Coverage for python/lsst/daf/butler/registry/managers.py: 32%

148 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "RegistryManagerInstances", 

32 "RegistryManagerTypes", 

33) 

34 

35import dataclasses 

36import logging 

37from collections.abc import Mapping 

38from typing import Any, Generic, TypeVar 

39 

40import sqlalchemy 

41from lsst.utils import doImportType 

42 

43from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl 

44from ._config import RegistryConfig 

45from .interfaces import ( 

46 ButlerAttributeManager, 

47 CollectionManager, 

48 Database, 

49 DatasetRecordStorageManager, 

50 DatastoreRegistryBridgeManager, 

51 DimensionRecordStorageManager, 

52 ObsCoreTableManager, 

53 OpaqueTableStorageManager, 

54 StaticTablesContext, 

55 VersionedExtension, 

56 VersionTuple, 

57) 

58from .versions import ButlerVersionsManager 

59 

60_Attributes = TypeVar("_Attributes") 

61_Dimensions = TypeVar("_Dimensions") 

62_Collections = TypeVar("_Collections") 

63_Datasets = TypeVar("_Datasets") 

64_Opaque = TypeVar("_Opaque") 

65_Datastores = TypeVar("_Datastores") 

66_ObsCore = TypeVar("_ObsCore") 

67 

68 

69_LOG = logging.getLogger(__name__) 

70 

71# key for dimensions configuration in attributes table 

72_DIMENSIONS_ATTR = "config:dimensions.json" 

73 

74# key for obscore configuration in attributes table 

75_OBSCORE_ATTR = "config:obscore.json" 

76 

77 

78@dataclasses.dataclass(frozen=True, eq=False) 

79class _GenericRegistryManagers( 

80 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore] 

81): 

82 """Base struct used to pass around the manager instances or types that back 

83 a `Registry`. 

84 

85 This class should only be used via its non-generic subclasses, 

86 `RegistryManagerInstances` and `RegistryManagerTypes`. 

87 """ 

88 

89 attributes: _Attributes 

90 """Manager for flat key-value pairs, including versions. 

91 """ 

92 

93 dimensions: _Dimensions 

94 """Manager for dimensions. 

95 """ 

96 

97 collections: _Collections 

98 """Manager for collections. 

99 """ 

100 

101 datasets: _Datasets 

102 """Manager for datasets, dataset types, and collection summaries. 

103 """ 

104 

105 opaque: _Opaque 

106 """Manager for opaque (to the Registry) tables. 

107 """ 

108 

109 datastores: _Datastores 

110 """Manager for the interface between `Registry` and `Datastore`. 

111 """ 

112 

113 obscore: _ObsCore | None 

114 """Manager for `ObsCore` table(s). 

115 """ 

116 

117 

118@dataclasses.dataclass(frozen=True, eq=False) 

119class RegistryManagerTypes( 

120 _GenericRegistryManagers[ 

121 type[ButlerAttributeManager], 

122 type[DimensionRecordStorageManager], 

123 type[CollectionManager], 

124 type[DatasetRecordStorageManager], 

125 type[OpaqueTableStorageManager], 

126 type[DatastoreRegistryBridgeManager], 

127 type[ObsCoreTableManager], 

128 ] 

129): 

130 """A struct used to pass around the types of the manager objects that back 

131 a `Registry`. 

132 """ 

133 

134 @classmethod 

135 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

136 """Construct by extracting class names from configuration and importing 

137 them. 

138 

139 Parameters 

140 ---------- 

141 config : `RegistryConfig` 

142 Configuration object with a "managers" section that contains all 

143 fully-qualified class names for all manager types. 

144 

145 Returns 

146 ------- 

147 types : `RegistryManagerTypes` 

148 A new struct containing type objects. 

149 """ 

150 # We only check for manager names defined in class attributes. 

151 # TODO: Maybe we need to check keys for unknown names/typos? 

152 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs", "schema_versions"} 

153 # Values of "config" sub-key, if any, indexed by manager name. 

154 configs: dict[str, Mapping] = {} 

155 schema_versions: dict[str, VersionTuple] = {} 

156 manager_types: dict[str, type] = {} 

157 for manager in managers: 

158 manager_config = config["managers"].get(manager) 

159 if isinstance(manager_config, Config): 

160 # Expect "cls" and optional "config" and "schema_version" 

161 # sub-keys. 

162 manager_config_dict = manager_config.toDict() 

163 try: 

164 class_name = manager_config_dict.pop("cls") 

165 except KeyError: 

166 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None 

167 if (mgr_config := manager_config_dict.pop("config", None)) is not None: 

168 configs[manager] = mgr_config 

169 if (mgr_version := manager_config_dict.pop("schema_version", None)) is not None: 

170 # Note that we do not check versions that come from config 

171 # for compatibility, they may be overriden later by 

172 # versions from registry. 

173 schema_versions[manager] = VersionTuple.fromString(mgr_version) 

174 if manager_config_dict: 

175 raise ValueError( 

176 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}" 

177 ) 

178 elif isinstance(manager_config, str): 

179 class_name = manager_config 

180 elif manager_config is None: 

181 # Some managers may be optional. 

182 continue 

183 else: 

184 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}") 

185 manager_types[manager] = doImportType(class_name) 

186 

187 # obscore need special care because it's the only manager which can be 

188 # None, and we cannot define default value for it. 

189 if "obscore" in manager_types: 

190 return cls(**manager_types, manager_configs=configs, schema_versions=schema_versions) 

191 else: 

192 return cls( 

193 **manager_types, obscore=None, manager_configs=configs, schema_versions=schema_versions 

194 ) 

195 

196 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

197 """Create all persistent `Registry` state for a new, empty data 

198 repository, and return a new struct containing manager instances. 

199 

200 Parameters 

201 ---------- 

202 database : `Database` 

203 Object that represents a connection to the SQL database that will 

204 back the data repository. Must point to an empty namespace, or at 

205 least one with no tables or other entities whose names might clash 

206 with those used by butler. 

207 dimensionConfig : `DimensionConfig` 

208 Configuration that defines a `DimensionUniverse`, to be written 

209 into the data repository and used to define aspects of the schema. 

210 

211 Returns 

212 ------- 

213 instances : `RegistryManagerInstances` 

214 Struct containing instances of the types contained by ``self``, 

215 pointing to the new repository and backed by ``database``. 

216 """ 

217 # If schema versions were specified in the config, check that they are 

218 # compatible with their managers. 

219 managers = self.as_dict() 

220 for manager_type, schema_version in self.schema_versions.items(): 

221 manager_class = managers[manager_type] 

222 manager_class.checkNewSchemaVersion(schema_version) 

223 

224 universe = DimensionUniverse(dimensionConfig) 

225 with database.declareStaticTables(create=True) as context: 

226 if self.datasets.getIdColumnType() is sqlalchemy.BigInteger: 

227 raise RuntimeError( 

228 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

229 "integer dataset IDs.", 

230 ) 

231 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

232 

233 # store managers and their versions in attributes table 

234 versions = ButlerVersionsManager(instances.attributes) 

235 versions.storeManagersConfig(instances.as_dict()) 

236 

237 # dump universe config as json into attributes (faster than YAML) 

238 json = dimensionConfig.dump(format="json") 

239 if json is not None: 

240 instances.attributes.set(_DIMENSIONS_ATTR, json) 

241 else: 

242 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

243 if instances.obscore is not None: 

244 json = instances.obscore.config_json() 

245 instances.attributes.set(_OBSCORE_ATTR, json) 

246 return instances 

247 

248 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

249 """Construct manager instances that point to an existing data 

250 repository. 

251 

252 Parameters 

253 ---------- 

254 database : `Database` 

255 Object that represents a connection to the SQL database that backs 

256 the data repository. Must point to a namespace that already holds 

257 all tables and other persistent entities used by butler. 

258 

259 Returns 

260 ------- 

261 instances : `RegistryManagerInstances` 

262 Struct containing instances of the types contained by ``self``, 

263 pointing to the new repository and backed by ``database``. 

264 """ 

265 # Create attributes manager only first, so we can use it to load the 

266 # embedded dimensions configuration. Note that we do not check this 

267 # manager version before initializing it, it is supposed to be 

268 # completely backward- and forward-compatible. 

269 with database.declareStaticTables(create=False) as context: 

270 attributes = self.attributes.initialize(database, context) 

271 

272 # Verify that configured classes are compatible with the ones stored 

273 # in registry. 

274 versions = ButlerVersionsManager(attributes) 

275 versions.checkManagersConfig(self.as_dict()) 

276 

277 # Read schema versions from registry and validate them. 

278 self.schema_versions.update(versions.managerVersions()) 

279 for manager_type, manager_class in self.as_dict().items(): 

280 schema_version = self.schema_versions.get(manager_type) 

281 if schema_version is not None: 

282 manager_class.checkCompatibility(schema_version, database.isWriteable()) 

283 

284 # get serialized as a string from database 

285 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

286 if dimensionsString is not None: 

287 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

288 else: 

289 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

290 universe = DimensionUniverse(dimensionConfig) 

291 if self.obscore is not None: 

292 # Get ObsCore configuration from attributes table, this silently 

293 # overrides whatever may come from config file. Idea is that we do 

294 # not want to carry around the whole thing, and butler config will 

295 # have empty obscore configuration after initialization. When 

296 # configuration is missing from attributes table, the obscore table 

297 # does not exist, and we do not instantiate obscore manager. 

298 obscoreString = attributes.get(_OBSCORE_ATTR) 

299 if obscoreString is not None: 

300 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json") 

301 

302 with database.declareStaticTables(create=False) as context: 

303 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

304 

305 # Load content from database that we try to keep in-memory. 

306 instances.refresh() 

307 return instances 

308 

309 def as_dict(self) -> Mapping[str, type[VersionedExtension]]: 

310 """Return contained managers as a dictionary with manager type name as 

311 a key. 

312 

313 Returns 

314 ------- 

315 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`] 

316 Maps manager type name (e.g. "datasets") to its corresponding 

317 manager class. Only existing managers are returned. 

318 """ 

319 extras = {"manager_configs", "schema_versions"} 

320 managers = {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name not in extras} 

321 return {key: value for key, value in managers.items() if value is not None} 

322 

323 manager_configs: dict[str, Mapping] = dataclasses.field(default_factory=dict) 

324 """Per-manager configuration options passed to their initialize methods. 

325 """ 

326 

327 schema_versions: dict[str, VersionTuple] = dataclasses.field(default_factory=dict) 

328 """Per-manager schema versions defined by configuration, optional.""" 

329 

330 

331@dataclasses.dataclass(frozen=True, eq=False) 

332class RegistryManagerInstances( 

333 _GenericRegistryManagers[ 

334 ButlerAttributeManager, 

335 DimensionRecordStorageManager, 

336 CollectionManager, 

337 DatasetRecordStorageManager, 

338 OpaqueTableStorageManager, 

339 DatastoreRegistryBridgeManager, 

340 ObsCoreTableManager, 

341 ] 

342): 

343 """A struct used to pass around the manager instances that back a 

344 `Registry`. 

345 """ 

346 

347 column_types: ColumnTypeInfo 

348 """Information about column types that can differ between data repositories 

349 and registry instances, including the dimension universe. 

350 """ 

351 

352 @classmethod 

353 def initialize( 

354 cls, 

355 database: Database, 

356 context: StaticTablesContext, 

357 *, 

358 types: RegistryManagerTypes, 

359 universe: DimensionUniverse, 

360 ) -> RegistryManagerInstances: 

361 """Construct manager instances from their types and an existing 

362 database connection. 

363 

364 Parameters 

365 ---------- 

366 database : `Database` 

367 Object that represents a connection to the SQL database that backs 

368 the data repository. 

369 context : `StaticTablesContext` 

370 Object used to create tables in ``database``. 

371 types : `RegistryManagerTypes` 

372 Struct containing type objects for the manager instances to 

373 construct. 

374 universe : `DimensionUniverse` 

375 Object that describes all dimensions in this data repository. 

376 

377 Returns 

378 ------- 

379 instances : `RegistryManagerInstances` 

380 Struct containing manager instances. 

381 """ 

382 dummy_table = ddl.TableSpec(fields=()) 

383 kwargs: dict[str, Any] = {} 

384 schema_versions = types.schema_versions 

385 kwargs["attributes"] = types.attributes.initialize( 

386 database, context, registry_schema_version=schema_versions.get("attributes") 

387 ) 

388 kwargs["dimensions"] = types.dimensions.initialize( 

389 database, context, universe=universe, registry_schema_version=schema_versions.get("dimensions") 

390 ) 

391 kwargs["collections"] = types.collections.initialize( 

392 database, 

393 context, 

394 dimensions=kwargs["dimensions"], 

395 registry_schema_version=schema_versions.get("collections"), 

396 ) 

397 datasets = types.datasets.initialize( 

398 database, 

399 context, 

400 collections=kwargs["collections"], 

401 dimensions=kwargs["dimensions"], 

402 registry_schema_version=schema_versions.get("datasets"), 

403 ) 

404 kwargs["datasets"] = datasets 

405 kwargs["opaque"] = types.opaque.initialize( 

406 database, context, registry_schema_version=schema_versions.get("opaque") 

407 ) 

408 kwargs["datastores"] = types.datastores.initialize( 

409 database, 

410 context, 

411 opaque=kwargs["opaque"], 

412 datasets=types.datasets, 

413 universe=universe, 

414 registry_schema_version=schema_versions.get("datastores"), 

415 ) 

416 if types.obscore is not None and "obscore" in types.manager_configs: 

417 kwargs["obscore"] = types.obscore.initialize( 

418 database, 

419 context, 

420 universe=universe, 

421 config=types.manager_configs["obscore"], 

422 datasets=types.datasets, 

423 dimensions=kwargs["dimensions"], 

424 registry_schema_version=schema_versions.get("obscore"), 

425 ) 

426 else: 

427 kwargs["obscore"] = None 

428 kwargs["column_types"] = ColumnTypeInfo( 

429 database.getTimespanRepresentation(), 

430 universe, 

431 dataset_id_spec=types.datasets.addDatasetForeignKey( 

432 dummy_table, 

433 primaryKey=False, 

434 nullable=False, 

435 ), 

436 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False), 

437 ingest_date_dtype=datasets.ingest_date_dtype(), 

438 ) 

439 return cls(**kwargs) 

440 

441 def as_dict(self) -> Mapping[str, VersionedExtension]: 

442 """Return contained managers as a dictionary with manager type name as 

443 a key. 

444 

445 Returns 

446 ------- 

447 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`] 

448 Maps manager type name (e.g. "datasets") to its corresponding 

449 manager instance. Only existing managers are returned. 

450 """ 

451 instances = { 

452 f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types" 

453 } 

454 return {key: value for key, value in instances.items() if value is not None} 

455 

456 def refresh(self) -> None: 

457 """Refresh all in-memory state by querying the database or clearing 

458 caches. 

459 """ 

460 self.dimensions.clearCaches() 

461 self.collections.refresh() 

462 self.datasets.refresh()