Coverage for python/lsst/daf/butler/registry/managers.py: 33%

151 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ( 

33 "RegistryManagerInstances", 

34 "RegistryManagerTypes", 

35) 

36 

37import dataclasses 

38import logging 

39from collections.abc import Mapping 

40from typing import Any, Generic, TypeVar 

41 

42import sqlalchemy 

43from lsst.utils import doImportType 

44 

45from .._column_type_info import ColumnTypeInfo 

46from .._config import Config 

47from ..dimensions import DimensionConfig, DimensionUniverse 

48from ._config import RegistryConfig 

49from .interfaces import ( 

50 ButlerAttributeManager, 

51 CollectionManager, 

52 Database, 

53 DatasetRecordStorageManager, 

54 DatastoreRegistryBridgeManager, 

55 DimensionRecordStorageManager, 

56 ObsCoreTableManager, 

57 OpaqueTableStorageManager, 

58 StaticTablesContext, 

59 VersionedExtension, 

60 VersionTuple, 

61) 

62from .versions import ButlerVersionsManager 

63 

64_Attributes = TypeVar("_Attributes") 

65_Dimensions = TypeVar("_Dimensions") 

66_Collections = TypeVar("_Collections") 

67_Datasets = TypeVar("_Datasets") 

68_Opaque = TypeVar("_Opaque") 

69_Datastores = TypeVar("_Datastores") 

70_ObsCore = TypeVar("_ObsCore") 

71 

72 

73_LOG = logging.getLogger(__name__) 

74 

75# key for dimensions configuration in attributes table 

76_DIMENSIONS_ATTR = "config:dimensions.json" 

77 

78# key for obscore configuration in attributes table 

79_OBSCORE_ATTR = "config:obscore.json" 

80 

81 

82@dataclasses.dataclass(frozen=True, eq=False) 

83class _GenericRegistryManagers( 

84 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore] 

85): 

86 """Base struct used to pass around the manager instances or types that back 

87 a `Registry`. 

88 

89 This class should only be used via its non-generic subclasses, 

90 `RegistryManagerInstances` and `RegistryManagerTypes`. 

91 """ 

92 

93 attributes: _Attributes 

94 """Manager for flat key-value pairs, including versions. 

95 """ 

96 

97 dimensions: _Dimensions 

98 """Manager for dimensions. 

99 """ 

100 

101 collections: _Collections 

102 """Manager for collections. 

103 """ 

104 

105 datasets: _Datasets 

106 """Manager for datasets, dataset types, and collection summaries. 

107 """ 

108 

109 opaque: _Opaque 

110 """Manager for opaque (to the Registry) tables. 

111 """ 

112 

113 datastores: _Datastores 

114 """Manager for the interface between `Registry` and `Datastore`. 

115 """ 

116 

117 obscore: _ObsCore | None 

118 """Manager for `ObsCore` table(s). 

119 """ 

120 

121 

122@dataclasses.dataclass(frozen=True, eq=False) 

123class RegistryManagerTypes( 

124 _GenericRegistryManagers[ 

125 type[ButlerAttributeManager], 

126 type[DimensionRecordStorageManager], 

127 type[CollectionManager], 

128 type[DatasetRecordStorageManager], 

129 type[OpaqueTableStorageManager], 

130 type[DatastoreRegistryBridgeManager], 

131 type[ObsCoreTableManager], 

132 ] 

133): 

134 """A struct used to pass around the types of the manager objects that back 

135 a `Registry`. 

136 """ 

137 

138 @classmethod 

139 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

140 """Construct by extracting class names from configuration and importing 

141 them. 

142 

143 Parameters 

144 ---------- 

145 config : `RegistryConfig` 

146 Configuration object with a "managers" section that contains all 

147 fully-qualified class names for all manager types. 

148 

149 Returns 

150 ------- 

151 types : `RegistryManagerTypes` 

152 A new struct containing type objects. 

153 """ 

154 # We only check for manager names defined in class attributes. 

155 # TODO: Maybe we need to check keys for unknown names/typos? 

156 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs", "schema_versions"} 

157 # Values of "config" sub-key, if any, indexed by manager name. 

158 configs: dict[str, Mapping] = {} 

159 schema_versions: dict[str, VersionTuple] = {} 

160 manager_types: dict[str, type] = {} 

161 for manager in managers: 

162 manager_config = config["managers"].get(manager) 

163 if isinstance(manager_config, Config): 

164 # Expect "cls" and optional "config" and "schema_version" 

165 # sub-keys. 

166 manager_config_dict = manager_config.toDict() 

167 try: 

168 class_name = manager_config_dict.pop("cls") 

169 except KeyError: 

170 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None 

171 if (mgr_config := manager_config_dict.pop("config", None)) is not None: 

172 configs[manager] = mgr_config 

173 if (mgr_version := manager_config_dict.pop("schema_version", None)) is not None: 

174 # Note that we do not check versions that come from config 

175 # for compatibility, they may be overriden later by 

176 # versions from registry. 

177 schema_versions[manager] = VersionTuple.fromString(mgr_version) 

178 if manager_config_dict: 

179 raise ValueError( 

180 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}" 

181 ) 

182 elif isinstance(manager_config, str): 

183 class_name = manager_config 

184 elif manager_config is None: 

185 # Some managers may be optional. 

186 continue 

187 else: 

188 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}") 

189 manager_types[manager] = doImportType(class_name) 

190 

191 # obscore need special care because it's the only manager which can be 

192 # None, and we cannot define default value for it. 

193 if "obscore" in manager_types: 

194 return cls(**manager_types, manager_configs=configs, schema_versions=schema_versions) 

195 else: 

196 return cls( 

197 **manager_types, obscore=None, manager_configs=configs, schema_versions=schema_versions 

198 ) 

199 

200 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

201 """Create all persistent `Registry` state for a new, empty data 

202 repository, and return a new struct containing manager instances. 

203 

204 Parameters 

205 ---------- 

206 database : `Database` 

207 Object that represents a connection to the SQL database that will 

208 back the data repository. Must point to an empty namespace, or at 

209 least one with no tables or other entities whose names might clash 

210 with those used by butler. 

211 dimensionConfig : `DimensionConfig` 

212 Configuration that defines a `DimensionUniverse`, to be written 

213 into the data repository and used to define aspects of the schema. 

214 

215 Returns 

216 ------- 

217 instances : `RegistryManagerInstances` 

218 Struct containing instances of the types contained by ``self``, 

219 pointing to the new repository and backed by ``database``. 

220 """ 

221 # If schema versions were specified in the config, check that they are 

222 # compatible with their managers. 

223 managers = self.as_dict() 

224 for manager_type, schema_version in self.schema_versions.items(): 

225 manager_class = managers[manager_type] 

226 manager_class.checkNewSchemaVersion(schema_version) 

227 

228 universe = DimensionUniverse(dimensionConfig) 

229 with database.declareStaticTables(create=True) as context: 

230 if self.datasets.getIdColumnType() is sqlalchemy.BigInteger: 

231 raise RuntimeError( 

232 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

233 "integer dataset IDs.", 

234 ) 

235 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

236 

237 # store managers and their versions in attributes table 

238 versions = ButlerVersionsManager(instances.attributes) 

239 versions.storeManagersConfig(instances.as_dict()) 

240 

241 # dump universe config as json into attributes (faster than YAML) 

242 json = dimensionConfig.dump(format="json") 

243 if json is not None: 

244 instances.attributes.set(_DIMENSIONS_ATTR, json) 

245 else: 

246 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

247 if instances.obscore is not None: 

248 json = instances.obscore.config_json() 

249 instances.attributes.set(_OBSCORE_ATTR, json) 

250 return instances 

251 

252 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

253 """Construct manager instances that point to an existing data 

254 repository. 

255 

256 Parameters 

257 ---------- 

258 database : `Database` 

259 Object that represents a connection to the SQL database that backs 

260 the data repository. Must point to a namespace that already holds 

261 all tables and other persistent entities used by butler. 

262 

263 Returns 

264 ------- 

265 instances : `RegistryManagerInstances` 

266 Struct containing instances of the types contained by ``self``, 

267 pointing to the new repository and backed by ``database``. 

268 """ 

269 # Create attributes manager only first, so we can use it to load the 

270 # embedded dimensions configuration. Note that we do not check this 

271 # manager version before initializing it, it is supposed to be 

272 # completely backward- and forward-compatible. 

273 with database.declareStaticTables(create=False) as context: 

274 attributes = self.attributes.initialize(database, context) 

275 

276 # Verify that configured classes are compatible with the ones stored 

277 # in registry. 

278 versions = ButlerVersionsManager(attributes) 

279 versions.checkManagersConfig(self.as_dict()) 

280 

281 # Read schema versions from registry and validate them. 

282 self.schema_versions.update(versions.managerVersions()) 

283 for manager_type, manager_class in self.as_dict().items(): 

284 schema_version = self.schema_versions.get(manager_type) 

285 if schema_version is not None: 

286 manager_class.checkCompatibility(schema_version, database.isWriteable()) 

287 

288 # get serialized as a string from database 

289 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

290 if dimensionsString is not None: 

291 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

292 else: 

293 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

294 universe = DimensionUniverse(dimensionConfig) 

295 if self.obscore is not None: 

296 # Get ObsCore configuration from attributes table, this silently 

297 # overrides whatever may come from config file. Idea is that we do 

298 # not want to carry around the whole thing, and butler config will 

299 # have empty obscore configuration after initialization. When 

300 # configuration is missing from attributes table, the obscore table 

301 # does not exist, and we do not instantiate obscore manager. 

302 obscoreString = attributes.get(_OBSCORE_ATTR) 

303 if obscoreString is not None: 

304 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json") 

305 

306 with database.declareStaticTables(create=False) as context: 

307 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

308 

309 # Load content from database that we try to keep in-memory. 

310 instances.refresh() 

311 return instances 

312 

313 def as_dict(self) -> Mapping[str, type[VersionedExtension]]: 

314 """Return contained managers as a dictionary with manager type name as 

315 a key. 

316 

317 Returns 

318 ------- 

319 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`] 

320 Maps manager type name (e.g. "datasets") to its corresponding 

321 manager class. Only existing managers are returned. 

322 """ 

323 extras = {"manager_configs", "schema_versions"} 

324 managers = {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name not in extras} 

325 return {key: value for key, value in managers.items() if value is not None} 

326 

327 manager_configs: dict[str, Mapping] = dataclasses.field(default_factory=dict) 

328 """Per-manager configuration options passed to their initialize methods. 

329 """ 

330 

331 schema_versions: dict[str, VersionTuple] = dataclasses.field(default_factory=dict) 

332 """Per-manager schema versions defined by configuration, optional.""" 

333 

334 

335@dataclasses.dataclass(frozen=True, eq=False) 

336class RegistryManagerInstances( 

337 _GenericRegistryManagers[ 

338 ButlerAttributeManager, 

339 DimensionRecordStorageManager, 

340 CollectionManager, 

341 DatasetRecordStorageManager, 

342 OpaqueTableStorageManager, 

343 DatastoreRegistryBridgeManager, 

344 ObsCoreTableManager, 

345 ] 

346): 

347 """A struct used to pass around the manager instances that back a 

348 `Registry`. 

349 """ 

350 

351 column_types: ColumnTypeInfo 

352 """Information about column types that can differ between data repositories 

353 and registry instances, including the dimension universe. 

354 """ 

355 

356 @classmethod 

357 def initialize( 

358 cls, 

359 database: Database, 

360 context: StaticTablesContext, 

361 *, 

362 types: RegistryManagerTypes, 

363 universe: DimensionUniverse, 

364 ) -> RegistryManagerInstances: 

365 """Construct manager instances from their types and an existing 

366 database connection. 

367 

368 Parameters 

369 ---------- 

370 database : `Database` 

371 Object that represents a connection to the SQL database that backs 

372 the data repository. 

373 context : `StaticTablesContext` 

374 Object used to create tables in ``database``. 

375 types : `RegistryManagerTypes` 

376 Struct containing type objects for the manager instances to 

377 construct. 

378 universe : `DimensionUniverse` 

379 Object that describes all dimensions in this data repository. 

380 

381 Returns 

382 ------- 

383 instances : `RegistryManagerInstances` 

384 Struct containing manager instances. 

385 """ 

386 dummy_table = ddl.TableSpec(fields=()) 

387 kwargs: dict[str, Any] = {} 

388 schema_versions = types.schema_versions 

389 kwargs["attributes"] = types.attributes.initialize( 

390 database, context, registry_schema_version=schema_versions.get("attributes") 

391 ) 

392 kwargs["dimensions"] = types.dimensions.initialize( 

393 database, context, universe=universe, registry_schema_version=schema_versions.get("dimensions") 

394 ) 

395 kwargs["collections"] = types.collections.initialize( 

396 database, 

397 context, 

398 dimensions=kwargs["dimensions"], 

399 registry_schema_version=schema_versions.get("collections"), 

400 ) 

401 datasets = types.datasets.initialize( 

402 database, 

403 context, 

404 collections=kwargs["collections"], 

405 dimensions=kwargs["dimensions"], 

406 registry_schema_version=schema_versions.get("datasets"), 

407 ) 

408 kwargs["datasets"] = datasets 

409 kwargs["opaque"] = types.opaque.initialize( 

410 database, context, registry_schema_version=schema_versions.get("opaque") 

411 ) 

412 kwargs["datastores"] = types.datastores.initialize( 

413 database, 

414 context, 

415 opaque=kwargs["opaque"], 

416 datasets=types.datasets, 

417 universe=universe, 

418 registry_schema_version=schema_versions.get("datastores"), 

419 ) 

420 if types.obscore is not None and "obscore" in types.manager_configs: 

421 kwargs["obscore"] = types.obscore.initialize( 

422 database, 

423 context, 

424 universe=universe, 

425 config=types.manager_configs["obscore"], 

426 datasets=types.datasets, 

427 dimensions=kwargs["dimensions"], 

428 registry_schema_version=schema_versions.get("obscore"), 

429 ) 

430 else: 

431 kwargs["obscore"] = None 

432 kwargs["column_types"] = ColumnTypeInfo( 

433 database.getTimespanRepresentation(), 

434 universe, 

435 dataset_id_spec=types.datasets.addDatasetForeignKey( 

436 dummy_table, 

437 primaryKey=False, 

438 nullable=False, 

439 ), 

440 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False), 

441 ingest_date_dtype=datasets.ingest_date_dtype(), 

442 ) 

443 return cls(**kwargs) 

444 

445 def as_dict(self) -> Mapping[str, VersionedExtension]: 

446 """Return contained managers as a dictionary with manager type name as 

447 a key. 

448 

449 Returns 

450 ------- 

451 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`] 

452 Maps manager type name (e.g. "datasets") to its corresponding 

453 manager instance. Only existing managers are returned. 

454 """ 

455 instances = { 

456 f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types" 

457 } 

458 return {key: value for key, value in instances.items() if value is not None} 

459 

460 def refresh(self) -> None: 

461 """Refresh all in-memory state by querying the database or clearing 

462 caches. 

463 """ 

464 self.dimensions.clearCaches() 

465 self.collections.refresh() 

466 self.datasets.refresh()