Coverage for python/lsst/daf/butler/registry/managers.py: 31%

148 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-22 02:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31from collections.abc import Mapping 

32from typing import Any, Generic, Optional, Type, TypeVar 

33 

34import sqlalchemy 

35from lsst.utils import doImportType 

36 

37from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl 

38from ._config import RegistryConfig 

39from .interfaces import ( 

40 ButlerAttributeManager, 

41 CollectionManager, 

42 Database, 

43 DatasetRecordStorageManager, 

44 DatastoreRegistryBridgeManager, 

45 DimensionRecordStorageManager, 

46 ObsCoreTableManager, 

47 OpaqueTableStorageManager, 

48 StaticTablesContext, 

49 VersionedExtension, 

50 VersionTuple, 

51) 

52from .versions import ButlerVersionsManager 

53 

54_Attributes = TypeVar("_Attributes") 

55_Dimensions = TypeVar("_Dimensions") 

56_Collections = TypeVar("_Collections") 

57_Datasets = TypeVar("_Datasets") 

58_Opaque = TypeVar("_Opaque") 

59_Datastores = TypeVar("_Datastores") 

60_ObsCore = TypeVar("_ObsCore") 

61 

62 

63_LOG = logging.getLogger(__name__) 

64 

65# key for dimensions configuration in attributes table 

66_DIMENSIONS_ATTR = "config:dimensions.json" 

67 

68# key for obscore configuration in attributes table 

69_OBSCORE_ATTR = "config:obscore.json" 

70 

71 

72@dataclasses.dataclass(frozen=True, eq=False) 

73class _GenericRegistryManagers( 

74 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore] 

75): 

76 """Base struct used to pass around the manager instances or types that back 

77 a `Registry`. 

78 

79 This class should only be used via its non-generic subclasses, 

80 `RegistryManagerInstances` and `RegistryManagerTypes`. 

81 """ 

82 

83 attributes: _Attributes 

84 """Manager for flat key-value pairs, including versions. 

85 """ 

86 

87 dimensions: _Dimensions 

88 """Manager for dimensions. 

89 """ 

90 

91 collections: _Collections 

92 """Manager for collections. 

93 """ 

94 

95 datasets: _Datasets 

96 """Manager for datasets, dataset types, and collection summaries. 

97 """ 

98 

99 opaque: _Opaque 

100 """Manager for opaque (to the Registry) tables. 

101 """ 

102 

103 datastores: _Datastores 

104 """Manager for the interface between `Registry` and `Datastore`. 

105 """ 

106 

107 obscore: Optional[_ObsCore] 

108 """Manager for `ObsCore` table(s). 

109 """ 

110 

111 

112@dataclasses.dataclass(frozen=True, eq=False) 

113class RegistryManagerTypes( 

114 _GenericRegistryManagers[ 

115 Type[ButlerAttributeManager], 

116 Type[DimensionRecordStorageManager], 

117 Type[CollectionManager], 

118 Type[DatasetRecordStorageManager], 

119 Type[OpaqueTableStorageManager], 

120 Type[DatastoreRegistryBridgeManager], 

121 Type[ObsCoreTableManager], 

122 ] 

123): 

124 """A struct used to pass around the types of the manager objects that back 

125 a `Registry`. 

126 """ 

127 

128 @classmethod 

129 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

130 """Construct by extracting class names from configuration and importing 

131 them. 

132 

133 Parameters 

134 ---------- 

135 config : `RegistryConfig` 

136 Configuration object with a "managers" section that contains all 

137 fully-qualified class names for all manager types. 

138 

139 Returns 

140 ------- 

141 types : `RegistryManagerTypes` 

142 A new struct containing type objects. 

143 """ 

144 # We only check for manager names defined in class attributes. 

145 # TODO: Maybe we need to check keys for unknown names/typos? 

146 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs", "schema_versions"} 

147 # Values of "config" sub-key, if any, indexed by manager name. 

148 configs: dict[str, Mapping] = {} 

149 schema_versions: dict[str, VersionTuple] = {} 

150 manager_types: dict[str, Type] = {} 

151 for manager in managers: 

152 manager_config = config["managers"].get(manager) 

153 if isinstance(manager_config, Config): 

154 # Expect "cls" and optional "config" and "schema_version" 

155 # sub-keys. 

156 manager_config_dict = manager_config.toDict() 

157 try: 

158 class_name = manager_config_dict.pop("cls") 

159 except KeyError: 

160 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None 

161 if (mgr_config := manager_config_dict.pop("config", None)) is not None: 

162 configs[manager] = mgr_config 

163 if (mgr_version := manager_config_dict.pop("schema_version", None)) is not None: 

164 # Note that we do not check versions that come from config 

165 # for compatibility, they may be overriden later by 

166 # versions from registry. 

167 schema_versions[manager] = VersionTuple.fromString(mgr_version) 

168 if manager_config_dict: 

169 raise ValueError( 

170 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}" 

171 ) 

172 elif isinstance(manager_config, str): 

173 class_name = manager_config 

174 elif manager_config is None: 

175 # Some managers may be optional. 

176 continue 

177 else: 

178 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}") 

179 manager_types[manager] = doImportType(class_name) 

180 

181 # obscore need special care because it's the only manager which can be 

182 # None, and we cannot define default value for it. 

183 if "obscore" in manager_types: 

184 return cls(**manager_types, manager_configs=configs, schema_versions=schema_versions) 

185 else: 

186 return cls( 

187 **manager_types, obscore=None, manager_configs=configs, schema_versions=schema_versions 

188 ) 

189 

190 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

191 """Create all persistent `Registry` state for a new, empty data 

192 repository, and return a new struct containing manager instances. 

193 

194 Parameters 

195 ---------- 

196 database : `Database` 

197 Object that represents a connection to the SQL database that will 

198 back the data repository. Must point to an empty namespace, or at 

199 least one with no tables or other entities whose names might clash 

200 with those used by butler. 

201 dimensionConfig : `DimensionConfig` 

202 Configuration that defines a `DimensionUniverse`, to be written 

203 into the data repository and used to define aspects of the schema. 

204 

205 Returns 

206 ------- 

207 instances : `RegistryManagerInstances` 

208 Struct containing instances of the types contained by ``self``, 

209 pointing to the new repository and backed by ``database``. 

210 """ 

211 # If schema versions were specified in the config, check that they are 

212 # compatible with their managers. 

213 managers = self.as_dict() 

214 for manager_type, schema_version in self.schema_versions.items(): 

215 manager_class = managers[manager_type] 

216 manager_class.checkNewSchemaVersion(schema_version) 

217 

218 universe = DimensionUniverse(dimensionConfig) 

219 with database.declareStaticTables(create=True) as context: 

220 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger: 

221 raise RuntimeError( 

222 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

223 "integer dataset IDs.", 

224 ) 

225 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

226 

227 # store managers and their versions in attributes table 

228 versions = ButlerVersionsManager(instances.attributes) 

229 versions.storeManagersConfig(instances.as_dict()) 

230 

231 # dump universe config as json into attributes (faster than YAML) 

232 json = dimensionConfig.dump(format="json") 

233 if json is not None: 

234 instances.attributes.set(_DIMENSIONS_ATTR, json) 

235 else: 

236 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

237 if instances.obscore is not None: 

238 json = instances.obscore.config_json() 

239 instances.attributes.set(_OBSCORE_ATTR, json) 

240 return instances 

241 

242 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

243 """Construct manager instances that point to an existing data 

244 repository. 

245 

246 Parameters 

247 ---------- 

248 database : `Database` 

249 Object that represents a connection to the SQL database that backs 

250 the data repository. Must point to a namespace that already holds 

251 all tables and other persistent entities used by butler. 

252 

253 Returns 

254 ------- 

255 instances : `RegistryManagerInstances` 

256 Struct containing instances of the types contained by ``self``, 

257 pointing to the new repository and backed by ``database``. 

258 """ 

259 # Create attributes manager only first, so we can use it to load the 

260 # embedded dimensions configuration. Note that we do not check this 

261 # manager version before initializing it, it is supposed to be 

262 # completely backward- and forward-compatible. 

263 with database.declareStaticTables(create=False) as context: 

264 attributes = self.attributes.initialize(database, context) 

265 

266 # Verify that configured classes are compatible with the ones stored 

267 # in registry. 

268 versions = ButlerVersionsManager(attributes) 

269 versions.checkManagersConfig(self.as_dict()) 

270 

271 # Read schema versions from registry and validate them. 

272 self.schema_versions.update(versions.managerVersions()) 

273 for manager_type, manager_class in self.as_dict().items(): 

274 schema_version = self.schema_versions.get(manager_type) 

275 if schema_version is not None: 

276 manager_class.checkCompatibility(schema_version, database.isWriteable()) 

277 

278 # get serialized as a string from database 

279 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

280 if dimensionsString is not None: 

281 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

282 else: 

283 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

284 universe = DimensionUniverse(dimensionConfig) 

285 if self.obscore is not None: 

286 # Get ObsCore configuration from attributes table, this silently 

287 # overrides whatever may come from config file. Idea is that we do 

288 # not want to carry around the whole thing, and butler config will 

289 # have empty obscore configuration after initialization. When 

290 # configuration is missing from attributes table, the obscore table 

291 # does not exist, and we do not instantiate obscore manager. 

292 obscoreString = attributes.get(_OBSCORE_ATTR) 

293 if obscoreString is not None: 

294 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json") 

295 

296 with database.declareStaticTables(create=False) as context: 

297 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

298 

299 # Load content from database that we try to keep in-memory. 

300 instances.refresh() 

301 return instances 

302 

303 def as_dict(self) -> Mapping[str, type[VersionedExtension]]: 

304 """Return contained managers as a dictionary with manager type name as 

305 a key. 

306 

307 Returns 

308 ------- 

309 extensions : `Mapping` [`str`, `VersionedExtension`] 

310 Maps manager type name (e.g. "datasets") to its corresponding 

311 manager class. Only existing managers are returned. 

312 """ 

313 extras = {"manager_configs", "schema_versions"} 

314 managers = {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name not in extras} 

315 return {key: value for key, value in managers.items() if value is not None} 

316 

317 manager_configs: dict[str, Mapping] = dataclasses.field(default_factory=dict) 

318 """Per-manager configuration options passed to their initialize methods. 

319 """ 

320 

321 schema_versions: dict[str, VersionTuple] = dataclasses.field(default_factory=dict) 

322 """Per-manager schema versions defined by configuration, optional.""" 

323 

324 

325@dataclasses.dataclass(frozen=True, eq=False) 

326class RegistryManagerInstances( 

327 _GenericRegistryManagers[ 

328 ButlerAttributeManager, 

329 DimensionRecordStorageManager, 

330 CollectionManager, 

331 DatasetRecordStorageManager, 

332 OpaqueTableStorageManager, 

333 DatastoreRegistryBridgeManager, 

334 ObsCoreTableManager, 

335 ] 

336): 

337 """A struct used to pass around the manager instances that back a 

338 `Registry`. 

339 """ 

340 

341 column_types: ColumnTypeInfo 

342 """Information about column types that can differ between data repositories 

343 and registry instances, including the dimension universe. 

344 """ 

345 

346 @classmethod 

347 def initialize( 

348 cls, 

349 database: Database, 

350 context: StaticTablesContext, 

351 *, 

352 types: RegistryManagerTypes, 

353 universe: DimensionUniverse, 

354 ) -> RegistryManagerInstances: 

355 """Construct manager instances from their types and an existing 

356 database connection. 

357 

358 Parameters 

359 ---------- 

360 database : `Database` 

361 Object that represents a connection to the SQL database that backs 

362 the data repository. 

363 context : `StaticTablesContext` 

364 Object used to create tables in ``database``. 

365 types : `RegistryManagerTypes` 

366 Struct containing type objects for the manager instances to 

367 construct. 

368 universe : `DimensionUniverse` 

369 Object that describes all dimensions in this data repository. 

370 

371 Returns 

372 ------- 

373 instances : `RegistryManagerInstances` 

374 Struct containing manager instances. 

375 """ 

376 dummy_table = ddl.TableSpec(fields=()) 

377 kwargs: dict[str, Any] = {} 

378 schema_versions = types.schema_versions 

379 kwargs["attributes"] = types.attributes.initialize( 

380 database, context, registry_schema_version=schema_versions.get("attributes") 

381 ) 

382 kwargs["dimensions"] = types.dimensions.initialize( 

383 database, context, universe=universe, registry_schema_version=schema_versions.get("dimensions") 

384 ) 

385 kwargs["collections"] = types.collections.initialize( 

386 database, 

387 context, 

388 dimensions=kwargs["dimensions"], 

389 registry_schema_version=schema_versions.get("collections"), 

390 ) 

391 datasets = types.datasets.initialize( 

392 database, 

393 context, 

394 collections=kwargs["collections"], 

395 dimensions=kwargs["dimensions"], 

396 registry_schema_version=schema_versions.get("datasets"), 

397 ) 

398 kwargs["datasets"] = datasets 

399 kwargs["opaque"] = types.opaque.initialize( 

400 database, context, registry_schema_version=schema_versions.get("opaque") 

401 ) 

402 kwargs["datastores"] = types.datastores.initialize( 

403 database, 

404 context, 

405 opaque=kwargs["opaque"], 

406 datasets=types.datasets, 

407 universe=universe, 

408 registry_schema_version=schema_versions.get("datastores"), 

409 ) 

410 if types.obscore is not None and "obscore" in types.manager_configs: 

411 kwargs["obscore"] = types.obscore.initialize( 

412 database, 

413 context, 

414 universe=universe, 

415 config=types.manager_configs["obscore"], 

416 datasets=types.datasets, 

417 dimensions=kwargs["dimensions"], 

418 registry_schema_version=schema_versions.get("obscore"), 

419 ) 

420 else: 

421 kwargs["obscore"] = None 

422 kwargs["column_types"] = ColumnTypeInfo( 

423 database.getTimespanRepresentation(), 

424 universe, 

425 dataset_id_spec=types.datasets.addDatasetForeignKey( 

426 dummy_table, 

427 primaryKey=False, 

428 nullable=False, 

429 ), 

430 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False), 

431 ingest_date_dtype=datasets.ingest_date_dtype(), 

432 ) 

433 return cls(**kwargs) 

434 

435 def as_dict(self) -> Mapping[str, VersionedExtension]: 

436 """Return contained managers as a dictionary with manager type name as 

437 a key. 

438 

439 Returns 

440 ------- 

441 extensions : `Mapping` [`str`, `VersionedExtension`] 

442 Maps manager type name (e.g. "datasets") to its corresponding 

443 manager instance. Only existing managers are returned. 

444 """ 

445 instances = { 

446 f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types" 

447 } 

448 return {key: value for key, value in instances.items() if value is not None} 

449 

450 def refresh(self) -> None: 

451 """Refresh all in-memory state by querying the database or clearing 

452 caches.""" 

453 self.dimensions.clearCaches() 

454 self.collections.refresh() 

455 self.datasets.refresh()