Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "SqlRegistry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 TYPE_CHECKING, 

41 Union, 

42) 

43 

44import sqlalchemy 

45 

46from ..core import ( 

47 ButlerURI, 

48 Config, 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DataId, 

52 DatasetAssociation, 

53 DatasetRef, 

54 DatasetType, 

55 ddl, 

56 Dimension, 

57 DimensionConfig, 

58 DimensionElement, 

59 DimensionGraph, 

60 DimensionRecord, 

61 DimensionUniverse, 

62 NamedKeyMapping, 

63 NameLookupMapping, 

64 Progress, 

65 StorageClassFactory, 

66 Timespan, 

67) 

68from . import queries 

69from ..core.utils import iterable, transactional 

70from ._config import RegistryConfig 

71from ._collectionType import CollectionType 

72from ._defaults import RegistryDefaults 

73from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

74from .managers import RegistryManagerTypes, RegistryManagerInstances 

75from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

76from .summaries import CollectionSummary 

77from .interfaces import ChainedCollectionRecord, RunRecord 

78from ._registry import Registry 

79 

80if TYPE_CHECKING: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 from .._butlerConfig import ButlerConfig 

82 from .interfaces import ( 

83 CollectionRecord, 

84 Database, 

85 DatastoreRegistryBridgeManager, 

86 ) 

87 

88 

89_LOG = logging.getLogger(__name__) 

90 

91 

92class SqlRegistry(Registry): 

93 """Registry implementation based on SQLAlchemy. 

94 

95 Parameters 

96 ---------- 

97 database : `Database` 

98 Database instance to store Registry. 

99 defaults : `RegistryDefaults` 

100 Default collection search path and/or output `~CollectionType.RUN` 

101 collection. 

102 managers : `RegistryManagerInstances` 

103 All the managers required for this registry. 

104 """ 

105 

106 defaultConfigFile: Optional[str] = None 

107 """Path to configuration defaults. Accessed within the ``configs`` resource 

108 or relative to a search path. Can be None if no defaults specified. 

109 """ 

110 

111 @classmethod 

112 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None, 

113 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

114 butlerRoot: Optional[str] = None) -> Registry: 

115 """Create registry database and return `SqlRegistry` instance. 

116 

117 This method initializes database contents, database must be empty 

118 prior to calling this method. 

119 

120 Parameters 

121 ---------- 

122 config : `RegistryConfig` or `str`, optional 

123 Registry configuration, if missing then default configuration will 

124 be loaded from registry.yaml. 

125 dimensionConfig : `DimensionConfig` or `str`, optional 

126 Dimensions configuration, if missing then default configuration 

127 will be loaded from dimensions.yaml. 

128 butlerRoot : `str`, optional 

129 Path to the repository root this `SqlRegistry` will manage. 

130 

131 Returns 

132 ------- 

133 registry : `SqlRegistry` 

134 A new `SqlRegistry` instance. 

135 """ 

136 config = cls.forceRegistryConfig(config) 

137 config.replaceRoot(butlerRoot) 

138 

139 if isinstance(dimensionConfig, str): 

140 dimensionConfig = DimensionConfig(config) 

141 elif dimensionConfig is None: 

142 dimensionConfig = DimensionConfig() 

143 elif not isinstance(dimensionConfig, DimensionConfig): 

144 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

145 

146 DatabaseClass = config.getDatabaseClass() 

147 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

148 namespace=config.get("namespace")) 

149 managerTypes = RegistryManagerTypes.fromConfig(config) 

150 managers = managerTypes.makeRepo(database, dimensionConfig) 

151 return cls(database, RegistryDefaults(), managers) 

152 

153 @classmethod 

154 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], 

155 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True, 

156 defaults: Optional[RegistryDefaults] = None) -> Registry: 

157 """Create `Registry` subclass instance from `config`. 

158 

159 Registry database must be inbitialized prior to calling this method. 

160 

161 Parameters 

162 ---------- 

163 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

164 Registry configuration 

165 butlerRoot : `str` or `ButlerURI`, optional 

166 Path to the repository root this `Registry` will manage. 

167 writeable : `bool`, optional 

168 If `True` (default) create a read-write connection to the database. 

169 defaults : `RegistryDefaults`, optional 

170 Default collection search path and/or output `~CollectionType.RUN` 

171 collection. 

172 

173 Returns 

174 ------- 

175 registry : `SqlRegistry` (subclass) 

176 A new `SqlRegistry` subclass instance. 

177 """ 

178 config = cls.forceRegistryConfig(config) 

179 config.replaceRoot(butlerRoot) 

180 DatabaseClass = config.getDatabaseClass() 

181 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

182 namespace=config.get("namespace"), writeable=writeable) 

183 managerTypes = RegistryManagerTypes.fromConfig(config) 

184 managers = managerTypes.loadRepo(database) 

185 if defaults is None: 

186 defaults = RegistryDefaults() 

187 return cls(database, defaults, managers) 

188 

189 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

190 self._db = database 

191 self._managers = managers 

192 self.storageClasses = StorageClassFactory() 

193 # Intentionally invoke property setter to initialize defaults. This 

194 # can only be done after most of the rest of Registry has already been 

195 # initialized, and must be done before the property getter is used. 

196 self.defaults = defaults 

197 

198 def __str__(self) -> str: 

199 return str(self._db) 

200 

201 def __repr__(self) -> str: 

202 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

203 

204 def isWriteable(self) -> bool: 

205 # Docstring inherited from lsst.daf.butler.registry.Registry 

206 return self._db.isWriteable() 

207 

208 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

209 # Docstring inherited from lsst.daf.butler.registry.Registry 

210 if defaults is None: 

211 # No need to copy, because `RegistryDefaults` is immutable; we 

212 # effectively copy on write. 

213 defaults = self.defaults 

214 return type(self)(self._db, defaults, self._managers) 

215 

216 @property 

217 def dimensions(self) -> DimensionUniverse: 

218 # Docstring inherited from lsst.daf.butler.registry.Registry 

219 return self._managers.dimensions.universe 

220 

221 def refresh(self) -> None: 

222 # Docstring inherited from lsst.daf.butler.registry.Registry 

223 self._managers.refresh() 

224 

225 @contextlib.contextmanager 

226 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

227 # Docstring inherited from lsst.daf.butler.registry.Registry 

228 try: 

229 with self._db.transaction(savepoint=savepoint): 

230 yield 

231 except BaseException: 

232 # TODO: this clears the caches sometimes when we wouldn't actually 

233 # need to. Can we avoid that? 

234 self._managers.dimensions.clearCaches() 

235 raise 

236 

237 def resetConnectionPool(self) -> None: 

238 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

239 

240 This operation is useful when using registry with fork-based 

241 multiprocessing. To use registry across fork boundary one has to make 

242 sure that there are no currently active connections (no session or 

243 transaction is in progress) and connection pool is reset using this 

244 method. This method should be called by the child process immediately 

245 after the fork. 

246 """ 

247 self._db._engine.dispose() 

248 

249 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

250 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

251 other data repository client. 

252 

253 Opaque table records can be added via `insertOpaqueData`, retrieved via 

254 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

255 

256 Parameters 

257 ---------- 

258 tableName : `str` 

259 Logical name of the opaque table. This may differ from the 

260 actual name used in the database by a prefix and/or suffix. 

261 spec : `ddl.TableSpec` 

262 Specification for the table to be added. 

263 """ 

264 self._managers.opaque.register(tableName, spec) 

265 

266 @transactional 

267 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

268 """Insert records into an opaque table. 

269 

270 Parameters 

271 ---------- 

272 tableName : `str` 

273 Logical name of the opaque table. Must match the name used in a 

274 previous call to `registerOpaqueTable`. 

275 data 

276 Each additional positional argument is a dictionary that represents 

277 a single row to be added. 

278 """ 

279 self._managers.opaque[tableName].insert(*data) 

280 

281 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

282 """Retrieve records from an opaque table. 

283 

284 Parameters 

285 ---------- 

286 tableName : `str` 

287 Logical name of the opaque table. Must match the name used in a 

288 previous call to `registerOpaqueTable`. 

289 where 

290 Additional keyword arguments are interpreted as equality 

291 constraints that restrict the returned rows (combined with AND); 

292 keyword arguments are column names and values are the values they 

293 must have. 

294 

295 Yields 

296 ------ 

297 row : `dict` 

298 A dictionary representing a single result row. 

299 """ 

300 yield from self._managers.opaque[tableName].fetch(**where) 

301 

302 @transactional 

303 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

304 """Remove records from an opaque table. 

305 

306 Parameters 

307 ---------- 

308 tableName : `str` 

309 Logical name of the opaque table. Must match the name used in a 

310 previous call to `registerOpaqueTable`. 

311 where 

312 Additional keyword arguments are interpreted as equality 

313 constraints that restrict the deleted rows (combined with AND); 

314 keyword arguments are column names and values are the values they 

315 must have. 

316 """ 

317 self._managers.opaque[tableName].delete(where.keys(), where) 

318 

319 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED, 

320 doc: Optional[str] = None) -> None: 

321 # Docstring inherited from lsst.daf.butler.registry.Registry 

322 self._managers.collections.register(name, type, doc=doc) 

323 

324 def getCollectionType(self, name: str) -> CollectionType: 

325 # Docstring inherited from lsst.daf.butler.registry.Registry 

326 return self._managers.collections.find(name).type 

327 

328 def _get_collection_record(self, name: str) -> CollectionRecord: 

329 # Docstring inherited from lsst.daf.butler.registry.Registry 

330 return self._managers.collections.find(name) 

331 

332 def registerRun(self, name: str, doc: Optional[str] = None) -> None: 

333 # Docstring inherited from lsst.daf.butler.registry.Registry 

334 self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

335 

336 @transactional 

337 def removeCollection(self, name: str) -> None: 

338 # Docstring inherited from lsst.daf.butler.registry.Registry 

339 self._managers.collections.remove(name) 

340 

341 def getCollectionChain(self, parent: str) -> CollectionSearch: 

342 # Docstring inherited from lsst.daf.butler.registry.Registry 

343 record = self._managers.collections.find(parent) 

344 if record.type is not CollectionType.CHAINED: 

345 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

346 assert isinstance(record, ChainedCollectionRecord) 

347 return record.children 

348 

349 @transactional 

350 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

351 # Docstring inherited from lsst.daf.butler.registry.Registry 

352 record = self._managers.collections.find(parent) 

353 if record.type is not CollectionType.CHAINED: 

354 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

355 assert isinstance(record, ChainedCollectionRecord) 

356 children = CollectionSearch.fromExpression(children) 

357 if children != record.children or flatten: 

358 record.update(self._managers.collections, children, flatten=flatten) 

359 

360 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

361 # Docstring inherited from lsst.daf.butler.registry.Registry 

362 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

363 

364 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

365 # Docstring inherited from lsst.daf.butler.registry.Registry 

366 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

367 

368 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 record = self._managers.collections.find(collection) 

371 return self._managers.datasets.getCollectionSummary(record) 

372 

373 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

374 # Docstring inherited from lsst.daf.butler.registry.Registry 

375 _, inserted = self._managers.datasets.register(datasetType) 

376 return inserted 

377 

378 def removeDatasetType(self, name: str) -> None: 

379 # Docstring inherited from lsst.daf.butler.registry.Registry 

380 self._managers.datasets.remove(name) 

381 

382 def getDatasetType(self, name: str) -> DatasetType: 

383 # Docstring inherited from lsst.daf.butler.registry.Registry 

384 return self._managers.datasets[name].datasetType 

385 

386 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

387 collections: Any = None, timespan: Optional[Timespan] = None, 

388 **kwargs: Any) -> Optional[DatasetRef]: 

389 # Docstring inherited from lsst.daf.butler.registry.Registry 

390 if isinstance(datasetType, DatasetType): 

391 storage = self._managers.datasets[datasetType.name] 

392 else: 

393 storage = self._managers.datasets[datasetType] 

394 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

395 universe=self.dimensions, defaults=self.defaults.dataId, 

396 **kwargs) 

397 if collections is None: 

398 if not self.defaults.collections: 

399 raise TypeError("No collections provided to findDataset, " 

400 "and no defaults from registry construction.") 

401 collections = self.defaults.collections 

402 else: 

403 collections = CollectionSearch.fromExpression(collections) 

404 for collectionRecord in collections.iter(self._managers.collections): 

405 if (collectionRecord.type is CollectionType.CALIBRATION 

406 and (not storage.datasetType.isCalibration() or timespan is None)): 

407 continue 

408 result = storage.find(collectionRecord, dataId, timespan=timespan) 

409 if result is not None: 

410 return result 

411 

412 return None 

413 

414 @transactional 

415 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

416 run: Optional[str] = None, expand: bool = True) -> List[DatasetRef]: 

417 # Docstring inherited from lsst.daf.butler.registry.Registry 

418 if isinstance(datasetType, DatasetType): 

419 storage = self._managers.datasets.find(datasetType.name) 

420 if storage is None: 

421 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

422 else: 

423 storage = self._managers.datasets.find(datasetType) 

424 if storage is None: 

425 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

426 if run is None: 

427 if self.defaults.run is None: 

428 raise TypeError("No run provided to insertDatasets, " 

429 "and no default from registry construction.") 

430 run = self.defaults.run 

431 runRecord = self._managers.collections.find(run) 

432 if runRecord.type is not CollectionType.RUN: 

433 raise TypeError(f"Given collection is of type {runRecord.type.name}; RUN collection required.") 

434 assert isinstance(runRecord, RunRecord) 

435 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

436 if expand: 

437 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

438 for dataId in progress.wrap(dataIds, 

439 f"Expanding {storage.datasetType.name} data IDs")] 

440 else: 

441 expandedDataIds = [DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) 

442 for dataId in dataIds] 

443 try: 

444 refs = list(storage.insert(runRecord, expandedDataIds)) 

445 except sqlalchemy.exc.IntegrityError as err: 

446 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

447 f"one or more datasets of type {storage.datasetType} into " 

448 f"collection '{run}'. " 

449 f"This probably means a dataset with the same data ID " 

450 f"and dataset type already exists, but it may also mean a " 

451 f"dimension row is missing.") from err 

452 return refs 

453 

454 def getDataset(self, id: int) -> Optional[DatasetRef]: 

455 # Docstring inherited from lsst.daf.butler.registry.Registry 

456 return self._managers.datasets.getDatasetRef(id) 

457 

458 @transactional 

459 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

460 # Docstring inherited from lsst.daf.butler.registry.Registry 

461 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

462 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

463 desc="Removing datasets by type"): 

464 storage = self._managers.datasets[datasetType.name] 

465 try: 

466 storage.delete(refsForType) 

467 except sqlalchemy.exc.IntegrityError as err: 

468 raise OrphanedRecordError("One or more datasets is still " 

469 "present in one or more Datastores.") from err 

470 

471 @transactional 

472 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

473 # Docstring inherited from lsst.daf.butler.registry.Registry 

474 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

475 collectionRecord = self._managers.collections.find(collection) 

476 if collectionRecord.type is not CollectionType.TAGGED: 

477 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

478 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

479 desc="Associating datasets by type"): 

480 storage = self._managers.datasets[datasetType.name] 

481 try: 

482 storage.associate(collectionRecord, refsForType) 

483 except sqlalchemy.exc.IntegrityError as err: 

484 raise ConflictingDefinitionError( 

485 f"Constraint violation while associating dataset of type {datasetType.name} with " 

486 f"collection {collection}. This probably means that one or more datasets with the same " 

487 f"dataset type and data ID already exist in the collection, but it may also indicate " 

488 f"that the datasets do not exist." 

489 ) from err 

490 

491 @transactional 

492 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

493 # Docstring inherited from lsst.daf.butler.registry.Registry 

494 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

495 collectionRecord = self._managers.collections.find(collection) 

496 if collectionRecord.type is not CollectionType.TAGGED: 

497 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

498 "expected TAGGED.") 

499 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

500 desc="Disassociating datasets by type"): 

501 storage = self._managers.datasets[datasetType.name] 

502 storage.disassociate(collectionRecord, refsForType) 

503 

504 @transactional 

505 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

506 # Docstring inherited from lsst.daf.butler.registry.Registry 

507 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

508 collectionRecord = self._managers.collections.find(collection) 

509 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

510 desc="Certifying datasets by type"): 

511 storage = self._managers.datasets[datasetType.name] 

512 storage.certify(collectionRecord, refsForType, timespan) 

513 

514 @transactional 

515 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *, 

516 dataIds: Optional[Iterable[DataId]] = None) -> None: 

517 # Docstring inherited from lsst.daf.butler.registry.Registry 

518 collectionRecord = self._managers.collections.find(collection) 

519 if isinstance(datasetType, str): 

520 storage = self._managers.datasets[datasetType] 

521 else: 

522 storage = self._managers.datasets[datasetType.name] 

523 standardizedDataIds = None 

524 if dataIds is not None: 

525 standardizedDataIds = [DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) 

526 for d in dataIds] 

527 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

528 

529 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

530 """Return an object that allows a new `Datastore` instance to 

531 communicate with this `Registry`. 

532 

533 Returns 

534 ------- 

535 manager : `DatastoreRegistryBridgeManager` 

536 Object that mediates communication between this `Registry` and its 

537 associated datastores. 

538 """ 

539 return self._managers.datastores 

540 

541 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

542 # Docstring inherited from lsst.daf.butler.registry.Registry 

543 return self._managers.datastores.findDatastores(ref) 

544 

545 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

546 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

547 withDefaults: bool = True, 

548 **kwargs: Any) -> DataCoordinate: 

549 # Docstring inherited from lsst.daf.butler.registry.Registry 

550 if not withDefaults: 

551 defaults = None 

552 else: 

553 defaults = self.defaults.dataId 

554 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, 

555 defaults=defaults, **kwargs) 

556 if standardized.hasRecords(): 

557 return standardized 

558 if records is None: 

559 records = {} 

560 elif isinstance(records, NamedKeyMapping): 

561 records = records.byName() 

562 else: 

563 records = dict(records) 

564 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

565 records.update(dataId.records.byName()) 

566 keys = standardized.byName() 

567 for element in standardized.graph.primaryKeyTraversalOrder: 

568 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

569 if record is ...: 

570 if isinstance(element, Dimension) and keys.get(element.name) is None: 

571 if element in standardized.graph.required: 

572 raise LookupError( 

573 f"No value or null value for required dimension {element.name}." 

574 ) 

575 keys[element.name] = None 

576 record = None 

577 else: 

578 storage = self._managers.dimensions[element] 

579 dataIdSet = DataCoordinateIterable.fromScalar( 

580 DataCoordinate.standardize(keys, graph=element.graph) 

581 ) 

582 fetched = tuple(storage.fetch(dataIdSet)) 

583 try: 

584 (record,) = fetched 

585 except ValueError: 

586 record = None 

587 records[element.name] = record 

588 if record is not None: 

589 for d in element.implied: 

590 value = getattr(record, d.name) 

591 if keys.setdefault(d.name, value) != value: 

592 raise InconsistentDataIdError( 

593 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

594 f"but {element.name} implies {d.name}={value!r}." 

595 ) 

596 else: 

597 if element in standardized.graph.required: 

598 raise LookupError( 

599 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

600 ) 

601 if element.alwaysJoin: 

602 raise InconsistentDataIdError( 

603 f"Could not fetch record for element {element.name} via keys {keys}, ", 

604 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

605 "related." 

606 ) 

607 for d in element.implied: 

608 keys.setdefault(d.name, None) 

609 records.setdefault(d.name, None) 

610 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

611 

612 def insertDimensionData(self, element: Union[DimensionElement, str], 

613 *data: Union[Mapping[str, Any], DimensionRecord], 

614 conform: bool = True) -> None: 

615 # Docstring inherited from lsst.daf.butler.registry.Registry 

616 if conform: 

617 if isinstance(element, str): 

618 element = self.dimensions[element] 

619 records = [row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

620 for row in data] 

621 else: 

622 # Ignore typing since caller said to trust them with conform=False. 

623 records = data # type: ignore 

624 storage = self._managers.dimensions[element] # type: ignore 

625 storage.insert(*records) 

626 

627 def syncDimensionData(self, element: Union[DimensionElement, str], 

628 row: Union[Mapping[str, Any], DimensionRecord], 

629 conform: bool = True) -> bool: 

630 # Docstring inherited from lsst.daf.butler.registry.Registry 

631 if conform: 

632 if isinstance(element, str): 

633 element = self.dimensions[element] 

634 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

635 else: 

636 # Ignore typing since caller said to trust them with conform=False. 

637 record = row # type: ignore 

638 storage = self._managers.dimensions[element] # type: ignore 

639 return storage.sync(record) 

640 

641 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

642 ) -> Iterator[DatasetType]: 

643 # Docstring inherited from lsst.daf.butler.registry.Registry 

644 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

645 if wildcard is Ellipsis: 

646 for datasetType in self._managers.datasets: 

647 # The dataset type can no longer be a component 

648 yield datasetType 

649 if components: 

650 # Automatically create the component dataset types 

651 try: 

652 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

653 except KeyError as err: 

654 _LOG.warning(f"Could not load storage class {err} for {datasetType.name}; " 

655 "if it has components they will not be included in query results.") 

656 else: 

657 yield from componentsForDatasetType 

658 return 

659 done: Set[str] = set() 

660 for name in wildcard.strings: 

661 storage = self._managers.datasets.find(name) 

662 if storage is not None: 

663 done.add(storage.datasetType.name) 

664 yield storage.datasetType 

665 if wildcard.patterns: 

666 # If components (the argument) is None, we'll save component 

667 # dataset that we might want to match, but only if their parents 

668 # didn't get included. 

669 componentsForLater = [] 

670 for registeredDatasetType in self._managers.datasets: 

671 # Components are not stored in registry so expand them here 

672 allDatasetTypes = [registeredDatasetType] 

673 try: 

674 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

675 except KeyError as err: 

676 _LOG.warning(f"Could not load storage class {err} for {registeredDatasetType.name}; " 

677 "if it has components they will not be included in query results.") 

678 for datasetType in allDatasetTypes: 

679 if datasetType.name in done: 

680 continue 

681 parentName, componentName = datasetType.nameAndComponent() 

682 if componentName is not None and not components: 

683 if components is None and parentName not in done: 

684 componentsForLater.append(datasetType) 

685 continue 

686 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

687 done.add(datasetType.name) 

688 yield datasetType 

689 # Go back and try to match saved components. 

690 for datasetType in componentsForLater: 

691 parentName, _ = datasetType.nameAndComponent() 

692 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

693 yield datasetType 

694 

695 def queryCollections(self, expression: Any = ..., 

696 datasetType: Optional[DatasetType] = None, 

697 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

698 flattenChains: bool = False, 

699 includeChains: Optional[bool] = None) -> Iterator[str]: 

700 # Docstring inherited from lsst.daf.butler.registry.Registry 

701 

702 # Right now the datasetTypes argument is completely ignored, but that 

703 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

704 # ticket will take care of that. 

705 query = CollectionQuery.fromExpression(expression) 

706 for record in query.iter(self._managers.collections, collectionTypes=frozenset(collectionTypes), 

707 flattenChains=flattenChains, includeChains=includeChains): 

708 yield record.name 

709 

710 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder: 

711 """Return a `QueryBuilder` instance capable of constructing and 

712 managing more complex queries than those obtainable via `Registry` 

713 interfaces. 

714 

715 This is an advanced interface; downstream code should prefer 

716 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

717 are sufficient. 

718 

719 Parameters 

720 ---------- 

721 summary : `queries.QuerySummary` 

722 Object describing and categorizing the full set of dimensions that 

723 will be included in the query. 

724 

725 Returns 

726 ------- 

727 builder : `queries.QueryBuilder` 

728 Object that can be used to construct and perform advanced queries. 

729 """ 

730 return queries.QueryBuilder( 

731 summary, 

732 queries.RegistryManagers( 

733 collections=self._managers.collections, 

734 dimensions=self._managers.dimensions, 

735 datasets=self._managers.datasets, 

736 TimespanReprClass=self._db.getTimespanRepresentation(), 

737 ), 

738 ) 

739 

740 def queryDatasets(self, datasetType: Any, *, 

741 collections: Any = None, 

742 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

743 dataId: Optional[DataId] = None, 

744 where: Optional[str] = None, 

745 findFirst: bool = False, 

746 components: Optional[bool] = None, 

747 bind: Optional[Mapping[str, Any]] = None, 

748 check: bool = True, 

749 **kwargs: Any) -> queries.DatasetQueryResults: 

750 # Docstring inherited from lsst.daf.butler.registry.Registry 

751 

752 # Standardize the collections expression. 

753 if collections is None: 

754 if not self.defaults.collections: 

755 raise TypeError("No collections provided to findDataset, " 

756 "and no defaults from registry construction.") 

757 collections = self.defaults.collections 

758 elif findFirst: 

759 collections = CollectionSearch.fromExpression(collections) 

760 else: 

761 collections = CollectionQuery.fromExpression(collections) 

762 # Standardize and expand the data ID provided as a constraint. 

763 standardizedDataId = self.expandDataId(dataId, **kwargs) 

764 

765 # We can only query directly if given a non-component DatasetType 

766 # instance. If we were given an expression or str or a component 

767 # DatasetType instance, we'll populate this dict, recurse, and return. 

768 # If we already have a non-component DatasetType, it will remain None 

769 # and we'll run the query directly. 

770 composition: Optional[ 

771 Dict[ 

772 DatasetType, # parent dataset type 

773 List[Optional[str]] # component name, or None for parent 

774 ] 

775 ] = None 

776 if not isinstance(datasetType, DatasetType): 

777 # We were given a dataset type expression (which may be as simple 

778 # as a str). Loop over all matching datasets, delegating handling 

779 # of the `components` argument to queryDatasetTypes, as we populate 

780 # the composition dict. 

781 composition = defaultdict(list) 

782 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

783 parentName, componentName = trueDatasetType.nameAndComponent() 

784 if componentName is not None: 

785 parentDatasetType = self.getDatasetType(parentName) 

786 composition.setdefault(parentDatasetType, []).append(componentName) 

787 else: 

788 composition.setdefault(trueDatasetType, []).append(None) 

789 elif datasetType.isComponent(): 

790 # We were given a true DatasetType instance, but it's a component. 

791 # the composition dict will have exactly one item. 

792 parentName, componentName = datasetType.nameAndComponent() 

793 parentDatasetType = self.getDatasetType(parentName) 

794 composition = {parentDatasetType: [componentName]} 

795 if composition is not None: 

796 # We need to recurse. Do that once for each parent dataset type. 

797 chain = [] 

798 for parentDatasetType, componentNames in composition.items(): 

799 parentResults = self.queryDatasets( 

800 parentDatasetType, 

801 collections=collections, 

802 dimensions=dimensions, 

803 dataId=standardizedDataId, 

804 where=where, 

805 findFirst=findFirst, 

806 check=check, 

807 ) 

808 if isinstance(parentResults, queries.ParentDatasetQueryResults): 

809 chain.append( 

810 parentResults.withComponents(componentNames) 

811 ) 

812 else: 

813 # Should only happen if we know there would be no results. 

814 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \ 

815 and not parentResults._chain 

816 return queries.ChainedDatasetQueryResults(chain) 

817 # If we get here, there's no need to recurse (or we are already 

818 # recursing; there can only ever be one level of recursion). 

819 

820 # The full set of dimensions in the query is the combination of those 

821 # needed for the DatasetType and those explicitly requested, if any. 

822 requestedDimensionNames = set(datasetType.dimensions.names) 

823 if dimensions is not None: 

824 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

825 # Construct the summary structure needed to construct a QueryBuilder. 

826 summary = queries.QuerySummary( 

827 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

828 dataId=standardizedDataId, 

829 expression=where, 

830 bind=bind, 

831 defaults=self.defaults.dataId, 

832 check=check, 

833 ) 

834 builder = self.makeQueryBuilder(summary) 

835 # Add the dataset subquery to the query, telling the QueryBuilder to 

836 # include the rank of the selected collection in the results only if we 

837 # need to findFirst. Note that if any of the collections are 

838 # actually wildcard expressions, and we've asked for deduplication, 

839 # this will raise TypeError for us. 

840 if not builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst): 

841 return queries.ChainedDatasetQueryResults(()) 

842 query = builder.finish() 

843 return queries.ParentDatasetQueryResults(self._db, query, components=[None]) 

844 

845 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

846 dataId: Optional[DataId] = None, 

847 datasets: Any = None, 

848 collections: Any = None, 

849 where: Optional[str] = None, 

850 components: Optional[bool] = None, 

851 bind: Optional[Mapping[str, Any]] = None, 

852 check: bool = True, 

853 **kwargs: Any) -> queries.DataCoordinateQueryResults: 

854 # Docstring inherited from lsst.daf.butler.registry.Registry 

855 dimensions = iterable(dimensions) 

856 standardizedDataId = self.expandDataId(dataId, **kwargs) 

857 standardizedDatasetTypes = set() 

858 requestedDimensions = self.dimensions.extract(dimensions) 

859 queryDimensionNames = set(requestedDimensions.names) 

860 if datasets is not None: 

861 if collections is None: 

862 if not self.defaults.collections: 

863 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

864 collections = self.defaults.collections 

865 else: 

866 # Preprocess collections expression in case the original 

867 # included single-pass iterators (we'll want to use it multiple 

868 # times below). 

869 collections = CollectionQuery.fromExpression(collections) 

870 for datasetType in self.queryDatasetTypes(datasets, components=components): 

871 queryDimensionNames.update(datasetType.dimensions.names) 

872 # If any matched dataset type is a component, just operate on 

873 # its parent instead, because Registry doesn't know anything 

874 # about what components exist, and here (unlike queryDatasets) 

875 # we don't care about returning them. 

876 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

877 if componentName is not None: 

878 datasetType = self.getDatasetType(parentDatasetTypeName) 

879 standardizedDatasetTypes.add(datasetType) 

880 

881 summary = queries.QuerySummary( 

882 requested=DimensionGraph(self.dimensions, names=queryDimensionNames), 

883 dataId=standardizedDataId, 

884 expression=where, 

885 bind=bind, 

886 defaults=self.defaults.dataId, 

887 check=check, 

888 ) 

889 builder = self.makeQueryBuilder(summary) 

890 for datasetType in standardizedDatasetTypes: 

891 builder.joinDataset(datasetType, collections, isResult=False) 

892 query = builder.finish() 

893 return queries.DataCoordinateQueryResults(self._db, query) 

894 

895 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

896 dataId: Optional[DataId] = None, 

897 datasets: Any = None, 

898 collections: Any = None, 

899 where: Optional[str] = None, 

900 components: Optional[bool] = None, 

901 bind: Optional[Mapping[str, Any]] = None, 

902 check: bool = True, 

903 **kwargs: Any) -> Iterator[DimensionRecord]: 

904 # Docstring inherited from lsst.daf.butler.registry.Registry 

905 if not isinstance(element, DimensionElement): 

906 try: 

907 element = self.dimensions[element] 

908 except KeyError as e: 

909 raise KeyError(f"No such dimension '{element}', available dimensions: " 

910 + str(self.dimensions.getStaticElements())) from e 

911 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections, 

912 where=where, components=components, bind=bind, check=check, **kwargs) 

913 return iter(self._managers.dimensions[element].fetch(dataIds)) 

914 

915 def queryDatasetAssociations( 

916 self, 

917 datasetType: Union[str, DatasetType], 

918 collections: Any = ..., 

919 *, 

920 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

921 flattenChains: bool = False, 

922 ) -> Iterator[DatasetAssociation]: 

923 # Docstring inherited from lsst.daf.butler.registry.Registry 

924 if collections is None: 

925 if not self.defaults.collections: 

926 raise TypeError("No collections provided to findDataset, " 

927 "and no defaults from registry construction.") 

928 collections = self.defaults.collections 

929 else: 

930 collections = CollectionQuery.fromExpression(collections) 

931 TimespanReprClass = self._db.getTimespanRepresentation() 

932 if isinstance(datasetType, str): 

933 storage = self._managers.datasets[datasetType] 

934 else: 

935 storage = self._managers.datasets[datasetType.name] 

936 for collectionRecord in collections.iter(self._managers.collections, 

937 collectionTypes=frozenset(collectionTypes), 

938 flattenChains=flattenChains): 

939 query = storage.select(collectionRecord) 

940 if query is None: 

941 continue 

942 for row in self._db.query(query.combine()): 

943 dataId = DataCoordinate.fromRequiredValues( 

944 storage.datasetType.dimensions, 

945 tuple(row[name] for name in storage.datasetType.dimensions.required.names) 

946 ) 

947 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

948 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, 

949 conform=False) 

950 if collectionRecord.type is CollectionType.CALIBRATION: 

951 timespan = TimespanReprClass.extract(row) 

952 else: 

953 timespan = None 

954 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

955 

956 storageClasses: StorageClassFactory 

957 """All storage classes known to the registry (`StorageClassFactory`). 

958 """