Coverage for python/lsst/daf/butler/registries/sql.py: 13%

477 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-30 02:19 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28from collections import defaultdict 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Tuple, 

41 Union, 

42) 

43 

44import sqlalchemy 

45from lsst.resources import ResourcePathExpression 

46from lsst.utils.iteration import ensure_iterable 

47 

48from ..core import ( 

49 Config, 

50 DataCoordinate, 

51 DataCoordinateIterable, 

52 DataId, 

53 DatasetAssociation, 

54 DatasetId, 

55 DatasetRef, 

56 DatasetType, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionRecord, 

62 DimensionUniverse, 

63 NamedKeyMapping, 

64 NameLookupMapping, 

65 Progress, 

66 StorageClassFactory, 

67 Timespan, 

68 ddl, 

69) 

70from ..core.utils import transactional 

71from ..registry import ( 

72 ArgumentError, 

73 CollectionExpressionError, 

74 CollectionSearch, 

75 CollectionSummary, 

76 CollectionType, 

77 CollectionTypeError, 

78 ConflictingDefinitionError, 

79 DataIdValueError, 

80 DatasetTypeError, 

81 DatasetTypeExpressionError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CategorizedWildcard, CollectionQuery, Ellipsis 

94 

95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

98 

99 

100_LOG = logging.getLogger(__name__) 

101 

102 

103class SqlRegistry(Registry): 

104 """Registry implementation based on SQLAlchemy. 

105 

106 Parameters 

107 ---------- 

108 database : `Database` 

109 Database instance to store Registry. 

110 defaults : `RegistryDefaults` 

111 Default collection search path and/or output `~CollectionType.RUN` 

112 collection. 

113 managers : `RegistryManagerInstances` 

114 All the managers required for this registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``configs`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def createFromConfig( 

124 cls, 

125 config: Optional[Union[RegistryConfig, str]] = None, 

126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

127 butlerRoot: Optional[ResourcePathExpression] = None, 

128 ) -> Registry: 

129 """Create registry database and return `SqlRegistry` instance. 

130 

131 This method initializes database contents, database must be empty 

132 prior to calling this method. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 dimensionConfig : `DimensionConfig` or `str`, optional 

140 Dimensions configuration, if missing then default configuration 

141 will be loaded from dimensions.yaml. 

142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

143 Path to the repository root this `SqlRegistry` will manage. 

144 

145 Returns 

146 ------- 

147 registry : `SqlRegistry` 

148 A new `SqlRegistry` instance. 

149 """ 

150 config = cls.forceRegistryConfig(config) 

151 config.replaceRoot(butlerRoot) 

152 

153 if isinstance(dimensionConfig, str): 

154 dimensionConfig = DimensionConfig(dimensionConfig) 

155 elif dimensionConfig is None: 

156 dimensionConfig = DimensionConfig() 

157 elif not isinstance(dimensionConfig, DimensionConfig): 

158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

159 

160 DatabaseClass = config.getDatabaseClass() 

161 database = DatabaseClass.fromUri( 

162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

163 ) 

164 managerTypes = RegistryManagerTypes.fromConfig(config) 

165 managers = managerTypes.makeRepo(database, dimensionConfig) 

166 return cls(database, RegistryDefaults(), managers) 

167 

168 @classmethod 

169 def fromConfig( 

170 cls, 

171 config: Union[ButlerConfig, RegistryConfig, Config, str], 

172 butlerRoot: Optional[ResourcePathExpression] = None, 

173 writeable: bool = True, 

174 defaults: Optional[RegistryDefaults] = None, 

175 ) -> Registry: 

176 """Create `Registry` subclass instance from `config`. 

177 

178 Registry database must be initialized prior to calling this method. 

179 

180 Parameters 

181 ---------- 

182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

183 Registry configuration 

184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

185 Path to the repository root this `Registry` will manage. 

186 writeable : `bool`, optional 

187 If `True` (default) create a read-write connection to the database. 

188 defaults : `RegistryDefaults`, optional 

189 Default collection search path and/or output `~CollectionType.RUN` 

190 collection. 

191 

192 Returns 

193 ------- 

194 registry : `SqlRegistry` (subclass) 

195 A new `SqlRegistry` subclass instance. 

196 """ 

197 config = cls.forceRegistryConfig(config) 

198 config.replaceRoot(butlerRoot) 

199 DatabaseClass = config.getDatabaseClass() 

200 database = DatabaseClass.fromUri( 

201 str(config.connectionString), 

202 origin=config.get("origin", 0), 

203 namespace=config.get("namespace"), 

204 writeable=writeable, 

205 ) 

206 managerTypes = RegistryManagerTypes.fromConfig(config) 

207 managers = managerTypes.loadRepo(database) 

208 if defaults is None: 

209 defaults = RegistryDefaults() 

210 return cls(database, defaults, managers) 

211 

212 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

213 self._db = database 

214 self._managers = managers 

215 self.storageClasses = StorageClassFactory() 

216 # Intentionally invoke property setter to initialize defaults. This 

217 # can only be done after most of the rest of Registry has already been 

218 # initialized, and must be done before the property getter is used. 

219 self.defaults = defaults 

220 # In the future DatasetIdFactory may become configurable and this 

221 # instance will need to be shared with datasets manager. 

222 self.datasetIdFactory = DatasetIdFactory() 

223 

224 def __str__(self) -> str: 

225 return str(self._db) 

226 

227 def __repr__(self) -> str: 

228 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

229 

230 def isWriteable(self) -> bool: 

231 # Docstring inherited from lsst.daf.butler.registry.Registry 

232 return self._db.isWriteable() 

233 

234 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

235 # Docstring inherited from lsst.daf.butler.registry.Registry 

236 if defaults is None: 

237 # No need to copy, because `RegistryDefaults` is immutable; we 

238 # effectively copy on write. 

239 defaults = self.defaults 

240 return type(self)(self._db, defaults, self._managers) 

241 

242 @property 

243 def dimensions(self) -> DimensionUniverse: 

244 # Docstring inherited from lsst.daf.butler.registry.Registry 

245 return self._managers.dimensions.universe 

246 

247 def refresh(self) -> None: 

248 # Docstring inherited from lsst.daf.butler.registry.Registry 

249 self._managers.refresh() 

250 

251 @contextlib.contextmanager 

252 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

253 # Docstring inherited from lsst.daf.butler.registry.Registry 

254 try: 

255 with self._db.transaction(savepoint=savepoint): 

256 yield 

257 except BaseException: 

258 # TODO: this clears the caches sometimes when we wouldn't actually 

259 # need to. Can we avoid that? 

260 self._managers.dimensions.clearCaches() 

261 raise 

262 

263 def resetConnectionPool(self) -> None: 

264 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

265 

266 This operation is useful when using registry with fork-based 

267 multiprocessing. To use registry across fork boundary one has to make 

268 sure that there are no currently active connections (no session or 

269 transaction is in progress) and connection pool is reset using this 

270 method. This method should be called by the child process immediately 

271 after the fork. 

272 """ 

273 self._db._engine.dispose() 

274 

275 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

276 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

277 other data repository client. 

278 

279 Opaque table records can be added via `insertOpaqueData`, retrieved via 

280 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

281 

282 Parameters 

283 ---------- 

284 tableName : `str` 

285 Logical name of the opaque table. This may differ from the 

286 actual name used in the database by a prefix and/or suffix. 

287 spec : `ddl.TableSpec` 

288 Specification for the table to be added. 

289 """ 

290 self._managers.opaque.register(tableName, spec) 

291 

292 @transactional 

293 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

294 """Insert records into an opaque table. 

295 

296 Parameters 

297 ---------- 

298 tableName : `str` 

299 Logical name of the opaque table. Must match the name used in a 

300 previous call to `registerOpaqueTable`. 

301 data 

302 Each additional positional argument is a dictionary that represents 

303 a single row to be added. 

304 """ 

305 self._managers.opaque[tableName].insert(*data) 

306 

307 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

308 """Retrieve records from an opaque table. 

309 

310 Parameters 

311 ---------- 

312 tableName : `str` 

313 Logical name of the opaque table. Must match the name used in a 

314 previous call to `registerOpaqueTable`. 

315 where 

316 Additional keyword arguments are interpreted as equality 

317 constraints that restrict the returned rows (combined with AND); 

318 keyword arguments are column names and values are the values they 

319 must have. 

320 

321 Yields 

322 ------ 

323 row : `dict` 

324 A dictionary representing a single result row. 

325 """ 

326 yield from self._managers.opaque[tableName].fetch(**where) 

327 

328 @transactional 

329 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

330 """Remove records from an opaque table. 

331 

332 Parameters 

333 ---------- 

334 tableName : `str` 

335 Logical name of the opaque table. Must match the name used in a 

336 previous call to `registerOpaqueTable`. 

337 where 

338 Additional keyword arguments are interpreted as equality 

339 constraints that restrict the deleted rows (combined with AND); 

340 keyword arguments are column names and values are the values they 

341 must have. 

342 """ 

343 self._managers.opaque[tableName].delete(where.keys(), where) 

344 

345 def registerCollection( 

346 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

347 ) -> bool: 

348 # Docstring inherited from lsst.daf.butler.registry.Registry 

349 _, registered = self._managers.collections.register(name, type, doc=doc) 

350 return registered 

351 

352 def getCollectionType(self, name: str) -> CollectionType: 

353 # Docstring inherited from lsst.daf.butler.registry.Registry 

354 return self._managers.collections.find(name).type 

355 

356 def _get_collection_record(self, name: str) -> CollectionRecord: 

357 # Docstring inherited from lsst.daf.butler.registry.Registry 

358 return self._managers.collections.find(name) 

359 

360 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

361 # Docstring inherited from lsst.daf.butler.registry.Registry 

362 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

363 return registered 

364 

365 @transactional 

366 def removeCollection(self, name: str) -> None: 

367 # Docstring inherited from lsst.daf.butler.registry.Registry 

368 self._managers.collections.remove(name) 

369 

370 def getCollectionChain(self, parent: str) -> CollectionSearch: 

371 # Docstring inherited from lsst.daf.butler.registry.Registry 

372 record = self._managers.collections.find(parent) 

373 if record.type is not CollectionType.CHAINED: 

374 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

375 assert isinstance(record, ChainedCollectionRecord) 

376 return record.children 

377 

378 @transactional 

379 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

380 # Docstring inherited from lsst.daf.butler.registry.Registry 

381 record = self._managers.collections.find(parent) 

382 if record.type is not CollectionType.CHAINED: 

383 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

384 assert isinstance(record, ChainedCollectionRecord) 

385 children = CollectionSearch.fromExpression(children) 

386 if children != record.children or flatten: 

387 record.update(self._managers.collections, children, flatten=flatten) 

388 

389 def getCollectionParentChains(self, collection: str) -> Set[str]: 

390 # Docstring inherited from lsst.daf.butler.registry.Registry 

391 return { 

392 record.name 

393 for record in self._managers.collections.getParentChains( 

394 self._managers.collections.find(collection).key 

395 ) 

396 } 

397 

398 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

399 # Docstring inherited from lsst.daf.butler.registry.Registry 

400 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

401 

402 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

403 # Docstring inherited from lsst.daf.butler.registry.Registry 

404 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

405 

406 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

407 # Docstring inherited from lsst.daf.butler.registry.Registry 

408 record = self._managers.collections.find(collection) 

409 return self._managers.datasets.getCollectionSummary(record) 

410 

411 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

412 # Docstring inherited from lsst.daf.butler.registry.Registry 

413 _, inserted = self._managers.datasets.register(datasetType) 

414 return inserted 

415 

416 def removeDatasetType(self, name: str) -> None: 

417 # Docstring inherited from lsst.daf.butler.registry.Registry 

418 self._managers.datasets.remove(name) 

419 

420 def getDatasetType(self, name: str) -> DatasetType: 

421 # Docstring inherited from lsst.daf.butler.registry.Registry 

422 return self._managers.datasets[name].datasetType 

423 

424 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

425 # Docstring inherited from lsst.daf.butler.registry.Registry 

426 return self._managers.datasets.supportsIdGenerationMode(mode) 

427 

428 def findDataset( 

429 self, 

430 datasetType: Union[DatasetType, str], 

431 dataId: Optional[DataId] = None, 

432 *, 

433 collections: Any = None, 

434 timespan: Optional[Timespan] = None, 

435 **kwargs: Any, 

436 ) -> Optional[DatasetRef]: 

437 # Docstring inherited from lsst.daf.butler.registry.Registry 

438 if isinstance(datasetType, DatasetType): 

439 storage = self._managers.datasets[datasetType.name] 

440 else: 

441 storage = self._managers.datasets[datasetType] 

442 dataId = DataCoordinate.standardize( 

443 dataId, 

444 graph=storage.datasetType.dimensions, 

445 universe=self.dimensions, 

446 defaults=self.defaults.dataId, 

447 **kwargs, 

448 ) 

449 if collections is None: 

450 if not self.defaults.collections: 

451 raise NoDefaultCollectionError( 

452 "No collections provided to findDataset, and no defaults from registry construction." 

453 ) 

454 collections = self.defaults.collections 

455 else: 

456 collections = CollectionSearch.fromExpression(collections) 

457 for collectionRecord in collections.iter(self._managers.collections): 

458 if collectionRecord.type is CollectionType.CALIBRATION and ( 

459 not storage.datasetType.isCalibration() or timespan is None 

460 ): 

461 continue 

462 result = storage.find(collectionRecord, dataId, timespan=timespan) 

463 if result is not None: 

464 return result 

465 

466 return None 

467 

468 @transactional 

469 def insertDatasets( 

470 self, 

471 datasetType: Union[DatasetType, str], 

472 dataIds: Iterable[DataId], 

473 run: Optional[str] = None, 

474 expand: bool = True, 

475 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

476 ) -> List[DatasetRef]: 

477 # Docstring inherited from lsst.daf.butler.registry.Registry 

478 if isinstance(datasetType, DatasetType): 

479 storage = self._managers.datasets.find(datasetType.name) 

480 if storage is None: 

481 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

482 else: 

483 storage = self._managers.datasets.find(datasetType) 

484 if storage is None: 

485 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

486 if run is None: 

487 if self.defaults.run is None: 

488 raise NoDefaultCollectionError( 

489 "No run provided to insertDatasets, and no default from registry construction." 

490 ) 

491 run = self.defaults.run 

492 runRecord = self._managers.collections.find(run) 

493 if runRecord.type is not CollectionType.RUN: 

494 raise CollectionTypeError( 

495 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

496 ) 

497 assert isinstance(runRecord, RunRecord) 

498 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

499 if expand: 

500 expandedDataIds = [ 

501 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

502 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

503 ] 

504 else: 

505 expandedDataIds = [ 

506 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

507 ] 

508 try: 

509 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

510 if self._managers.obscore: 

511 self._managers.obscore.add_datasets(refs) 

512 except sqlalchemy.exc.IntegrityError as err: 

513 raise ConflictingDefinitionError( 

514 f"A database constraint failure was triggered by inserting " 

515 f"one or more datasets of type {storage.datasetType} into " 

516 f"collection '{run}'. " 

517 f"This probably means a dataset with the same data ID " 

518 f"and dataset type already exists, but it may also mean a " 

519 f"dimension row is missing." 

520 ) from err 

521 return refs 

522 

523 @transactional 

524 def _importDatasets( 

525 self, 

526 datasets: Iterable[DatasetRef], 

527 expand: bool = True, 

528 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

529 reuseIds: bool = False, 

530 ) -> List[DatasetRef]: 

531 # Docstring inherited from lsst.daf.butler.registry.Registry 

532 datasets = list(datasets) 

533 if not datasets: 

534 # nothing to do 

535 return [] 

536 

537 # find dataset type 

538 datasetTypes = set(dataset.datasetType for dataset in datasets) 

539 if len(datasetTypes) != 1: 

540 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

541 datasetType = datasetTypes.pop() 

542 

543 # get storage handler for this dataset type 

544 storage = self._managers.datasets.find(datasetType.name) 

545 if storage is None: 

546 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

547 

548 # find run name 

549 runs = set(dataset.run for dataset in datasets) 

550 if len(runs) != 1: 

551 raise ValueError(f"Multiple run names in input datasets: {runs}") 

552 run = runs.pop() 

553 if run is None: 

554 if self.defaults.run is None: 

555 raise NoDefaultCollectionError( 

556 "No run provided to ingestDatasets, and no default from registry construction." 

557 ) 

558 run = self.defaults.run 

559 

560 runRecord = self._managers.collections.find(run) 

561 if runRecord.type is not CollectionType.RUN: 

562 raise CollectionTypeError( 

563 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

564 " RUN collection required." 

565 ) 

566 assert isinstance(runRecord, RunRecord) 

567 

568 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

569 if expand: 

570 expandedDatasets = [ 

571 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

572 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

573 ] 

574 else: 

575 expandedDatasets = [ 

576 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

577 for dataset in datasets 

578 ] 

579 

580 try: 

581 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

582 if self._managers.obscore: 

583 self._managers.obscore.add_datasets(refs) 

584 except sqlalchemy.exc.IntegrityError as err: 

585 raise ConflictingDefinitionError( 

586 f"A database constraint failure was triggered by inserting " 

587 f"one or more datasets of type {storage.datasetType} into " 

588 f"collection '{run}'. " 

589 f"This probably means a dataset with the same data ID " 

590 f"and dataset type already exists, but it may also mean a " 

591 f"dimension row is missing." 

592 ) from err 

593 return refs 

594 

595 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

596 # Docstring inherited from lsst.daf.butler.registry.Registry 

597 return self._managers.datasets.getDatasetRef(id) 

598 

599 @transactional 

600 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

601 # Docstring inherited from lsst.daf.butler.registry.Registry 

602 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

603 for datasetType, refsForType in progress.iter_item_chunks( 

604 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

605 ): 

606 storage = self._managers.datasets[datasetType.name] 

607 try: 

608 storage.delete(refsForType) 

609 except sqlalchemy.exc.IntegrityError as err: 

610 raise OrphanedRecordError( 

611 "One or more datasets is still present in one or more Datastores." 

612 ) from err 

613 

614 @transactional 

615 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

616 # Docstring inherited from lsst.daf.butler.registry.Registry 

617 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

618 collectionRecord = self._managers.collections.find(collection) 

619 if collectionRecord.type is not CollectionType.TAGGED: 

620 raise CollectionTypeError( 

621 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

622 ) 

623 for datasetType, refsForType in progress.iter_item_chunks( 

624 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

625 ): 

626 storage = self._managers.datasets[datasetType.name] 

627 try: 

628 storage.associate(collectionRecord, refsForType) 

629 if self._managers.obscore: 

630 # If a TAGGED collection is being monitored by ObsCore 

631 # manager then we may need to save the dataset. 

632 self._managers.obscore.associate(refsForType, collectionRecord) 

633 except sqlalchemy.exc.IntegrityError as err: 

634 raise ConflictingDefinitionError( 

635 f"Constraint violation while associating dataset of type {datasetType.name} with " 

636 f"collection {collection}. This probably means that one or more datasets with the same " 

637 f"dataset type and data ID already exist in the collection, but it may also indicate " 

638 f"that the datasets do not exist." 

639 ) from err 

640 

641 @transactional 

642 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

643 # Docstring inherited from lsst.daf.butler.registry.Registry 

644 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

645 collectionRecord = self._managers.collections.find(collection) 

646 if collectionRecord.type is not CollectionType.TAGGED: 

647 raise CollectionTypeError( 

648 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

649 ) 

650 for datasetType, refsForType in progress.iter_item_chunks( 

651 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

652 ): 

653 storage = self._managers.datasets[datasetType.name] 

654 storage.disassociate(collectionRecord, refsForType) 

655 if self._managers.obscore: 

656 self._managers.obscore.disassociate(refsForType, collectionRecord) 

657 

658 @transactional 

659 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

660 # Docstring inherited from lsst.daf.butler.registry.Registry 

661 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

662 collectionRecord = self._managers.collections.find(collection) 

663 for datasetType, refsForType in progress.iter_item_chunks( 

664 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

665 ): 

666 storage = self._managers.datasets[datasetType.name] 

667 storage.certify(collectionRecord, refsForType, timespan) 

668 

669 @transactional 

670 def decertify( 

671 self, 

672 collection: str, 

673 datasetType: Union[str, DatasetType], 

674 timespan: Timespan, 

675 *, 

676 dataIds: Optional[Iterable[DataId]] = None, 

677 ) -> None: 

678 # Docstring inherited from lsst.daf.butler.registry.Registry 

679 collectionRecord = self._managers.collections.find(collection) 

680 if isinstance(datasetType, str): 

681 storage = self._managers.datasets[datasetType] 

682 else: 

683 storage = self._managers.datasets[datasetType.name] 

684 standardizedDataIds = None 

685 if dataIds is not None: 

686 standardizedDataIds = [ 

687 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

688 ] 

689 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

690 

691 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

692 """Return an object that allows a new `Datastore` instance to 

693 communicate with this `Registry`. 

694 

695 Returns 

696 ------- 

697 manager : `DatastoreRegistryBridgeManager` 

698 Object that mediates communication between this `Registry` and its 

699 associated datastores. 

700 """ 

701 return self._managers.datastores 

702 

703 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

704 # Docstring inherited from lsst.daf.butler.registry.Registry 

705 return self._managers.datastores.findDatastores(ref) 

706 

707 def expandDataId( 

708 self, 

709 dataId: Optional[DataId] = None, 

710 *, 

711 graph: Optional[DimensionGraph] = None, 

712 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

713 withDefaults: bool = True, 

714 **kwargs: Any, 

715 ) -> DataCoordinate: 

716 # Docstring inherited from lsst.daf.butler.registry.Registry 

717 if not withDefaults: 

718 defaults = None 

719 else: 

720 defaults = self.defaults.dataId 

721 try: 

722 standardized = DataCoordinate.standardize( 

723 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

724 ) 

725 except KeyError as exc: 

726 # This means either kwargs have some odd name or required 

727 # dimension is missing. 

728 raise DimensionNameError(str(exc)) from exc 

729 if standardized.hasRecords(): 

730 return standardized 

731 if records is None: 

732 records = {} 

733 elif isinstance(records, NamedKeyMapping): 

734 records = records.byName() 

735 else: 

736 records = dict(records) 

737 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

738 records.update(dataId.records.byName()) 

739 keys = standardized.byName() 

740 for element in standardized.graph.primaryKeyTraversalOrder: 

741 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

742 if record is ...: 

743 if isinstance(element, Dimension) and keys.get(element.name) is None: 

744 if element in standardized.graph.required: 

745 raise DimensionNameError( 

746 f"No value or null value for required dimension {element.name}." 

747 ) 

748 keys[element.name] = None 

749 record = None 

750 else: 

751 storage = self._managers.dimensions[element] 

752 dataIdSet = DataCoordinateIterable.fromScalar( 

753 DataCoordinate.standardize(keys, graph=element.graph) 

754 ) 

755 fetched = tuple(storage.fetch(dataIdSet)) 

756 try: 

757 (record,) = fetched 

758 except ValueError: 

759 record = None 

760 records[element.name] = record 

761 if record is not None: 

762 for d in element.implied: 

763 value = getattr(record, d.name) 

764 if keys.setdefault(d.name, value) != value: 

765 raise InconsistentDataIdError( 

766 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

767 f"but {element.name} implies {d.name}={value!r}." 

768 ) 

769 else: 

770 if element in standardized.graph.required: 

771 raise DataIdValueError( 

772 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

773 ) 

774 if element.alwaysJoin: 

775 raise InconsistentDataIdError( 

776 f"Could not fetch record for element {element.name} via keys {keys}, ", 

777 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

778 "related.", 

779 ) 

780 for d in element.implied: 

781 keys.setdefault(d.name, None) 

782 records.setdefault(d.name, None) 

783 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

784 

785 def insertDimensionData( 

786 self, 

787 element: Union[DimensionElement, str], 

788 *data: Union[Mapping[str, Any], DimensionRecord], 

789 conform: bool = True, 

790 replace: bool = False, 

791 skip_existing: bool = False, 

792 ) -> None: 

793 # Docstring inherited from lsst.daf.butler.registry.Registry 

794 if conform: 

795 if isinstance(element, str): 

796 element = self.dimensions[element] 

797 records = [ 

798 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

799 ] 

800 else: 

801 # Ignore typing since caller said to trust them with conform=False. 

802 records = data # type: ignore 

803 storage = self._managers.dimensions[element] # type: ignore 

804 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

805 

806 def syncDimensionData( 

807 self, 

808 element: Union[DimensionElement, str], 

809 row: Union[Mapping[str, Any], DimensionRecord], 

810 conform: bool = True, 

811 update: bool = False, 

812 ) -> Union[bool, Dict[str, Any]]: 

813 # Docstring inherited from lsst.daf.butler.registry.Registry 

814 if conform: 

815 if isinstance(element, str): 

816 element = self.dimensions[element] 

817 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

818 else: 

819 # Ignore typing since caller said to trust them with conform=False. 

820 record = row # type: ignore 

821 storage = self._managers.dimensions[element] # type: ignore 

822 return storage.sync(record, update=update) 

823 

824 def queryDatasetTypes( 

825 self, 

826 expression: Any = ..., 

827 *, 

828 components: Optional[bool] = None, 

829 missing: Optional[List[str]] = None, 

830 ) -> Iterator[DatasetType]: 

831 # Docstring inherited from lsst.daf.butler.registry.Registry 

832 try: 

833 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

834 except TypeError as exc: 

835 raise DatasetTypeExpressionError(f"Invalid dataset type expression '{expression}'") from exc 

836 unknownComponentsMessage = ( 

837 "Could not find definition for storage class %s for dataset type %r;" 

838 " if it has components they will not be included in dataset type query results." 

839 ) 

840 if wildcard is Ellipsis: 

841 for datasetType in self._managers.datasets: 

842 # The dataset type can no longer be a component 

843 yield datasetType 

844 if components: 

845 # Automatically create the component dataset types 

846 try: 

847 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

848 except KeyError as err: 

849 _LOG.warning(unknownComponentsMessage, err, datasetType.name) 

850 else: 

851 yield from componentsForDatasetType 

852 return 

853 done: Set[str] = set() 

854 for name in wildcard.strings: 

855 storage = self._managers.datasets.find(name) 

856 done.add(name) 

857 if storage is None: 

858 if missing is not None: 

859 missing.append(name) 

860 else: 

861 yield storage.datasetType 

862 if wildcard.patterns: 

863 # If components (the argument) is None, we'll save component 

864 # dataset that we might want to match, but only if their parents 

865 # didn't get included. 

866 componentsForLater = [] 

867 for registeredDatasetType in self._managers.datasets: 

868 # Components are not stored in registry so expand them here 

869 allDatasetTypes = [registeredDatasetType] 

870 if components is not False: 

871 # Only check for the components if we are being asked 

872 # for components or components is None. 

873 try: 

874 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

875 except KeyError as err: 

876 _LOG.warning(unknownComponentsMessage, err, registeredDatasetType.name) 

877 for datasetType in allDatasetTypes: 

878 if datasetType.name in done: 

879 continue 

880 parentName, componentName = datasetType.nameAndComponent() 

881 if componentName is not None and not components: 

882 if components is None and parentName not in done: 

883 componentsForLater.append(datasetType) 

884 continue 

885 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

886 done.add(datasetType.name) 

887 yield datasetType 

888 # Go back and try to match saved components. 

889 for datasetType in componentsForLater: 

890 parentName, _ = datasetType.nameAndComponent() 

891 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

892 yield datasetType 

893 

894 def queryCollections( 

895 self, 

896 expression: Any = ..., 

897 datasetType: Optional[DatasetType] = None, 

898 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

899 flattenChains: bool = False, 

900 includeChains: Optional[bool] = None, 

901 ) -> Iterator[str]: 

902 # Docstring inherited from lsst.daf.butler.registry.Registry 

903 

904 # Right now the datasetTypes argument is completely ignored, but that 

905 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

906 # ticket will take care of that. 

907 try: 

908 query = CollectionQuery.fromExpression(expression) 

909 except TypeError as exc: 

910 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

911 collectionTypes = ensure_iterable(collectionTypes) 

912 for record in query.iter( 

913 self._managers.collections, 

914 collectionTypes=frozenset(collectionTypes), 

915 flattenChains=flattenChains, 

916 includeChains=includeChains, 

917 ): 

918 yield record.name 

919 

920 def _makeQueryBuilder( 

921 self, summary: queries.QuerySummary, doomed_by: Iterable[str] = () 

922 ) -> queries.QueryBuilder: 

923 """Return a `QueryBuilder` instance capable of constructing and 

924 managing more complex queries than those obtainable via `Registry` 

925 interfaces. 

926 

927 This is an advanced interface; downstream code should prefer 

928 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

929 are sufficient. 

930 

931 Parameters 

932 ---------- 

933 summary : `queries.QuerySummary` 

934 Object describing and categorizing the full set of dimensions that 

935 will be included in the query. 

936 doomed_by : `Iterable` of `str`, optional 

937 A list of diagnostic messages that indicate why the query is going 

938 to yield no results and should not even be executed. If an empty 

939 container (default) the query will be executed unless other code 

940 determines that it is doomed. 

941 

942 Returns 

943 ------- 

944 builder : `queries.QueryBuilder` 

945 Object that can be used to construct and perform advanced queries. 

946 """ 

947 return queries.QueryBuilder( 

948 summary, 

949 backend=queries.SqlQueryBackend(self._db, self._managers), 

950 doomed_by=doomed_by, 

951 ) 

952 

953 def _standardize_query_dataset_args( 

954 self, 

955 datasets: Any, 

956 collections: Any, 

957 components: bool | None, 

958 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

959 *, 

960 doomed_by: list[str], 

961 ) -> tuple[defaultdict[DatasetType, list[str | None]], CollectionQuery | CollectionSearch | None]: 

962 """Preprocess dataset arguments passed to query* methods. 

963 

964 Parameters 

965 ---------- 

966 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

967 Expression identifying dataset types. See `queryDatasetTypes` for 

968 details. 

969 collections : `str`, `re.Pattern`, or iterable of these 

970 Expression identifying collections to be searched. See 

971 `queryCollections` for details. 

972 components : `bool`, optional 

973 If `True`, apply all expression patterns to component dataset type 

974 names as well. If `False`, never apply patterns to components. 

975 If `None` (default), apply patterns to components only if their 

976 parent datasets were not matched by the expression. 

977 Fully-specified component datasets (`str` or `DatasetType` 

978 instances) are always included. 

979 mode : `str`, optional 

980 The way in which datasets are being used in this query; one of: 

981 

982 - "find_first": this is a query for the first dataset in an 

983 ordered list of collections. Prohibits collection wildcards, 

984 but permits dataset type wildcards. 

985 

986 - "find_all": this is a query for all datasets in all matched 

987 collections. Permits collection and dataset type wildcards. 

988 

989 - "constrain": this is a query for something other than datasets, 

990 with results constrained by dataset existence. Permits 

991 collection wildcards and prohibits ``...`` as a dataset type 

992 wildcard. 

993 doomed_by : `list` [ `str` ] 

994 List to append messages indicating why the query is doomed to 

995 yield no results. 

996 

997 Returns 

998 ------- 

999 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1000 Dictionary mapping parent dataset type to `list` of components 

1001 matched for that dataset type (or `None` for the parent itself). 

1002 collections : `CollectionSearch` or `CollectionQuery` 

1003 Processed collection expression. 

1004 """ 

1005 composition: defaultdict[DatasetType, list[str | None]] = defaultdict(list) 

1006 if datasets is not None: 

1007 if not collections: 

1008 if not self.defaults.collections: 

1009 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1010 collections = self.defaults.collections 

1011 elif mode == "find_first": 

1012 collections = CollectionSearch.fromExpression(collections) 

1013 else: 

1014 collections = CollectionQuery.fromExpression(collections) 

1015 missing: list[str] = [] 

1016 if mode == "constrain" and datasets is Ellipsis: 

1017 raise TypeError("Cannot pass the universal wildcard '...' for dataset types in this context.") 

1018 for dataset_type in self.queryDatasetTypes(datasets, components=components, missing=missing): 

1019 if dataset_type.isComponent(): 

1020 composition[dataset_type.makeCompositeDatasetType()].append(dataset_type.component()) 

1021 else: 

1022 composition[dataset_type].append(None) 

1023 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1024 elif collections: 

1025 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1026 return composition, collections 

1027 

1028 def queryDatasets( 

1029 self, 

1030 datasetType: Any, 

1031 *, 

1032 collections: Any = None, 

1033 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1034 dataId: Optional[DataId] = None, 

1035 where: Optional[str] = None, 

1036 findFirst: bool = False, 

1037 components: Optional[bool] = None, 

1038 bind: Optional[Mapping[str, Any]] = None, 

1039 check: bool = True, 

1040 **kwargs: Any, 

1041 ) -> queries.DatasetQueryResults: 

1042 # Docstring inherited from lsst.daf.butler.registry.Registry 

1043 doomed_by: list[str] = [] 

1044 data_id = self.expandDataId(dataId, **kwargs) 

1045 dataset_composition, collections = self._standardize_query_dataset_args( 

1046 datasetType, 

1047 collections, 

1048 components, 

1049 mode="find_first" if findFirst else "find_all", 

1050 doomed_by=doomed_by, 

1051 ) 

1052 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1053 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1054 # The full set of dimensions in the query is the combination of 

1055 # those needed for the DatasetType and those explicitly requested, 

1056 # if any. 

1057 dimension_names = set(parent_dataset_type.dimensions.names) 

1058 if dimensions is not None: 

1059 dimension_names.update(self.dimensions.extract(dimensions).names) 

1060 # Construct the summary structure needed to construct a 

1061 # QueryBuilder. 

1062 summary = queries.QuerySummary( 

1063 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1064 dataId=data_id, 

1065 expression=where, 

1066 bind=bind, 

1067 defaults=self.defaults.dataId, 

1068 check=check, 

1069 datasets=[parent_dataset_type], 

1070 ) 

1071 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1072 # Add the dataset subquery to the query, telling the QueryBuilder 

1073 # to include the rank of the selected collection in the results 

1074 # only if we need to findFirst. Note that if any of the 

1075 # collections are actually wildcard expressions, and 

1076 # findFirst=True, this will raise TypeError for us. 

1077 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1078 query = builder.finish() 

1079 parent_results.append( 

1080 queries.ParentDatasetQueryResults( 

1081 self._db, query, datasetType=parent_dataset_type, components=components_for_parent 

1082 ) 

1083 ) 

1084 if not parent_results: 

1085 doomed_by.extend( 

1086 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1087 "exist in any collection." 

1088 for t in ensure_iterable(datasetType) 

1089 ) 

1090 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1091 elif len(parent_results) == 1: 

1092 return parent_results[0] 

1093 else: 

1094 return queries.ChainedDatasetQueryResults(parent_results) 

1095 

1096 def queryDataIds( 

1097 self, 

1098 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1099 *, 

1100 dataId: Optional[DataId] = None, 

1101 datasets: Any = None, 

1102 collections: Any = None, 

1103 where: Optional[str] = None, 

1104 components: Optional[bool] = None, 

1105 bind: Optional[Mapping[str, Any]] = None, 

1106 check: bool = True, 

1107 **kwargs: Any, 

1108 ) -> queries.DataCoordinateQueryResults: 

1109 # Docstring inherited from lsst.daf.butler.registry.Registry 

1110 dimensions = ensure_iterable(dimensions) 

1111 requestedDimensions = self.dimensions.extract(dimensions) 

1112 doomed_by: list[str] = [] 

1113 data_id = self.expandDataId(dataId, **kwargs) 

1114 dataset_composition, collections = self._standardize_query_dataset_args( 

1115 datasets, collections, components, doomed_by=doomed_by 

1116 ) 

1117 

1118 def query_factory( 

1119 order_by: Optional[Iterable[str]] = None, limit: Optional[Tuple[int, Optional[int]]] = None 

1120 ) -> queries.Query: 

1121 """Construct the Query object that generates query results.""" 

1122 summary = queries.QuerySummary( 

1123 requested=requestedDimensions, 

1124 dataId=data_id, 

1125 expression=where, 

1126 bind=bind, 

1127 defaults=self.defaults.dataId, 

1128 check=check, 

1129 datasets=dataset_composition.keys(), 

1130 order_by=order_by, 

1131 limit=limit, 

1132 ) 

1133 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1134 for datasetType in dataset_composition: 

1135 builder.joinDataset(datasetType, collections, isResult=False) 

1136 return builder.finish() 

1137 

1138 return queries.DataCoordinateQueryResults(self._db, query_factory, requestedDimensions) 

1139 

1140 def queryDimensionRecords( 

1141 self, 

1142 element: Union[DimensionElement, str], 

1143 *, 

1144 dataId: Optional[DataId] = None, 

1145 datasets: Any = None, 

1146 collections: Any = None, 

1147 where: Optional[str] = None, 

1148 components: Optional[bool] = None, 

1149 bind: Optional[Mapping[str, Any]] = None, 

1150 check: bool = True, 

1151 **kwargs: Any, 

1152 ) -> queries.DimensionRecordQueryResults: 

1153 # Docstring inherited from lsst.daf.butler.registry.Registry 

1154 if not isinstance(element, DimensionElement): 

1155 try: 

1156 element = self.dimensions[element] 

1157 except KeyError as e: 

1158 raise DimensionNameError( 

1159 f"No such dimension '{element}', available dimensions: " 

1160 + str(self.dimensions.getStaticElements()) 

1161 ) from e 

1162 dataIds = self.queryDataIds( 

1163 element.graph, 

1164 dataId=dataId, 

1165 datasets=datasets, 

1166 collections=collections, 

1167 where=where, 

1168 components=components, 

1169 bind=bind, 

1170 check=check, 

1171 **kwargs, 

1172 ) 

1173 return queries.DatabaseDimensionRecordQueryResults(dataIds, self._managers.dimensions[element]) 

1174 

1175 def queryDatasetAssociations( 

1176 self, 

1177 datasetType: Union[str, DatasetType], 

1178 collections: Any = ..., 

1179 *, 

1180 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1181 flattenChains: bool = False, 

1182 ) -> Iterator[DatasetAssociation]: 

1183 # Docstring inherited from lsst.daf.butler.registry.Registry 

1184 if collections is None: 

1185 if not self.defaults.collections: 

1186 raise NoDefaultCollectionError( 

1187 "No collections provided to findDataset, and no defaults from registry construction." 

1188 ) 

1189 collections = self.defaults.collections 

1190 else: 

1191 collections = CollectionQuery.fromExpression(collections) 

1192 TimespanReprClass = self._db.getTimespanRepresentation() 

1193 if isinstance(datasetType, str): 

1194 storage = self._managers.datasets[datasetType] 

1195 else: 

1196 storage = self._managers.datasets[datasetType.name] 

1197 for collectionRecord in collections.iter( 

1198 self._managers.collections, 

1199 collectionTypes=frozenset(collectionTypes), 

1200 flattenChains=flattenChains, 

1201 ): 

1202 query = storage.select(collectionRecord) 

1203 for row in self._db.query(query).mappings(): 

1204 dataId = DataCoordinate.fromRequiredValues( 

1205 storage.datasetType.dimensions, 

1206 tuple(row[name] for name in storage.datasetType.dimensions.required.names), 

1207 ) 

1208 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1209 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, conform=False) 

1210 if collectionRecord.type is CollectionType.CALIBRATION: 

1211 timespan = TimespanReprClass.extract(row) 

1212 else: 

1213 timespan = None 

1214 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1215 

1216 storageClasses: StorageClassFactory 

1217 """All storage classes known to the registry (`StorageClassFactory`). 

1218 """