Coverage for python/lsst/daf/butler/registries/sql.py: 13%

482 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-26 02:22 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28from collections import defaultdict 

29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Set, Tuple, Union 

30 

31import sqlalchemy 

32from lsst.resources import ResourcePathExpression 

33from lsst.utils.iteration import ensure_iterable 

34 

35from ..core import ( 

36 Config, 

37 DataCoordinate, 

38 DataCoordinateIterable, 

39 DataId, 

40 DatasetAssociation, 

41 DatasetId, 

42 DatasetRef, 

43 DatasetType, 

44 Dimension, 

45 DimensionConfig, 

46 DimensionElement, 

47 DimensionGraph, 

48 DimensionRecord, 

49 DimensionUniverse, 

50 NamedKeyMapping, 

51 NameLookupMapping, 

52 Progress, 

53 StorageClassFactory, 

54 Timespan, 

55 ddl, 

56) 

57from ..core.utils import transactional 

58from ..registry import ( 

59 ArgumentError, 

60 CollectionExpressionError, 

61 CollectionSearch, 

62 CollectionType, 

63 CollectionTypeError, 

64 ConflictingDefinitionError, 

65 DataIdValueError, 

66 DatasetTypeError, 

67 DatasetTypeExpressionError, 

68 DimensionNameError, 

69 InconsistentDataIdError, 

70 NoDefaultCollectionError, 

71 OrphanedRecordError, 

72 Registry, 

73 RegistryConfig, 

74 RegistryDefaults, 

75 queries, 

76) 

77from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

78from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

79from ..registry.queries import Query 

80from ..registry.summaries import CollectionSummary 

81from ..registry.wildcards import CategorizedWildcard, CollectionQuery, Ellipsis 

82 

83if TYPE_CHECKING: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 from .._butlerConfig import ButlerConfig 

85 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

86 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91class SqlRegistry(Registry): 

92 """Registry implementation based on SQLAlchemy. 

93 

94 Parameters 

95 ---------- 

96 database : `Database` 

97 Database instance to store Registry. 

98 defaults : `RegistryDefaults` 

99 Default collection search path and/or output `~CollectionType.RUN` 

100 collection. 

101 managers : `RegistryManagerInstances` 

102 All the managers required for this registry. 

103 """ 

104 

105 defaultConfigFile: Optional[str] = None 

106 """Path to configuration defaults. Accessed within the ``configs`` resource 

107 or relative to a search path. Can be None if no defaults specified. 

108 """ 

109 

110 @classmethod 

111 def createFromConfig( 

112 cls, 

113 config: Optional[Union[RegistryConfig, str]] = None, 

114 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

115 butlerRoot: Optional[ResourcePathExpression] = None, 

116 ) -> Registry: 

117 """Create registry database and return `SqlRegistry` instance. 

118 

119 This method initializes database contents, database must be empty 

120 prior to calling this method. 

121 

122 Parameters 

123 ---------- 

124 config : `RegistryConfig` or `str`, optional 

125 Registry configuration, if missing then default configuration will 

126 be loaded from registry.yaml. 

127 dimensionConfig : `DimensionConfig` or `str`, optional 

128 Dimensions configuration, if missing then default configuration 

129 will be loaded from dimensions.yaml. 

130 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

131 Path to the repository root this `SqlRegistry` will manage. 

132 

133 Returns 

134 ------- 

135 registry : `SqlRegistry` 

136 A new `SqlRegistry` instance. 

137 """ 

138 config = cls.forceRegistryConfig(config) 

139 config.replaceRoot(butlerRoot) 

140 

141 if isinstance(dimensionConfig, str): 

142 dimensionConfig = DimensionConfig(config) 

143 elif dimensionConfig is None: 

144 dimensionConfig = DimensionConfig() 

145 elif not isinstance(dimensionConfig, DimensionConfig): 

146 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

147 

148 DatabaseClass = config.getDatabaseClass() 

149 database = DatabaseClass.fromUri( 

150 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

151 ) 

152 managerTypes = RegistryManagerTypes.fromConfig(config) 

153 managers = managerTypes.makeRepo(database, dimensionConfig) 

154 return cls(database, RegistryDefaults(), managers) 

155 

156 @classmethod 

157 def fromConfig( 

158 cls, 

159 config: Union[ButlerConfig, RegistryConfig, Config, str], 

160 butlerRoot: Optional[ResourcePathExpression] = None, 

161 writeable: bool = True, 

162 defaults: Optional[RegistryDefaults] = None, 

163 ) -> Registry: 

164 """Create `Registry` subclass instance from `config`. 

165 

166 Registry database must be initialized prior to calling this method. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

171 Registry configuration 

172 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

173 Path to the repository root this `Registry` will manage. 

174 writeable : `bool`, optional 

175 If `True` (default) create a read-write connection to the database. 

176 defaults : `RegistryDefaults`, optional 

177 Default collection search path and/or output `~CollectionType.RUN` 

178 collection. 

179 

180 Returns 

181 ------- 

182 registry : `SqlRegistry` (subclass) 

183 A new `SqlRegistry` subclass instance. 

184 """ 

185 config = cls.forceRegistryConfig(config) 

186 config.replaceRoot(butlerRoot) 

187 DatabaseClass = config.getDatabaseClass() 

188 database = DatabaseClass.fromUri( 

189 str(config.connectionString), 

190 origin=config.get("origin", 0), 

191 namespace=config.get("namespace"), 

192 writeable=writeable, 

193 ) 

194 managerTypes = RegistryManagerTypes.fromConfig(config) 

195 managers = managerTypes.loadRepo(database) 

196 if defaults is None: 

197 defaults = RegistryDefaults() 

198 return cls(database, defaults, managers) 

199 

200 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

201 self._db = database 

202 self._managers = managers 

203 self.storageClasses = StorageClassFactory() 

204 # Intentionally invoke property setter to initialize defaults. This 

205 # can only be done after most of the rest of Registry has already been 

206 # initialized, and must be done before the property getter is used. 

207 self.defaults = defaults 

208 # In the future DatasetIdFactory may become configurable and this 

209 # instance will need to be shared with datasets manager. 

210 self.datasetIdFactory = DatasetIdFactory() 

211 

212 def __str__(self) -> str: 

213 return str(self._db) 

214 

215 def __repr__(self) -> str: 

216 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

217 

218 def isWriteable(self) -> bool: 

219 # Docstring inherited from lsst.daf.butler.registry.Registry 

220 return self._db.isWriteable() 

221 

222 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

223 # Docstring inherited from lsst.daf.butler.registry.Registry 

224 if defaults is None: 

225 # No need to copy, because `RegistryDefaults` is immutable; we 

226 # effectively copy on write. 

227 defaults = self.defaults 

228 return type(self)(self._db, defaults, self._managers) 

229 

230 @property 

231 def dimensions(self) -> DimensionUniverse: 

232 # Docstring inherited from lsst.daf.butler.registry.Registry 

233 return self._managers.dimensions.universe 

234 

235 def refresh(self) -> None: 

236 # Docstring inherited from lsst.daf.butler.registry.Registry 

237 self._managers.refresh() 

238 

239 @contextlib.contextmanager 

240 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

241 # Docstring inherited from lsst.daf.butler.registry.Registry 

242 try: 

243 with self._db.transaction(savepoint=savepoint): 

244 yield 

245 except BaseException: 

246 # TODO: this clears the caches sometimes when we wouldn't actually 

247 # need to. Can we avoid that? 

248 self._managers.dimensions.clearCaches() 

249 raise 

250 

251 def resetConnectionPool(self) -> None: 

252 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

253 

254 This operation is useful when using registry with fork-based 

255 multiprocessing. To use registry across fork boundary one has to make 

256 sure that there are no currently active connections (no session or 

257 transaction is in progress) and connection pool is reset using this 

258 method. This method should be called by the child process immediately 

259 after the fork. 

260 """ 

261 self._db._engine.dispose() 

262 

263 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

264 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

265 other data repository client. 

266 

267 Opaque table records can be added via `insertOpaqueData`, retrieved via 

268 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

269 

270 Parameters 

271 ---------- 

272 tableName : `str` 

273 Logical name of the opaque table. This may differ from the 

274 actual name used in the database by a prefix and/or suffix. 

275 spec : `ddl.TableSpec` 

276 Specification for the table to be added. 

277 """ 

278 self._managers.opaque.register(tableName, spec) 

279 

280 @transactional 

281 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

282 """Insert records into an opaque table. 

283 

284 Parameters 

285 ---------- 

286 tableName : `str` 

287 Logical name of the opaque table. Must match the name used in a 

288 previous call to `registerOpaqueTable`. 

289 data 

290 Each additional positional argument is a dictionary that represents 

291 a single row to be added. 

292 """ 

293 self._managers.opaque[tableName].insert(*data) 

294 

295 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

296 """Retrieve records from an opaque table. 

297 

298 Parameters 

299 ---------- 

300 tableName : `str` 

301 Logical name of the opaque table. Must match the name used in a 

302 previous call to `registerOpaqueTable`. 

303 where 

304 Additional keyword arguments are interpreted as equality 

305 constraints that restrict the returned rows (combined with AND); 

306 keyword arguments are column names and values are the values they 

307 must have. 

308 

309 Yields 

310 ------ 

311 row : `dict` 

312 A dictionary representing a single result row. 

313 """ 

314 yield from self._managers.opaque[tableName].fetch(**where) 

315 

316 @transactional 

317 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

318 """Remove records from an opaque table. 

319 

320 Parameters 

321 ---------- 

322 tableName : `str` 

323 Logical name of the opaque table. Must match the name used in a 

324 previous call to `registerOpaqueTable`. 

325 where 

326 Additional keyword arguments are interpreted as equality 

327 constraints that restrict the deleted rows (combined with AND); 

328 keyword arguments are column names and values are the values they 

329 must have. 

330 """ 

331 self._managers.opaque[tableName].delete(where.keys(), where) 

332 

333 def registerCollection( 

334 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

335 ) -> bool: 

336 # Docstring inherited from lsst.daf.butler.registry.Registry 

337 _, registered = self._managers.collections.register(name, type, doc=doc) 

338 return registered 

339 

340 def getCollectionType(self, name: str) -> CollectionType: 

341 # Docstring inherited from lsst.daf.butler.registry.Registry 

342 return self._managers.collections.find(name).type 

343 

344 def _get_collection_record(self, name: str) -> CollectionRecord: 

345 # Docstring inherited from lsst.daf.butler.registry.Registry 

346 return self._managers.collections.find(name) 

347 

348 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

349 # Docstring inherited from lsst.daf.butler.registry.Registry 

350 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

351 return registered 

352 

353 @transactional 

354 def removeCollection(self, name: str) -> None: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 self._managers.collections.remove(name) 

357 

358 def getCollectionChain(self, parent: str) -> CollectionSearch: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 record = self._managers.collections.find(parent) 

361 if record.type is not CollectionType.CHAINED: 

362 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

363 assert isinstance(record, ChainedCollectionRecord) 

364 return record.children 

365 

366 @transactional 

367 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

368 # Docstring inherited from lsst.daf.butler.registry.Registry 

369 record = self._managers.collections.find(parent) 

370 if record.type is not CollectionType.CHAINED: 

371 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

372 assert isinstance(record, ChainedCollectionRecord) 

373 children = CollectionSearch.fromExpression(children) 

374 if children != record.children or flatten: 

375 record.update(self._managers.collections, children, flatten=flatten) 

376 

377 def getCollectionParentChains(self, collection: str) -> Set[str]: 

378 # Docstring inherited from lsst.daf.butler.registry.Registry 

379 return { 

380 record.name 

381 for record in self._managers.collections.getParentChains( 

382 self._managers.collections.find(collection).key 

383 ) 

384 } 

385 

386 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

387 # Docstring inherited from lsst.daf.butler.registry.Registry 

388 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

389 

390 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

391 # Docstring inherited from lsst.daf.butler.registry.Registry 

392 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

393 

394 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

395 # Docstring inherited from lsst.daf.butler.registry.Registry 

396 record = self._managers.collections.find(collection) 

397 return self._managers.datasets.getCollectionSummary(record) 

398 

399 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

400 # Docstring inherited from lsst.daf.butler.registry.Registry 

401 _, inserted = self._managers.datasets.register(datasetType) 

402 return inserted 

403 

404 def removeDatasetType(self, name: str) -> None: 

405 # Docstring inherited from lsst.daf.butler.registry.Registry 

406 self._managers.datasets.remove(name) 

407 

408 def getDatasetType(self, name: str) -> DatasetType: 

409 # Docstring inherited from lsst.daf.butler.registry.Registry 

410 return self._managers.datasets[name].datasetType 

411 

412 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

413 # Docstring inherited from lsst.daf.butler.registry.Registry 

414 return self._managers.datasets.supportsIdGenerationMode(mode) 

415 

416 def findDataset( 

417 self, 

418 datasetType: Union[DatasetType, str], 

419 dataId: Optional[DataId] = None, 

420 *, 

421 collections: Any = None, 

422 timespan: Optional[Timespan] = None, 

423 **kwargs: Any, 

424 ) -> Optional[DatasetRef]: 

425 # Docstring inherited from lsst.daf.butler.registry.Registry 

426 if isinstance(datasetType, DatasetType): 

427 storage = self._managers.datasets[datasetType.name] 

428 else: 

429 storage = self._managers.datasets[datasetType] 

430 dataId = DataCoordinate.standardize( 

431 dataId, 

432 graph=storage.datasetType.dimensions, 

433 universe=self.dimensions, 

434 defaults=self.defaults.dataId, 

435 **kwargs, 

436 ) 

437 if collections is None: 

438 if not self.defaults.collections: 

439 raise NoDefaultCollectionError( 

440 "No collections provided to findDataset, and no defaults from registry construction." 

441 ) 

442 collections = self.defaults.collections 

443 else: 

444 collections = CollectionSearch.fromExpression(collections) 

445 for collectionRecord in collections.iter(self._managers.collections): 

446 if collectionRecord.type is CollectionType.CALIBRATION and ( 

447 not storage.datasetType.isCalibration() or timespan is None 

448 ): 

449 continue 

450 result = storage.find(collectionRecord, dataId, timespan=timespan) 

451 if result is not None: 

452 return result 

453 

454 return None 

455 

456 @transactional 

457 def insertDatasets( 

458 self, 

459 datasetType: Union[DatasetType, str], 

460 dataIds: Iterable[DataId], 

461 run: Optional[str] = None, 

462 expand: bool = True, 

463 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

464 ) -> List[DatasetRef]: 

465 # Docstring inherited from lsst.daf.butler.registry.Registry 

466 if isinstance(datasetType, DatasetType): 

467 storage = self._managers.datasets.find(datasetType.name) 

468 if storage is None: 

469 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

470 else: 

471 storage = self._managers.datasets.find(datasetType) 

472 if storage is None: 

473 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

474 if run is None: 

475 if self.defaults.run is None: 

476 raise NoDefaultCollectionError( 

477 "No run provided to insertDatasets, and no default from registry construction." 

478 ) 

479 run = self.defaults.run 

480 runRecord = self._managers.collections.find(run) 

481 if runRecord.type is not CollectionType.RUN: 

482 raise CollectionTypeError( 

483 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

484 ) 

485 assert isinstance(runRecord, RunRecord) 

486 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

487 if expand: 

488 expandedDataIds = [ 

489 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

490 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

491 ] 

492 else: 

493 expandedDataIds = [ 

494 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

495 ] 

496 try: 

497 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

498 except sqlalchemy.exc.IntegrityError as err: 

499 raise ConflictingDefinitionError( 

500 f"A database constraint failure was triggered by inserting " 

501 f"one or more datasets of type {storage.datasetType} into " 

502 f"collection '{run}'. " 

503 f"This probably means a dataset with the same data ID " 

504 f"and dataset type already exists, but it may also mean a " 

505 f"dimension row is missing." 

506 ) from err 

507 return refs 

508 

509 @transactional 

510 def _importDatasets( 

511 self, 

512 datasets: Iterable[DatasetRef], 

513 expand: bool = True, 

514 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

515 reuseIds: bool = False, 

516 ) -> List[DatasetRef]: 

517 # Docstring inherited from lsst.daf.butler.registry.Registry 

518 datasets = list(datasets) 

519 if not datasets: 

520 # nothing to do 

521 return [] 

522 

523 # find dataset type 

524 datasetTypes = set(dataset.datasetType for dataset in datasets) 

525 if len(datasetTypes) != 1: 

526 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

527 datasetType = datasetTypes.pop() 

528 

529 # get storage handler for this dataset type 

530 storage = self._managers.datasets.find(datasetType.name) 

531 if storage is None: 

532 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

533 

534 # find run name 

535 runs = set(dataset.run for dataset in datasets) 

536 if len(runs) != 1: 

537 raise ValueError(f"Multiple run names in input datasets: {runs}") 

538 run = runs.pop() 

539 if run is None: 

540 if self.defaults.run is None: 

541 raise NoDefaultCollectionError( 

542 "No run provided to ingestDatasets, and no default from registry construction." 

543 ) 

544 run = self.defaults.run 

545 

546 runRecord = self._managers.collections.find(run) 

547 if runRecord.type is not CollectionType.RUN: 

548 raise CollectionTypeError( 

549 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

550 " RUN collection required." 

551 ) 

552 assert isinstance(runRecord, RunRecord) 

553 

554 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

555 if expand: 

556 expandedDatasets = [ 

557 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

558 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

559 ] 

560 else: 

561 expandedDatasets = [ 

562 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

563 for dataset in datasets 

564 ] 

565 

566 try: 

567 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

568 except sqlalchemy.exc.IntegrityError as err: 

569 raise ConflictingDefinitionError( 

570 f"A database constraint failure was triggered by inserting " 

571 f"one or more datasets of type {storage.datasetType} into " 

572 f"collection '{run}'. " 

573 f"This probably means a dataset with the same data ID " 

574 f"and dataset type already exists, but it may also mean a " 

575 f"dimension row is missing." 

576 ) from err 

577 return refs 

578 

579 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

580 # Docstring inherited from lsst.daf.butler.registry.Registry 

581 return self._managers.datasets.getDatasetRef(id) 

582 

583 @transactional 

584 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

585 # Docstring inherited from lsst.daf.butler.registry.Registry 

586 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

587 for datasetType, refsForType in progress.iter_item_chunks( 

588 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

589 ): 

590 storage = self._managers.datasets[datasetType.name] 

591 try: 

592 storage.delete(refsForType) 

593 except sqlalchemy.exc.IntegrityError as err: 

594 raise OrphanedRecordError( 

595 "One or more datasets is still present in one or more Datastores." 

596 ) from err 

597 

598 @transactional 

599 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

600 # Docstring inherited from lsst.daf.butler.registry.Registry 

601 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

602 collectionRecord = self._managers.collections.find(collection) 

603 if collectionRecord.type is not CollectionType.TAGGED: 

604 raise CollectionTypeError( 

605 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

606 ) 

607 for datasetType, refsForType in progress.iter_item_chunks( 

608 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

609 ): 

610 storage = self._managers.datasets[datasetType.name] 

611 try: 

612 storage.associate(collectionRecord, refsForType) 

613 except sqlalchemy.exc.IntegrityError as err: 

614 raise ConflictingDefinitionError( 

615 f"Constraint violation while associating dataset of type {datasetType.name} with " 

616 f"collection {collection}. This probably means that one or more datasets with the same " 

617 f"dataset type and data ID already exist in the collection, but it may also indicate " 

618 f"that the datasets do not exist." 

619 ) from err 

620 

621 @transactional 

622 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

623 # Docstring inherited from lsst.daf.butler.registry.Registry 

624 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

625 collectionRecord = self._managers.collections.find(collection) 

626 if collectionRecord.type is not CollectionType.TAGGED: 

627 raise CollectionTypeError( 

628 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

629 ) 

630 for datasetType, refsForType in progress.iter_item_chunks( 

631 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

632 ): 

633 storage = self._managers.datasets[datasetType.name] 

634 storage.disassociate(collectionRecord, refsForType) 

635 

636 @transactional 

637 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

638 # Docstring inherited from lsst.daf.butler.registry.Registry 

639 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

640 collectionRecord = self._managers.collections.find(collection) 

641 for datasetType, refsForType in progress.iter_item_chunks( 

642 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

643 ): 

644 storage = self._managers.datasets[datasetType.name] 

645 storage.certify(collectionRecord, refsForType, timespan) 

646 

647 @transactional 

648 def decertify( 

649 self, 

650 collection: str, 

651 datasetType: Union[str, DatasetType], 

652 timespan: Timespan, 

653 *, 

654 dataIds: Optional[Iterable[DataId]] = None, 

655 ) -> None: 

656 # Docstring inherited from lsst.daf.butler.registry.Registry 

657 collectionRecord = self._managers.collections.find(collection) 

658 if isinstance(datasetType, str): 

659 storage = self._managers.datasets[datasetType] 

660 else: 

661 storage = self._managers.datasets[datasetType.name] 

662 standardizedDataIds = None 

663 if dataIds is not None: 

664 standardizedDataIds = [ 

665 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

666 ] 

667 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

668 

669 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

670 """Return an object that allows a new `Datastore` instance to 

671 communicate with this `Registry`. 

672 

673 Returns 

674 ------- 

675 manager : `DatastoreRegistryBridgeManager` 

676 Object that mediates communication between this `Registry` and its 

677 associated datastores. 

678 """ 

679 return self._managers.datastores 

680 

681 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

682 # Docstring inherited from lsst.daf.butler.registry.Registry 

683 return self._managers.datastores.findDatastores(ref) 

684 

685 def expandDataId( 

686 self, 

687 dataId: Optional[DataId] = None, 

688 *, 

689 graph: Optional[DimensionGraph] = None, 

690 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

691 withDefaults: bool = True, 

692 **kwargs: Any, 

693 ) -> DataCoordinate: 

694 # Docstring inherited from lsst.daf.butler.registry.Registry 

695 if not withDefaults: 

696 defaults = None 

697 else: 

698 defaults = self.defaults.dataId 

699 try: 

700 standardized = DataCoordinate.standardize( 

701 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

702 ) 

703 except KeyError as exc: 

704 # This means either kwargs have some odd name or required 

705 # dimension is missing. 

706 raise DimensionNameError(str(exc)) from exc 

707 if standardized.hasRecords(): 

708 return standardized 

709 if records is None: 

710 records = {} 

711 elif isinstance(records, NamedKeyMapping): 

712 records = records.byName() 

713 else: 

714 records = dict(records) 

715 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

716 records.update(dataId.records.byName()) 

717 keys = standardized.byName() 

718 for element in standardized.graph.primaryKeyTraversalOrder: 

719 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

720 if record is ...: 

721 if isinstance(element, Dimension) and keys.get(element.name) is None: 

722 if element in standardized.graph.required: 

723 raise DimensionNameError( 

724 f"No value or null value for required dimension {element.name}." 

725 ) 

726 keys[element.name] = None 

727 record = None 

728 else: 

729 storage = self._managers.dimensions[element] 

730 dataIdSet = DataCoordinateIterable.fromScalar( 

731 DataCoordinate.standardize(keys, graph=element.graph) 

732 ) 

733 fetched = tuple(storage.fetch(dataIdSet)) 

734 try: 

735 (record,) = fetched 

736 except ValueError: 

737 record = None 

738 records[element.name] = record 

739 if record is not None: 

740 for d in element.implied: 

741 value = getattr(record, d.name) 

742 if keys.setdefault(d.name, value) != value: 

743 raise InconsistentDataIdError( 

744 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

745 f"but {element.name} implies {d.name}={value!r}." 

746 ) 

747 else: 

748 if element in standardized.graph.required: 

749 raise DataIdValueError( 

750 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

751 ) 

752 if element.alwaysJoin: 

753 raise InconsistentDataIdError( 

754 f"Could not fetch record for element {element.name} via keys {keys}, ", 

755 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

756 "related.", 

757 ) 

758 for d in element.implied: 

759 keys.setdefault(d.name, None) 

760 records.setdefault(d.name, None) 

761 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

762 

763 def insertDimensionData( 

764 self, 

765 element: Union[DimensionElement, str], 

766 *data: Union[Mapping[str, Any], DimensionRecord], 

767 conform: bool = True, 

768 replace: bool = False, 

769 skip_existing: bool = False, 

770 ) -> None: 

771 # Docstring inherited from lsst.daf.butler.registry.Registry 

772 if conform: 

773 if isinstance(element, str): 

774 element = self.dimensions[element] 

775 records = [ 

776 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

777 ] 

778 else: 

779 # Ignore typing since caller said to trust them with conform=False. 

780 records = data # type: ignore 

781 storage = self._managers.dimensions[element] # type: ignore 

782 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

783 

784 def syncDimensionData( 

785 self, 

786 element: Union[DimensionElement, str], 

787 row: Union[Mapping[str, Any], DimensionRecord], 

788 conform: bool = True, 

789 update: bool = False, 

790 ) -> Union[bool, Dict[str, Any]]: 

791 # Docstring inherited from lsst.daf.butler.registry.Registry 

792 if conform: 

793 if isinstance(element, str): 

794 element = self.dimensions[element] 

795 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

796 else: 

797 # Ignore typing since caller said to trust them with conform=False. 

798 record = row # type: ignore 

799 storage = self._managers.dimensions[element] # type: ignore 

800 return storage.sync(record, update=update) 

801 

802 def queryDatasetTypes( 

803 self, 

804 expression: Any = ..., 

805 *, 

806 components: Optional[bool] = None, 

807 missing: Optional[List[str]] = None, 

808 ) -> Iterator[DatasetType]: 

809 # Docstring inherited from lsst.daf.butler.registry.Registry 

810 try: 

811 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

812 except TypeError as exc: 

813 raise DatasetTypeExpressionError(f"Invalid dataset type expression '{expression}'") from exc 

814 unknownComponentsMessage = ( 

815 "Could not find definition for storage class %s for dataset type %r;" 

816 " if it has components they will not be included in dataset type query results." 

817 ) 

818 if wildcard is Ellipsis: 

819 for datasetType in self._managers.datasets: 

820 # The dataset type can no longer be a component 

821 yield datasetType 

822 if components: 

823 # Automatically create the component dataset types 

824 try: 

825 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

826 except KeyError as err: 

827 _LOG.warning(unknownComponentsMessage, err, datasetType.name) 

828 else: 

829 yield from componentsForDatasetType 

830 return 

831 done: Set[str] = set() 

832 for name in wildcard.strings: 

833 storage = self._managers.datasets.find(name) 

834 done.add(name) 

835 if storage is None: 

836 if missing is not None: 

837 missing.append(name) 

838 else: 

839 yield storage.datasetType 

840 if wildcard.patterns: 

841 # If components (the argument) is None, we'll save component 

842 # dataset that we might want to match, but only if their parents 

843 # didn't get included. 

844 componentsForLater = [] 

845 for registeredDatasetType in self._managers.datasets: 

846 # Components are not stored in registry so expand them here 

847 allDatasetTypes = [registeredDatasetType] 

848 if components is not False: 

849 # Only check for the components if we are being asked 

850 # for components or components is None. 

851 try: 

852 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

853 except KeyError as err: 

854 _LOG.warning(unknownComponentsMessage, err, registeredDatasetType.name) 

855 for datasetType in allDatasetTypes: 

856 if datasetType.name in done: 

857 continue 

858 parentName, componentName = datasetType.nameAndComponent() 

859 if componentName is not None and not components: 

860 if components is None and parentName not in done: 

861 componentsForLater.append(datasetType) 

862 continue 

863 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

864 done.add(datasetType.name) 

865 yield datasetType 

866 # Go back and try to match saved components. 

867 for datasetType in componentsForLater: 

868 parentName, _ = datasetType.nameAndComponent() 

869 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

870 yield datasetType 

871 

872 def queryCollections( 

873 self, 

874 expression: Any = ..., 

875 datasetType: Optional[DatasetType] = None, 

876 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

877 flattenChains: bool = False, 

878 includeChains: Optional[bool] = None, 

879 ) -> Iterator[str]: 

880 # Docstring inherited from lsst.daf.butler.registry.Registry 

881 

882 # Right now the datasetTypes argument is completely ignored, but that 

883 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

884 # ticket will take care of that. 

885 try: 

886 query = CollectionQuery.fromExpression(expression) 

887 except TypeError as exc: 

888 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

889 collectionTypes = ensure_iterable(collectionTypes) 

890 for record in query.iter( 

891 self._managers.collections, 

892 collectionTypes=frozenset(collectionTypes), 

893 flattenChains=flattenChains, 

894 includeChains=includeChains, 

895 ): 

896 yield record.name 

897 

898 def _makeQueryBuilder( 

899 self, summary: queries.QuerySummary, doomed_by: Iterable[str] = () 

900 ) -> queries.QueryBuilder: 

901 """Return a `QueryBuilder` instance capable of constructing and 

902 managing more complex queries than those obtainable via `Registry` 

903 interfaces. 

904 

905 This is an advanced interface; downstream code should prefer 

906 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

907 are sufficient. 

908 

909 Parameters 

910 ---------- 

911 summary : `queries.QuerySummary` 

912 Object describing and categorizing the full set of dimensions that 

913 will be included in the query. 

914 doomed_by : `Iterable` of `str`, optional 

915 A list of diagnostic messages that indicate why the query is going 

916 to yield no results and should not even be executed. If an empty 

917 container (default) the query will be executed unless other code 

918 determines that it is doomed. 

919 

920 Returns 

921 ------- 

922 builder : `queries.QueryBuilder` 

923 Object that can be used to construct and perform advanced queries. 

924 """ 

925 return queries.QueryBuilder( 

926 summary, 

927 queries.RegistryManagers( 

928 collections=self._managers.collections, 

929 dimensions=self._managers.dimensions, 

930 datasets=self._managers.datasets, 

931 TimespanReprClass=self._db.getTimespanRepresentation(), 

932 ), 

933 doomed_by=doomed_by, 

934 ) 

935 

936 def queryDatasets( 

937 self, 

938 datasetType: Any, 

939 *, 

940 collections: Any = None, 

941 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

942 dataId: Optional[DataId] = None, 

943 where: Optional[str] = None, 

944 findFirst: bool = False, 

945 components: Optional[bool] = None, 

946 bind: Optional[Mapping[str, Any]] = None, 

947 check: bool = True, 

948 **kwargs: Any, 

949 ) -> queries.DatasetQueryResults: 

950 # Docstring inherited from lsst.daf.butler.registry.Registry 

951 

952 # Standardize the collections expression. 

953 if collections is None: 

954 if not self.defaults.collections: 

955 raise NoDefaultCollectionError( 

956 "No collections provided to findDataset, and no defaults from registry construction." 

957 ) 

958 collections = self.defaults.collections 

959 elif findFirst: 

960 collections = CollectionSearch.fromExpression(collections) 

961 else: 

962 collections = CollectionQuery.fromExpression(collections) 

963 # Standardize and expand the data ID provided as a constraint. 

964 standardizedDataId = self.expandDataId(dataId, **kwargs) 

965 

966 # We can only query directly if given a non-component DatasetType 

967 # instance. If we were given an expression or str or a component 

968 # DatasetType instance, we'll populate this dict, recurse, and return. 

969 # If we already have a non-component DatasetType, it will remain None 

970 # and we'll run the query directly. 

971 composition: Optional[ 

972 Dict[ 

973 DatasetType, List[Optional[str]] # parent dataset type # component name, or None for parent 

974 ] 

975 ] = None 

976 if not isinstance(datasetType, DatasetType): 

977 # We were given a dataset type expression (which may be as simple 

978 # as a str). Loop over all matching datasets, delegating handling 

979 # of the `components` argument to queryDatasetTypes, as we populate 

980 # the composition dict. 

981 composition = defaultdict(list) 

982 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

983 parentName, componentName = trueDatasetType.nameAndComponent() 

984 if componentName is not None: 

985 parentDatasetType = self.getDatasetType(parentName) 

986 composition.setdefault(parentDatasetType, []).append(componentName) 

987 else: 

988 composition.setdefault(trueDatasetType, []).append(None) 

989 if not composition: 

990 return queries.ChainedDatasetQueryResults( 

991 [], 

992 doomed_by=[ 

993 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

994 "exist in any collection." 

995 for t in ensure_iterable(datasetType) 

996 ], 

997 ) 

998 elif datasetType.isComponent(): 

999 # We were given a true DatasetType instance, but it's a component. 

1000 # the composition dict will have exactly one item. 

1001 parentName, componentName = datasetType.nameAndComponent() 

1002 parentDatasetType = self.getDatasetType(parentName) 

1003 composition = {parentDatasetType: [componentName]} 

1004 if composition is not None: 

1005 # We need to recurse. Do that once for each parent dataset type. 

1006 chain = [] 

1007 for parentDatasetType, componentNames in composition.items(): 

1008 parentResults = self.queryDatasets( 

1009 parentDatasetType, 

1010 collections=collections, 

1011 dimensions=dimensions, 

1012 dataId=standardizedDataId, 

1013 where=where, 

1014 bind=bind, 

1015 findFirst=findFirst, 

1016 check=check, 

1017 ) 

1018 assert isinstance( 

1019 parentResults, queries.ParentDatasetQueryResults 

1020 ), "Should always be true if passing in a DatasetType instance, and we are." 

1021 chain.append(parentResults.withComponents(componentNames)) 

1022 return queries.ChainedDatasetQueryResults(chain) 

1023 # If we get here, there's no need to recurse (or we are already 

1024 # recursing; there can only ever be one level of recursion). 

1025 

1026 # The full set of dimensions in the query is the combination of those 

1027 # needed for the DatasetType and those explicitly requested, if any. 

1028 requestedDimensionNames = set(datasetType.dimensions.names) 

1029 if dimensions is not None: 

1030 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1031 # Construct the summary structure needed to construct a QueryBuilder. 

1032 summary = queries.QuerySummary( 

1033 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1034 dataId=standardizedDataId, 

1035 expression=where, 

1036 bind=bind, 

1037 defaults=self.defaults.dataId, 

1038 check=check, 

1039 datasets=[datasetType], 

1040 ) 

1041 builder = self._makeQueryBuilder(summary) 

1042 # Add the dataset subquery to the query, telling the QueryBuilder to 

1043 # include the rank of the selected collection in the results only if we 

1044 # need to findFirst. Note that if any of the collections are 

1045 # actually wildcard expressions, and we've asked for deduplication, 

1046 # this will raise TypeError for us. 

1047 builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst) 

1048 query = builder.finish() 

1049 return queries.ParentDatasetQueryResults(self._db, query, components=[None], datasetType=datasetType) 

1050 

1051 def queryDataIds( 

1052 self, 

1053 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1054 *, 

1055 dataId: Optional[DataId] = None, 

1056 datasets: Any = None, 

1057 collections: Any = None, 

1058 where: Optional[str] = None, 

1059 components: Optional[bool] = None, 

1060 bind: Optional[Mapping[str, Any]] = None, 

1061 check: bool = True, 

1062 **kwargs: Any, 

1063 ) -> queries.DataCoordinateQueryResults: 

1064 # Docstring inherited from lsst.daf.butler.registry.Registry 

1065 dimensions = ensure_iterable(dimensions) 

1066 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1067 standardizedDatasetTypes = set() 

1068 requestedDimensions = self.dimensions.extract(dimensions) 

1069 missing: List[str] = [] 

1070 if datasets is not None: 

1071 if not collections: 

1072 if not self.defaults.collections: 

1073 raise NoDefaultCollectionError( 

1074 f"Cannot pass 'datasets' (='{datasets}') without 'collections'." 

1075 ) 

1076 collections = self.defaults.collections 

1077 else: 

1078 # Preprocess collections expression in case the original 

1079 # included single-pass iterators (we'll want to use it multiple 

1080 # times below). 

1081 collections = CollectionQuery.fromExpression(collections) 

1082 for datasetType in self.queryDatasetTypes(datasets, components=components, missing=missing): 

1083 # If any matched dataset type is a component, just operate on 

1084 # its parent instead, because Registry doesn't know anything 

1085 # about what components exist, and here (unlike queryDatasets) 

1086 # we don't care about returning them. 

1087 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1088 if componentName is not None: 

1089 datasetType = self.getDatasetType(parentDatasetTypeName) 

1090 standardizedDatasetTypes.add(datasetType) 

1091 elif collections: 

1092 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1093 

1094 def query_factory( 

1095 order_by: Optional[Iterable[str]] = None, limit: Optional[Tuple[int, Optional[int]]] = None 

1096 ) -> Query: 

1097 """Construct the Query object that generates query results.""" 

1098 summary = queries.QuerySummary( 

1099 requested=requestedDimensions, 

1100 dataId=standardizedDataId, 

1101 expression=where, 

1102 bind=bind, 

1103 defaults=self.defaults.dataId, 

1104 check=check, 

1105 datasets=standardizedDatasetTypes, 

1106 order_by=order_by, 

1107 limit=limit, 

1108 ) 

1109 builder = self._makeQueryBuilder( 

1110 summary, doomed_by=[f"Dataset type {name} is not registered." for name in missing] 

1111 ) 

1112 for datasetType in standardizedDatasetTypes: 

1113 builder.joinDataset( 

1114 datasetType, 

1115 collections, 

1116 isResult=False, 

1117 ) 

1118 return builder.finish() 

1119 

1120 return queries.DataCoordinateQueryResults(self._db, query_factory, requestedDimensions) 

1121 

1122 def queryDimensionRecords( 

1123 self, 

1124 element: Union[DimensionElement, str], 

1125 *, 

1126 dataId: Optional[DataId] = None, 

1127 datasets: Any = None, 

1128 collections: Any = None, 

1129 where: Optional[str] = None, 

1130 components: Optional[bool] = None, 

1131 bind: Optional[Mapping[str, Any]] = None, 

1132 check: bool = True, 

1133 **kwargs: Any, 

1134 ) -> queries.DimensionRecordQueryResults: 

1135 # Docstring inherited from lsst.daf.butler.registry.Registry 

1136 if not isinstance(element, DimensionElement): 

1137 try: 

1138 element = self.dimensions[element] 

1139 except KeyError as e: 

1140 raise DimensionNameError( 

1141 f"No such dimension '{element}', available dimensions: " 

1142 + str(self.dimensions.getStaticElements()) 

1143 ) from e 

1144 dataIds = self.queryDataIds( 

1145 element.graph, 

1146 dataId=dataId, 

1147 datasets=datasets, 

1148 collections=collections, 

1149 where=where, 

1150 components=components, 

1151 bind=bind, 

1152 check=check, 

1153 **kwargs, 

1154 ) 

1155 return queries.DatabaseDimensionRecordQueryResults(dataIds, self._managers.dimensions[element]) 

1156 

1157 def queryDatasetAssociations( 

1158 self, 

1159 datasetType: Union[str, DatasetType], 

1160 collections: Any = ..., 

1161 *, 

1162 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1163 flattenChains: bool = False, 

1164 ) -> Iterator[DatasetAssociation]: 

1165 # Docstring inherited from lsst.daf.butler.registry.Registry 

1166 if collections is None: 

1167 if not self.defaults.collections: 

1168 raise NoDefaultCollectionError( 

1169 "No collections provided to findDataset, and no defaults from registry construction." 

1170 ) 

1171 collections = self.defaults.collections 

1172 else: 

1173 collections = CollectionQuery.fromExpression(collections) 

1174 TimespanReprClass = self._db.getTimespanRepresentation() 

1175 if isinstance(datasetType, str): 

1176 storage = self._managers.datasets[datasetType] 

1177 else: 

1178 storage = self._managers.datasets[datasetType.name] 

1179 for collectionRecord in collections.iter( 

1180 self._managers.collections, 

1181 collectionTypes=frozenset(collectionTypes), 

1182 flattenChains=flattenChains, 

1183 ): 

1184 query = storage.select(collectionRecord) 

1185 for row in self._db.query(query).mappings(): 

1186 dataId = DataCoordinate.fromRequiredValues( 

1187 storage.datasetType.dimensions, 

1188 tuple(row[name] for name in storage.datasetType.dimensions.required.names), 

1189 ) 

1190 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1191 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, conform=False) 

1192 if collectionRecord.type is CollectionType.CALIBRATION: 

1193 timespan = TimespanReprClass.extract(row) 

1194 else: 

1195 timespan = None 

1196 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1197 

1198 storageClasses: StorageClassFactory 

1199 """All storage classes known to the registry (`StorageClassFactory`). 

1200 """