Coverage for python/lsst/daf/butler/registries/sql.py: 13%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

481 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28from collections import defaultdict 

29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Set, Tuple, Union 

30 

31import sqlalchemy 

32from lsst.resources import ResourcePathExpression 

33from lsst.utils.iteration import ensure_iterable 

34 

35from ..core import ( 

36 Config, 

37 DataCoordinate, 

38 DataCoordinateIterable, 

39 DataId, 

40 DatasetAssociation, 

41 DatasetId, 

42 DatasetRef, 

43 DatasetType, 

44 Dimension, 

45 DimensionConfig, 

46 DimensionElement, 

47 DimensionGraph, 

48 DimensionRecord, 

49 DimensionUniverse, 

50 NamedKeyMapping, 

51 NameLookupMapping, 

52 Progress, 

53 StorageClassFactory, 

54 Timespan, 

55 ddl, 

56) 

57from ..core.utils import transactional 

58from ..registry import ( 

59 ArgumentError, 

60 CollectionExpressionError, 

61 CollectionSearch, 

62 CollectionType, 

63 CollectionTypeError, 

64 ConflictingDefinitionError, 

65 DataIdValueError, 

66 DatasetTypeError, 

67 DatasetTypeExpressionError, 

68 DimensionNameError, 

69 InconsistentDataIdError, 

70 NoDefaultCollectionError, 

71 OrphanedRecordError, 

72 Registry, 

73 RegistryConfig, 

74 RegistryDefaults, 

75 queries, 

76) 

77from ..registry.interfaces import ChainedCollectionRecord, DatasetIdGenEnum, RunRecord 

78from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

79from ..registry.queries import Query 

80from ..registry.summaries import CollectionSummary 

81from ..registry.wildcards import CategorizedWildcard, CollectionQuery, Ellipsis 

82 

83if TYPE_CHECKING: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 from .._butlerConfig import ButlerConfig 

85 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

86 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91class SqlRegistry(Registry): 

92 """Registry implementation based on SQLAlchemy. 

93 

94 Parameters 

95 ---------- 

96 database : `Database` 

97 Database instance to store Registry. 

98 defaults : `RegistryDefaults` 

99 Default collection search path and/or output `~CollectionType.RUN` 

100 collection. 

101 managers : `RegistryManagerInstances` 

102 All the managers required for this registry. 

103 """ 

104 

105 defaultConfigFile: Optional[str] = None 

106 """Path to configuration defaults. Accessed within the ``configs`` resource 

107 or relative to a search path. Can be None if no defaults specified. 

108 """ 

109 

110 @classmethod 

111 def createFromConfig( 

112 cls, 

113 config: Optional[Union[RegistryConfig, str]] = None, 

114 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

115 butlerRoot: Optional[ResourcePathExpression] = None, 

116 ) -> Registry: 

117 """Create registry database and return `SqlRegistry` instance. 

118 

119 This method initializes database contents, database must be empty 

120 prior to calling this method. 

121 

122 Parameters 

123 ---------- 

124 config : `RegistryConfig` or `str`, optional 

125 Registry configuration, if missing then default configuration will 

126 be loaded from registry.yaml. 

127 dimensionConfig : `DimensionConfig` or `str`, optional 

128 Dimensions configuration, if missing then default configuration 

129 will be loaded from dimensions.yaml. 

130 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

131 Path to the repository root this `SqlRegistry` will manage. 

132 

133 Returns 

134 ------- 

135 registry : `SqlRegistry` 

136 A new `SqlRegistry` instance. 

137 """ 

138 config = cls.forceRegistryConfig(config) 

139 config.replaceRoot(butlerRoot) 

140 

141 if isinstance(dimensionConfig, str): 

142 dimensionConfig = DimensionConfig(config) 

143 elif dimensionConfig is None: 

144 dimensionConfig = DimensionConfig() 

145 elif not isinstance(dimensionConfig, DimensionConfig): 

146 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

147 

148 DatabaseClass = config.getDatabaseClass() 

149 database = DatabaseClass.fromUri( 

150 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

151 ) 

152 managerTypes = RegistryManagerTypes.fromConfig(config) 

153 managers = managerTypes.makeRepo(database, dimensionConfig) 

154 return cls(database, RegistryDefaults(), managers) 

155 

156 @classmethod 

157 def fromConfig( 

158 cls, 

159 config: Union[ButlerConfig, RegistryConfig, Config, str], 

160 butlerRoot: Optional[ResourcePathExpression] = None, 

161 writeable: bool = True, 

162 defaults: Optional[RegistryDefaults] = None, 

163 ) -> Registry: 

164 """Create `Registry` subclass instance from `config`. 

165 

166 Registry database must be initialized prior to calling this method. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

171 Registry configuration 

172 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

173 Path to the repository root this `Registry` will manage. 

174 writeable : `bool`, optional 

175 If `True` (default) create a read-write connection to the database. 

176 defaults : `RegistryDefaults`, optional 

177 Default collection search path and/or output `~CollectionType.RUN` 

178 collection. 

179 

180 Returns 

181 ------- 

182 registry : `SqlRegistry` (subclass) 

183 A new `SqlRegistry` subclass instance. 

184 """ 

185 config = cls.forceRegistryConfig(config) 

186 config.replaceRoot(butlerRoot) 

187 DatabaseClass = config.getDatabaseClass() 

188 database = DatabaseClass.fromUri( 

189 str(config.connectionString), 

190 origin=config.get("origin", 0), 

191 namespace=config.get("namespace"), 

192 writeable=writeable, 

193 ) 

194 managerTypes = RegistryManagerTypes.fromConfig(config) 

195 managers = managerTypes.loadRepo(database) 

196 if defaults is None: 

197 defaults = RegistryDefaults() 

198 return cls(database, defaults, managers) 

199 

200 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

201 self._db = database 

202 self._managers = managers 

203 self.storageClasses = StorageClassFactory() 

204 # Intentionally invoke property setter to initialize defaults. This 

205 # can only be done after most of the rest of Registry has already been 

206 # initialized, and must be done before the property getter is used. 

207 self.defaults = defaults 

208 

209 def __str__(self) -> str: 

210 return str(self._db) 

211 

212 def __repr__(self) -> str: 

213 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

214 

215 def isWriteable(self) -> bool: 

216 # Docstring inherited from lsst.daf.butler.registry.Registry 

217 return self._db.isWriteable() 

218 

219 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

220 # Docstring inherited from lsst.daf.butler.registry.Registry 

221 if defaults is None: 

222 # No need to copy, because `RegistryDefaults` is immutable; we 

223 # effectively copy on write. 

224 defaults = self.defaults 

225 return type(self)(self._db, defaults, self._managers) 

226 

227 @property 

228 def dimensions(self) -> DimensionUniverse: 

229 # Docstring inherited from lsst.daf.butler.registry.Registry 

230 return self._managers.dimensions.universe 

231 

232 def refresh(self) -> None: 

233 # Docstring inherited from lsst.daf.butler.registry.Registry 

234 self._managers.refresh() 

235 

236 @contextlib.contextmanager 

237 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

238 # Docstring inherited from lsst.daf.butler.registry.Registry 

239 try: 

240 with self._db.transaction(savepoint=savepoint): 

241 yield 

242 except BaseException: 

243 # TODO: this clears the caches sometimes when we wouldn't actually 

244 # need to. Can we avoid that? 

245 self._managers.dimensions.clearCaches() 

246 raise 

247 

248 def resetConnectionPool(self) -> None: 

249 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

250 

251 This operation is useful when using registry with fork-based 

252 multiprocessing. To use registry across fork boundary one has to make 

253 sure that there are no currently active connections (no session or 

254 transaction is in progress) and connection pool is reset using this 

255 method. This method should be called by the child process immediately 

256 after the fork. 

257 """ 

258 self._db._engine.dispose() 

259 

260 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

261 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

262 other data repository client. 

263 

264 Opaque table records can be added via `insertOpaqueData`, retrieved via 

265 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

266 

267 Parameters 

268 ---------- 

269 tableName : `str` 

270 Logical name of the opaque table. This may differ from the 

271 actual name used in the database by a prefix and/or suffix. 

272 spec : `ddl.TableSpec` 

273 Specification for the table to be added. 

274 """ 

275 self._managers.opaque.register(tableName, spec) 

276 

277 @transactional 

278 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

279 """Insert records into an opaque table. 

280 

281 Parameters 

282 ---------- 

283 tableName : `str` 

284 Logical name of the opaque table. Must match the name used in a 

285 previous call to `registerOpaqueTable`. 

286 data 

287 Each additional positional argument is a dictionary that represents 

288 a single row to be added. 

289 """ 

290 self._managers.opaque[tableName].insert(*data) 

291 

292 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

293 """Retrieve records from an opaque table. 

294 

295 Parameters 

296 ---------- 

297 tableName : `str` 

298 Logical name of the opaque table. Must match the name used in a 

299 previous call to `registerOpaqueTable`. 

300 where 

301 Additional keyword arguments are interpreted as equality 

302 constraints that restrict the returned rows (combined with AND); 

303 keyword arguments are column names and values are the values they 

304 must have. 

305 

306 Yields 

307 ------ 

308 row : `dict` 

309 A dictionary representing a single result row. 

310 """ 

311 yield from self._managers.opaque[tableName].fetch(**where) 

312 

313 @transactional 

314 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

315 """Remove records from an opaque table. 

316 

317 Parameters 

318 ---------- 

319 tableName : `str` 

320 Logical name of the opaque table. Must match the name used in a 

321 previous call to `registerOpaqueTable`. 

322 where 

323 Additional keyword arguments are interpreted as equality 

324 constraints that restrict the deleted rows (combined with AND); 

325 keyword arguments are column names and values are the values they 

326 must have. 

327 """ 

328 self._managers.opaque[tableName].delete(where.keys(), where) 

329 

330 def registerCollection( 

331 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

332 ) -> bool: 

333 # Docstring inherited from lsst.daf.butler.registry.Registry 

334 _, registered = self._managers.collections.register(name, type, doc=doc) 

335 return registered 

336 

337 def getCollectionType(self, name: str) -> CollectionType: 

338 # Docstring inherited from lsst.daf.butler.registry.Registry 

339 return self._managers.collections.find(name).type 

340 

341 def _get_collection_record(self, name: str) -> CollectionRecord: 

342 # Docstring inherited from lsst.daf.butler.registry.Registry 

343 return self._managers.collections.find(name) 

344 

345 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

346 # Docstring inherited from lsst.daf.butler.registry.Registry 

347 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

348 return registered 

349 

350 @transactional 

351 def removeCollection(self, name: str) -> None: 

352 # Docstring inherited from lsst.daf.butler.registry.Registry 

353 self._managers.collections.remove(name) 

354 

355 def getCollectionChain(self, parent: str) -> CollectionSearch: 

356 # Docstring inherited from lsst.daf.butler.registry.Registry 

357 record = self._managers.collections.find(parent) 

358 if record.type is not CollectionType.CHAINED: 

359 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

360 assert isinstance(record, ChainedCollectionRecord) 

361 return record.children 

362 

363 @transactional 

364 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

365 # Docstring inherited from lsst.daf.butler.registry.Registry 

366 record = self._managers.collections.find(parent) 

367 if record.type is not CollectionType.CHAINED: 

368 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

369 assert isinstance(record, ChainedCollectionRecord) 

370 children = CollectionSearch.fromExpression(children) 

371 if children != record.children or flatten: 

372 record.update(self._managers.collections, children, flatten=flatten) 

373 

374 def getCollectionParentChains(self, collection: str) -> Set[str]: 

375 # Docstring inherited from lsst.daf.butler.registry.Registry 

376 return { 

377 record.name 

378 for record in self._managers.collections.getParentChains( 

379 self._managers.collections.find(collection).key 

380 ) 

381 } 

382 

383 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

384 # Docstring inherited from lsst.daf.butler.registry.Registry 

385 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

386 

387 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

388 # Docstring inherited from lsst.daf.butler.registry.Registry 

389 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

390 

391 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 record = self._managers.collections.find(collection) 

394 return self._managers.datasets.getCollectionSummary(record) 

395 

396 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

397 # Docstring inherited from lsst.daf.butler.registry.Registry 

398 _, inserted = self._managers.datasets.register(datasetType) 

399 return inserted 

400 

401 def removeDatasetType(self, name: str) -> None: 

402 # Docstring inherited from lsst.daf.butler.registry.Registry 

403 self._managers.datasets.remove(name) 

404 

405 def getDatasetType(self, name: str) -> DatasetType: 

406 # Docstring inherited from lsst.daf.butler.registry.Registry 

407 return self._managers.datasets[name].datasetType 

408 

409 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

410 # Docstring inherited from lsst.daf.butler.registry.Registry 

411 return self._managers.datasets.supportsIdGenerationMode(mode) 

412 

413 def findDataset( 

414 self, 

415 datasetType: Union[DatasetType, str], 

416 dataId: Optional[DataId] = None, 

417 *, 

418 collections: Any = None, 

419 timespan: Optional[Timespan] = None, 

420 **kwargs: Any, 

421 ) -> Optional[DatasetRef]: 

422 # Docstring inherited from lsst.daf.butler.registry.Registry 

423 if isinstance(datasetType, DatasetType): 

424 storage = self._managers.datasets[datasetType.name] 

425 else: 

426 storage = self._managers.datasets[datasetType] 

427 dataId = DataCoordinate.standardize( 

428 dataId, 

429 graph=storage.datasetType.dimensions, 

430 universe=self.dimensions, 

431 defaults=self.defaults.dataId, 

432 **kwargs, 

433 ) 

434 if collections is None: 

435 if not self.defaults.collections: 

436 raise NoDefaultCollectionError( 

437 "No collections provided to findDataset, and no defaults from registry construction." 

438 ) 

439 collections = self.defaults.collections 

440 else: 

441 collections = CollectionSearch.fromExpression(collections) 

442 for collectionRecord in collections.iter(self._managers.collections): 

443 if collectionRecord.type is CollectionType.CALIBRATION and ( 

444 not storage.datasetType.isCalibration() or timespan is None 

445 ): 

446 continue 

447 result = storage.find(collectionRecord, dataId, timespan=timespan) 

448 if result is not None: 

449 return result 

450 

451 return None 

452 

453 @transactional 

454 def insertDatasets( 

455 self, 

456 datasetType: Union[DatasetType, str], 

457 dataIds: Iterable[DataId], 

458 run: Optional[str] = None, 

459 expand: bool = True, 

460 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

461 ) -> List[DatasetRef]: 

462 # Docstring inherited from lsst.daf.butler.registry.Registry 

463 if isinstance(datasetType, DatasetType): 

464 storage = self._managers.datasets.find(datasetType.name) 

465 if storage is None: 

466 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

467 else: 

468 storage = self._managers.datasets.find(datasetType) 

469 if storage is None: 

470 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

471 if run is None: 

472 if self.defaults.run is None: 

473 raise NoDefaultCollectionError( 

474 "No run provided to insertDatasets, and no default from registry construction." 

475 ) 

476 run = self.defaults.run 

477 runRecord = self._managers.collections.find(run) 

478 if runRecord.type is not CollectionType.RUN: 

479 raise CollectionTypeError( 

480 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

481 ) 

482 assert isinstance(runRecord, RunRecord) 

483 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

484 if expand: 

485 expandedDataIds = [ 

486 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

487 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

488 ] 

489 else: 

490 expandedDataIds = [ 

491 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

492 ] 

493 try: 

494 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

495 except sqlalchemy.exc.IntegrityError as err: 

496 raise ConflictingDefinitionError( 

497 f"A database constraint failure was triggered by inserting " 

498 f"one or more datasets of type {storage.datasetType} into " 

499 f"collection '{run}'. " 

500 f"This probably means a dataset with the same data ID " 

501 f"and dataset type already exists, but it may also mean a " 

502 f"dimension row is missing." 

503 ) from err 

504 return refs 

505 

506 @transactional 

507 def _importDatasets( 

508 self, 

509 datasets: Iterable[DatasetRef], 

510 expand: bool = True, 

511 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

512 reuseIds: bool = False, 

513 ) -> List[DatasetRef]: 

514 # Docstring inherited from lsst.daf.butler.registry.Registry 

515 datasets = list(datasets) 

516 if not datasets: 

517 # nothing to do 

518 return [] 

519 

520 # find dataset type 

521 datasetTypes = set(dataset.datasetType for dataset in datasets) 

522 if len(datasetTypes) != 1: 

523 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

524 datasetType = datasetTypes.pop() 

525 

526 # get storage handler for this dataset type 

527 storage = self._managers.datasets.find(datasetType.name) 

528 if storage is None: 

529 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

530 

531 # find run name 

532 runs = set(dataset.run for dataset in datasets) 

533 if len(runs) != 1: 

534 raise ValueError(f"Multiple run names in input datasets: {runs}") 

535 run = runs.pop() 

536 if run is None: 

537 if self.defaults.run is None: 

538 raise NoDefaultCollectionError( 

539 "No run provided to ingestDatasets, and no default from registry construction." 

540 ) 

541 run = self.defaults.run 

542 

543 runRecord = self._managers.collections.find(run) 

544 if runRecord.type is not CollectionType.RUN: 

545 raise CollectionTypeError( 

546 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

547 " RUN collection required." 

548 ) 

549 assert isinstance(runRecord, RunRecord) 

550 

551 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

552 if expand: 

553 expandedDatasets = [ 

554 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

555 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

556 ] 

557 else: 

558 expandedDatasets = [ 

559 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

560 for dataset in datasets 

561 ] 

562 

563 try: 

564 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

565 except sqlalchemy.exc.IntegrityError as err: 

566 raise ConflictingDefinitionError( 

567 f"A database constraint failure was triggered by inserting " 

568 f"one or more datasets of type {storage.datasetType} into " 

569 f"collection '{run}'. " 

570 f"This probably means a dataset with the same data ID " 

571 f"and dataset type already exists, but it may also mean a " 

572 f"dimension row is missing." 

573 ) from err 

574 return refs 

575 

576 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

577 # Docstring inherited from lsst.daf.butler.registry.Registry 

578 return self._managers.datasets.getDatasetRef(id) 

579 

580 @transactional 

581 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

582 # Docstring inherited from lsst.daf.butler.registry.Registry 

583 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

584 for datasetType, refsForType in progress.iter_item_chunks( 

585 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

586 ): 

587 storage = self._managers.datasets[datasetType.name] 

588 try: 

589 storage.delete(refsForType) 

590 except sqlalchemy.exc.IntegrityError as err: 

591 raise OrphanedRecordError( 

592 "One or more datasets is still present in one or more Datastores." 

593 ) from err 

594 

595 @transactional 

596 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

597 # Docstring inherited from lsst.daf.butler.registry.Registry 

598 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

599 collectionRecord = self._managers.collections.find(collection) 

600 if collectionRecord.type is not CollectionType.TAGGED: 

601 raise CollectionTypeError( 

602 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

603 ) 

604 for datasetType, refsForType in progress.iter_item_chunks( 

605 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

606 ): 

607 storage = self._managers.datasets[datasetType.name] 

608 try: 

609 storage.associate(collectionRecord, refsForType) 

610 except sqlalchemy.exc.IntegrityError as err: 

611 raise ConflictingDefinitionError( 

612 f"Constraint violation while associating dataset of type {datasetType.name} with " 

613 f"collection {collection}. This probably means that one or more datasets with the same " 

614 f"dataset type and data ID already exist in the collection, but it may also indicate " 

615 f"that the datasets do not exist." 

616 ) from err 

617 

618 @transactional 

619 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

620 # Docstring inherited from lsst.daf.butler.registry.Registry 

621 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

622 collectionRecord = self._managers.collections.find(collection) 

623 if collectionRecord.type is not CollectionType.TAGGED: 

624 raise CollectionTypeError( 

625 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

626 ) 

627 for datasetType, refsForType in progress.iter_item_chunks( 

628 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

629 ): 

630 storage = self._managers.datasets[datasetType.name] 

631 storage.disassociate(collectionRecord, refsForType) 

632 

633 @transactional 

634 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

635 # Docstring inherited from lsst.daf.butler.registry.Registry 

636 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

637 collectionRecord = self._managers.collections.find(collection) 

638 for datasetType, refsForType in progress.iter_item_chunks( 

639 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

640 ): 

641 storage = self._managers.datasets[datasetType.name] 

642 storage.certify(collectionRecord, refsForType, timespan) 

643 

644 @transactional 

645 def decertify( 

646 self, 

647 collection: str, 

648 datasetType: Union[str, DatasetType], 

649 timespan: Timespan, 

650 *, 

651 dataIds: Optional[Iterable[DataId]] = None, 

652 ) -> None: 

653 # Docstring inherited from lsst.daf.butler.registry.Registry 

654 collectionRecord = self._managers.collections.find(collection) 

655 if isinstance(datasetType, str): 

656 storage = self._managers.datasets[datasetType] 

657 else: 

658 storage = self._managers.datasets[datasetType.name] 

659 standardizedDataIds = None 

660 if dataIds is not None: 

661 standardizedDataIds = [ 

662 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

663 ] 

664 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

665 

666 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

667 """Return an object that allows a new `Datastore` instance to 

668 communicate with this `Registry`. 

669 

670 Returns 

671 ------- 

672 manager : `DatastoreRegistryBridgeManager` 

673 Object that mediates communication between this `Registry` and its 

674 associated datastores. 

675 """ 

676 return self._managers.datastores 

677 

678 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

679 # Docstring inherited from lsst.daf.butler.registry.Registry 

680 return self._managers.datastores.findDatastores(ref) 

681 

682 def expandDataId( 

683 self, 

684 dataId: Optional[DataId] = None, 

685 *, 

686 graph: Optional[DimensionGraph] = None, 

687 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

688 withDefaults: bool = True, 

689 **kwargs: Any, 

690 ) -> DataCoordinate: 

691 # Docstring inherited from lsst.daf.butler.registry.Registry 

692 if not withDefaults: 

693 defaults = None 

694 else: 

695 defaults = self.defaults.dataId 

696 try: 

697 standardized = DataCoordinate.standardize( 

698 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

699 ) 

700 except KeyError as exc: 

701 # This means either kwargs have some odd name or required 

702 # dimension is missing. 

703 raise DimensionNameError(str(exc)) from exc 

704 if standardized.hasRecords(): 

705 return standardized 

706 if records is None: 

707 records = {} 

708 elif isinstance(records, NamedKeyMapping): 

709 records = records.byName() 

710 else: 

711 records = dict(records) 

712 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

713 records.update(dataId.records.byName()) 

714 keys = standardized.byName() 

715 for element in standardized.graph.primaryKeyTraversalOrder: 

716 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

717 if record is ...: 

718 if isinstance(element, Dimension) and keys.get(element.name) is None: 

719 if element in standardized.graph.required: 

720 raise DimensionNameError( 

721 f"No value or null value for required dimension {element.name}." 

722 ) 

723 keys[element.name] = None 

724 record = None 

725 else: 

726 storage = self._managers.dimensions[element] 

727 dataIdSet = DataCoordinateIterable.fromScalar( 

728 DataCoordinate.standardize(keys, graph=element.graph) 

729 ) 

730 fetched = tuple(storage.fetch(dataIdSet)) 

731 try: 

732 (record,) = fetched 

733 except ValueError: 

734 record = None 

735 records[element.name] = record 

736 if record is not None: 

737 for d in element.implied: 

738 value = getattr(record, d.name) 

739 if keys.setdefault(d.name, value) != value: 

740 raise InconsistentDataIdError( 

741 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

742 f"but {element.name} implies {d.name}={value!r}." 

743 ) 

744 else: 

745 if element in standardized.graph.required: 

746 raise DataIdValueError( 

747 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

748 ) 

749 if element.alwaysJoin: 

750 raise InconsistentDataIdError( 

751 f"Could not fetch record for element {element.name} via keys {keys}, ", 

752 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

753 "related.", 

754 ) 

755 for d in element.implied: 

756 keys.setdefault(d.name, None) 

757 records.setdefault(d.name, None) 

758 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

759 

760 def insertDimensionData( 

761 self, 

762 element: Union[DimensionElement, str], 

763 *data: Union[Mapping[str, Any], DimensionRecord], 

764 conform: bool = True, 

765 replace: bool = False, 

766 ) -> None: 

767 # Docstring inherited from lsst.daf.butler.registry.Registry 

768 if conform: 

769 if isinstance(element, str): 

770 element = self.dimensions[element] 

771 records = [ 

772 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

773 ] 

774 else: 

775 # Ignore typing since caller said to trust them with conform=False. 

776 records = data # type: ignore 

777 storage = self._managers.dimensions[element] # type: ignore 

778 storage.insert(*records, replace=replace) 

779 

780 def syncDimensionData( 

781 self, 

782 element: Union[DimensionElement, str], 

783 row: Union[Mapping[str, Any], DimensionRecord], 

784 conform: bool = True, 

785 update: bool = False, 

786 ) -> Union[bool, Dict[str, Any]]: 

787 # Docstring inherited from lsst.daf.butler.registry.Registry 

788 if conform: 

789 if isinstance(element, str): 

790 element = self.dimensions[element] 

791 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

792 else: 

793 # Ignore typing since caller said to trust them with conform=False. 

794 record = row # type: ignore 

795 storage = self._managers.dimensions[element] # type: ignore 

796 return storage.sync(record, update=update) 

797 

798 def queryDatasetTypes( 

799 self, 

800 expression: Any = ..., 

801 *, 

802 components: Optional[bool] = None, 

803 missing: Optional[List[str]] = None, 

804 ) -> Iterator[DatasetType]: 

805 # Docstring inherited from lsst.daf.butler.registry.Registry 

806 try: 

807 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

808 except TypeError as exc: 

809 raise DatasetTypeExpressionError(f"Invalid dataset type expression '{expression}'") from exc 

810 unknownComponentsMessage = ( 

811 "Could not find definition for storage class %s for dataset type %r;" 

812 " if it has components they will not be included in dataset type query results." 

813 ) 

814 if wildcard is Ellipsis: 

815 for datasetType in self._managers.datasets: 

816 # The dataset type can no longer be a component 

817 yield datasetType 

818 if components: 

819 # Automatically create the component dataset types 

820 try: 

821 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

822 except KeyError as err: 

823 _LOG.warning(unknownComponentsMessage, err, datasetType.name) 

824 else: 

825 yield from componentsForDatasetType 

826 return 

827 done: Set[str] = set() 

828 for name in wildcard.strings: 

829 storage = self._managers.datasets.find(name) 

830 done.add(name) 

831 if storage is None: 

832 if missing is not None: 

833 missing.append(name) 

834 else: 

835 yield storage.datasetType 

836 if wildcard.patterns: 

837 # If components (the argument) is None, we'll save component 

838 # dataset that we might want to match, but only if their parents 

839 # didn't get included. 

840 componentsForLater = [] 

841 for registeredDatasetType in self._managers.datasets: 

842 # Components are not stored in registry so expand them here 

843 allDatasetTypes = [registeredDatasetType] 

844 if components is not False: 

845 # Only check for the components if we are being asked 

846 # for components or components is None. 

847 try: 

848 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

849 except KeyError as err: 

850 _LOG.warning(unknownComponentsMessage, err, registeredDatasetType.name) 

851 for datasetType in allDatasetTypes: 

852 if datasetType.name in done: 

853 continue 

854 parentName, componentName = datasetType.nameAndComponent() 

855 if componentName is not None and not components: 

856 if components is None and parentName not in done: 

857 componentsForLater.append(datasetType) 

858 continue 

859 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

860 done.add(datasetType.name) 

861 yield datasetType 

862 # Go back and try to match saved components. 

863 for datasetType in componentsForLater: 

864 parentName, _ = datasetType.nameAndComponent() 

865 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

866 yield datasetType 

867 

868 def queryCollections( 

869 self, 

870 expression: Any = ..., 

871 datasetType: Optional[DatasetType] = None, 

872 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

873 flattenChains: bool = False, 

874 includeChains: Optional[bool] = None, 

875 ) -> Iterator[str]: 

876 # Docstring inherited from lsst.daf.butler.registry.Registry 

877 

878 # Right now the datasetTypes argument is completely ignored, but that 

879 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

880 # ticket will take care of that. 

881 try: 

882 query = CollectionQuery.fromExpression(expression) 

883 except TypeError as exc: 

884 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

885 collectionTypes = ensure_iterable(collectionTypes) 

886 recordNames = [ 

887 record.name 

888 for record in query.iter( 

889 self._managers.collections, 

890 collectionTypes=frozenset(collectionTypes), 

891 flattenChains=flattenChains, 

892 includeChains=includeChains, 

893 ) 

894 ] 

895 for name in sorted(recordNames): 

896 yield name 

897 

898 def _makeQueryBuilder( 

899 self, summary: queries.QuerySummary, doomed_by: Iterable[str] = () 

900 ) -> queries.QueryBuilder: 

901 """Return a `QueryBuilder` instance capable of constructing and 

902 managing more complex queries than those obtainable via `Registry` 

903 interfaces. 

904 

905 This is an advanced interface; downstream code should prefer 

906 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

907 are sufficient. 

908 

909 Parameters 

910 ---------- 

911 summary : `queries.QuerySummary` 

912 Object describing and categorizing the full set of dimensions that 

913 will be included in the query. 

914 doomed_by : `Iterable` of `str`, optional 

915 A list of diagnostic messages that indicate why the query is going 

916 to yield no results and should not even be executed. If an empty 

917 container (default) the query will be executed unless other code 

918 determines that it is doomed. 

919 

920 Returns 

921 ------- 

922 builder : `queries.QueryBuilder` 

923 Object that can be used to construct and perform advanced queries. 

924 """ 

925 return queries.QueryBuilder( 

926 summary, 

927 queries.RegistryManagers( 

928 collections=self._managers.collections, 

929 dimensions=self._managers.dimensions, 

930 datasets=self._managers.datasets, 

931 TimespanReprClass=self._db.getTimespanRepresentation(), 

932 ), 

933 doomed_by=doomed_by, 

934 ) 

935 

936 def queryDatasets( 

937 self, 

938 datasetType: Any, 

939 *, 

940 collections: Any = None, 

941 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

942 dataId: Optional[DataId] = None, 

943 where: Optional[str] = None, 

944 findFirst: bool = False, 

945 components: Optional[bool] = None, 

946 bind: Optional[Mapping[str, Any]] = None, 

947 check: bool = True, 

948 **kwargs: Any, 

949 ) -> queries.DatasetQueryResults: 

950 # Docstring inherited from lsst.daf.butler.registry.Registry 

951 

952 # Standardize the collections expression. 

953 if collections is None: 

954 if not self.defaults.collections: 

955 raise NoDefaultCollectionError( 

956 "No collections provided to findDataset, and no defaults from registry construction." 

957 ) 

958 collections = self.defaults.collections 

959 elif findFirst: 

960 collections = CollectionSearch.fromExpression(collections) 

961 else: 

962 collections = CollectionQuery.fromExpression(collections) 

963 # Standardize and expand the data ID provided as a constraint. 

964 standardizedDataId = self.expandDataId(dataId, **kwargs) 

965 

966 # We can only query directly if given a non-component DatasetType 

967 # instance. If we were given an expression or str or a component 

968 # DatasetType instance, we'll populate this dict, recurse, and return. 

969 # If we already have a non-component DatasetType, it will remain None 

970 # and we'll run the query directly. 

971 composition: Optional[ 

972 Dict[ 

973 DatasetType, List[Optional[str]] # parent dataset type # component name, or None for parent 

974 ] 

975 ] = None 

976 if not isinstance(datasetType, DatasetType): 

977 # We were given a dataset type expression (which may be as simple 

978 # as a str). Loop over all matching datasets, delegating handling 

979 # of the `components` argument to queryDatasetTypes, as we populate 

980 # the composition dict. 

981 composition = defaultdict(list) 

982 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

983 parentName, componentName = trueDatasetType.nameAndComponent() 

984 if componentName is not None: 

985 parentDatasetType = self.getDatasetType(parentName) 

986 composition.setdefault(parentDatasetType, []).append(componentName) 

987 else: 

988 composition.setdefault(trueDatasetType, []).append(None) 

989 if not composition: 

990 return queries.ChainedDatasetQueryResults( 

991 [], 

992 doomed_by=[ 

993 f"No registered dataset type matching {t!r} found." 

994 for t in ensure_iterable(datasetType) 

995 ], 

996 ) 

997 elif datasetType.isComponent(): 

998 # We were given a true DatasetType instance, but it's a component. 

999 # the composition dict will have exactly one item. 

1000 parentName, componentName = datasetType.nameAndComponent() 

1001 parentDatasetType = self.getDatasetType(parentName) 

1002 composition = {parentDatasetType: [componentName]} 

1003 if composition is not None: 

1004 # We need to recurse. Do that once for each parent dataset type. 

1005 chain = [] 

1006 for parentDatasetType, componentNames in composition.items(): 

1007 parentResults = self.queryDatasets( 

1008 parentDatasetType, 

1009 collections=collections, 

1010 dimensions=dimensions, 

1011 dataId=standardizedDataId, 

1012 where=where, 

1013 bind=bind, 

1014 findFirst=findFirst, 

1015 check=check, 

1016 ) 

1017 assert isinstance( 

1018 parentResults, queries.ParentDatasetQueryResults 

1019 ), "Should always be true if passing in a DatasetType instance, and we are." 

1020 chain.append(parentResults.withComponents(componentNames)) 

1021 return queries.ChainedDatasetQueryResults(chain) 

1022 # If we get here, there's no need to recurse (or we are already 

1023 # recursing; there can only ever be one level of recursion). 

1024 

1025 # The full set of dimensions in the query is the combination of those 

1026 # needed for the DatasetType and those explicitly requested, if any. 

1027 requestedDimensionNames = set(datasetType.dimensions.names) 

1028 if dimensions is not None: 

1029 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1030 # Construct the summary structure needed to construct a QueryBuilder. 

1031 summary = queries.QuerySummary( 

1032 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1033 dataId=standardizedDataId, 

1034 expression=where, 

1035 bind=bind, 

1036 defaults=self.defaults.dataId, 

1037 check=check, 

1038 datasets=[datasetType], 

1039 ) 

1040 builder = self._makeQueryBuilder(summary) 

1041 # Add the dataset subquery to the query, telling the QueryBuilder to 

1042 # include the rank of the selected collection in the results only if we 

1043 # need to findFirst. Note that if any of the collections are 

1044 # actually wildcard expressions, and we've asked for deduplication, 

1045 # this will raise TypeError for us. 

1046 builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst) 

1047 query = builder.finish() 

1048 return queries.ParentDatasetQueryResults(self._db, query, components=[None], datasetType=datasetType) 

1049 

1050 def queryDataIds( 

1051 self, 

1052 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1053 *, 

1054 dataId: Optional[DataId] = None, 

1055 datasets: Any = None, 

1056 collections: Any = None, 

1057 where: Optional[str] = None, 

1058 components: Optional[bool] = None, 

1059 bind: Optional[Mapping[str, Any]] = None, 

1060 check: bool = True, 

1061 **kwargs: Any, 

1062 ) -> queries.DataCoordinateQueryResults: 

1063 # Docstring inherited from lsst.daf.butler.registry.Registry 

1064 dimensions = ensure_iterable(dimensions) 

1065 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1066 standardizedDatasetTypes = set() 

1067 requestedDimensions = self.dimensions.extract(dimensions) 

1068 missing: List[str] = [] 

1069 if datasets is not None: 

1070 if not collections: 

1071 if not self.defaults.collections: 

1072 raise NoDefaultCollectionError( 

1073 f"Cannot pass 'datasets' (='{datasets}') without 'collections'." 

1074 ) 

1075 collections = self.defaults.collections 

1076 else: 

1077 # Preprocess collections expression in case the original 

1078 # included single-pass iterators (we'll want to use it multiple 

1079 # times below). 

1080 collections = CollectionQuery.fromExpression(collections) 

1081 for datasetType in self.queryDatasetTypes(datasets, components=components, missing=missing): 

1082 # If any matched dataset type is a component, just operate on 

1083 # its parent instead, because Registry doesn't know anything 

1084 # about what components exist, and here (unlike queryDatasets) 

1085 # we don't care about returning them. 

1086 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1087 if componentName is not None: 

1088 datasetType = self.getDatasetType(parentDatasetTypeName) 

1089 standardizedDatasetTypes.add(datasetType) 

1090 elif collections: 

1091 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1092 

1093 def query_factory( 

1094 order_by: Optional[Iterable[str]] = None, limit: Optional[Tuple[int, Optional[int]]] = None 

1095 ) -> Query: 

1096 """Construct the Query object that generates query results.""" 

1097 summary = queries.QuerySummary( 

1098 requested=requestedDimensions, 

1099 dataId=standardizedDataId, 

1100 expression=where, 

1101 bind=bind, 

1102 defaults=self.defaults.dataId, 

1103 check=check, 

1104 datasets=standardizedDatasetTypes, 

1105 order_by=order_by, 

1106 limit=limit, 

1107 ) 

1108 builder = self._makeQueryBuilder( 

1109 summary, doomed_by=[f"Dataset type {name} is not registered." for name in missing] 

1110 ) 

1111 for datasetType in standardizedDatasetTypes: 

1112 builder.joinDataset( 

1113 datasetType, 

1114 collections, 

1115 isResult=False, 

1116 ) 

1117 return builder.finish() 

1118 

1119 return queries.DataCoordinateQueryResults(self._db, query_factory, requestedDimensions) 

1120 

1121 def queryDimensionRecords( 

1122 self, 

1123 element: Union[DimensionElement, str], 

1124 *, 

1125 dataId: Optional[DataId] = None, 

1126 datasets: Any = None, 

1127 collections: Any = None, 

1128 where: Optional[str] = None, 

1129 components: Optional[bool] = None, 

1130 bind: Optional[Mapping[str, Any]] = None, 

1131 check: bool = True, 

1132 **kwargs: Any, 

1133 ) -> queries.DimensionRecordQueryResults: 

1134 # Docstring inherited from lsst.daf.butler.registry.Registry 

1135 if not isinstance(element, DimensionElement): 

1136 try: 

1137 element = self.dimensions[element] 

1138 except KeyError as e: 

1139 raise DimensionNameError( 

1140 f"No such dimension '{element}', available dimensions: " 

1141 + str(self.dimensions.getStaticElements()) 

1142 ) from e 

1143 dataIds = self.queryDataIds( 

1144 element.graph, 

1145 dataId=dataId, 

1146 datasets=datasets, 

1147 collections=collections, 

1148 where=where, 

1149 components=components, 

1150 bind=bind, 

1151 check=check, 

1152 **kwargs, 

1153 ) 

1154 return queries.DatabaseDimensionRecordQueryResults(dataIds, self._managers.dimensions[element]) 

1155 

1156 def queryDatasetAssociations( 

1157 self, 

1158 datasetType: Union[str, DatasetType], 

1159 collections: Any = ..., 

1160 *, 

1161 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1162 flattenChains: bool = False, 

1163 ) -> Iterator[DatasetAssociation]: 

1164 # Docstring inherited from lsst.daf.butler.registry.Registry 

1165 if collections is None: 

1166 if not self.defaults.collections: 

1167 raise NoDefaultCollectionError( 

1168 "No collections provided to findDataset, and no defaults from registry construction." 

1169 ) 

1170 collections = self.defaults.collections 

1171 else: 

1172 collections = CollectionQuery.fromExpression(collections) 

1173 TimespanReprClass = self._db.getTimespanRepresentation() 

1174 if isinstance(datasetType, str): 

1175 storage = self._managers.datasets[datasetType] 

1176 else: 

1177 storage = self._managers.datasets[datasetType.name] 

1178 for collectionRecord in collections.iter( 

1179 self._managers.collections, 

1180 collectionTypes=frozenset(collectionTypes), 

1181 flattenChains=flattenChains, 

1182 ): 

1183 query = storage.select(collectionRecord) 

1184 for row in self._db.query(query.combine()).mappings(): 

1185 dataId = DataCoordinate.fromRequiredValues( 

1186 storage.datasetType.dimensions, 

1187 tuple(row[name] for name in storage.datasetType.dimensions.required.names), 

1188 ) 

1189 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1190 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, conform=False) 

1191 if collectionRecord.type is CollectionType.CALIBRATION: 

1192 timespan = TimespanReprClass.extract(row) 

1193 else: 

1194 timespan = None 

1195 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1196 

1197 storageClasses: StorageClassFactory 

1198 """All storage classes known to the registry (`StorageClassFactory`). 

1199 """