Coverage for python/lsst/daf/butler/registries/sql.py: 13%

481 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-23 02:27 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28from collections import defaultdict 

29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Set, Tuple, Union 

30 

31import sqlalchemy 

32from lsst.resources import ResourcePathExpression 

33from lsst.utils.iteration import ensure_iterable 

34 

35from ..core import ( 

36 Config, 

37 DataCoordinate, 

38 DataCoordinateIterable, 

39 DataId, 

40 DatasetAssociation, 

41 DatasetId, 

42 DatasetRef, 

43 DatasetType, 

44 Dimension, 

45 DimensionConfig, 

46 DimensionElement, 

47 DimensionGraph, 

48 DimensionRecord, 

49 DimensionUniverse, 

50 NamedKeyMapping, 

51 NameLookupMapping, 

52 Progress, 

53 StorageClassFactory, 

54 Timespan, 

55 ddl, 

56) 

57from ..core.utils import transactional 

58from ..registry import ( 

59 ArgumentError, 

60 CollectionExpressionError, 

61 CollectionSearch, 

62 CollectionType, 

63 CollectionTypeError, 

64 ConflictingDefinitionError, 

65 DataIdValueError, 

66 DatasetTypeError, 

67 DatasetTypeExpressionError, 

68 DimensionNameError, 

69 InconsistentDataIdError, 

70 NoDefaultCollectionError, 

71 OrphanedRecordError, 

72 Registry, 

73 RegistryConfig, 

74 RegistryDefaults, 

75 queries, 

76) 

77from ..registry.interfaces import ChainedCollectionRecord, DatasetIdGenEnum, RunRecord 

78from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

79from ..registry.queries import Query 

80from ..registry.summaries import CollectionSummary 

81from ..registry.wildcards import CategorizedWildcard, CollectionQuery, Ellipsis 

82 

83if TYPE_CHECKING: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 from .._butlerConfig import ButlerConfig 

85 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

86 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91class SqlRegistry(Registry): 

92 """Registry implementation based on SQLAlchemy. 

93 

94 Parameters 

95 ---------- 

96 database : `Database` 

97 Database instance to store Registry. 

98 defaults : `RegistryDefaults` 

99 Default collection search path and/or output `~CollectionType.RUN` 

100 collection. 

101 managers : `RegistryManagerInstances` 

102 All the managers required for this registry. 

103 """ 

104 

105 defaultConfigFile: Optional[str] = None 

106 """Path to configuration defaults. Accessed within the ``configs`` resource 

107 or relative to a search path. Can be None if no defaults specified. 

108 """ 

109 

110 @classmethod 

111 def createFromConfig( 

112 cls, 

113 config: Optional[Union[RegistryConfig, str]] = None, 

114 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

115 butlerRoot: Optional[ResourcePathExpression] = None, 

116 ) -> Registry: 

117 """Create registry database and return `SqlRegistry` instance. 

118 

119 This method initializes database contents, database must be empty 

120 prior to calling this method. 

121 

122 Parameters 

123 ---------- 

124 config : `RegistryConfig` or `str`, optional 

125 Registry configuration, if missing then default configuration will 

126 be loaded from registry.yaml. 

127 dimensionConfig : `DimensionConfig` or `str`, optional 

128 Dimensions configuration, if missing then default configuration 

129 will be loaded from dimensions.yaml. 

130 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

131 Path to the repository root this `SqlRegistry` will manage. 

132 

133 Returns 

134 ------- 

135 registry : `SqlRegistry` 

136 A new `SqlRegistry` instance. 

137 """ 

138 config = cls.forceRegistryConfig(config) 

139 config.replaceRoot(butlerRoot) 

140 

141 if isinstance(dimensionConfig, str): 

142 dimensionConfig = DimensionConfig(config) 

143 elif dimensionConfig is None: 

144 dimensionConfig = DimensionConfig() 

145 elif not isinstance(dimensionConfig, DimensionConfig): 

146 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

147 

148 DatabaseClass = config.getDatabaseClass() 

149 database = DatabaseClass.fromUri( 

150 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

151 ) 

152 managerTypes = RegistryManagerTypes.fromConfig(config) 

153 managers = managerTypes.makeRepo(database, dimensionConfig) 

154 return cls(database, RegistryDefaults(), managers) 

155 

156 @classmethod 

157 def fromConfig( 

158 cls, 

159 config: Union[ButlerConfig, RegistryConfig, Config, str], 

160 butlerRoot: Optional[ResourcePathExpression] = None, 

161 writeable: bool = True, 

162 defaults: Optional[RegistryDefaults] = None, 

163 ) -> Registry: 

164 """Create `Registry` subclass instance from `config`. 

165 

166 Registry database must be initialized prior to calling this method. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

171 Registry configuration 

172 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

173 Path to the repository root this `Registry` will manage. 

174 writeable : `bool`, optional 

175 If `True` (default) create a read-write connection to the database. 

176 defaults : `RegistryDefaults`, optional 

177 Default collection search path and/or output `~CollectionType.RUN` 

178 collection. 

179 

180 Returns 

181 ------- 

182 registry : `SqlRegistry` (subclass) 

183 A new `SqlRegistry` subclass instance. 

184 """ 

185 config = cls.forceRegistryConfig(config) 

186 config.replaceRoot(butlerRoot) 

187 DatabaseClass = config.getDatabaseClass() 

188 database = DatabaseClass.fromUri( 

189 str(config.connectionString), 

190 origin=config.get("origin", 0), 

191 namespace=config.get("namespace"), 

192 writeable=writeable, 

193 ) 

194 managerTypes = RegistryManagerTypes.fromConfig(config) 

195 managers = managerTypes.loadRepo(database) 

196 if defaults is None: 

197 defaults = RegistryDefaults() 

198 return cls(database, defaults, managers) 

199 

200 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

201 self._db = database 

202 self._managers = managers 

203 self.storageClasses = StorageClassFactory() 

204 # Intentionally invoke property setter to initialize defaults. This 

205 # can only be done after most of the rest of Registry has already been 

206 # initialized, and must be done before the property getter is used. 

207 self.defaults = defaults 

208 

209 def __str__(self) -> str: 

210 return str(self._db) 

211 

212 def __repr__(self) -> str: 

213 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

214 

215 def isWriteable(self) -> bool: 

216 # Docstring inherited from lsst.daf.butler.registry.Registry 

217 return self._db.isWriteable() 

218 

219 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

220 # Docstring inherited from lsst.daf.butler.registry.Registry 

221 if defaults is None: 

222 # No need to copy, because `RegistryDefaults` is immutable; we 

223 # effectively copy on write. 

224 defaults = self.defaults 

225 return type(self)(self._db, defaults, self._managers) 

226 

227 @property 

228 def dimensions(self) -> DimensionUniverse: 

229 # Docstring inherited from lsst.daf.butler.registry.Registry 

230 return self._managers.dimensions.universe 

231 

232 def refresh(self) -> None: 

233 # Docstring inherited from lsst.daf.butler.registry.Registry 

234 self._managers.refresh() 

235 

236 @contextlib.contextmanager 

237 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

238 # Docstring inherited from lsst.daf.butler.registry.Registry 

239 try: 

240 with self._db.transaction(savepoint=savepoint): 

241 yield 

242 except BaseException: 

243 # TODO: this clears the caches sometimes when we wouldn't actually 

244 # need to. Can we avoid that? 

245 self._managers.dimensions.clearCaches() 

246 raise 

247 

248 def resetConnectionPool(self) -> None: 

249 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

250 

251 This operation is useful when using registry with fork-based 

252 multiprocessing. To use registry across fork boundary one has to make 

253 sure that there are no currently active connections (no session or 

254 transaction is in progress) and connection pool is reset using this 

255 method. This method should be called by the child process immediately 

256 after the fork. 

257 """ 

258 self._db._engine.dispose() 

259 

260 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

261 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

262 other data repository client. 

263 

264 Opaque table records can be added via `insertOpaqueData`, retrieved via 

265 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

266 

267 Parameters 

268 ---------- 

269 tableName : `str` 

270 Logical name of the opaque table. This may differ from the 

271 actual name used in the database by a prefix and/or suffix. 

272 spec : `ddl.TableSpec` 

273 Specification for the table to be added. 

274 """ 

275 self._managers.opaque.register(tableName, spec) 

276 

277 @transactional 

278 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

279 """Insert records into an opaque table. 

280 

281 Parameters 

282 ---------- 

283 tableName : `str` 

284 Logical name of the opaque table. Must match the name used in a 

285 previous call to `registerOpaqueTable`. 

286 data 

287 Each additional positional argument is a dictionary that represents 

288 a single row to be added. 

289 """ 

290 self._managers.opaque[tableName].insert(*data) 

291 

292 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

293 """Retrieve records from an opaque table. 

294 

295 Parameters 

296 ---------- 

297 tableName : `str` 

298 Logical name of the opaque table. Must match the name used in a 

299 previous call to `registerOpaqueTable`. 

300 where 

301 Additional keyword arguments are interpreted as equality 

302 constraints that restrict the returned rows (combined with AND); 

303 keyword arguments are column names and values are the values they 

304 must have. 

305 

306 Yields 

307 ------ 

308 row : `dict` 

309 A dictionary representing a single result row. 

310 """ 

311 yield from self._managers.opaque[tableName].fetch(**where) 

312 

313 @transactional 

314 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

315 """Remove records from an opaque table. 

316 

317 Parameters 

318 ---------- 

319 tableName : `str` 

320 Logical name of the opaque table. Must match the name used in a 

321 previous call to `registerOpaqueTable`. 

322 where 

323 Additional keyword arguments are interpreted as equality 

324 constraints that restrict the deleted rows (combined with AND); 

325 keyword arguments are column names and values are the values they 

326 must have. 

327 """ 

328 self._managers.opaque[tableName].delete(where.keys(), where) 

329 

330 def registerCollection( 

331 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

332 ) -> bool: 

333 # Docstring inherited from lsst.daf.butler.registry.Registry 

334 _, registered = self._managers.collections.register(name, type, doc=doc) 

335 return registered 

336 

337 def getCollectionType(self, name: str) -> CollectionType: 

338 # Docstring inherited from lsst.daf.butler.registry.Registry 

339 return self._managers.collections.find(name).type 

340 

341 def _get_collection_record(self, name: str) -> CollectionRecord: 

342 # Docstring inherited from lsst.daf.butler.registry.Registry 

343 return self._managers.collections.find(name) 

344 

345 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

346 # Docstring inherited from lsst.daf.butler.registry.Registry 

347 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

348 return registered 

349 

350 @transactional 

351 def removeCollection(self, name: str) -> None: 

352 # Docstring inherited from lsst.daf.butler.registry.Registry 

353 self._managers.collections.remove(name) 

354 

355 def getCollectionChain(self, parent: str) -> CollectionSearch: 

356 # Docstring inherited from lsst.daf.butler.registry.Registry 

357 record = self._managers.collections.find(parent) 

358 if record.type is not CollectionType.CHAINED: 

359 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

360 assert isinstance(record, ChainedCollectionRecord) 

361 return record.children 

362 

363 @transactional 

364 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

365 # Docstring inherited from lsst.daf.butler.registry.Registry 

366 record = self._managers.collections.find(parent) 

367 if record.type is not CollectionType.CHAINED: 

368 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

369 assert isinstance(record, ChainedCollectionRecord) 

370 children = CollectionSearch.fromExpression(children) 

371 if children != record.children or flatten: 

372 record.update(self._managers.collections, children, flatten=flatten) 

373 

374 def getCollectionParentChains(self, collection: str) -> Set[str]: 

375 # Docstring inherited from lsst.daf.butler.registry.Registry 

376 return { 

377 record.name 

378 for record in self._managers.collections.getParentChains( 

379 self._managers.collections.find(collection).key 

380 ) 

381 } 

382 

383 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

384 # Docstring inherited from lsst.daf.butler.registry.Registry 

385 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

386 

387 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

388 # Docstring inherited from lsst.daf.butler.registry.Registry 

389 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

390 

391 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 record = self._managers.collections.find(collection) 

394 return self._managers.datasets.getCollectionSummary(record) 

395 

396 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

397 # Docstring inherited from lsst.daf.butler.registry.Registry 

398 _, inserted = self._managers.datasets.register(datasetType) 

399 return inserted 

400 

401 def removeDatasetType(self, name: str) -> None: 

402 # Docstring inherited from lsst.daf.butler.registry.Registry 

403 self._managers.datasets.remove(name) 

404 

405 def getDatasetType(self, name: str) -> DatasetType: 

406 # Docstring inherited from lsst.daf.butler.registry.Registry 

407 return self._managers.datasets[name].datasetType 

408 

409 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

410 # Docstring inherited from lsst.daf.butler.registry.Registry 

411 return self._managers.datasets.supportsIdGenerationMode(mode) 

412 

413 def findDataset( 

414 self, 

415 datasetType: Union[DatasetType, str], 

416 dataId: Optional[DataId] = None, 

417 *, 

418 collections: Any = None, 

419 timespan: Optional[Timespan] = None, 

420 **kwargs: Any, 

421 ) -> Optional[DatasetRef]: 

422 # Docstring inherited from lsst.daf.butler.registry.Registry 

423 if isinstance(datasetType, DatasetType): 

424 storage = self._managers.datasets[datasetType.name] 

425 else: 

426 storage = self._managers.datasets[datasetType] 

427 dataId = DataCoordinate.standardize( 

428 dataId, 

429 graph=storage.datasetType.dimensions, 

430 universe=self.dimensions, 

431 defaults=self.defaults.dataId, 

432 **kwargs, 

433 ) 

434 if collections is None: 

435 if not self.defaults.collections: 

436 raise NoDefaultCollectionError( 

437 "No collections provided to findDataset, and no defaults from registry construction." 

438 ) 

439 collections = self.defaults.collections 

440 else: 

441 collections = CollectionSearch.fromExpression(collections) 

442 for collectionRecord in collections.iter(self._managers.collections): 

443 if collectionRecord.type is CollectionType.CALIBRATION and ( 

444 not storage.datasetType.isCalibration() or timespan is None 

445 ): 

446 continue 

447 result = storage.find(collectionRecord, dataId, timespan=timespan) 

448 if result is not None: 

449 return result 

450 

451 return None 

452 

453 @transactional 

454 def insertDatasets( 

455 self, 

456 datasetType: Union[DatasetType, str], 

457 dataIds: Iterable[DataId], 

458 run: Optional[str] = None, 

459 expand: bool = True, 

460 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

461 ) -> List[DatasetRef]: 

462 # Docstring inherited from lsst.daf.butler.registry.Registry 

463 if isinstance(datasetType, DatasetType): 

464 storage = self._managers.datasets.find(datasetType.name) 

465 if storage is None: 

466 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

467 else: 

468 storage = self._managers.datasets.find(datasetType) 

469 if storage is None: 

470 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

471 if run is None: 

472 if self.defaults.run is None: 

473 raise NoDefaultCollectionError( 

474 "No run provided to insertDatasets, and no default from registry construction." 

475 ) 

476 run = self.defaults.run 

477 runRecord = self._managers.collections.find(run) 

478 if runRecord.type is not CollectionType.RUN: 

479 raise CollectionTypeError( 

480 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

481 ) 

482 assert isinstance(runRecord, RunRecord) 

483 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

484 if expand: 

485 expandedDataIds = [ 

486 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

487 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

488 ] 

489 else: 

490 expandedDataIds = [ 

491 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

492 ] 

493 try: 

494 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

495 except sqlalchemy.exc.IntegrityError as err: 

496 raise ConflictingDefinitionError( 

497 f"A database constraint failure was triggered by inserting " 

498 f"one or more datasets of type {storage.datasetType} into " 

499 f"collection '{run}'. " 

500 f"This probably means a dataset with the same data ID " 

501 f"and dataset type already exists, but it may also mean a " 

502 f"dimension row is missing." 

503 ) from err 

504 return refs 

505 

506 @transactional 

507 def _importDatasets( 

508 self, 

509 datasets: Iterable[DatasetRef], 

510 expand: bool = True, 

511 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

512 reuseIds: bool = False, 

513 ) -> List[DatasetRef]: 

514 # Docstring inherited from lsst.daf.butler.registry.Registry 

515 datasets = list(datasets) 

516 if not datasets: 

517 # nothing to do 

518 return [] 

519 

520 # find dataset type 

521 datasetTypes = set(dataset.datasetType for dataset in datasets) 

522 if len(datasetTypes) != 1: 

523 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

524 datasetType = datasetTypes.pop() 

525 

526 # get storage handler for this dataset type 

527 storage = self._managers.datasets.find(datasetType.name) 

528 if storage is None: 

529 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

530 

531 # find run name 

532 runs = set(dataset.run for dataset in datasets) 

533 if len(runs) != 1: 

534 raise ValueError(f"Multiple run names in input datasets: {runs}") 

535 run = runs.pop() 

536 if run is None: 

537 if self.defaults.run is None: 

538 raise NoDefaultCollectionError( 

539 "No run provided to ingestDatasets, and no default from registry construction." 

540 ) 

541 run = self.defaults.run 

542 

543 runRecord = self._managers.collections.find(run) 

544 if runRecord.type is not CollectionType.RUN: 

545 raise CollectionTypeError( 

546 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

547 " RUN collection required." 

548 ) 

549 assert isinstance(runRecord, RunRecord) 

550 

551 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

552 if expand: 

553 expandedDatasets = [ 

554 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

555 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

556 ] 

557 else: 

558 expandedDatasets = [ 

559 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

560 for dataset in datasets 

561 ] 

562 

563 try: 

564 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

565 except sqlalchemy.exc.IntegrityError as err: 

566 raise ConflictingDefinitionError( 

567 f"A database constraint failure was triggered by inserting " 

568 f"one or more datasets of type {storage.datasetType} into " 

569 f"collection '{run}'. " 

570 f"This probably means a dataset with the same data ID " 

571 f"and dataset type already exists, but it may also mean a " 

572 f"dimension row is missing." 

573 ) from err 

574 return refs 

575 

576 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

577 # Docstring inherited from lsst.daf.butler.registry.Registry 

578 return self._managers.datasets.getDatasetRef(id) 

579 

580 @transactional 

581 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

582 # Docstring inherited from lsst.daf.butler.registry.Registry 

583 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

584 for datasetType, refsForType in progress.iter_item_chunks( 

585 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

586 ): 

587 storage = self._managers.datasets[datasetType.name] 

588 try: 

589 storage.delete(refsForType) 

590 except sqlalchemy.exc.IntegrityError as err: 

591 raise OrphanedRecordError( 

592 "One or more datasets is still present in one or more Datastores." 

593 ) from err 

594 

595 @transactional 

596 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

597 # Docstring inherited from lsst.daf.butler.registry.Registry 

598 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

599 collectionRecord = self._managers.collections.find(collection) 

600 if collectionRecord.type is not CollectionType.TAGGED: 

601 raise CollectionTypeError( 

602 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

603 ) 

604 for datasetType, refsForType in progress.iter_item_chunks( 

605 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

606 ): 

607 storage = self._managers.datasets[datasetType.name] 

608 try: 

609 storage.associate(collectionRecord, refsForType) 

610 except sqlalchemy.exc.IntegrityError as err: 

611 raise ConflictingDefinitionError( 

612 f"Constraint violation while associating dataset of type {datasetType.name} with " 

613 f"collection {collection}. This probably means that one or more datasets with the same " 

614 f"dataset type and data ID already exist in the collection, but it may also indicate " 

615 f"that the datasets do not exist." 

616 ) from err 

617 

618 @transactional 

619 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

620 # Docstring inherited from lsst.daf.butler.registry.Registry 

621 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

622 collectionRecord = self._managers.collections.find(collection) 

623 if collectionRecord.type is not CollectionType.TAGGED: 

624 raise CollectionTypeError( 

625 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

626 ) 

627 for datasetType, refsForType in progress.iter_item_chunks( 

628 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

629 ): 

630 storage = self._managers.datasets[datasetType.name] 

631 storage.disassociate(collectionRecord, refsForType) 

632 

633 @transactional 

634 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

635 # Docstring inherited from lsst.daf.butler.registry.Registry 

636 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

637 collectionRecord = self._managers.collections.find(collection) 

638 for datasetType, refsForType in progress.iter_item_chunks( 

639 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

640 ): 

641 storage = self._managers.datasets[datasetType.name] 

642 storage.certify(collectionRecord, refsForType, timespan) 

643 

644 @transactional 

645 def decertify( 

646 self, 

647 collection: str, 

648 datasetType: Union[str, DatasetType], 

649 timespan: Timespan, 

650 *, 

651 dataIds: Optional[Iterable[DataId]] = None, 

652 ) -> None: 

653 # Docstring inherited from lsst.daf.butler.registry.Registry 

654 collectionRecord = self._managers.collections.find(collection) 

655 if isinstance(datasetType, str): 

656 storage = self._managers.datasets[datasetType] 

657 else: 

658 storage = self._managers.datasets[datasetType.name] 

659 standardizedDataIds = None 

660 if dataIds is not None: 

661 standardizedDataIds = [ 

662 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

663 ] 

664 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

665 

666 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

667 """Return an object that allows a new `Datastore` instance to 

668 communicate with this `Registry`. 

669 

670 Returns 

671 ------- 

672 manager : `DatastoreRegistryBridgeManager` 

673 Object that mediates communication between this `Registry` and its 

674 associated datastores. 

675 """ 

676 return self._managers.datastores 

677 

678 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

679 # Docstring inherited from lsst.daf.butler.registry.Registry 

680 return self._managers.datastores.findDatastores(ref) 

681 

682 def expandDataId( 

683 self, 

684 dataId: Optional[DataId] = None, 

685 *, 

686 graph: Optional[DimensionGraph] = None, 

687 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

688 withDefaults: bool = True, 

689 **kwargs: Any, 

690 ) -> DataCoordinate: 

691 # Docstring inherited from lsst.daf.butler.registry.Registry 

692 if not withDefaults: 

693 defaults = None 

694 else: 

695 defaults = self.defaults.dataId 

696 try: 

697 standardized = DataCoordinate.standardize( 

698 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

699 ) 

700 except KeyError as exc: 

701 # This means either kwargs have some odd name or required 

702 # dimension is missing. 

703 raise DimensionNameError(str(exc)) from exc 

704 if standardized.hasRecords(): 

705 return standardized 

706 if records is None: 

707 records = {} 

708 elif isinstance(records, NamedKeyMapping): 

709 records = records.byName() 

710 else: 

711 records = dict(records) 

712 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

713 records.update(dataId.records.byName()) 

714 keys = standardized.byName() 

715 for element in standardized.graph.primaryKeyTraversalOrder: 

716 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

717 if record is ...: 

718 if isinstance(element, Dimension) and keys.get(element.name) is None: 

719 if element in standardized.graph.required: 

720 raise DimensionNameError( 

721 f"No value or null value for required dimension {element.name}." 

722 ) 

723 keys[element.name] = None 

724 record = None 

725 else: 

726 storage = self._managers.dimensions[element] 

727 dataIdSet = DataCoordinateIterable.fromScalar( 

728 DataCoordinate.standardize(keys, graph=element.graph) 

729 ) 

730 fetched = tuple(storage.fetch(dataIdSet)) 

731 try: 

732 (record,) = fetched 

733 except ValueError: 

734 record = None 

735 records[element.name] = record 

736 if record is not None: 

737 for d in element.implied: 

738 value = getattr(record, d.name) 

739 if keys.setdefault(d.name, value) != value: 

740 raise InconsistentDataIdError( 

741 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

742 f"but {element.name} implies {d.name}={value!r}." 

743 ) 

744 else: 

745 if element in standardized.graph.required: 

746 raise DataIdValueError( 

747 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

748 ) 

749 if element.alwaysJoin: 

750 raise InconsistentDataIdError( 

751 f"Could not fetch record for element {element.name} via keys {keys}, ", 

752 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

753 "related.", 

754 ) 

755 for d in element.implied: 

756 keys.setdefault(d.name, None) 

757 records.setdefault(d.name, None) 

758 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

759 

760 def insertDimensionData( 

761 self, 

762 element: Union[DimensionElement, str], 

763 *data: Union[Mapping[str, Any], DimensionRecord], 

764 conform: bool = True, 

765 replace: bool = False, 

766 skip_existing: bool = False, 

767 ) -> None: 

768 # Docstring inherited from lsst.daf.butler.registry.Registry 

769 if conform: 

770 if isinstance(element, str): 

771 element = self.dimensions[element] 

772 records = [ 

773 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

774 ] 

775 else: 

776 # Ignore typing since caller said to trust them with conform=False. 

777 records = data # type: ignore 

778 storage = self._managers.dimensions[element] # type: ignore 

779 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

780 

781 def syncDimensionData( 

782 self, 

783 element: Union[DimensionElement, str], 

784 row: Union[Mapping[str, Any], DimensionRecord], 

785 conform: bool = True, 

786 update: bool = False, 

787 ) -> Union[bool, Dict[str, Any]]: 

788 # Docstring inherited from lsst.daf.butler.registry.Registry 

789 if conform: 

790 if isinstance(element, str): 

791 element = self.dimensions[element] 

792 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

793 else: 

794 # Ignore typing since caller said to trust them with conform=False. 

795 record = row # type: ignore 

796 storage = self._managers.dimensions[element] # type: ignore 

797 return storage.sync(record, update=update) 

798 

799 def queryDatasetTypes( 

800 self, 

801 expression: Any = ..., 

802 *, 

803 components: Optional[bool] = None, 

804 missing: Optional[List[str]] = None, 

805 ) -> Iterator[DatasetType]: 

806 # Docstring inherited from lsst.daf.butler.registry.Registry 

807 try: 

808 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

809 except TypeError as exc: 

810 raise DatasetTypeExpressionError(f"Invalid dataset type expression '{expression}'") from exc 

811 unknownComponentsMessage = ( 

812 "Could not find definition for storage class %s for dataset type %r;" 

813 " if it has components they will not be included in dataset type query results." 

814 ) 

815 if wildcard is Ellipsis: 

816 for datasetType in self._managers.datasets: 

817 # The dataset type can no longer be a component 

818 yield datasetType 

819 if components: 

820 # Automatically create the component dataset types 

821 try: 

822 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

823 except KeyError as err: 

824 _LOG.warning(unknownComponentsMessage, err, datasetType.name) 

825 else: 

826 yield from componentsForDatasetType 

827 return 

828 done: Set[str] = set() 

829 for name in wildcard.strings: 

830 storage = self._managers.datasets.find(name) 

831 done.add(name) 

832 if storage is None: 

833 if missing is not None: 

834 missing.append(name) 

835 else: 

836 yield storage.datasetType 

837 if wildcard.patterns: 

838 # If components (the argument) is None, we'll save component 

839 # dataset that we might want to match, but only if their parents 

840 # didn't get included. 

841 componentsForLater = [] 

842 for registeredDatasetType in self._managers.datasets: 

843 # Components are not stored in registry so expand them here 

844 allDatasetTypes = [registeredDatasetType] 

845 if components is not False: 

846 # Only check for the components if we are being asked 

847 # for components or components is None. 

848 try: 

849 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

850 except KeyError as err: 

851 _LOG.warning(unknownComponentsMessage, err, registeredDatasetType.name) 

852 for datasetType in allDatasetTypes: 

853 if datasetType.name in done: 

854 continue 

855 parentName, componentName = datasetType.nameAndComponent() 

856 if componentName is not None and not components: 

857 if components is None and parentName not in done: 

858 componentsForLater.append(datasetType) 

859 continue 

860 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

861 done.add(datasetType.name) 

862 yield datasetType 

863 # Go back and try to match saved components. 

864 for datasetType in componentsForLater: 

865 parentName, _ = datasetType.nameAndComponent() 

866 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

867 yield datasetType 

868 

869 def queryCollections( 

870 self, 

871 expression: Any = ..., 

872 datasetType: Optional[DatasetType] = None, 

873 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

874 flattenChains: bool = False, 

875 includeChains: Optional[bool] = None, 

876 ) -> Iterator[str]: 

877 # Docstring inherited from lsst.daf.butler.registry.Registry 

878 

879 # Right now the datasetTypes argument is completely ignored, but that 

880 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

881 # ticket will take care of that. 

882 try: 

883 query = CollectionQuery.fromExpression(expression) 

884 except TypeError as exc: 

885 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

886 collectionTypes = ensure_iterable(collectionTypes) 

887 for record in query.iter( 

888 self._managers.collections, 

889 collectionTypes=frozenset(collectionTypes), 

890 flattenChains=flattenChains, 

891 includeChains=includeChains, 

892 ): 

893 yield record.name 

894 

895 def _makeQueryBuilder( 

896 self, summary: queries.QuerySummary, doomed_by: Iterable[str] = () 

897 ) -> queries.QueryBuilder: 

898 """Return a `QueryBuilder` instance capable of constructing and 

899 managing more complex queries than those obtainable via `Registry` 

900 interfaces. 

901 

902 This is an advanced interface; downstream code should prefer 

903 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

904 are sufficient. 

905 

906 Parameters 

907 ---------- 

908 summary : `queries.QuerySummary` 

909 Object describing and categorizing the full set of dimensions that 

910 will be included in the query. 

911 doomed_by : `Iterable` of `str`, optional 

912 A list of diagnostic messages that indicate why the query is going 

913 to yield no results and should not even be executed. If an empty 

914 container (default) the query will be executed unless other code 

915 determines that it is doomed. 

916 

917 Returns 

918 ------- 

919 builder : `queries.QueryBuilder` 

920 Object that can be used to construct and perform advanced queries. 

921 """ 

922 return queries.QueryBuilder( 

923 summary, 

924 queries.RegistryManagers( 

925 collections=self._managers.collections, 

926 dimensions=self._managers.dimensions, 

927 datasets=self._managers.datasets, 

928 TimespanReprClass=self._db.getTimespanRepresentation(), 

929 ), 

930 doomed_by=doomed_by, 

931 ) 

932 

933 def queryDatasets( 

934 self, 

935 datasetType: Any, 

936 *, 

937 collections: Any = None, 

938 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

939 dataId: Optional[DataId] = None, 

940 where: Optional[str] = None, 

941 findFirst: bool = False, 

942 components: Optional[bool] = None, 

943 bind: Optional[Mapping[str, Any]] = None, 

944 check: bool = True, 

945 **kwargs: Any, 

946 ) -> queries.DatasetQueryResults: 

947 # Docstring inherited from lsst.daf.butler.registry.Registry 

948 

949 # Standardize the collections expression. 

950 if collections is None: 

951 if not self.defaults.collections: 

952 raise NoDefaultCollectionError( 

953 "No collections provided to findDataset, and no defaults from registry construction." 

954 ) 

955 collections = self.defaults.collections 

956 elif findFirst: 

957 collections = CollectionSearch.fromExpression(collections) 

958 else: 

959 collections = CollectionQuery.fromExpression(collections) 

960 # Standardize and expand the data ID provided as a constraint. 

961 standardizedDataId = self.expandDataId(dataId, **kwargs) 

962 

963 # We can only query directly if given a non-component DatasetType 

964 # instance. If we were given an expression or str or a component 

965 # DatasetType instance, we'll populate this dict, recurse, and return. 

966 # If we already have a non-component DatasetType, it will remain None 

967 # and we'll run the query directly. 

968 composition: Optional[ 

969 Dict[ 

970 DatasetType, List[Optional[str]] # parent dataset type # component name, or None for parent 

971 ] 

972 ] = None 

973 if not isinstance(datasetType, DatasetType): 

974 # We were given a dataset type expression (which may be as simple 

975 # as a str). Loop over all matching datasets, delegating handling 

976 # of the `components` argument to queryDatasetTypes, as we populate 

977 # the composition dict. 

978 composition = defaultdict(list) 

979 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

980 parentName, componentName = trueDatasetType.nameAndComponent() 

981 if componentName is not None: 

982 parentDatasetType = self.getDatasetType(parentName) 

983 composition.setdefault(parentDatasetType, []).append(componentName) 

984 else: 

985 composition.setdefault(trueDatasetType, []).append(None) 

986 if not composition: 

987 return queries.ChainedDatasetQueryResults( 

988 [], 

989 doomed_by=[ 

990 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

991 "exist in any collection." 

992 for t in ensure_iterable(datasetType) 

993 ], 

994 ) 

995 elif datasetType.isComponent(): 

996 # We were given a true DatasetType instance, but it's a component. 

997 # the composition dict will have exactly one item. 

998 parentName, componentName = datasetType.nameAndComponent() 

999 parentDatasetType = self.getDatasetType(parentName) 

1000 composition = {parentDatasetType: [componentName]} 

1001 if composition is not None: 

1002 # We need to recurse. Do that once for each parent dataset type. 

1003 chain = [] 

1004 for parentDatasetType, componentNames in composition.items(): 

1005 parentResults = self.queryDatasets( 

1006 parentDatasetType, 

1007 collections=collections, 

1008 dimensions=dimensions, 

1009 dataId=standardizedDataId, 

1010 where=where, 

1011 bind=bind, 

1012 findFirst=findFirst, 

1013 check=check, 

1014 ) 

1015 assert isinstance( 

1016 parentResults, queries.ParentDatasetQueryResults 

1017 ), "Should always be true if passing in a DatasetType instance, and we are." 

1018 chain.append(parentResults.withComponents(componentNames)) 

1019 return queries.ChainedDatasetQueryResults(chain) 

1020 # If we get here, there's no need to recurse (or we are already 

1021 # recursing; there can only ever be one level of recursion). 

1022 

1023 # The full set of dimensions in the query is the combination of those 

1024 # needed for the DatasetType and those explicitly requested, if any. 

1025 requestedDimensionNames = set(datasetType.dimensions.names) 

1026 if dimensions is not None: 

1027 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1028 # Construct the summary structure needed to construct a QueryBuilder. 

1029 summary = queries.QuerySummary( 

1030 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1031 dataId=standardizedDataId, 

1032 expression=where, 

1033 bind=bind, 

1034 defaults=self.defaults.dataId, 

1035 check=check, 

1036 datasets=[datasetType], 

1037 ) 

1038 builder = self._makeQueryBuilder(summary) 

1039 # Add the dataset subquery to the query, telling the QueryBuilder to 

1040 # include the rank of the selected collection in the results only if we 

1041 # need to findFirst. Note that if any of the collections are 

1042 # actually wildcard expressions, and we've asked for deduplication, 

1043 # this will raise TypeError for us. 

1044 builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst) 

1045 query = builder.finish() 

1046 return queries.ParentDatasetQueryResults(self._db, query, components=[None], datasetType=datasetType) 

1047 

1048 def queryDataIds( 

1049 self, 

1050 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1051 *, 

1052 dataId: Optional[DataId] = None, 

1053 datasets: Any = None, 

1054 collections: Any = None, 

1055 where: Optional[str] = None, 

1056 components: Optional[bool] = None, 

1057 bind: Optional[Mapping[str, Any]] = None, 

1058 check: bool = True, 

1059 **kwargs: Any, 

1060 ) -> queries.DataCoordinateQueryResults: 

1061 # Docstring inherited from lsst.daf.butler.registry.Registry 

1062 dimensions = ensure_iterable(dimensions) 

1063 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1064 standardizedDatasetTypes = set() 

1065 requestedDimensions = self.dimensions.extract(dimensions) 

1066 missing: List[str] = [] 

1067 if datasets is not None: 

1068 if not collections: 

1069 if not self.defaults.collections: 

1070 raise NoDefaultCollectionError( 

1071 f"Cannot pass 'datasets' (='{datasets}') without 'collections'." 

1072 ) 

1073 collections = self.defaults.collections 

1074 else: 

1075 # Preprocess collections expression in case the original 

1076 # included single-pass iterators (we'll want to use it multiple 

1077 # times below). 

1078 collections = CollectionQuery.fromExpression(collections) 

1079 for datasetType in self.queryDatasetTypes(datasets, components=components, missing=missing): 

1080 # If any matched dataset type is a component, just operate on 

1081 # its parent instead, because Registry doesn't know anything 

1082 # about what components exist, and here (unlike queryDatasets) 

1083 # we don't care about returning them. 

1084 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1085 if componentName is not None: 

1086 datasetType = self.getDatasetType(parentDatasetTypeName) 

1087 standardizedDatasetTypes.add(datasetType) 

1088 elif collections: 

1089 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1090 

1091 def query_factory( 

1092 order_by: Optional[Iterable[str]] = None, limit: Optional[Tuple[int, Optional[int]]] = None 

1093 ) -> Query: 

1094 """Construct the Query object that generates query results.""" 

1095 summary = queries.QuerySummary( 

1096 requested=requestedDimensions, 

1097 dataId=standardizedDataId, 

1098 expression=where, 

1099 bind=bind, 

1100 defaults=self.defaults.dataId, 

1101 check=check, 

1102 datasets=standardizedDatasetTypes, 

1103 order_by=order_by, 

1104 limit=limit, 

1105 ) 

1106 builder = self._makeQueryBuilder( 

1107 summary, doomed_by=[f"Dataset type {name} is not registered." for name in missing] 

1108 ) 

1109 for datasetType in standardizedDatasetTypes: 

1110 builder.joinDataset( 

1111 datasetType, 

1112 collections, 

1113 isResult=False, 

1114 ) 

1115 return builder.finish() 

1116 

1117 return queries.DataCoordinateQueryResults(self._db, query_factory, requestedDimensions) 

1118 

1119 def queryDimensionRecords( 

1120 self, 

1121 element: Union[DimensionElement, str], 

1122 *, 

1123 dataId: Optional[DataId] = None, 

1124 datasets: Any = None, 

1125 collections: Any = None, 

1126 where: Optional[str] = None, 

1127 components: Optional[bool] = None, 

1128 bind: Optional[Mapping[str, Any]] = None, 

1129 check: bool = True, 

1130 **kwargs: Any, 

1131 ) -> queries.DimensionRecordQueryResults: 

1132 # Docstring inherited from lsst.daf.butler.registry.Registry 

1133 if not isinstance(element, DimensionElement): 

1134 try: 

1135 element = self.dimensions[element] 

1136 except KeyError as e: 

1137 raise DimensionNameError( 

1138 f"No such dimension '{element}', available dimensions: " 

1139 + str(self.dimensions.getStaticElements()) 

1140 ) from e 

1141 dataIds = self.queryDataIds( 

1142 element.graph, 

1143 dataId=dataId, 

1144 datasets=datasets, 

1145 collections=collections, 

1146 where=where, 

1147 components=components, 

1148 bind=bind, 

1149 check=check, 

1150 **kwargs, 

1151 ) 

1152 return queries.DatabaseDimensionRecordQueryResults(dataIds, self._managers.dimensions[element]) 

1153 

1154 def queryDatasetAssociations( 

1155 self, 

1156 datasetType: Union[str, DatasetType], 

1157 collections: Any = ..., 

1158 *, 

1159 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1160 flattenChains: bool = False, 

1161 ) -> Iterator[DatasetAssociation]: 

1162 # Docstring inherited from lsst.daf.butler.registry.Registry 

1163 if collections is None: 

1164 if not self.defaults.collections: 

1165 raise NoDefaultCollectionError( 

1166 "No collections provided to findDataset, and no defaults from registry construction." 

1167 ) 

1168 collections = self.defaults.collections 

1169 else: 

1170 collections = CollectionQuery.fromExpression(collections) 

1171 TimespanReprClass = self._db.getTimespanRepresentation() 

1172 if isinstance(datasetType, str): 

1173 storage = self._managers.datasets[datasetType] 

1174 else: 

1175 storage = self._managers.datasets[datasetType.name] 

1176 for collectionRecord in collections.iter( 

1177 self._managers.collections, 

1178 collectionTypes=frozenset(collectionTypes), 

1179 flattenChains=flattenChains, 

1180 ): 

1181 query = storage.select(collectionRecord) 

1182 for row in self._db.query(query).mappings(): 

1183 dataId = DataCoordinate.fromRequiredValues( 

1184 storage.datasetType.dimensions, 

1185 tuple(row[name] for name in storage.datasetType.dimensions.required.names), 

1186 ) 

1187 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1188 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, conform=False) 

1189 if collectionRecord.type is CollectionType.CALIBRATION: 

1190 timespan = TimespanReprClass.extract(row) 

1191 else: 

1192 timespan = None 

1193 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1194 

1195 storageClasses: StorageClassFactory 

1196 """All storage classes known to the registry (`StorageClassFactory`). 

1197 """