Coverage for python/lsst/daf/butler/registries/sql.py: 13%

485 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-24 23:50 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28from collections import defaultdict 

29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Set, Tuple, Union 

30 

31import sqlalchemy 

32from lsst.resources import ResourcePathExpression 

33from lsst.utils.iteration import ensure_iterable 

34 

35from ..core import ( 

36 Config, 

37 DataCoordinate, 

38 DataCoordinateIterable, 

39 DataId, 

40 DatasetAssociation, 

41 DatasetId, 

42 DatasetRef, 

43 DatasetType, 

44 Dimension, 

45 DimensionConfig, 

46 DimensionElement, 

47 DimensionGraph, 

48 DimensionRecord, 

49 DimensionUniverse, 

50 NamedKeyMapping, 

51 NameLookupMapping, 

52 Progress, 

53 StorageClassFactory, 

54 Timespan, 

55 ddl, 

56) 

57from ..core.utils import transactional 

58from ..registry import ( 

59 ArgumentError, 

60 CollectionExpressionError, 

61 CollectionSearch, 

62 CollectionType, 

63 CollectionTypeError, 

64 ConflictingDefinitionError, 

65 DataIdValueError, 

66 DatasetTypeError, 

67 DatasetTypeExpressionError, 

68 DimensionNameError, 

69 InconsistentDataIdError, 

70 NoDefaultCollectionError, 

71 OrphanedRecordError, 

72 Registry, 

73 RegistryConfig, 

74 RegistryDefaults, 

75 queries, 

76) 

77from ..registry.interfaces import ChainedCollectionRecord, DatasetIdGenEnum, RunRecord 

78from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

79from ..registry.queries import Query 

80from ..registry.summaries import CollectionSummary 

81from ..registry.wildcards import CategorizedWildcard, CollectionQuery, Ellipsis 

82 

83if TYPE_CHECKING: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 from .._butlerConfig import ButlerConfig 

85 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

86 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91class SqlRegistry(Registry): 

92 """Registry implementation based on SQLAlchemy. 

93 

94 Parameters 

95 ---------- 

96 database : `Database` 

97 Database instance to store Registry. 

98 defaults : `RegistryDefaults` 

99 Default collection search path and/or output `~CollectionType.RUN` 

100 collection. 

101 managers : `RegistryManagerInstances` 

102 All the managers required for this registry. 

103 """ 

104 

105 defaultConfigFile: Optional[str] = None 

106 """Path to configuration defaults. Accessed within the ``configs`` resource 

107 or relative to a search path. Can be None if no defaults specified. 

108 """ 

109 

110 @classmethod 

111 def createFromConfig( 

112 cls, 

113 config: Optional[Union[RegistryConfig, str]] = None, 

114 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

115 butlerRoot: Optional[ResourcePathExpression] = None, 

116 ) -> Registry: 

117 """Create registry database and return `SqlRegistry` instance. 

118 

119 This method initializes database contents, database must be empty 

120 prior to calling this method. 

121 

122 Parameters 

123 ---------- 

124 config : `RegistryConfig` or `str`, optional 

125 Registry configuration, if missing then default configuration will 

126 be loaded from registry.yaml. 

127 dimensionConfig : `DimensionConfig` or `str`, optional 

128 Dimensions configuration, if missing then default configuration 

129 will be loaded from dimensions.yaml. 

130 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

131 Path to the repository root this `SqlRegistry` will manage. 

132 

133 Returns 

134 ------- 

135 registry : `SqlRegistry` 

136 A new `SqlRegistry` instance. 

137 """ 

138 config = cls.forceRegistryConfig(config) 

139 config.replaceRoot(butlerRoot) 

140 

141 if isinstance(dimensionConfig, str): 

142 dimensionConfig = DimensionConfig(config) 

143 elif dimensionConfig is None: 

144 dimensionConfig = DimensionConfig() 

145 elif not isinstance(dimensionConfig, DimensionConfig): 

146 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

147 

148 DatabaseClass = config.getDatabaseClass() 

149 database = DatabaseClass.fromUri( 

150 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

151 ) 

152 managerTypes = RegistryManagerTypes.fromConfig(config) 

153 managers = managerTypes.makeRepo(database, dimensionConfig) 

154 return cls(database, RegistryDefaults(), managers) 

155 

156 @classmethod 

157 def fromConfig( 

158 cls, 

159 config: Union[ButlerConfig, RegistryConfig, Config, str], 

160 butlerRoot: Optional[ResourcePathExpression] = None, 

161 writeable: bool = True, 

162 defaults: Optional[RegistryDefaults] = None, 

163 ) -> Registry: 

164 """Create `Registry` subclass instance from `config`. 

165 

166 Registry database must be initialized prior to calling this method. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

171 Registry configuration 

172 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

173 Path to the repository root this `Registry` will manage. 

174 writeable : `bool`, optional 

175 If `True` (default) create a read-write connection to the database. 

176 defaults : `RegistryDefaults`, optional 

177 Default collection search path and/or output `~CollectionType.RUN` 

178 collection. 

179 

180 Returns 

181 ------- 

182 registry : `SqlRegistry` (subclass) 

183 A new `SqlRegistry` subclass instance. 

184 """ 

185 config = cls.forceRegistryConfig(config) 

186 config.replaceRoot(butlerRoot) 

187 DatabaseClass = config.getDatabaseClass() 

188 database = DatabaseClass.fromUri( 

189 str(config.connectionString), 

190 origin=config.get("origin", 0), 

191 namespace=config.get("namespace"), 

192 writeable=writeable, 

193 ) 

194 managerTypes = RegistryManagerTypes.fromConfig(config) 

195 with database.session(): 

196 managers = managerTypes.loadRepo(database) 

197 if defaults is None: 

198 defaults = RegistryDefaults() 

199 return cls(database, defaults, managers) 

200 

201 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

202 self._db = database 

203 self._managers = managers 

204 self.storageClasses = StorageClassFactory() 

205 # Intentionally invoke property setter to initialize defaults. This 

206 # can only be done after most of the rest of Registry has already been 

207 # initialized, and must be done before the property getter is used. 

208 self.defaults = defaults 

209 

210 def __str__(self) -> str: 

211 return str(self._db) 

212 

213 def __repr__(self) -> str: 

214 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

215 

216 def isWriteable(self) -> bool: 

217 # Docstring inherited from lsst.daf.butler.registry.Registry 

218 return self._db.isWriteable() 

219 

220 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

221 # Docstring inherited from lsst.daf.butler.registry.Registry 

222 if defaults is None: 

223 # No need to copy, because `RegistryDefaults` is immutable; we 

224 # effectively copy on write. 

225 defaults = self.defaults 

226 return type(self)(self._db, defaults, self._managers) 

227 

228 @property 

229 def dimensions(self) -> DimensionUniverse: 

230 # Docstring inherited from lsst.daf.butler.registry.Registry 

231 return self._managers.dimensions.universe 

232 

233 def refresh(self) -> None: 

234 # Docstring inherited from lsst.daf.butler.registry.Registry 

235 with self._db.transaction(): 

236 self._managers.refresh() 

237 

238 @contextlib.contextmanager 

239 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

240 # Docstring inherited from lsst.daf.butler.registry.Registry 

241 try: 

242 with self._db.transaction(savepoint=savepoint): 

243 yield 

244 except BaseException: 

245 # TODO: this clears the caches sometimes when we wouldn't actually 

246 # need to. Can we avoid that? 

247 self._managers.dimensions.clearCaches() 

248 raise 

249 

250 def resetConnectionPool(self) -> None: 

251 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

252 

253 This operation is useful when using registry with fork-based 

254 multiprocessing. To use registry across fork boundary one has to make 

255 sure that there are no currently active connections (no session or 

256 transaction is in progress) and connection pool is reset using this 

257 method. This method should be called by the child process immediately 

258 after the fork. 

259 """ 

260 self._db._engine.dispose() 

261 

262 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

263 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

264 other data repository client. 

265 

266 Opaque table records can be added via `insertOpaqueData`, retrieved via 

267 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

268 

269 Parameters 

270 ---------- 

271 tableName : `str` 

272 Logical name of the opaque table. This may differ from the 

273 actual name used in the database by a prefix and/or suffix. 

274 spec : `ddl.TableSpec` 

275 Specification for the table to be added. 

276 """ 

277 self._managers.opaque.register(tableName, spec) 

278 

279 @transactional 

280 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

281 """Insert records into an opaque table. 

282 

283 Parameters 

284 ---------- 

285 tableName : `str` 

286 Logical name of the opaque table. Must match the name used in a 

287 previous call to `registerOpaqueTable`. 

288 data 

289 Each additional positional argument is a dictionary that represents 

290 a single row to be added. 

291 """ 

292 self._managers.opaque[tableName].insert(*data) 

293 

294 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

295 """Retrieve records from an opaque table. 

296 

297 Parameters 

298 ---------- 

299 tableName : `str` 

300 Logical name of the opaque table. Must match the name used in a 

301 previous call to `registerOpaqueTable`. 

302 where 

303 Additional keyword arguments are interpreted as equality 

304 constraints that restrict the returned rows (combined with AND); 

305 keyword arguments are column names and values are the values they 

306 must have. 

307 

308 Yields 

309 ------ 

310 row : `dict` 

311 A dictionary representing a single result row. 

312 """ 

313 yield from self._managers.opaque[tableName].fetch(**where) 

314 

315 @transactional 

316 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

317 """Remove records from an opaque table. 

318 

319 Parameters 

320 ---------- 

321 tableName : `str` 

322 Logical name of the opaque table. Must match the name used in a 

323 previous call to `registerOpaqueTable`. 

324 where 

325 Additional keyword arguments are interpreted as equality 

326 constraints that restrict the deleted rows (combined with AND); 

327 keyword arguments are column names and values are the values they 

328 must have. 

329 """ 

330 self._managers.opaque[tableName].delete(where.keys(), where) 

331 

332 def registerCollection( 

333 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

334 ) -> bool: 

335 # Docstring inherited from lsst.daf.butler.registry.Registry 

336 _, registered = self._managers.collections.register(name, type, doc=doc) 

337 return registered 

338 

339 def getCollectionType(self, name: str) -> CollectionType: 

340 # Docstring inherited from lsst.daf.butler.registry.Registry 

341 return self._managers.collections.find(name).type 

342 

343 def _get_collection_record(self, name: str) -> CollectionRecord: 

344 # Docstring inherited from lsst.daf.butler.registry.Registry 

345 return self._managers.collections.find(name) 

346 

347 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

348 # Docstring inherited from lsst.daf.butler.registry.Registry 

349 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

350 return registered 

351 

352 @transactional 

353 def removeCollection(self, name: str) -> None: 

354 # Docstring inherited from lsst.daf.butler.registry.Registry 

355 self._managers.collections.remove(name) 

356 

357 def getCollectionChain(self, parent: str) -> CollectionSearch: 

358 # Docstring inherited from lsst.daf.butler.registry.Registry 

359 record = self._managers.collections.find(parent) 

360 if record.type is not CollectionType.CHAINED: 

361 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

362 assert isinstance(record, ChainedCollectionRecord) 

363 return record.children 

364 

365 @transactional 

366 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

367 # Docstring inherited from lsst.daf.butler.registry.Registry 

368 record = self._managers.collections.find(parent) 

369 if record.type is not CollectionType.CHAINED: 

370 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

371 assert isinstance(record, ChainedCollectionRecord) 

372 children = CollectionSearch.fromExpression(children) 

373 if children != record.children or flatten: 

374 record.update(self._managers.collections, children, flatten=flatten) 

375 

376 def getCollectionParentChains(self, collection: str) -> Set[str]: 

377 # Docstring inherited from lsst.daf.butler.registry.Registry 

378 return { 

379 record.name 

380 for record in self._managers.collections.getParentChains( 

381 self._managers.collections.find(collection).key 

382 ) 

383 } 

384 

385 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

386 # Docstring inherited from lsst.daf.butler.registry.Registry 

387 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

388 

389 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

390 # Docstring inherited from lsst.daf.butler.registry.Registry 

391 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

392 

393 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

394 # Docstring inherited from lsst.daf.butler.registry.Registry 

395 record = self._managers.collections.find(collection) 

396 return self._managers.datasets.getCollectionSummary(record) 

397 

398 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

399 # Docstring inherited from lsst.daf.butler.registry.Registry 

400 _, inserted = self._managers.datasets.register(datasetType) 

401 return inserted 

402 

403 def removeDatasetType(self, name: str) -> None: 

404 # Docstring inherited from lsst.daf.butler.registry.Registry 

405 self._managers.datasets.remove(name) 

406 

407 def getDatasetType(self, name: str) -> DatasetType: 

408 # Docstring inherited from lsst.daf.butler.registry.Registry 

409 return self._managers.datasets[name].datasetType 

410 

411 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

412 # Docstring inherited from lsst.daf.butler.registry.Registry 

413 return self._managers.datasets.supportsIdGenerationMode(mode) 

414 

415 def findDataset( 

416 self, 

417 datasetType: Union[DatasetType, str], 

418 dataId: Optional[DataId] = None, 

419 *, 

420 collections: Any = None, 

421 timespan: Optional[Timespan] = None, 

422 **kwargs: Any, 

423 ) -> Optional[DatasetRef]: 

424 # Docstring inherited from lsst.daf.butler.registry.Registry 

425 if isinstance(datasetType, DatasetType): 

426 storage = self._managers.datasets[datasetType.name] 

427 else: 

428 storage = self._managers.datasets[datasetType] 

429 dataId = DataCoordinate.standardize( 

430 dataId, 

431 graph=storage.datasetType.dimensions, 

432 universe=self.dimensions, 

433 defaults=self.defaults.dataId, 

434 **kwargs, 

435 ) 

436 if collections is None: 

437 if not self.defaults.collections: 

438 raise NoDefaultCollectionError( 

439 "No collections provided to findDataset, and no defaults from registry construction." 

440 ) 

441 collections = self.defaults.collections 

442 else: 

443 collections = CollectionSearch.fromExpression(collections) 

444 for collectionRecord in collections.iter(self._managers.collections): 

445 if collectionRecord.type is CollectionType.CALIBRATION and ( 

446 not storage.datasetType.isCalibration() or timespan is None 

447 ): 

448 continue 

449 result = storage.find(collectionRecord, dataId, timespan=timespan) 

450 if result is not None: 

451 return result 

452 

453 return None 

454 

455 @transactional 

456 def insertDatasets( 

457 self, 

458 datasetType: Union[DatasetType, str], 

459 dataIds: Iterable[DataId], 

460 run: Optional[str] = None, 

461 expand: bool = True, 

462 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

463 ) -> List[DatasetRef]: 

464 # Docstring inherited from lsst.daf.butler.registry.Registry 

465 if isinstance(datasetType, DatasetType): 

466 storage = self._managers.datasets.find(datasetType.name) 

467 if storage is None: 

468 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

469 else: 

470 storage = self._managers.datasets.find(datasetType) 

471 if storage is None: 

472 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

473 if run is None: 

474 if self.defaults.run is None: 

475 raise NoDefaultCollectionError( 

476 "No run provided to insertDatasets, and no default from registry construction." 

477 ) 

478 run = self.defaults.run 

479 runRecord = self._managers.collections.find(run) 

480 if runRecord.type is not CollectionType.RUN: 

481 raise CollectionTypeError( 

482 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

483 ) 

484 assert isinstance(runRecord, RunRecord) 

485 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

486 if expand: 

487 expandedDataIds = [ 

488 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

489 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

490 ] 

491 else: 

492 expandedDataIds = [ 

493 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

494 ] 

495 try: 

496 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

497 except sqlalchemy.exc.IntegrityError as err: 

498 raise ConflictingDefinitionError( 

499 f"A database constraint failure was triggered by inserting " 

500 f"one or more datasets of type {storage.datasetType} into " 

501 f"collection '{run}'. " 

502 f"This probably means a dataset with the same data ID " 

503 f"and dataset type already exists, but it may also mean a " 

504 f"dimension row is missing." 

505 ) from err 

506 return refs 

507 

508 @transactional 

509 def _importDatasets( 

510 self, 

511 datasets: Iterable[DatasetRef], 

512 expand: bool = True, 

513 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

514 reuseIds: bool = False, 

515 ) -> List[DatasetRef]: 

516 # Docstring inherited from lsst.daf.butler.registry.Registry 

517 datasets = list(datasets) 

518 if not datasets: 

519 # nothing to do 

520 return [] 

521 

522 # find dataset type 

523 datasetTypes = set(dataset.datasetType for dataset in datasets) 

524 if len(datasetTypes) != 1: 

525 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

526 datasetType = datasetTypes.pop() 

527 

528 # get storage handler for this dataset type 

529 storage = self._managers.datasets.find(datasetType.name) 

530 if storage is None: 

531 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

532 

533 # find run name 

534 runs = set(dataset.run for dataset in datasets) 

535 if len(runs) != 1: 

536 raise ValueError(f"Multiple run names in input datasets: {runs}") 

537 run = runs.pop() 

538 if run is None: 

539 if self.defaults.run is None: 

540 raise NoDefaultCollectionError( 

541 "No run provided to ingestDatasets, and no default from registry construction." 

542 ) 

543 run = self.defaults.run 

544 

545 runRecord = self._managers.collections.find(run) 

546 if runRecord.type is not CollectionType.RUN: 

547 raise CollectionTypeError( 

548 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

549 " RUN collection required." 

550 ) 

551 assert isinstance(runRecord, RunRecord) 

552 

553 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

554 if expand: 

555 expandedDatasets = [ 

556 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

557 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

558 ] 

559 else: 

560 expandedDatasets = [ 

561 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

562 for dataset in datasets 

563 ] 

564 

565 try: 

566 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

567 except sqlalchemy.exc.IntegrityError as err: 

568 raise ConflictingDefinitionError( 

569 f"A database constraint failure was triggered by inserting " 

570 f"one or more datasets of type {storage.datasetType} into " 

571 f"collection '{run}'. " 

572 f"This probably means a dataset with the same data ID " 

573 f"and dataset type already exists, but it may also mean a " 

574 f"dimension row is missing." 

575 ) from err 

576 return refs 

577 

578 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

579 # Docstring inherited from lsst.daf.butler.registry.Registry 

580 return self._managers.datasets.getDatasetRef(id) 

581 

582 @transactional 

583 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

584 # Docstring inherited from lsst.daf.butler.registry.Registry 

585 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

586 for datasetType, refsForType in progress.iter_item_chunks( 

587 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

588 ): 

589 storage = self._managers.datasets[datasetType.name] 

590 try: 

591 storage.delete(refsForType) 

592 except sqlalchemy.exc.IntegrityError as err: 

593 raise OrphanedRecordError( 

594 "One or more datasets is still present in one or more Datastores." 

595 ) from err 

596 

597 @transactional 

598 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

599 # Docstring inherited from lsst.daf.butler.registry.Registry 

600 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

601 collectionRecord = self._managers.collections.find(collection) 

602 if collectionRecord.type is not CollectionType.TAGGED: 

603 raise CollectionTypeError( 

604 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

605 ) 

606 for datasetType, refsForType in progress.iter_item_chunks( 

607 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

608 ): 

609 storage = self._managers.datasets[datasetType.name] 

610 try: 

611 storage.associate(collectionRecord, refsForType) 

612 except sqlalchemy.exc.IntegrityError as err: 

613 raise ConflictingDefinitionError( 

614 f"Constraint violation while associating dataset of type {datasetType.name} with " 

615 f"collection {collection}. This probably means that one or more datasets with the same " 

616 f"dataset type and data ID already exist in the collection, but it may also indicate " 

617 f"that the datasets do not exist." 

618 ) from err 

619 

620 @transactional 

621 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

622 # Docstring inherited from lsst.daf.butler.registry.Registry 

623 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

624 collectionRecord = self._managers.collections.find(collection) 

625 if collectionRecord.type is not CollectionType.TAGGED: 

626 raise CollectionTypeError( 

627 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

628 ) 

629 for datasetType, refsForType in progress.iter_item_chunks( 

630 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

631 ): 

632 storage = self._managers.datasets[datasetType.name] 

633 storage.disassociate(collectionRecord, refsForType) 

634 

635 @transactional 

636 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

637 # Docstring inherited from lsst.daf.butler.registry.Registry 

638 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

639 collectionRecord = self._managers.collections.find(collection) 

640 for datasetType, refsForType in progress.iter_item_chunks( 

641 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

642 ): 

643 storage = self._managers.datasets[datasetType.name] 

644 storage.certify(collectionRecord, refsForType, timespan) 

645 

646 @transactional 

647 def decertify( 

648 self, 

649 collection: str, 

650 datasetType: Union[str, DatasetType], 

651 timespan: Timespan, 

652 *, 

653 dataIds: Optional[Iterable[DataId]] = None, 

654 ) -> None: 

655 # Docstring inherited from lsst.daf.butler.registry.Registry 

656 collectionRecord = self._managers.collections.find(collection) 

657 if isinstance(datasetType, str): 

658 storage = self._managers.datasets[datasetType] 

659 else: 

660 storage = self._managers.datasets[datasetType.name] 

661 standardizedDataIds = None 

662 if dataIds is not None: 

663 standardizedDataIds = [ 

664 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

665 ] 

666 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

667 

668 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

669 """Return an object that allows a new `Datastore` instance to 

670 communicate with this `Registry`. 

671 

672 Returns 

673 ------- 

674 manager : `DatastoreRegistryBridgeManager` 

675 Object that mediates communication between this `Registry` and its 

676 associated datastores. 

677 """ 

678 return self._managers.datastores 

679 

680 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

681 # Docstring inherited from lsst.daf.butler.registry.Registry 

682 return self._managers.datastores.findDatastores(ref) 

683 

684 def expandDataId( 

685 self, 

686 dataId: Optional[DataId] = None, 

687 *, 

688 graph: Optional[DimensionGraph] = None, 

689 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

690 withDefaults: bool = True, 

691 **kwargs: Any, 

692 ) -> DataCoordinate: 

693 # Docstring inherited from lsst.daf.butler.registry.Registry 

694 if not withDefaults: 

695 defaults = None 

696 else: 

697 defaults = self.defaults.dataId 

698 try: 

699 standardized = DataCoordinate.standardize( 

700 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

701 ) 

702 except KeyError as exc: 

703 # This means either kwargs have some odd name or required 

704 # dimension is missing. 

705 raise DimensionNameError(str(exc)) from exc 

706 if standardized.hasRecords(): 

707 return standardized 

708 if records is None: 

709 records = {} 

710 elif isinstance(records, NamedKeyMapping): 

711 records = records.byName() 

712 else: 

713 records = dict(records) 

714 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

715 records.update(dataId.records.byName()) 

716 keys = standardized.byName() 

717 for element in standardized.graph.primaryKeyTraversalOrder: 

718 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

719 if record is ...: 

720 if isinstance(element, Dimension) and keys.get(element.name) is None: 

721 if element in standardized.graph.required: 

722 raise DimensionNameError( 

723 f"No value or null value for required dimension {element.name}." 

724 ) 

725 keys[element.name] = None 

726 record = None 

727 else: 

728 storage = self._managers.dimensions[element] 

729 dataIdSet = DataCoordinateIterable.fromScalar( 

730 DataCoordinate.standardize(keys, graph=element.graph) 

731 ) 

732 fetched = tuple(storage.fetch(dataIdSet)) 

733 try: 

734 (record,) = fetched 

735 except ValueError: 

736 record = None 

737 records[element.name] = record 

738 if record is not None: 

739 for d in element.implied: 

740 value = getattr(record, d.name) 

741 if keys.setdefault(d.name, value) != value: 

742 raise InconsistentDataIdError( 

743 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

744 f"but {element.name} implies {d.name}={value!r}." 

745 ) 

746 else: 

747 if element in standardized.graph.required: 

748 raise DataIdValueError( 

749 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

750 ) 

751 if element.alwaysJoin: 

752 raise InconsistentDataIdError( 

753 f"Could not fetch record for element {element.name} via keys {keys}, ", 

754 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

755 "related.", 

756 ) 

757 for d in element.implied: 

758 keys.setdefault(d.name, None) 

759 records.setdefault(d.name, None) 

760 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

761 

762 def insertDimensionData( 

763 self, 

764 element: Union[DimensionElement, str], 

765 *data: Union[Mapping[str, Any], DimensionRecord], 

766 conform: bool = True, 

767 replace: bool = False, 

768 skip_existing: bool = False, 

769 ) -> None: 

770 # Docstring inherited from lsst.daf.butler.registry.Registry 

771 if conform: 

772 if isinstance(element, str): 

773 element = self.dimensions[element] 

774 records = [ 

775 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

776 ] 

777 else: 

778 # Ignore typing since caller said to trust them with conform=False. 

779 records = data # type: ignore 

780 storage = self._managers.dimensions[element] # type: ignore 

781 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

782 

783 def syncDimensionData( 

784 self, 

785 element: Union[DimensionElement, str], 

786 row: Union[Mapping[str, Any], DimensionRecord], 

787 conform: bool = True, 

788 update: bool = False, 

789 ) -> Union[bool, Dict[str, Any]]: 

790 # Docstring inherited from lsst.daf.butler.registry.Registry 

791 if conform: 

792 if isinstance(element, str): 

793 element = self.dimensions[element] 

794 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

795 else: 

796 # Ignore typing since caller said to trust them with conform=False. 

797 record = row # type: ignore 

798 storage = self._managers.dimensions[element] # type: ignore 

799 return storage.sync(record, update=update) 

800 

801 def queryDatasetTypes( 

802 self, 

803 expression: Any = ..., 

804 *, 

805 components: Optional[bool] = None, 

806 missing: Optional[List[str]] = None, 

807 ) -> Iterator[DatasetType]: 

808 # Docstring inherited from lsst.daf.butler.registry.Registry 

809 try: 

810 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

811 except TypeError as exc: 

812 raise DatasetTypeExpressionError(f"Invalid dataset type expression '{expression}'") from exc 

813 unknownComponentsMessage = ( 

814 "Could not find definition for storage class %s for dataset type %r;" 

815 " if it has components they will not be included in dataset type query results." 

816 ) 

817 if wildcard is Ellipsis: 

818 for datasetType in self._managers.datasets: 

819 # The dataset type can no longer be a component 

820 yield datasetType 

821 if components: 

822 # Automatically create the component dataset types 

823 try: 

824 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

825 except KeyError as err: 

826 _LOG.warning(unknownComponentsMessage, err, datasetType.name) 

827 else: 

828 yield from componentsForDatasetType 

829 return 

830 done: Set[str] = set() 

831 for name in wildcard.strings: 

832 storage = self._managers.datasets.find(name) 

833 done.add(name) 

834 if storage is None: 

835 if missing is not None: 

836 missing.append(name) 

837 else: 

838 yield storage.datasetType 

839 if wildcard.patterns: 

840 # If components (the argument) is None, we'll save component 

841 # dataset that we might want to match, but only if their parents 

842 # didn't get included. 

843 componentsForLater = [] 

844 for registeredDatasetType in self._managers.datasets: 

845 # Components are not stored in registry so expand them here 

846 allDatasetTypes = [registeredDatasetType] 

847 if components is not False: 

848 # Only check for the components if we are being asked 

849 # for components or components is None. 

850 try: 

851 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

852 except KeyError as err: 

853 _LOG.warning(unknownComponentsMessage, err, registeredDatasetType.name) 

854 for datasetType in allDatasetTypes: 

855 if datasetType.name in done: 

856 continue 

857 parentName, componentName = datasetType.nameAndComponent() 

858 if componentName is not None and not components: 

859 if components is None and parentName not in done: 

860 componentsForLater.append(datasetType) 

861 continue 

862 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

863 done.add(datasetType.name) 

864 yield datasetType 

865 # Go back and try to match saved components. 

866 for datasetType in componentsForLater: 

867 parentName, _ = datasetType.nameAndComponent() 

868 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

869 yield datasetType 

870 

871 def queryCollections( 

872 self, 

873 expression: Any = ..., 

874 datasetType: Optional[DatasetType] = None, 

875 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

876 flattenChains: bool = False, 

877 includeChains: Optional[bool] = None, 

878 ) -> Iterator[str]: 

879 # Docstring inherited from lsst.daf.butler.registry.Registry 

880 

881 # Right now the datasetTypes argument is completely ignored, but that 

882 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

883 # ticket will take care of that. 

884 try: 

885 query = CollectionQuery.fromExpression(expression) 

886 except TypeError as exc: 

887 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

888 collectionTypes = ensure_iterable(collectionTypes) 

889 for record in query.iter( 

890 self._managers.collections, 

891 collectionTypes=frozenset(collectionTypes), 

892 flattenChains=flattenChains, 

893 includeChains=includeChains, 

894 ): 

895 yield record.name 

896 

897 def _makeQueryBuilder( 

898 self, summary: queries.QuerySummary, doomed_by: Iterable[str] = () 

899 ) -> queries.QueryBuilder: 

900 """Return a `QueryBuilder` instance capable of constructing and 

901 managing more complex queries than those obtainable via `Registry` 

902 interfaces. 

903 

904 This is an advanced interface; downstream code should prefer 

905 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

906 are sufficient. 

907 

908 Parameters 

909 ---------- 

910 summary : `queries.QuerySummary` 

911 Object describing and categorizing the full set of dimensions that 

912 will be included in the query. 

913 doomed_by : `Iterable` of `str`, optional 

914 A list of diagnostic messages that indicate why the query is going 

915 to yield no results and should not even be executed. If an empty 

916 container (default) the query will be executed unless other code 

917 determines that it is doomed. 

918 

919 Returns 

920 ------- 

921 builder : `queries.QueryBuilder` 

922 Object that can be used to construct and perform advanced queries. 

923 """ 

924 return queries.QueryBuilder( 

925 summary, 

926 queries.RegistryManagers( 

927 collections=self._managers.collections, 

928 dimensions=self._managers.dimensions, 

929 datasets=self._managers.datasets, 

930 TimespanReprClass=self._db.getTimespanRepresentation(), 

931 ), 

932 doomed_by=doomed_by, 

933 ) 

934 

935 def queryDatasets( 

936 self, 

937 datasetType: Any, 

938 *, 

939 collections: Any = None, 

940 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

941 dataId: Optional[DataId] = None, 

942 where: Optional[str] = None, 

943 findFirst: bool = False, 

944 components: Optional[bool] = None, 

945 bind: Optional[Mapping[str, Any]] = None, 

946 check: bool = True, 

947 **kwargs: Any, 

948 ) -> queries.DatasetQueryResults: 

949 # Docstring inherited from lsst.daf.butler.registry.Registry 

950 

951 # Standardize the collections expression. 

952 if collections is None: 

953 if not self.defaults.collections: 

954 raise NoDefaultCollectionError( 

955 "No collections provided to findDataset, and no defaults from registry construction." 

956 ) 

957 collections = self.defaults.collections 

958 elif findFirst: 

959 collections = CollectionSearch.fromExpression(collections) 

960 else: 

961 collections = CollectionQuery.fromExpression(collections) 

962 # Standardize and expand the data ID provided as a constraint. 

963 standardizedDataId = self.expandDataId(dataId, **kwargs) 

964 

965 # We can only query directly if given a non-component DatasetType 

966 # instance. If we were given an expression or str or a component 

967 # DatasetType instance, we'll populate this dict, recurse, and return. 

968 # If we already have a non-component DatasetType, it will remain None 

969 # and we'll run the query directly. 

970 composition: Optional[ 

971 Dict[ 

972 DatasetType, List[Optional[str]] # parent dataset type # component name, or None for parent 

973 ] 

974 ] = None 

975 if not isinstance(datasetType, DatasetType): 

976 # We were given a dataset type expression (which may be as simple 

977 # as a str). Loop over all matching datasets, delegating handling 

978 # of the `components` argument to queryDatasetTypes, as we populate 

979 # the composition dict. 

980 composition = defaultdict(list) 

981 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

982 parentName, componentName = trueDatasetType.nameAndComponent() 

983 if componentName is not None: 

984 parentDatasetType = self.getDatasetType(parentName) 

985 composition.setdefault(parentDatasetType, []).append(componentName) 

986 else: 

987 composition.setdefault(trueDatasetType, []).append(None) 

988 if not composition: 

989 return queries.ChainedDatasetQueryResults( 

990 [], 

991 doomed_by=[ 

992 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

993 "exist in any collection." 

994 for t in ensure_iterable(datasetType) 

995 ], 

996 ) 

997 elif datasetType.isComponent(): 

998 # We were given a true DatasetType instance, but it's a component. 

999 # the composition dict will have exactly one item. 

1000 parentName, componentName = datasetType.nameAndComponent() 

1001 parentDatasetType = self.getDatasetType(parentName) 

1002 composition = {parentDatasetType: [componentName]} 

1003 if composition is not None: 

1004 # We need to recurse. Do that once for each parent dataset type. 

1005 chain = [] 

1006 for parentDatasetType, componentNames in composition.items(): 

1007 parentResults = self.queryDatasets( 

1008 parentDatasetType, 

1009 collections=collections, 

1010 dimensions=dimensions, 

1011 dataId=standardizedDataId, 

1012 where=where, 

1013 bind=bind, 

1014 findFirst=findFirst, 

1015 check=check, 

1016 ) 

1017 assert isinstance( 

1018 parentResults, queries.ParentDatasetQueryResults 

1019 ), "Should always be true if passing in a DatasetType instance, and we are." 

1020 chain.append(parentResults.withComponents(componentNames)) 

1021 return queries.ChainedDatasetQueryResults(chain) 

1022 # If we get here, there's no need to recurse (or we are already 

1023 # recursing; there can only ever be one level of recursion). 

1024 

1025 # The full set of dimensions in the query is the combination of those 

1026 # needed for the DatasetType and those explicitly requested, if any. 

1027 requestedDimensionNames = set(datasetType.dimensions.names) 

1028 if dimensions is not None: 

1029 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1030 # Construct the summary structure needed to construct a QueryBuilder. 

1031 summary = queries.QuerySummary( 

1032 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1033 dataId=standardizedDataId, 

1034 expression=where, 

1035 bind=bind, 

1036 defaults=self.defaults.dataId, 

1037 check=check, 

1038 datasets=[datasetType], 

1039 ) 

1040 builder = self._makeQueryBuilder(summary) 

1041 # Add the dataset subquery to the query, telling the QueryBuilder to 

1042 # include the rank of the selected collection in the results only if we 

1043 # need to findFirst. Note that if any of the collections are 

1044 # actually wildcard expressions, and we've asked for deduplication, 

1045 # this will raise TypeError for us. 

1046 builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst) 

1047 query = builder.finish() 

1048 return queries.ParentDatasetQueryResults(self._db, query, components=[None], datasetType=datasetType) 

1049 

1050 def queryDataIds( 

1051 self, 

1052 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1053 *, 

1054 dataId: Optional[DataId] = None, 

1055 datasets: Any = None, 

1056 collections: Any = None, 

1057 where: Optional[str] = None, 

1058 components: Optional[bool] = None, 

1059 bind: Optional[Mapping[str, Any]] = None, 

1060 check: bool = True, 

1061 **kwargs: Any, 

1062 ) -> queries.DataCoordinateQueryResults: 

1063 # Docstring inherited from lsst.daf.butler.registry.Registry 

1064 dimensions = ensure_iterable(dimensions) 

1065 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1066 standardizedDatasetTypes = set() 

1067 requestedDimensions = self.dimensions.extract(dimensions) 

1068 missing: List[str] = [] 

1069 if datasets is not None: 

1070 if not collections: 

1071 if not self.defaults.collections: 

1072 raise NoDefaultCollectionError( 

1073 f"Cannot pass 'datasets' (='{datasets}') without 'collections'." 

1074 ) 

1075 collections = self.defaults.collections 

1076 else: 

1077 # Preprocess collections expression in case the original 

1078 # included single-pass iterators (we'll want to use it multiple 

1079 # times below). 

1080 collections = CollectionQuery.fromExpression(collections) 

1081 for datasetType in self.queryDatasetTypes(datasets, components=components, missing=missing): 

1082 # If any matched dataset type is a component, just operate on 

1083 # its parent instead, because Registry doesn't know anything 

1084 # about what components exist, and here (unlike queryDatasets) 

1085 # we don't care about returning them. 

1086 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1087 if componentName is not None: 

1088 datasetType = self.getDatasetType(parentDatasetTypeName) 

1089 standardizedDatasetTypes.add(datasetType) 

1090 elif collections: 

1091 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1092 

1093 def query_factory( 

1094 order_by: Optional[Iterable[str]] = None, limit: Optional[Tuple[int, Optional[int]]] = None 

1095 ) -> Query: 

1096 """Construct the Query object that generates query results.""" 

1097 summary = queries.QuerySummary( 

1098 requested=requestedDimensions, 

1099 dataId=standardizedDataId, 

1100 expression=where, 

1101 bind=bind, 

1102 defaults=self.defaults.dataId, 

1103 check=check, 

1104 datasets=standardizedDatasetTypes, 

1105 order_by=order_by, 

1106 limit=limit, 

1107 ) 

1108 builder = self._makeQueryBuilder( 

1109 summary, doomed_by=[f"Dataset type {name} is not registered." for name in missing] 

1110 ) 

1111 for datasetType in standardizedDatasetTypes: 

1112 builder.joinDataset( 

1113 datasetType, 

1114 collections, 

1115 isResult=False, 

1116 ) 

1117 return builder.finish() 

1118 

1119 return queries.DataCoordinateQueryResults(self._db, query_factory, requestedDimensions) 

1120 

1121 def queryDimensionRecords( 

1122 self, 

1123 element: Union[DimensionElement, str], 

1124 *, 

1125 dataId: Optional[DataId] = None, 

1126 datasets: Any = None, 

1127 collections: Any = None, 

1128 where: Optional[str] = None, 

1129 components: Optional[bool] = None, 

1130 bind: Optional[Mapping[str, Any]] = None, 

1131 check: bool = True, 

1132 **kwargs: Any, 

1133 ) -> queries.DimensionRecordQueryResults: 

1134 # Docstring inherited from lsst.daf.butler.registry.Registry 

1135 if not isinstance(element, DimensionElement): 

1136 try: 

1137 element = self.dimensions[element] 

1138 except KeyError as e: 

1139 raise DimensionNameError( 

1140 f"No such dimension '{element}', available dimensions: " 

1141 + str(self.dimensions.getStaticElements()) 

1142 ) from e 

1143 dataIds = self.queryDataIds( 

1144 element.graph, 

1145 dataId=dataId, 

1146 datasets=datasets, 

1147 collections=collections, 

1148 where=where, 

1149 components=components, 

1150 bind=bind, 

1151 check=check, 

1152 **kwargs, 

1153 ) 

1154 return queries.DatabaseDimensionRecordQueryResults(dataIds, self._managers.dimensions[element]) 

1155 

1156 def queryDatasetAssociations( 

1157 self, 

1158 datasetType: Union[str, DatasetType], 

1159 collections: Any = ..., 

1160 *, 

1161 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1162 flattenChains: bool = False, 

1163 ) -> Iterator[DatasetAssociation]: 

1164 # Docstring inherited from lsst.daf.butler.registry.Registry 

1165 if collections is None: 

1166 if not self.defaults.collections: 

1167 raise NoDefaultCollectionError( 

1168 "No collections provided to findDataset, and no defaults from registry construction." 

1169 ) 

1170 collections = self.defaults.collections 

1171 else: 

1172 collections = CollectionQuery.fromExpression(collections) 

1173 TimespanReprClass = self._db.getTimespanRepresentation() 

1174 if isinstance(datasetType, str): 

1175 storage = self._managers.datasets[datasetType] 

1176 else: 

1177 storage = self._managers.datasets[datasetType.name] 

1178 for collectionRecord in collections.iter( 

1179 self._managers.collections, 

1180 collectionTypes=frozenset(collectionTypes), 

1181 flattenChains=flattenChains, 

1182 ): 

1183 query = storage.select(collectionRecord) 

1184 with self._db.query(query) as sql_result: 

1185 sql_mappings = sql_result.mappings().fetchall() 

1186 for row in sql_mappings: 

1187 dataId = DataCoordinate.fromRequiredValues( 

1188 storage.datasetType.dimensions, 

1189 tuple(row[name] for name in storage.datasetType.dimensions.required.names), 

1190 ) 

1191 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1192 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, conform=False) 

1193 if collectionRecord.type is CollectionType.CALIBRATION: 

1194 timespan = TimespanReprClass.extract(row) 

1195 else: 

1196 timespan = None 

1197 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1198 

1199 storageClasses: StorageClassFactory 

1200 """All storage classes known to the registry (`StorageClassFactory`). 

1201 """