Coverage for python/lsst/daf/butler/registries/sql.py: 13%

469 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-27 08:58 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28from collections import defaultdict 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Tuple, 

41 Union, 

42) 

43 

44import sqlalchemy 

45from lsst.resources import ResourcePathExpression 

46from lsst.utils.iteration import ensure_iterable 

47 

48from ..core import ( 

49 Config, 

50 DataCoordinate, 

51 DataCoordinateIterable, 

52 DataId, 

53 DatasetAssociation, 

54 DatasetId, 

55 DatasetRef, 

56 DatasetType, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionRecord, 

62 DimensionUniverse, 

63 NamedKeyMapping, 

64 NameLookupMapping, 

65 Progress, 

66 StorageClassFactory, 

67 Timespan, 

68 ddl, 

69) 

70from ..core.utils import transactional 

71from ..registry import ( 

72 ArgumentError, 

73 CollectionExpressionError, 

74 CollectionSearch, 

75 CollectionSummary, 

76 CollectionType, 

77 CollectionTypeError, 

78 ConflictingDefinitionError, 

79 DataIdValueError, 

80 DatasetTypeError, 

81 DatasetTypeExpressionError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CategorizedWildcard, CollectionQuery, Ellipsis 

94 

95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

98 

99 

100_LOG = logging.getLogger(__name__) 

101 

102 

103class SqlRegistry(Registry): 

104 """Registry implementation based on SQLAlchemy. 

105 

106 Parameters 

107 ---------- 

108 database : `Database` 

109 Database instance to store Registry. 

110 defaults : `RegistryDefaults` 

111 Default collection search path and/or output `~CollectionType.RUN` 

112 collection. 

113 managers : `RegistryManagerInstances` 

114 All the managers required for this registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``configs`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def createFromConfig( 

124 cls, 

125 config: Optional[Union[RegistryConfig, str]] = None, 

126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

127 butlerRoot: Optional[ResourcePathExpression] = None, 

128 ) -> Registry: 

129 """Create registry database and return `SqlRegistry` instance. 

130 

131 This method initializes database contents, database must be empty 

132 prior to calling this method. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 dimensionConfig : `DimensionConfig` or `str`, optional 

140 Dimensions configuration, if missing then default configuration 

141 will be loaded from dimensions.yaml. 

142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

143 Path to the repository root this `SqlRegistry` will manage. 

144 

145 Returns 

146 ------- 

147 registry : `SqlRegistry` 

148 A new `SqlRegistry` instance. 

149 """ 

150 config = cls.forceRegistryConfig(config) 

151 config.replaceRoot(butlerRoot) 

152 

153 if isinstance(dimensionConfig, str): 

154 dimensionConfig = DimensionConfig(config) 

155 elif dimensionConfig is None: 

156 dimensionConfig = DimensionConfig() 

157 elif not isinstance(dimensionConfig, DimensionConfig): 

158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

159 

160 DatabaseClass = config.getDatabaseClass() 

161 database = DatabaseClass.fromUri( 

162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

163 ) 

164 managerTypes = RegistryManagerTypes.fromConfig(config) 

165 managers = managerTypes.makeRepo(database, dimensionConfig) 

166 return cls(database, RegistryDefaults(), managers) 

167 

168 @classmethod 

169 def fromConfig( 

170 cls, 

171 config: Union[ButlerConfig, RegistryConfig, Config, str], 

172 butlerRoot: Optional[ResourcePathExpression] = None, 

173 writeable: bool = True, 

174 defaults: Optional[RegistryDefaults] = None, 

175 ) -> Registry: 

176 """Create `Registry` subclass instance from `config`. 

177 

178 Registry database must be initialized prior to calling this method. 

179 

180 Parameters 

181 ---------- 

182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

183 Registry configuration 

184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

185 Path to the repository root this `Registry` will manage. 

186 writeable : `bool`, optional 

187 If `True` (default) create a read-write connection to the database. 

188 defaults : `RegistryDefaults`, optional 

189 Default collection search path and/or output `~CollectionType.RUN` 

190 collection. 

191 

192 Returns 

193 ------- 

194 registry : `SqlRegistry` (subclass) 

195 A new `SqlRegistry` subclass instance. 

196 """ 

197 config = cls.forceRegistryConfig(config) 

198 config.replaceRoot(butlerRoot) 

199 DatabaseClass = config.getDatabaseClass() 

200 database = DatabaseClass.fromUri( 

201 str(config.connectionString), 

202 origin=config.get("origin", 0), 

203 namespace=config.get("namespace"), 

204 writeable=writeable, 

205 ) 

206 managerTypes = RegistryManagerTypes.fromConfig(config) 

207 managers = managerTypes.loadRepo(database) 

208 if defaults is None: 

209 defaults = RegistryDefaults() 

210 return cls(database, defaults, managers) 

211 

212 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

213 self._db = database 

214 self._managers = managers 

215 self.storageClasses = StorageClassFactory() 

216 # Intentionally invoke property setter to initialize defaults. This 

217 # can only be done after most of the rest of Registry has already been 

218 # initialized, and must be done before the property getter is used. 

219 self.defaults = defaults 

220 # In the future DatasetIdFactory may become configurable and this 

221 # instance will need to be shared with datasets manager. 

222 self.datasetIdFactory = DatasetIdFactory() 

223 

224 def __str__(self) -> str: 

225 return str(self._db) 

226 

227 def __repr__(self) -> str: 

228 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

229 

230 def isWriteable(self) -> bool: 

231 # Docstring inherited from lsst.daf.butler.registry.Registry 

232 return self._db.isWriteable() 

233 

234 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

235 # Docstring inherited from lsst.daf.butler.registry.Registry 

236 if defaults is None: 

237 # No need to copy, because `RegistryDefaults` is immutable; we 

238 # effectively copy on write. 

239 defaults = self.defaults 

240 return type(self)(self._db, defaults, self._managers) 

241 

242 @property 

243 def dimensions(self) -> DimensionUniverse: 

244 # Docstring inherited from lsst.daf.butler.registry.Registry 

245 return self._managers.dimensions.universe 

246 

247 def refresh(self) -> None: 

248 # Docstring inherited from lsst.daf.butler.registry.Registry 

249 self._managers.refresh() 

250 

251 @contextlib.contextmanager 

252 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

253 # Docstring inherited from lsst.daf.butler.registry.Registry 

254 try: 

255 with self._db.transaction(savepoint=savepoint): 

256 yield 

257 except BaseException: 

258 # TODO: this clears the caches sometimes when we wouldn't actually 

259 # need to. Can we avoid that? 

260 self._managers.dimensions.clearCaches() 

261 raise 

262 

263 def resetConnectionPool(self) -> None: 

264 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

265 

266 This operation is useful when using registry with fork-based 

267 multiprocessing. To use registry across fork boundary one has to make 

268 sure that there are no currently active connections (no session or 

269 transaction is in progress) and connection pool is reset using this 

270 method. This method should be called by the child process immediately 

271 after the fork. 

272 """ 

273 self._db._engine.dispose() 

274 

275 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

276 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

277 other data repository client. 

278 

279 Opaque table records can be added via `insertOpaqueData`, retrieved via 

280 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

281 

282 Parameters 

283 ---------- 

284 tableName : `str` 

285 Logical name of the opaque table. This may differ from the 

286 actual name used in the database by a prefix and/or suffix. 

287 spec : `ddl.TableSpec` 

288 Specification for the table to be added. 

289 """ 

290 self._managers.opaque.register(tableName, spec) 

291 

292 @transactional 

293 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

294 """Insert records into an opaque table. 

295 

296 Parameters 

297 ---------- 

298 tableName : `str` 

299 Logical name of the opaque table. Must match the name used in a 

300 previous call to `registerOpaqueTable`. 

301 data 

302 Each additional positional argument is a dictionary that represents 

303 a single row to be added. 

304 """ 

305 self._managers.opaque[tableName].insert(*data) 

306 

307 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

308 """Retrieve records from an opaque table. 

309 

310 Parameters 

311 ---------- 

312 tableName : `str` 

313 Logical name of the opaque table. Must match the name used in a 

314 previous call to `registerOpaqueTable`. 

315 where 

316 Additional keyword arguments are interpreted as equality 

317 constraints that restrict the returned rows (combined with AND); 

318 keyword arguments are column names and values are the values they 

319 must have. 

320 

321 Yields 

322 ------ 

323 row : `dict` 

324 A dictionary representing a single result row. 

325 """ 

326 yield from self._managers.opaque[tableName].fetch(**where) 

327 

328 @transactional 

329 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

330 """Remove records from an opaque table. 

331 

332 Parameters 

333 ---------- 

334 tableName : `str` 

335 Logical name of the opaque table. Must match the name used in a 

336 previous call to `registerOpaqueTable`. 

337 where 

338 Additional keyword arguments are interpreted as equality 

339 constraints that restrict the deleted rows (combined with AND); 

340 keyword arguments are column names and values are the values they 

341 must have. 

342 """ 

343 self._managers.opaque[tableName].delete(where.keys(), where) 

344 

345 def registerCollection( 

346 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

347 ) -> bool: 

348 # Docstring inherited from lsst.daf.butler.registry.Registry 

349 _, registered = self._managers.collections.register(name, type, doc=doc) 

350 return registered 

351 

352 def getCollectionType(self, name: str) -> CollectionType: 

353 # Docstring inherited from lsst.daf.butler.registry.Registry 

354 return self._managers.collections.find(name).type 

355 

356 def _get_collection_record(self, name: str) -> CollectionRecord: 

357 # Docstring inherited from lsst.daf.butler.registry.Registry 

358 return self._managers.collections.find(name) 

359 

360 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

361 # Docstring inherited from lsst.daf.butler.registry.Registry 

362 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

363 return registered 

364 

365 @transactional 

366 def removeCollection(self, name: str) -> None: 

367 # Docstring inherited from lsst.daf.butler.registry.Registry 

368 self._managers.collections.remove(name) 

369 

370 def getCollectionChain(self, parent: str) -> CollectionSearch: 

371 # Docstring inherited from lsst.daf.butler.registry.Registry 

372 record = self._managers.collections.find(parent) 

373 if record.type is not CollectionType.CHAINED: 

374 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

375 assert isinstance(record, ChainedCollectionRecord) 

376 return record.children 

377 

378 @transactional 

379 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

380 # Docstring inherited from lsst.daf.butler.registry.Registry 

381 record = self._managers.collections.find(parent) 

382 if record.type is not CollectionType.CHAINED: 

383 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

384 assert isinstance(record, ChainedCollectionRecord) 

385 children = CollectionSearch.fromExpression(children) 

386 if children != record.children or flatten: 

387 record.update(self._managers.collections, children, flatten=flatten) 

388 

389 def getCollectionParentChains(self, collection: str) -> Set[str]: 

390 # Docstring inherited from lsst.daf.butler.registry.Registry 

391 return { 

392 record.name 

393 for record in self._managers.collections.getParentChains( 

394 self._managers.collections.find(collection).key 

395 ) 

396 } 

397 

398 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

399 # Docstring inherited from lsst.daf.butler.registry.Registry 

400 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

401 

402 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

403 # Docstring inherited from lsst.daf.butler.registry.Registry 

404 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

405 

406 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

407 # Docstring inherited from lsst.daf.butler.registry.Registry 

408 record = self._managers.collections.find(collection) 

409 return self._managers.datasets.getCollectionSummary(record) 

410 

411 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

412 # Docstring inherited from lsst.daf.butler.registry.Registry 

413 _, inserted = self._managers.datasets.register(datasetType) 

414 return inserted 

415 

416 def removeDatasetType(self, name: str) -> None: 

417 # Docstring inherited from lsst.daf.butler.registry.Registry 

418 self._managers.datasets.remove(name) 

419 

420 def getDatasetType(self, name: str) -> DatasetType: 

421 # Docstring inherited from lsst.daf.butler.registry.Registry 

422 return self._managers.datasets[name].datasetType 

423 

424 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

425 # Docstring inherited from lsst.daf.butler.registry.Registry 

426 return self._managers.datasets.supportsIdGenerationMode(mode) 

427 

428 def findDataset( 

429 self, 

430 datasetType: Union[DatasetType, str], 

431 dataId: Optional[DataId] = None, 

432 *, 

433 collections: Any = None, 

434 timespan: Optional[Timespan] = None, 

435 **kwargs: Any, 

436 ) -> Optional[DatasetRef]: 

437 # Docstring inherited from lsst.daf.butler.registry.Registry 

438 if isinstance(datasetType, DatasetType): 

439 storage = self._managers.datasets[datasetType.name] 

440 else: 

441 storage = self._managers.datasets[datasetType] 

442 dataId = DataCoordinate.standardize( 

443 dataId, 

444 graph=storage.datasetType.dimensions, 

445 universe=self.dimensions, 

446 defaults=self.defaults.dataId, 

447 **kwargs, 

448 ) 

449 if collections is None: 

450 if not self.defaults.collections: 

451 raise NoDefaultCollectionError( 

452 "No collections provided to findDataset, and no defaults from registry construction." 

453 ) 

454 collections = self.defaults.collections 

455 else: 

456 collections = CollectionSearch.fromExpression(collections) 

457 for collectionRecord in collections.iter(self._managers.collections): 

458 if collectionRecord.type is CollectionType.CALIBRATION and ( 

459 not storage.datasetType.isCalibration() or timespan is None 

460 ): 

461 continue 

462 result = storage.find(collectionRecord, dataId, timespan=timespan) 

463 if result is not None: 

464 return result 

465 

466 return None 

467 

468 @transactional 

469 def insertDatasets( 

470 self, 

471 datasetType: Union[DatasetType, str], 

472 dataIds: Iterable[DataId], 

473 run: Optional[str] = None, 

474 expand: bool = True, 

475 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

476 ) -> List[DatasetRef]: 

477 # Docstring inherited from lsst.daf.butler.registry.Registry 

478 if isinstance(datasetType, DatasetType): 

479 storage = self._managers.datasets.find(datasetType.name) 

480 if storage is None: 

481 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

482 else: 

483 storage = self._managers.datasets.find(datasetType) 

484 if storage is None: 

485 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

486 if run is None: 

487 if self.defaults.run is None: 

488 raise NoDefaultCollectionError( 

489 "No run provided to insertDatasets, and no default from registry construction." 

490 ) 

491 run = self.defaults.run 

492 runRecord = self._managers.collections.find(run) 

493 if runRecord.type is not CollectionType.RUN: 

494 raise CollectionTypeError( 

495 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

496 ) 

497 assert isinstance(runRecord, RunRecord) 

498 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

499 if expand: 

500 expandedDataIds = [ 

501 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

502 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

503 ] 

504 else: 

505 expandedDataIds = [ 

506 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

507 ] 

508 try: 

509 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

510 except sqlalchemy.exc.IntegrityError as err: 

511 raise ConflictingDefinitionError( 

512 f"A database constraint failure was triggered by inserting " 

513 f"one or more datasets of type {storage.datasetType} into " 

514 f"collection '{run}'. " 

515 f"This probably means a dataset with the same data ID " 

516 f"and dataset type already exists, but it may also mean a " 

517 f"dimension row is missing." 

518 ) from err 

519 return refs 

520 

521 @transactional 

522 def _importDatasets( 

523 self, 

524 datasets: Iterable[DatasetRef], 

525 expand: bool = True, 

526 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

527 reuseIds: bool = False, 

528 ) -> List[DatasetRef]: 

529 # Docstring inherited from lsst.daf.butler.registry.Registry 

530 datasets = list(datasets) 

531 if not datasets: 

532 # nothing to do 

533 return [] 

534 

535 # find dataset type 

536 datasetTypes = set(dataset.datasetType for dataset in datasets) 

537 if len(datasetTypes) != 1: 

538 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

539 datasetType = datasetTypes.pop() 

540 

541 # get storage handler for this dataset type 

542 storage = self._managers.datasets.find(datasetType.name) 

543 if storage is None: 

544 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

545 

546 # find run name 

547 runs = set(dataset.run for dataset in datasets) 

548 if len(runs) != 1: 

549 raise ValueError(f"Multiple run names in input datasets: {runs}") 

550 run = runs.pop() 

551 if run is None: 

552 if self.defaults.run is None: 

553 raise NoDefaultCollectionError( 

554 "No run provided to ingestDatasets, and no default from registry construction." 

555 ) 

556 run = self.defaults.run 

557 

558 runRecord = self._managers.collections.find(run) 

559 if runRecord.type is not CollectionType.RUN: 

560 raise CollectionTypeError( 

561 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

562 " RUN collection required." 

563 ) 

564 assert isinstance(runRecord, RunRecord) 

565 

566 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

567 if expand: 

568 expandedDatasets = [ 

569 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

570 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

571 ] 

572 else: 

573 expandedDatasets = [ 

574 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

575 for dataset in datasets 

576 ] 

577 

578 try: 

579 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

580 except sqlalchemy.exc.IntegrityError as err: 

581 raise ConflictingDefinitionError( 

582 f"A database constraint failure was triggered by inserting " 

583 f"one or more datasets of type {storage.datasetType} into " 

584 f"collection '{run}'. " 

585 f"This probably means a dataset with the same data ID " 

586 f"and dataset type already exists, but it may also mean a " 

587 f"dimension row is missing." 

588 ) from err 

589 return refs 

590 

591 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

592 # Docstring inherited from lsst.daf.butler.registry.Registry 

593 return self._managers.datasets.getDatasetRef(id) 

594 

595 @transactional 

596 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

597 # Docstring inherited from lsst.daf.butler.registry.Registry 

598 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

599 for datasetType, refsForType in progress.iter_item_chunks( 

600 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

601 ): 

602 storage = self._managers.datasets[datasetType.name] 

603 try: 

604 storage.delete(refsForType) 

605 except sqlalchemy.exc.IntegrityError as err: 

606 raise OrphanedRecordError( 

607 "One or more datasets is still present in one or more Datastores." 

608 ) from err 

609 

610 @transactional 

611 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

612 # Docstring inherited from lsst.daf.butler.registry.Registry 

613 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

614 collectionRecord = self._managers.collections.find(collection) 

615 if collectionRecord.type is not CollectionType.TAGGED: 

616 raise CollectionTypeError( 

617 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

618 ) 

619 for datasetType, refsForType in progress.iter_item_chunks( 

620 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

621 ): 

622 storage = self._managers.datasets[datasetType.name] 

623 try: 

624 storage.associate(collectionRecord, refsForType) 

625 except sqlalchemy.exc.IntegrityError as err: 

626 raise ConflictingDefinitionError( 

627 f"Constraint violation while associating dataset of type {datasetType.name} with " 

628 f"collection {collection}. This probably means that one or more datasets with the same " 

629 f"dataset type and data ID already exist in the collection, but it may also indicate " 

630 f"that the datasets do not exist." 

631 ) from err 

632 

633 @transactional 

634 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

635 # Docstring inherited from lsst.daf.butler.registry.Registry 

636 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

637 collectionRecord = self._managers.collections.find(collection) 

638 if collectionRecord.type is not CollectionType.TAGGED: 

639 raise CollectionTypeError( 

640 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

641 ) 

642 for datasetType, refsForType in progress.iter_item_chunks( 

643 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

644 ): 

645 storage = self._managers.datasets[datasetType.name] 

646 storage.disassociate(collectionRecord, refsForType) 

647 

648 @transactional 

649 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

650 # Docstring inherited from lsst.daf.butler.registry.Registry 

651 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

652 collectionRecord = self._managers.collections.find(collection) 

653 for datasetType, refsForType in progress.iter_item_chunks( 

654 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

655 ): 

656 storage = self._managers.datasets[datasetType.name] 

657 storage.certify(collectionRecord, refsForType, timespan) 

658 

659 @transactional 

660 def decertify( 

661 self, 

662 collection: str, 

663 datasetType: Union[str, DatasetType], 

664 timespan: Timespan, 

665 *, 

666 dataIds: Optional[Iterable[DataId]] = None, 

667 ) -> None: 

668 # Docstring inherited from lsst.daf.butler.registry.Registry 

669 collectionRecord = self._managers.collections.find(collection) 

670 if isinstance(datasetType, str): 

671 storage = self._managers.datasets[datasetType] 

672 else: 

673 storage = self._managers.datasets[datasetType.name] 

674 standardizedDataIds = None 

675 if dataIds is not None: 

676 standardizedDataIds = [ 

677 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

678 ] 

679 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

680 

681 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

682 """Return an object that allows a new `Datastore` instance to 

683 communicate with this `Registry`. 

684 

685 Returns 

686 ------- 

687 manager : `DatastoreRegistryBridgeManager` 

688 Object that mediates communication between this `Registry` and its 

689 associated datastores. 

690 """ 

691 return self._managers.datastores 

692 

693 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

694 # Docstring inherited from lsst.daf.butler.registry.Registry 

695 return self._managers.datastores.findDatastores(ref) 

696 

697 def expandDataId( 

698 self, 

699 dataId: Optional[DataId] = None, 

700 *, 

701 graph: Optional[DimensionGraph] = None, 

702 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

703 withDefaults: bool = True, 

704 **kwargs: Any, 

705 ) -> DataCoordinate: 

706 # Docstring inherited from lsst.daf.butler.registry.Registry 

707 if not withDefaults: 

708 defaults = None 

709 else: 

710 defaults = self.defaults.dataId 

711 try: 

712 standardized = DataCoordinate.standardize( 

713 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

714 ) 

715 except KeyError as exc: 

716 # This means either kwargs have some odd name or required 

717 # dimension is missing. 

718 raise DimensionNameError(str(exc)) from exc 

719 if standardized.hasRecords(): 

720 return standardized 

721 if records is None: 

722 records = {} 

723 elif isinstance(records, NamedKeyMapping): 

724 records = records.byName() 

725 else: 

726 records = dict(records) 

727 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

728 records.update(dataId.records.byName()) 

729 keys = standardized.byName() 

730 for element in standardized.graph.primaryKeyTraversalOrder: 

731 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

732 if record is ...: 

733 if isinstance(element, Dimension) and keys.get(element.name) is None: 

734 if element in standardized.graph.required: 

735 raise DimensionNameError( 

736 f"No value or null value for required dimension {element.name}." 

737 ) 

738 keys[element.name] = None 

739 record = None 

740 else: 

741 storage = self._managers.dimensions[element] 

742 dataIdSet = DataCoordinateIterable.fromScalar( 

743 DataCoordinate.standardize(keys, graph=element.graph) 

744 ) 

745 fetched = tuple(storage.fetch(dataIdSet)) 

746 try: 

747 (record,) = fetched 

748 except ValueError: 

749 record = None 

750 records[element.name] = record 

751 if record is not None: 

752 for d in element.implied: 

753 value = getattr(record, d.name) 

754 if keys.setdefault(d.name, value) != value: 

755 raise InconsistentDataIdError( 

756 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

757 f"but {element.name} implies {d.name}={value!r}." 

758 ) 

759 else: 

760 if element in standardized.graph.required: 

761 raise DataIdValueError( 

762 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

763 ) 

764 if element.alwaysJoin: 

765 raise InconsistentDataIdError( 

766 f"Could not fetch record for element {element.name} via keys {keys}, ", 

767 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

768 "related.", 

769 ) 

770 for d in element.implied: 

771 keys.setdefault(d.name, None) 

772 records.setdefault(d.name, None) 

773 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

774 

775 def insertDimensionData( 

776 self, 

777 element: Union[DimensionElement, str], 

778 *data: Union[Mapping[str, Any], DimensionRecord], 

779 conform: bool = True, 

780 replace: bool = False, 

781 skip_existing: bool = False, 

782 ) -> None: 

783 # Docstring inherited from lsst.daf.butler.registry.Registry 

784 if conform: 

785 if isinstance(element, str): 

786 element = self.dimensions[element] 

787 records = [ 

788 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

789 ] 

790 else: 

791 # Ignore typing since caller said to trust them with conform=False. 

792 records = data # type: ignore 

793 storage = self._managers.dimensions[element] # type: ignore 

794 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

795 

796 def syncDimensionData( 

797 self, 

798 element: Union[DimensionElement, str], 

799 row: Union[Mapping[str, Any], DimensionRecord], 

800 conform: bool = True, 

801 update: bool = False, 

802 ) -> Union[bool, Dict[str, Any]]: 

803 # Docstring inherited from lsst.daf.butler.registry.Registry 

804 if conform: 

805 if isinstance(element, str): 

806 element = self.dimensions[element] 

807 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

808 else: 

809 # Ignore typing since caller said to trust them with conform=False. 

810 record = row # type: ignore 

811 storage = self._managers.dimensions[element] # type: ignore 

812 return storage.sync(record, update=update) 

813 

814 def queryDatasetTypes( 

815 self, 

816 expression: Any = ..., 

817 *, 

818 components: Optional[bool] = None, 

819 missing: Optional[List[str]] = None, 

820 ) -> Iterator[DatasetType]: 

821 # Docstring inherited from lsst.daf.butler.registry.Registry 

822 try: 

823 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

824 except TypeError as exc: 

825 raise DatasetTypeExpressionError(f"Invalid dataset type expression '{expression}'") from exc 

826 unknownComponentsMessage = ( 

827 "Could not find definition for storage class %s for dataset type %r;" 

828 " if it has components they will not be included in dataset type query results." 

829 ) 

830 if wildcard is Ellipsis: 

831 for datasetType in self._managers.datasets: 

832 # The dataset type can no longer be a component 

833 yield datasetType 

834 if components: 

835 # Automatically create the component dataset types 

836 try: 

837 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

838 except KeyError as err: 

839 _LOG.warning(unknownComponentsMessage, err, datasetType.name) 

840 else: 

841 yield from componentsForDatasetType 

842 return 

843 done: Set[str] = set() 

844 for name in wildcard.strings: 

845 storage = self._managers.datasets.find(name) 

846 done.add(name) 

847 if storage is None: 

848 if missing is not None: 

849 missing.append(name) 

850 else: 

851 yield storage.datasetType 

852 if wildcard.patterns: 

853 # If components (the argument) is None, we'll save component 

854 # dataset that we might want to match, but only if their parents 

855 # didn't get included. 

856 componentsForLater = [] 

857 for registeredDatasetType in self._managers.datasets: 

858 # Components are not stored in registry so expand them here 

859 allDatasetTypes = [registeredDatasetType] 

860 if components is not False: 

861 # Only check for the components if we are being asked 

862 # for components or components is None. 

863 try: 

864 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

865 except KeyError as err: 

866 _LOG.warning(unknownComponentsMessage, err, registeredDatasetType.name) 

867 for datasetType in allDatasetTypes: 

868 if datasetType.name in done: 

869 continue 

870 parentName, componentName = datasetType.nameAndComponent() 

871 if componentName is not None and not components: 

872 if components is None and parentName not in done: 

873 componentsForLater.append(datasetType) 

874 continue 

875 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

876 done.add(datasetType.name) 

877 yield datasetType 

878 # Go back and try to match saved components. 

879 for datasetType in componentsForLater: 

880 parentName, _ = datasetType.nameAndComponent() 

881 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

882 yield datasetType 

883 

884 def queryCollections( 

885 self, 

886 expression: Any = ..., 

887 datasetType: Optional[DatasetType] = None, 

888 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

889 flattenChains: bool = False, 

890 includeChains: Optional[bool] = None, 

891 ) -> Iterator[str]: 

892 # Docstring inherited from lsst.daf.butler.registry.Registry 

893 

894 # Right now the datasetTypes argument is completely ignored, but that 

895 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

896 # ticket will take care of that. 

897 try: 

898 query = CollectionQuery.fromExpression(expression) 

899 except TypeError as exc: 

900 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

901 collectionTypes = ensure_iterable(collectionTypes) 

902 for record in query.iter( 

903 self._managers.collections, 

904 collectionTypes=frozenset(collectionTypes), 

905 flattenChains=flattenChains, 

906 includeChains=includeChains, 

907 ): 

908 yield record.name 

909 

910 def _makeQueryBuilder( 

911 self, summary: queries.QuerySummary, doomed_by: Iterable[str] = () 

912 ) -> queries.QueryBuilder: 

913 """Return a `QueryBuilder` instance capable of constructing and 

914 managing more complex queries than those obtainable via `Registry` 

915 interfaces. 

916 

917 This is an advanced interface; downstream code should prefer 

918 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

919 are sufficient. 

920 

921 Parameters 

922 ---------- 

923 summary : `queries.QuerySummary` 

924 Object describing and categorizing the full set of dimensions that 

925 will be included in the query. 

926 doomed_by : `Iterable` of `str`, optional 

927 A list of diagnostic messages that indicate why the query is going 

928 to yield no results and should not even be executed. If an empty 

929 container (default) the query will be executed unless other code 

930 determines that it is doomed. 

931 

932 Returns 

933 ------- 

934 builder : `queries.QueryBuilder` 

935 Object that can be used to construct and perform advanced queries. 

936 """ 

937 return queries.QueryBuilder( 

938 summary, 

939 backend=queries.SqlQueryBackend(self._db, self._managers), 

940 doomed_by=doomed_by, 

941 ) 

942 

943 def _standardize_query_dataset_args( 

944 self, 

945 datasets: Any, 

946 collections: Any, 

947 components: bool | None, 

948 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

949 *, 

950 doomed_by: list[str], 

951 ) -> tuple[defaultdict[DatasetType, list[str | None]], CollectionQuery | CollectionSearch | None]: 

952 """Preprocess dataset arguments passed to query* methods. 

953 

954 Parameters 

955 ---------- 

956 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

957 Expression identifying dataset types. See `queryDatasetTypes` for 

958 details. 

959 collections : `str`, `re.Pattern`, or iterable of these 

960 Expression identifying collections to be searched. See 

961 `queryCollections` for details. 

962 components : `bool`, optional 

963 If `True`, apply all expression patterns to component dataset type 

964 names as well. If `False`, never apply patterns to components. 

965 If `None` (default), apply patterns to components only if their 

966 parent datasets were not matched by the expression. 

967 Fully-specified component datasets (`str` or `DatasetType` 

968 instances) are always included. 

969 mode : `str`, optional 

970 The way in which datasets are being used in this query; one of: 

971 

972 - "find_first": this is a query for the first dataset in an 

973 ordered list of collections. Prohibits collection wildcards, 

974 but permits dataset type wildcards. 

975 

976 - "find_all": this is a query for all datasets in all matched 

977 collections. Permits collection and dataset type wildcards. 

978 

979 - "constrain": this is a query for something other than datasets, 

980 with results constrained by dataset existence. Permits 

981 collection wildcards and prohibits ``...`` as a dataset type 

982 wildcard. 

983 doomed_by : `list` [ `str` ] 

984 List to append messages indicating why the query is doomed to 

985 yield no results. 

986 

987 Returns 

988 ------- 

989 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

990 Dictionary mapping parent dataset type to `list` of components 

991 matched for that dataset type (or `None` for the parent itself). 

992 collections : `CollectionSearch` or `CollectionQuery` 

993 Processed collection expression. 

994 """ 

995 composition: defaultdict[DatasetType, list[str | None]] = defaultdict(list) 

996 if datasets is not None: 

997 if not collections: 

998 if not self.defaults.collections: 

999 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1000 collections = self.defaults.collections 

1001 elif mode == "find_first": 

1002 collections = CollectionSearch.fromExpression(collections) 

1003 else: 

1004 collections = CollectionQuery.fromExpression(collections) 

1005 missing: list[str] = [] 

1006 if mode == "constrain" and datasets is Ellipsis: 

1007 raise TypeError("Cannot pass the universal wildcard '...' for dataset types in this context.") 

1008 for dataset_type in self.queryDatasetTypes(datasets, components=components, missing=missing): 

1009 if dataset_type.isComponent(): 

1010 composition[dataset_type.makeCompositeDatasetType()].append(dataset_type.component()) 

1011 else: 

1012 composition[dataset_type].append(None) 

1013 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1014 elif collections: 

1015 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1016 return composition, collections 

1017 

1018 def queryDatasets( 

1019 self, 

1020 datasetType: Any, 

1021 *, 

1022 collections: Any = None, 

1023 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1024 dataId: Optional[DataId] = None, 

1025 where: Optional[str] = None, 

1026 findFirst: bool = False, 

1027 components: Optional[bool] = None, 

1028 bind: Optional[Mapping[str, Any]] = None, 

1029 check: bool = True, 

1030 **kwargs: Any, 

1031 ) -> queries.DatasetQueryResults: 

1032 # Docstring inherited from lsst.daf.butler.registry.Registry 

1033 doomed_by: list[str] = [] 

1034 data_id = self.expandDataId(dataId, **kwargs) 

1035 dataset_composition, collections = self._standardize_query_dataset_args( 

1036 datasetType, 

1037 collections, 

1038 components, 

1039 mode="find_first" if findFirst else "find_all", 

1040 doomed_by=doomed_by, 

1041 ) 

1042 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1043 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1044 # The full set of dimensions in the query is the combination of 

1045 # those needed for the DatasetType and those explicitly requested, 

1046 # if any. 

1047 dimension_names = set(parent_dataset_type.dimensions.names) 

1048 if dimensions is not None: 

1049 dimension_names.update(self.dimensions.extract(dimensions).names) 

1050 # Construct the summary structure needed to construct a 

1051 # QueryBuilder. 

1052 summary = queries.QuerySummary( 

1053 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1054 dataId=data_id, 

1055 expression=where, 

1056 bind=bind, 

1057 defaults=self.defaults.dataId, 

1058 check=check, 

1059 datasets=[parent_dataset_type], 

1060 ) 

1061 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1062 # Add the dataset subquery to the query, telling the QueryBuilder 

1063 # to include the rank of the selected collection in the results 

1064 # only if we need to findFirst. Note that if any of the 

1065 # collections are actually wildcard expressions, and 

1066 # findFirst=True, this will raise TypeError for us. 

1067 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1068 query = builder.finish() 

1069 parent_results.append( 

1070 queries.ParentDatasetQueryResults( 

1071 self._db, query, datasetType=parent_dataset_type, components=components_for_parent 

1072 ) 

1073 ) 

1074 if not parent_results: 

1075 doomed_by.extend( 

1076 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1077 "exist in any collection." 

1078 for t in ensure_iterable(datasetType) 

1079 ) 

1080 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1081 elif len(parent_results) == 1: 

1082 return parent_results[0] 

1083 else: 

1084 return queries.ChainedDatasetQueryResults(parent_results) 

1085 

1086 def queryDataIds( 

1087 self, 

1088 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1089 *, 

1090 dataId: Optional[DataId] = None, 

1091 datasets: Any = None, 

1092 collections: Any = None, 

1093 where: Optional[str] = None, 

1094 components: Optional[bool] = None, 

1095 bind: Optional[Mapping[str, Any]] = None, 

1096 check: bool = True, 

1097 **kwargs: Any, 

1098 ) -> queries.DataCoordinateQueryResults: 

1099 # Docstring inherited from lsst.daf.butler.registry.Registry 

1100 dimensions = ensure_iterable(dimensions) 

1101 requestedDimensions = self.dimensions.extract(dimensions) 

1102 doomed_by: list[str] = [] 

1103 data_id = self.expandDataId(dataId, **kwargs) 

1104 dataset_composition, collections = self._standardize_query_dataset_args( 

1105 datasets, collections, components, doomed_by=doomed_by 

1106 ) 

1107 

1108 def query_factory( 

1109 order_by: Optional[Iterable[str]] = None, limit: Optional[Tuple[int, Optional[int]]] = None 

1110 ) -> queries.Query: 

1111 """Construct the Query object that generates query results.""" 

1112 summary = queries.QuerySummary( 

1113 requested=requestedDimensions, 

1114 dataId=data_id, 

1115 expression=where, 

1116 bind=bind, 

1117 defaults=self.defaults.dataId, 

1118 check=check, 

1119 datasets=dataset_composition.keys(), 

1120 order_by=order_by, 

1121 limit=limit, 

1122 ) 

1123 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1124 for datasetType in dataset_composition: 

1125 builder.joinDataset(datasetType, collections, isResult=False) 

1126 return builder.finish() 

1127 

1128 return queries.DataCoordinateQueryResults(self._db, query_factory, requestedDimensions) 

1129 

1130 def queryDimensionRecords( 

1131 self, 

1132 element: Union[DimensionElement, str], 

1133 *, 

1134 dataId: Optional[DataId] = None, 

1135 datasets: Any = None, 

1136 collections: Any = None, 

1137 where: Optional[str] = None, 

1138 components: Optional[bool] = None, 

1139 bind: Optional[Mapping[str, Any]] = None, 

1140 check: bool = True, 

1141 **kwargs: Any, 

1142 ) -> queries.DimensionRecordQueryResults: 

1143 # Docstring inherited from lsst.daf.butler.registry.Registry 

1144 if not isinstance(element, DimensionElement): 

1145 try: 

1146 element = self.dimensions[element] 

1147 except KeyError as e: 

1148 raise DimensionNameError( 

1149 f"No such dimension '{element}', available dimensions: " 

1150 + str(self.dimensions.getStaticElements()) 

1151 ) from e 

1152 dataIds = self.queryDataIds( 

1153 element.graph, 

1154 dataId=dataId, 

1155 datasets=datasets, 

1156 collections=collections, 

1157 where=where, 

1158 components=components, 

1159 bind=bind, 

1160 check=check, 

1161 **kwargs, 

1162 ) 

1163 return queries.DatabaseDimensionRecordQueryResults(dataIds, self._managers.dimensions[element]) 

1164 

1165 def queryDatasetAssociations( 

1166 self, 

1167 datasetType: Union[str, DatasetType], 

1168 collections: Any = ..., 

1169 *, 

1170 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1171 flattenChains: bool = False, 

1172 ) -> Iterator[DatasetAssociation]: 

1173 # Docstring inherited from lsst.daf.butler.registry.Registry 

1174 if collections is None: 

1175 if not self.defaults.collections: 

1176 raise NoDefaultCollectionError( 

1177 "No collections provided to findDataset, and no defaults from registry construction." 

1178 ) 

1179 collections = self.defaults.collections 

1180 else: 

1181 collections = CollectionQuery.fromExpression(collections) 

1182 TimespanReprClass = self._db.getTimespanRepresentation() 

1183 if isinstance(datasetType, str): 

1184 storage = self._managers.datasets[datasetType] 

1185 else: 

1186 storage = self._managers.datasets[datasetType.name] 

1187 for collectionRecord in collections.iter( 

1188 self._managers.collections, 

1189 collectionTypes=frozenset(collectionTypes), 

1190 flattenChains=flattenChains, 

1191 ): 

1192 query = storage.select(collectionRecord) 

1193 for row in self._db.query(query).mappings(): 

1194 dataId = DataCoordinate.fromRequiredValues( 

1195 storage.datasetType.dimensions, 

1196 tuple(row[name] for name in storage.datasetType.dimensions.required.names), 

1197 ) 

1198 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1199 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, conform=False) 

1200 if collectionRecord.type is CollectionType.CALIBRATION: 

1201 timespan = TimespanReprClass.extract(row) 

1202 else: 

1203 timespan = None 

1204 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1205 

1206 storageClasses: StorageClassFactory 

1207 """All storage classes known to the registry (`StorageClassFactory`). 

1208 """