Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 TYPE_CHECKING, 

41 Union, 

42) 

43 

44import sqlalchemy 

45 

46from ..core import ( 

47 ButlerURI, 

48 Config, 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DataId, 

52 DatasetAssociation, 

53 DatasetRef, 

54 DatasetType, 

55 ddl, 

56 Dimension, 

57 DimensionConfig, 

58 DimensionElement, 

59 DimensionGraph, 

60 DimensionRecord, 

61 DimensionUniverse, 

62 NamedKeyMapping, 

63 NameLookupMapping, 

64 Progress, 

65 StorageClassFactory, 

66 Timespan, 

67) 

68from . import queries 

69from ..core.utils import iterable, transactional 

70from ._config import RegistryConfig 

71from ._collectionType import CollectionType 

72from ._defaults import RegistryDefaults 

73from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

74from .managers import RegistryManagerTypes, RegistryManagerInstances 

75from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

76from .summaries import CollectionSummary 

77from .interfaces import ChainedCollectionRecord, RunRecord 

78 

79if TYPE_CHECKING: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 from .._butlerConfig import ButlerConfig 

81 from .interfaces import ( 

82 Database, 

83 DatastoreRegistryBridgeManager, 

84 ) 

85 

86 

87_LOG = logging.getLogger(__name__) 

88 

89# key for dimensions configuration in attributes table 

90_DIMENSIONS_ATTR = "config:dimensions.json" 

91 

92 

93class Registry: 

94 """Registry interface. 

95 

96 Parameters 

97 ---------- 

98 database : `Database` 

99 Database instance to store Registry. 

100 defaults : `RegistryDefaults`, optional 

101 Default collection search path and/or output `~CollectionType.RUN` 

102 collection. 

103 attributes : `type` 

104 Manager class implementing `ButlerAttributeManager`. 

105 opaque : `type` 

106 Manager class implementing `OpaqueTableStorageManager`. 

107 dimensions : `type` 

108 Manager class implementing `DimensionRecordStorageManager`. 

109 collections : `type` 

110 Manager class implementing `CollectionManager`. 

111 datasets : `type` 

112 Manager class implementing `DatasetRecordStorageManager`. 

113 datastoreBridges : `type` 

114 Manager class implementing `DatastoreRegistryBridgeManager`. 

115 dimensionConfig : `DimensionConfig`, optional 

116 Dimension universe configuration, only used when ``create`` is True. 

117 writeable : `bool`, optional 

118 If True then Registry will support write operations. 

119 create : `bool`, optional 

120 If True then database schema will be initialized, it must be empty 

121 before instantiating Registry. 

122 """ 

123 

124 defaultConfigFile: Optional[str] = None 

125 """Path to configuration defaults. Accessed within the ``configs`` resource 

126 or relative to a search path. Can be None if no defaults specified. 

127 """ 

128 

129 @classmethod 

130 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None, 

131 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

132 butlerRoot: Optional[str] = None) -> Registry: 

133 """Create registry database and return `Registry` instance. 

134 

135 This method initializes database contents, database must be empty 

136 prior to calling this method. 

137 

138 Parameters 

139 ---------- 

140 config : `RegistryConfig` or `str`, optional 

141 Registry configuration, if missing then default configuration will 

142 be loaded from registry.yaml. 

143 dimensionConfig : `DimensionConfig` or `str`, optional 

144 Dimensions configuration, if missing then default configuration 

145 will be loaded from dimensions.yaml. 

146 butlerRoot : `str`, optional 

147 Path to the repository root this `Registry` will manage. 

148 

149 Returns 

150 ------- 

151 registry : `Registry` 

152 A new `Registry` instance. 

153 """ 

154 if isinstance(config, str): 

155 config = RegistryConfig(config) 

156 elif config is None: 

157 config = RegistryConfig() 

158 elif not isinstance(config, RegistryConfig): 

159 raise TypeError(f"Incompatible Registry configuration type: {type(config)}") 

160 config.replaceRoot(butlerRoot) 

161 

162 if isinstance(dimensionConfig, str): 

163 dimensionConfig = DimensionConfig(config) 

164 elif dimensionConfig is None: 

165 dimensionConfig = DimensionConfig() 

166 elif not isinstance(dimensionConfig, DimensionConfig): 

167 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

168 

169 DatabaseClass = config.getDatabaseClass() 

170 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

171 namespace=config.get("namespace")) 

172 managerTypes = RegistryManagerTypes.fromConfig(config) 

173 managers = managerTypes.makeRepo(database, dimensionConfig) 

174 return cls(database, RegistryDefaults(), managers) 

175 

176 @classmethod 

177 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], 

178 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True, 

179 defaults: Optional[RegistryDefaults] = None) -> Registry: 

180 """Create `Registry` subclass instance from `config`. 

181 

182 Registry database must be inbitialized prior to calling this method. 

183 

184 Parameters 

185 ---------- 

186 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

187 Registry configuration 

188 butlerRoot : `str` or `ButlerURI`, optional 

189 Path to the repository root this `Registry` will manage. 

190 writeable : `bool`, optional 

191 If `True` (default) create a read-write connection to the database. 

192 defaults : `RegistryDefaults`, optional 

193 Default collection search path and/or output `~CollectionType.RUN` 

194 collection. 

195 

196 Returns 

197 ------- 

198 registry : `Registry` (subclass) 

199 A new `Registry` subclass instance. 

200 """ 

201 if not isinstance(config, RegistryConfig): 

202 if isinstance(config, str) or isinstance(config, Config): 

203 config = RegistryConfig(config) 

204 else: 

205 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

206 config.replaceRoot(butlerRoot) 

207 DatabaseClass = config.getDatabaseClass() 

208 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

209 namespace=config.get("namespace"), writeable=writeable) 

210 managerTypes = RegistryManagerTypes.fromConfig(config) 

211 managers = managerTypes.loadRepo(database) 

212 if defaults is None: 

213 defaults = RegistryDefaults() 

214 return cls(database, defaults, managers) 

215 

216 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

217 self._db = database 

218 self._managers = managers 

219 self.storageClasses = StorageClassFactory() 

220 # Intentionally invoke property setter to initialize defaults. This 

221 # can only be done after most of the rest of Registry has already been 

222 # initialized, and must be done before the property getter is used. 

223 self.defaults = defaults 

224 

225 def __str__(self) -> str: 

226 return str(self._db) 

227 

228 def __repr__(self) -> str: 

229 return f"Registry({self._db!r}, {self.dimensions!r})" 

230 

231 def isWriteable(self) -> bool: 

232 """Return `True` if this registry allows write operations, and `False` 

233 otherwise. 

234 """ 

235 return self._db.isWriteable() 

236 

237 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

238 """Create a new `Registry` backed by the same data repository and 

239 connection as this one, but independent defaults. 

240 

241 Parameters 

242 ---------- 

243 defaults : `RegistryDefaults`, optional 

244 Default collections and data ID values for the new registry. If 

245 not provided, ``self.defaults`` will be used (but future changes 

246 to either registry's defaults will not affect the other). 

247 

248 Returns 

249 ------- 

250 copy : `Registry` 

251 A new `Registry` instance with its own defaults. 

252 

253 Notes 

254 ----- 

255 Because the new registry shares a connection with the original, they 

256 also share transaction state (despite the fact that their `transaction` 

257 context manager methods do not reflect this), and must be used with 

258 care. 

259 """ 

260 if defaults is None: 

261 # No need to copy, because `RegistryDefaults` is immutable; we 

262 # effectively copy on write. 

263 defaults = self.defaults 

264 return Registry(self._db, defaults, self._managers) 

265 

266 @property 

267 def dimensions(self) -> DimensionUniverse: 

268 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

269 """ 

270 return self._managers.dimensions.universe 

271 

272 @property 

273 def defaults(self) -> RegistryDefaults: 

274 """Default collection search path and/or output `~CollectionType.RUN` 

275 collection (`RegistryDefaults`). 

276 

277 This is an immutable struct whose components may not be set 

278 individually, but the entire struct can be set by assigning to this 

279 property. 

280 """ 

281 return self._defaults 

282 

283 @defaults.setter 

284 def defaults(self, value: RegistryDefaults) -> None: 

285 if value.run is not None: 

286 self.registerRun(value.run) 

287 value.finish(self) 

288 self._defaults = value 

289 

290 def refresh(self) -> None: 

291 """Refresh all in-memory state by querying the database. 

292 

293 This may be necessary to enable querying for entities added by other 

294 `Registry` instances after this one was constructed. 

295 """ 

296 self._managers.refresh() 

297 

298 @contextlib.contextmanager 

299 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

300 """Return a context manager that represents a transaction. 

301 """ 

302 try: 

303 with self._db.transaction(savepoint=savepoint): 

304 yield 

305 except BaseException: 

306 # TODO: this clears the caches sometimes when we wouldn't actually 

307 # need to. Can we avoid that? 

308 self._managers.dimensions.clearCaches() 

309 raise 

310 

311 def resetConnectionPool(self) -> None: 

312 """Reset SQLAlchemy connection pool for registry database. 

313 

314 This operation is useful when using registry with fork-based 

315 multiprocessing. To use registry across fork boundary one has to make 

316 sure that there are no currently active connections (no session or 

317 transaction is in progress) and connection pool is reset using this 

318 method. This method should be called by the child process immediately 

319 after the fork. 

320 """ 

321 self._db._engine.dispose() 

322 

323 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

324 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

325 other data repository client. 

326 

327 Opaque table records can be added via `insertOpaqueData`, retrieved via 

328 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

329 

330 Parameters 

331 ---------- 

332 tableName : `str` 

333 Logical name of the opaque table. This may differ from the 

334 actual name used in the database by a prefix and/or suffix. 

335 spec : `ddl.TableSpec` 

336 Specification for the table to be added. 

337 """ 

338 self._managers.opaque.register(tableName, spec) 

339 

340 @transactional 

341 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

342 """Insert records into an opaque table. 

343 

344 Parameters 

345 ---------- 

346 tableName : `str` 

347 Logical name of the opaque table. Must match the name used in a 

348 previous call to `registerOpaqueTable`. 

349 data 

350 Each additional positional argument is a dictionary that represents 

351 a single row to be added. 

352 """ 

353 self._managers.opaque[tableName].insert(*data) 

354 

355 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

356 """Retrieve records from an opaque table. 

357 

358 Parameters 

359 ---------- 

360 tableName : `str` 

361 Logical name of the opaque table. Must match the name used in a 

362 previous call to `registerOpaqueTable`. 

363 where 

364 Additional keyword arguments are interpreted as equality 

365 constraints that restrict the returned rows (combined with AND); 

366 keyword arguments are column names and values are the values they 

367 must have. 

368 

369 Yields 

370 ------ 

371 row : `dict` 

372 A dictionary representing a single result row. 

373 """ 

374 yield from self._managers.opaque[tableName].fetch(**where) 

375 

376 @transactional 

377 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

378 """Remove records from an opaque table. 

379 

380 Parameters 

381 ---------- 

382 tableName : `str` 

383 Logical name of the opaque table. Must match the name used in a 

384 previous call to `registerOpaqueTable`. 

385 where 

386 Additional keyword arguments are interpreted as equality 

387 constraints that restrict the deleted rows (combined with AND); 

388 keyword arguments are column names and values are the values they 

389 must have. 

390 """ 

391 self._managers.opaque[tableName].delete(where.keys(), where) 

392 

393 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED, 

394 doc: Optional[str] = None) -> None: 

395 """Add a new collection if one with the given name does not exist. 

396 

397 Parameters 

398 ---------- 

399 name : `str` 

400 The name of the collection to create. 

401 type : `CollectionType` 

402 Enum value indicating the type of collection to create. 

403 doc : `str`, optional 

404 Documentation string for the collection. 

405 

406 Notes 

407 ----- 

408 This method cannot be called within transactions, as it needs to be 

409 able to perform its own transaction to be concurrent. 

410 """ 

411 self._managers.collections.register(name, type, doc=doc) 

412 

413 def getCollectionType(self, name: str) -> CollectionType: 

414 """Return an enumeration value indicating the type of the given 

415 collection. 

416 

417 Parameters 

418 ---------- 

419 name : `str` 

420 The name of the collection. 

421 

422 Returns 

423 ------- 

424 type : `CollectionType` 

425 Enum value indicating the type of this collection. 

426 

427 Raises 

428 ------ 

429 MissingCollectionError 

430 Raised if no collection with the given name exists. 

431 """ 

432 return self._managers.collections.find(name).type 

433 

434 def registerRun(self, name: str, doc: Optional[str] = None) -> None: 

435 """Add a new run if one with the given name does not exist. 

436 

437 Parameters 

438 ---------- 

439 name : `str` 

440 The name of the run to create. 

441 doc : `str`, optional 

442 Documentation string for the collection. 

443 

444 Notes 

445 ----- 

446 This method cannot be called within transactions, as it needs to be 

447 able to perform its own transaction to be concurrent. 

448 """ 

449 self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

450 

451 @transactional 

452 def removeCollection(self, name: str) -> None: 

453 """Completely remove the given collection. 

454 

455 Parameters 

456 ---------- 

457 name : `str` 

458 The name of the collection to remove. 

459 

460 Raises 

461 ------ 

462 MissingCollectionError 

463 Raised if no collection with the given name exists. 

464 

465 Notes 

466 ----- 

467 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

468 in it are also fully removed. This requires that those datasets be 

469 removed (or at least trashed) from any datastores that hold them first. 

470 

471 A collection may not be deleted as long as it is referenced by a 

472 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

473 be deleted or redefined first. 

474 """ 

475 self._managers.collections.remove(name) 

476 

477 def getCollectionChain(self, parent: str) -> CollectionSearch: 

478 """Return the child collections in a `~CollectionType.CHAINED` 

479 collection. 

480 

481 Parameters 

482 ---------- 

483 parent : `str` 

484 Name of the chained collection. Must have already been added via 

485 a call to `Registry.registerCollection`. 

486 

487 Returns 

488 ------- 

489 children : `CollectionSearch` 

490 An object that defines the search path of the collection. 

491 See :ref:`daf_butler_collection_expressions` for more information. 

492 

493 Raises 

494 ------ 

495 MissingCollectionError 

496 Raised if ``parent`` does not exist in the `Registry`. 

497 TypeError 

498 Raised if ``parent`` does not correspond to a 

499 `~CollectionType.CHAINED` collection. 

500 """ 

501 record = self._managers.collections.find(parent) 

502 if record.type is not CollectionType.CHAINED: 

503 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

504 assert isinstance(record, ChainedCollectionRecord) 

505 return record.children 

506 

507 @transactional 

508 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

509 """Define or redefine a `~CollectionType.CHAINED` collection. 

510 

511 Parameters 

512 ---------- 

513 parent : `str` 

514 Name of the chained collection. Must have already been added via 

515 a call to `Registry.registerCollection`. 

516 children : `Any` 

517 An expression defining an ordered search of child collections, 

518 generally an iterable of `str`; see 

519 :ref:`daf_butler_collection_expressions` for more information. 

520 flatten : `bool`, optional 

521 If `True` (`False` is default), recursively flatten out any nested 

522 `~CollectionType.CHAINED` collections in ``children`` first. 

523 

524 Raises 

525 ------ 

526 MissingCollectionError 

527 Raised when any of the given collections do not exist in the 

528 `Registry`. 

529 TypeError 

530 Raised if ``parent`` does not correspond to a 

531 `~CollectionType.CHAINED` collection. 

532 ValueError 

533 Raised if the given collections contains a cycle. 

534 """ 

535 record = self._managers.collections.find(parent) 

536 if record.type is not CollectionType.CHAINED: 

537 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

538 assert isinstance(record, ChainedCollectionRecord) 

539 children = CollectionSearch.fromExpression(children) 

540 if children != record.children or flatten: 

541 record.update(self._managers.collections, children, flatten=flatten) 

542 

543 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

544 """Retrieve the documentation string for a collection. 

545 

546 Parameters 

547 ---------- 

548 name : `str` 

549 Name of the collection. 

550 

551 Returns 

552 ------- 

553 docs : `str` or `None` 

554 Docstring for the collection with the given name. 

555 """ 

556 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

557 

558 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

559 """Set the documentation string for a collection. 

560 

561 Parameters 

562 ---------- 

563 name : `str` 

564 Name of the collection. 

565 docs : `str` or `None` 

566 Docstring for the collection with the given name; will replace any 

567 existing docstring. Passing `None` will remove any existing 

568 docstring. 

569 """ 

570 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

571 

572 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

573 """Return a summary for the given collection. 

574 

575 Parameters 

576 ---------- 

577 collection : `str` 

578 Name of the collection for which a summary is to be retrieved. 

579 

580 Returns 

581 ------- 

582 summary : `CollectionSummary` 

583 Summary of the dataset types and governor dimension values in 

584 this collection. 

585 """ 

586 record = self._managers.collections.find(collection) 

587 return self._managers.datasets.getCollectionSummary(record) 

588 

589 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

590 """ 

591 Add a new `DatasetType` to the Registry. 

592 

593 It is not an error to register the same `DatasetType` twice. 

594 

595 Parameters 

596 ---------- 

597 datasetType : `DatasetType` 

598 The `DatasetType` to be added. 

599 

600 Returns 

601 ------- 

602 inserted : `bool` 

603 `True` if ``datasetType`` was inserted, `False` if an identical 

604 existing `DatsetType` was found. Note that in either case the 

605 DatasetType is guaranteed to be defined in the Registry 

606 consistently with the given definition. 

607 

608 Raises 

609 ------ 

610 ValueError 

611 Raised if the dimensions or storage class are invalid. 

612 ConflictingDefinitionError 

613 Raised if this DatasetType is already registered with a different 

614 definition. 

615 

616 Notes 

617 ----- 

618 This method cannot be called within transactions, as it needs to be 

619 able to perform its own transaction to be concurrent. 

620 """ 

621 _, inserted = self._managers.datasets.register(datasetType) 

622 return inserted 

623 

624 def removeDatasetType(self, name: str) -> None: 

625 """Remove the named `DatasetType` from the registry. 

626 

627 .. warning:: 

628 

629 Registry caches the dataset type definitions. This means that 

630 deleting the dataset type definition may result in unexpected 

631 behavior from other butler processes that are active that have 

632 not seen the deletion. 

633 

634 Parameters 

635 ---------- 

636 name : `str` 

637 Name of the type to be removed. 

638 

639 Raises 

640 ------ 

641 lsst.daf.butler.registry.OrphanedRecordError 

642 Raised if an attempt is made to remove the dataset type definition 

643 when there are already datasets associated with it. 

644 

645 Notes 

646 ----- 

647 If the dataset type is not registered the method will return without 

648 action. 

649 """ 

650 self._managers.datasets.remove(name) 

651 

652 def getDatasetType(self, name: str) -> DatasetType: 

653 """Get the `DatasetType`. 

654 

655 Parameters 

656 ---------- 

657 name : `str` 

658 Name of the type. 

659 

660 Returns 

661 ------- 

662 type : `DatasetType` 

663 The `DatasetType` associated with the given name. 

664 

665 Raises 

666 ------ 

667 KeyError 

668 Requested named DatasetType could not be found in registry. 

669 """ 

670 return self._managers.datasets[name].datasetType 

671 

672 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

673 collections: Any = None, timespan: Optional[Timespan] = None, 

674 **kwargs: Any) -> Optional[DatasetRef]: 

675 """Find a dataset given its `DatasetType` and data ID. 

676 

677 This can be used to obtain a `DatasetRef` that permits the dataset to 

678 be read from a `Datastore`. If the dataset is a component and can not 

679 be found using the provided dataset type, a dataset ref for the parent 

680 will be returned instead but with the correct dataset type. 

681 

682 Parameters 

683 ---------- 

684 datasetType : `DatasetType` or `str` 

685 A `DatasetType` or the name of one. 

686 dataId : `dict` or `DataCoordinate`, optional 

687 A `dict`-like object containing the `Dimension` links that identify 

688 the dataset within a collection. 

689 collections, optional. 

690 An expression that fully or partially identifies the collections to 

691 search for the dataset; see 

692 :ref:`daf_butler_collection_expressions` for more information. 

693 Defaults to ``self.defaults.collections``. 

694 timespan : `Timespan`, optional 

695 A timespan that the validity range of the dataset must overlap. 

696 If not provided, any `~CollectionType.CALIBRATION` collections 

697 matched by the ``collections`` argument will not be searched. 

698 **kwargs 

699 Additional keyword arguments passed to 

700 `DataCoordinate.standardize` to convert ``dataId`` to a true 

701 `DataCoordinate` or augment an existing one. 

702 

703 Returns 

704 ------- 

705 ref : `DatasetRef` 

706 A reference to the dataset, or `None` if no matching Dataset 

707 was found. 

708 

709 Raises 

710 ------ 

711 TypeError 

712 Raised if ``collections`` is `None` and 

713 ``self.defaults.collections`` is `None`. 

714 LookupError 

715 Raised if one or more data ID keys are missing. 

716 KeyError 

717 Raised if the dataset type does not exist. 

718 MissingCollectionError 

719 Raised if any of ``collections`` does not exist in the registry. 

720 

721 Notes 

722 ----- 

723 This method simply returns `None` and does not raise an exception even 

724 when the set of collections searched is intrinsically incompatible with 

725 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

726 only `~CollectionType.CALIBRATION` collections are being searched. 

727 This may make it harder to debug some lookup failures, but the behavior 

728 is intentional; we consider it more important that failed searches are 

729 reported consistently, regardless of the reason, and that adding 

730 additional collections that do not contain a match to the search path 

731 never changes the behavior. 

732 """ 

733 if isinstance(datasetType, DatasetType): 

734 storage = self._managers.datasets[datasetType.name] 

735 else: 

736 storage = self._managers.datasets[datasetType] 

737 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

738 universe=self.dimensions, defaults=self.defaults.dataId, 

739 **kwargs) 

740 if collections is None: 

741 if not self.defaults.collections: 

742 raise TypeError("No collections provided to findDataset, " 

743 "and no defaults from registry construction.") 

744 collections = self.defaults.collections 

745 else: 

746 collections = CollectionSearch.fromExpression(collections) 

747 for collectionRecord in collections.iter(self._managers.collections): 

748 if (collectionRecord.type is CollectionType.CALIBRATION 

749 and (not storage.datasetType.isCalibration() or timespan is None)): 

750 continue 

751 result = storage.find(collectionRecord, dataId, timespan=timespan) 

752 if result is not None: 

753 return result 

754 

755 return None 

756 

757 @transactional 

758 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

759 run: Optional[str] = None) -> List[DatasetRef]: 

760 """Insert one or more datasets into the `Registry` 

761 

762 This always adds new datasets; to associate existing datasets with 

763 a new collection, use ``associate``. 

764 

765 Parameters 

766 ---------- 

767 datasetType : `DatasetType` or `str` 

768 A `DatasetType` or the name of one. 

769 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

770 Dimension-based identifiers for the new datasets. 

771 run : `str`, optional 

772 The name of the run that produced the datasets. Defaults to 

773 ``self.defaults.run``. 

774 

775 Returns 

776 ------- 

777 refs : `list` of `DatasetRef` 

778 Resolved `DatasetRef` instances for all given data IDs (in the same 

779 order). 

780 

781 Raises 

782 ------ 

783 TypeError 

784 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

785 ConflictingDefinitionError 

786 If a dataset with the same dataset type and data ID as one of those 

787 given already exists in ``run``. 

788 MissingCollectionError 

789 Raised if ``run`` does not exist in the registry. 

790 """ 

791 if isinstance(datasetType, DatasetType): 

792 storage = self._managers.datasets.find(datasetType.name) 

793 if storage is None: 

794 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

795 else: 

796 storage = self._managers.datasets.find(datasetType) 

797 if storage is None: 

798 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

799 if run is None: 

800 if self.defaults.run is None: 

801 raise TypeError("No run provided to insertDatasets, " 

802 "and no default from registry construction.") 

803 run = self.defaults.run 

804 runRecord = self._managers.collections.find(run) 

805 if runRecord.type is not CollectionType.RUN: 

806 raise TypeError(f"Given collection is of type {runRecord.type.name}; RUN collection required.") 

807 assert isinstance(runRecord, RunRecord) 

808 progress = Progress("lsst.daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

809 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

810 for dataId in progress.wrap(dataIds, 

811 f"Expanding {storage.datasetType.name} data IDs")] 

812 try: 

813 refs = list(storage.insert(runRecord, expandedDataIds)) 

814 except sqlalchemy.exc.IntegrityError as err: 

815 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

816 f"one or more datasets of type {storage.datasetType} into " 

817 f"collection '{run}'. " 

818 f"This probably means a dataset with the same data ID " 

819 f"and dataset type already exists, but it may also mean a " 

820 f"dimension row is missing.") from err 

821 return refs 

822 

823 def getDataset(self, id: int) -> Optional[DatasetRef]: 

824 """Retrieve a Dataset entry. 

825 

826 Parameters 

827 ---------- 

828 id : `int` 

829 The unique identifier for the dataset. 

830 

831 Returns 

832 ------- 

833 ref : `DatasetRef` or `None` 

834 A ref to the Dataset, or `None` if no matching Dataset 

835 was found. 

836 """ 

837 ref = self._managers.datasets.getDatasetRef(id) 

838 if ref is None: 

839 return None 

840 return ref 

841 

842 @transactional 

843 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

844 """Remove datasets from the Registry. 

845 

846 The datasets will be removed unconditionally from all collections, and 

847 any `Quantum` that consumed this dataset will instead be marked with 

848 having a NULL input. `Datastore` records will *not* be deleted; the 

849 caller is responsible for ensuring that the dataset has already been 

850 removed from all Datastores. 

851 

852 Parameters 

853 ---------- 

854 refs : `Iterable` of `DatasetRef` 

855 References to the datasets to be removed. Must include a valid 

856 ``id`` attribute, and should be considered invalidated upon return. 

857 

858 Raises 

859 ------ 

860 AmbiguousDatasetError 

861 Raised if any ``ref.id`` is `None`. 

862 OrphanedRecordError 

863 Raised if any dataset is still present in any `Datastore`. 

864 """ 

865 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

866 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

867 desc="Removing datasets by type"): 

868 storage = self._managers.datasets.find(datasetType.name) 

869 assert storage is not None 

870 try: 

871 storage.delete(refsForType) 

872 except sqlalchemy.exc.IntegrityError as err: 

873 raise OrphanedRecordError("One or more datasets is still " 

874 "present in one or more Datastores.") from err 

875 

876 @transactional 

877 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

878 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

879 

880 If a DatasetRef with the same exact integer ID is already in a 

881 collection nothing is changed. If a `DatasetRef` with the same 

882 `DatasetType` and data ID but with different integer ID 

883 exists in the collection, `ConflictingDefinitionError` is raised. 

884 

885 Parameters 

886 ---------- 

887 collection : `str` 

888 Indicates the collection the datasets should be associated with. 

889 refs : `Iterable` [ `DatasetRef` ] 

890 An iterable of resolved `DatasetRef` instances that already exist 

891 in this `Registry`. 

892 

893 Raises 

894 ------ 

895 ConflictingDefinitionError 

896 If a Dataset with the given `DatasetRef` already exists in the 

897 given collection. 

898 AmbiguousDatasetError 

899 Raised if ``any(ref.id is None for ref in refs)``. 

900 MissingCollectionError 

901 Raised if ``collection`` does not exist in the registry. 

902 TypeError 

903 Raise adding new datasets to the given ``collection`` is not 

904 allowed. 

905 """ 

906 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

907 collectionRecord = self._managers.collections.find(collection) 

908 if collectionRecord.type is not CollectionType.TAGGED: 

909 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

910 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

911 desc="Associating datasets by type"): 

912 storage = self._managers.datasets.find(datasetType.name) 

913 assert storage is not None 

914 try: 

915 storage.associate(collectionRecord, refsForType) 

916 except sqlalchemy.exc.IntegrityError as err: 

917 raise ConflictingDefinitionError( 

918 f"Constraint violation while associating dataset of type {datasetType.name} with " 

919 f"collection {collection}. This probably means that one or more datasets with the same " 

920 f"dataset type and data ID already exist in the collection, but it may also indicate " 

921 f"that the datasets do not exist." 

922 ) from err 

923 

924 @transactional 

925 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

926 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

927 

928 ``collection`` and ``ref`` combinations that are not currently 

929 associated are silently ignored. 

930 

931 Parameters 

932 ---------- 

933 collection : `str` 

934 The collection the datasets should no longer be associated with. 

935 refs : `Iterable` [ `DatasetRef` ] 

936 An iterable of resolved `DatasetRef` instances that already exist 

937 in this `Registry`. 

938 

939 Raises 

940 ------ 

941 AmbiguousDatasetError 

942 Raised if any of the given dataset references is unresolved. 

943 MissingCollectionError 

944 Raised if ``collection`` does not exist in the registry. 

945 TypeError 

946 Raise adding new datasets to the given ``collection`` is not 

947 allowed. 

948 """ 

949 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

950 collectionRecord = self._managers.collections.find(collection) 

951 if collectionRecord.type is not CollectionType.TAGGED: 

952 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

953 "expected TAGGED.") 

954 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

955 desc="Disassociating datasets by type"): 

956 storage = self._managers.datasets.find(datasetType.name) 

957 assert storage is not None 

958 storage.disassociate(collectionRecord, refsForType) 

959 

960 @transactional 

961 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

962 """Associate one or more datasets with a calibration collection and a 

963 validity range within it. 

964 

965 Parameters 

966 ---------- 

967 collection : `str` 

968 The name of an already-registered `~CollectionType.CALIBRATION` 

969 collection. 

970 refs : `Iterable` [ `DatasetRef` ] 

971 Datasets to be associated. 

972 timespan : `Timespan` 

973 The validity range for these datasets within the collection. 

974 

975 Raises 

976 ------ 

977 AmbiguousDatasetError 

978 Raised if any of the given `DatasetRef` instances is unresolved. 

979 ConflictingDefinitionError 

980 Raised if the collection already contains a different dataset with 

981 the same `DatasetType` and data ID and an overlapping validity 

982 range. 

983 TypeError 

984 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

985 collection or if one or more datasets are of a dataset type for 

986 which `DatasetType.isCalibration` returns `False`. 

987 """ 

988 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

989 collectionRecord = self._managers.collections.find(collection) 

990 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

991 desc="Certifying datasets by type"): 

992 storage = self._managers.datasets[datasetType.name] 

993 storage.certify(collectionRecord, refsForType, timespan) 

994 

995 @transactional 

996 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *, 

997 dataIds: Optional[Iterable[DataId]] = None) -> None: 

998 """Remove or adjust datasets to clear a validity range within a 

999 calibration collection. 

1000 

1001 Parameters 

1002 ---------- 

1003 collection : `str` 

1004 The name of an already-registered `~CollectionType.CALIBRATION` 

1005 collection. 

1006 datasetType : `str` or `DatasetType` 

1007 Name or `DatasetType` instance for the datasets to be decertified. 

1008 timespan : `Timespan`, optional 

1009 The validity range to remove datasets from within the collection. 

1010 Datasets that overlap this range but are not contained by it will 

1011 have their validity ranges adjusted to not overlap it, which may 

1012 split a single dataset validity range into two. 

1013 dataIds : `Iterable` [ `DataId` ], optional 

1014 Data IDs that should be decertified within the given validity range 

1015 If `None`, all data IDs for ``self.datasetType`` will be 

1016 decertified. 

1017 

1018 Raises 

1019 ------ 

1020 TypeError 

1021 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1022 collection or if ``datasetType.isCalibration() is False``. 

1023 """ 

1024 collectionRecord = self._managers.collections.find(collection) 

1025 if isinstance(datasetType, str): 

1026 storage = self._managers.datasets[datasetType] 

1027 else: 

1028 storage = self._managers.datasets[datasetType.name] 

1029 standardizedDataIds = None 

1030 if dataIds is not None: 

1031 standardizedDataIds = [DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) 

1032 for d in dataIds] 

1033 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

1034 

1035 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1036 """Return an object that allows a new `Datastore` instance to 

1037 communicate with this `Registry`. 

1038 

1039 Returns 

1040 ------- 

1041 manager : `DatastoreRegistryBridgeManager` 

1042 Object that mediates communication between this `Registry` and its 

1043 associated datastores. 

1044 """ 

1045 return self._managers.datastores 

1046 

1047 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1048 """Retrieve datastore locations for a given dataset. 

1049 

1050 Parameters 

1051 ---------- 

1052 ref : `DatasetRef` 

1053 A reference to the dataset for which to retrieve storage 

1054 information. 

1055 

1056 Returns 

1057 ------- 

1058 datastores : `Iterable` [ `str` ] 

1059 All the matching datastores holding this dataset. 

1060 

1061 Raises 

1062 ------ 

1063 AmbiguousDatasetError 

1064 Raised if ``ref.id`` is `None`. 

1065 """ 

1066 return self._managers.datastores.findDatastores(ref) 

1067 

1068 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

1069 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1070 withDefaults: bool = True, 

1071 **kwargs: Any) -> DataCoordinate: 

1072 """Expand a dimension-based data ID to include additional information. 

1073 

1074 Parameters 

1075 ---------- 

1076 dataId : `DataCoordinate` or `dict`, optional 

1077 Data ID to be expanded; augmented and overridden by ``kwds``. 

1078 graph : `DimensionGraph`, optional 

1079 Set of dimensions for the expanded ID. If `None`, the dimensions 

1080 will be inferred from the keys of ``dataId`` and ``kwds``. 

1081 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

1082 are silently ignored, providing a way to extract and expand a 

1083 subset of a data ID. 

1084 records : `Mapping` [`str`, `DimensionRecord`], optional 

1085 Dimension record data to use before querying the database for that 

1086 data, keyed by element name. 

1087 withDefaults : `bool`, optional 

1088 Utilize ``self.defaults.dataId`` to fill in missing governor 

1089 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1090 used). 

1091 **kwargs 

1092 Additional keywords are treated like additional key-value pairs for 

1093 ``dataId``, extending and overriding 

1094 

1095 Returns 

1096 ------- 

1097 expanded : `DataCoordinate` 

1098 A data ID that includes full metadata for all of the dimensions it 

1099 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and 

1100 ``expanded.hasFull()`` both return `True`. 

1101 """ 

1102 if not withDefaults: 

1103 defaults = None 

1104 else: 

1105 defaults = self.defaults.dataId 

1106 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, 

1107 defaults=defaults, **kwargs) 

1108 if standardized.hasRecords(): 

1109 return standardized 

1110 if records is None: 

1111 records = {} 

1112 elif isinstance(records, NamedKeyMapping): 

1113 records = records.byName() 

1114 else: 

1115 records = dict(records) 

1116 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1117 records.update(dataId.records.byName()) 

1118 keys = standardized.byName() 

1119 for element in standardized.graph.primaryKeyTraversalOrder: 

1120 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

1121 if record is ...: 

1122 if isinstance(element, Dimension) and keys.get(element.name) is None: 

1123 if element in standardized.graph.required: 

1124 raise LookupError( 

1125 f"No value or null value for required dimension {element.name}." 

1126 ) 

1127 keys[element.name] = None 

1128 record = None 

1129 else: 

1130 storage = self._managers.dimensions[element] 

1131 dataIdSet = DataCoordinateIterable.fromScalar( 

1132 DataCoordinate.standardize(keys, graph=element.graph) 

1133 ) 

1134 fetched = tuple(storage.fetch(dataIdSet)) 

1135 try: 

1136 (record,) = fetched 

1137 except ValueError: 

1138 record = None 

1139 records[element.name] = record 

1140 if record is not None: 

1141 for d in element.implied: 

1142 value = getattr(record, d.name) 

1143 if keys.setdefault(d.name, value) != value: 

1144 raise InconsistentDataIdError( 

1145 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1146 f"but {element.name} implies {d.name}={value!r}." 

1147 ) 

1148 else: 

1149 if element in standardized.graph.required: 

1150 raise LookupError( 

1151 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1152 ) 

1153 if element.alwaysJoin: 

1154 raise InconsistentDataIdError( 

1155 f"Could not fetch record for element {element.name} via keys {keys}, ", 

1156 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

1157 "related." 

1158 ) 

1159 for d in element.implied: 

1160 keys.setdefault(d.name, None) 

1161 records.setdefault(d.name, None) 

1162 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

1163 

1164 def insertDimensionData(self, element: Union[DimensionElement, str], 

1165 *data: Union[Mapping[str, Any], DimensionRecord], 

1166 conform: bool = True) -> None: 

1167 """Insert one or more dimension records into the database. 

1168 

1169 Parameters 

1170 ---------- 

1171 element : `DimensionElement` or `str` 

1172 The `DimensionElement` or name thereof that identifies the table 

1173 records will be inserted into. 

1174 data : `dict` or `DimensionRecord` (variadic) 

1175 One or more records to insert. 

1176 conform : `bool`, optional 

1177 If `False` (`True` is default) perform no checking or conversions, 

1178 and assume that ``element`` is a `DimensionElement` instance and 

1179 ``data`` is a one or more `DimensionRecord` instances of the 

1180 appropriate subclass. 

1181 """ 

1182 if conform: 

1183 if isinstance(element, str): 

1184 element = self.dimensions[element] 

1185 records = [row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1186 for row in data] 

1187 else: 

1188 # Ignore typing since caller said to trust them with conform=False. 

1189 records = data # type: ignore 

1190 storage = self._managers.dimensions[element] # type: ignore 

1191 storage.insert(*records) 

1192 

1193 def syncDimensionData(self, element: Union[DimensionElement, str], 

1194 row: Union[Mapping[str, Any], DimensionRecord], 

1195 conform: bool = True) -> bool: 

1196 """Synchronize the given dimension record with the database, inserting 

1197 if it does not already exist and comparing values if it does. 

1198 

1199 Parameters 

1200 ---------- 

1201 element : `DimensionElement` or `str` 

1202 The `DimensionElement` or name thereof that identifies the table 

1203 records will be inserted into. 

1204 row : `dict` or `DimensionRecord` 

1205 The record to insert. 

1206 conform : `bool`, optional 

1207 If `False` (`True` is default) perform no checking or conversions, 

1208 and assume that ``element`` is a `DimensionElement` instance and 

1209 ``data`` is a one or more `DimensionRecord` instances of the 

1210 appropriate subclass. 

1211 

1212 Returns 

1213 ------- 

1214 inserted : `bool` 

1215 `True` if a new row was inserted, `False` otherwise. 

1216 

1217 Raises 

1218 ------ 

1219 ConflictingDefinitionError 

1220 Raised if the record exists in the database (according to primary 

1221 key lookup) but is inconsistent with the given one. 

1222 """ 

1223 if conform: 

1224 if isinstance(element, str): 

1225 element = self.dimensions[element] 

1226 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1227 else: 

1228 # Ignore typing since caller said to trust them with conform=False. 

1229 record = row # type: ignore 

1230 storage = self._managers.dimensions[element] # type: ignore 

1231 return storage.sync(record) 

1232 

1233 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

1234 ) -> Iterator[DatasetType]: 

1235 """Iterate over the dataset types whose names match an expression. 

1236 

1237 Parameters 

1238 ---------- 

1239 expression : `Any`, optional 

1240 An expression that fully or partially identifies the dataset types 

1241 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1242 `...` can be used to return all dataset types, and is the default. 

1243 See :ref:`daf_butler_dataset_type_expressions` for more 

1244 information. 

1245 components : `bool`, optional 

1246 If `True`, apply all expression patterns to component dataset type 

1247 names as well. If `False`, never apply patterns to components. 

1248 If `None` (default), apply patterns to components only if their 

1249 parent datasets were not matched by the expression. 

1250 Fully-specified component datasets (`str` or `DatasetType` 

1251 instances) are always included. 

1252 

1253 Yields 

1254 ------ 

1255 datasetType : `DatasetType` 

1256 A `DatasetType` instance whose name matches ``expression``. 

1257 """ 

1258 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

1259 if wildcard is Ellipsis: 

1260 for datasetType in self._managers.datasets: 

1261 # The dataset type can no longer be a component 

1262 yield datasetType 

1263 if components: 

1264 # Automatically create the component dataset types 

1265 try: 

1266 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

1267 except KeyError as err: 

1268 _LOG.warning(f"Could not load storage class {err} for {datasetType.name}; " 

1269 "if it has components they will not be included in query results.") 

1270 else: 

1271 yield from componentsForDatasetType 

1272 return 

1273 done: Set[str] = set() 

1274 for name in wildcard.strings: 

1275 storage = self._managers.datasets.find(name) 

1276 if storage is not None: 

1277 done.add(storage.datasetType.name) 

1278 yield storage.datasetType 

1279 if wildcard.patterns: 

1280 # If components (the argument) is None, we'll save component 

1281 # dataset that we might want to match, but only if their parents 

1282 # didn't get included. 

1283 componentsForLater = [] 

1284 for registeredDatasetType in self._managers.datasets: 

1285 # Components are not stored in registry so expand them here 

1286 allDatasetTypes = [registeredDatasetType] 

1287 try: 

1288 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

1289 except KeyError as err: 

1290 _LOG.warning(f"Could not load storage class {err} for {registeredDatasetType.name}; " 

1291 "if it has components they will not be included in query results.") 

1292 for datasetType in allDatasetTypes: 

1293 if datasetType.name in done: 

1294 continue 

1295 parentName, componentName = datasetType.nameAndComponent() 

1296 if componentName is not None and not components: 

1297 if components is None and parentName not in done: 

1298 componentsForLater.append(datasetType) 

1299 continue 

1300 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1301 done.add(datasetType.name) 

1302 yield datasetType 

1303 # Go back and try to match saved components. 

1304 for datasetType in componentsForLater: 

1305 parentName, _ = datasetType.nameAndComponent() 

1306 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1307 yield datasetType 

1308 

1309 def queryCollections(self, expression: Any = ..., 

1310 datasetType: Optional[DatasetType] = None, 

1311 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1312 flattenChains: bool = False, 

1313 includeChains: Optional[bool] = None) -> Iterator[str]: 

1314 """Iterate over the collections whose names match an expression. 

1315 

1316 Parameters 

1317 ---------- 

1318 expression : `Any`, optional 

1319 An expression that fully or partially identifies the collections 

1320 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1321 `...` can be used to return all collections, and is the default. 

1322 See :ref:`daf_butler_collection_expressions` for more 

1323 information. 

1324 datasetType : `DatasetType`, optional 

1325 If provided, only yield collections that may contain datasets of 

1326 this type. This is a conservative approximation in general; it may 

1327 yield collections that do not have any such datasets. 

1328 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1329 If provided, only yield collections of these types. 

1330 flattenChains : `bool`, optional 

1331 If `True` (`False` is default), recursively yield the child 

1332 collections of matching `~CollectionType.CHAINED` collections. 

1333 includeChains : `bool`, optional 

1334 If `True`, yield records for matching `~CollectionType.CHAINED` 

1335 collections. Default is the opposite of ``flattenChains``: include 

1336 either CHAINED collections or their children, but not both. 

1337 

1338 Yields 

1339 ------ 

1340 collection : `str` 

1341 The name of a collection that matches ``expression``. 

1342 """ 

1343 # Right now the datasetTypes argument is completely ignored, but that 

1344 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1345 # ticket will take care of that. 

1346 query = CollectionQuery.fromExpression(expression) 

1347 for record in query.iter(self._managers.collections, collectionTypes=frozenset(collectionTypes), 

1348 flattenChains=flattenChains, includeChains=includeChains): 

1349 yield record.name 

1350 

1351 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder: 

1352 """Return a `QueryBuilder` instance capable of constructing and 

1353 managing more complex queries than those obtainable via `Registry` 

1354 interfaces. 

1355 

1356 This is an advanced interface; downstream code should prefer 

1357 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1358 are sufficient. 

1359 

1360 Parameters 

1361 ---------- 

1362 summary : `queries.QuerySummary` 

1363 Object describing and categorizing the full set of dimensions that 

1364 will be included in the query. 

1365 

1366 Returns 

1367 ------- 

1368 builder : `queries.QueryBuilder` 

1369 Object that can be used to construct and perform advanced queries. 

1370 """ 

1371 return queries.QueryBuilder( 

1372 summary, 

1373 queries.RegistryManagers( 

1374 collections=self._managers.collections, 

1375 dimensions=self._managers.dimensions, 

1376 datasets=self._managers.datasets, 

1377 TimespanReprClass=self._db.getTimespanRepresentation(), 

1378 ), 

1379 ) 

1380 

1381 def queryDatasets(self, datasetType: Any, *, 

1382 collections: Any = None, 

1383 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1384 dataId: Optional[DataId] = None, 

1385 where: Optional[str] = None, 

1386 findFirst: bool = False, 

1387 components: Optional[bool] = None, 

1388 bind: Optional[Mapping[str, Any]] = None, 

1389 check: bool = True, 

1390 **kwargs: Any) -> queries.DatasetQueryResults: 

1391 """Query for and iterate over dataset references matching user-provided 

1392 criteria. 

1393 

1394 Parameters 

1395 ---------- 

1396 datasetType 

1397 An expression that fully or partially identifies the dataset types 

1398 to be queried. Allowed types include `DatasetType`, `str`, 

1399 `re.Pattern`, and iterables thereof. The special value `...` can 

1400 be used to query all dataset types. See 

1401 :ref:`daf_butler_dataset_type_expressions` for more information. 

1402 collections: optional 

1403 An expression that fully or partially identifies the collections 

1404 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1405 thereof. `...` can be used to find datasets from all 

1406 `~CollectionType.RUN` collections (no other collections are 

1407 necessary, because all datasets are in a ``RUN`` collection). See 

1408 :ref:`daf_butler_collection_expressions` for more information. 

1409 If not provided, ``self.default.collections`` is used. 

1410 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1411 Dimensions to include in the query (in addition to those used 

1412 to identify the queried dataset type(s)), either to constrain 

1413 the resulting datasets to those for which a matching dimension 

1414 exists, or to relate the dataset type's dimensions to dimensions 

1415 referenced by the ``dataId`` or ``where`` arguments. 

1416 dataId : `dict` or `DataCoordinate`, optional 

1417 A data ID whose key-value pairs are used as equality constraints 

1418 in the query. 

1419 where : `str`, optional 

1420 A string expression similar to a SQL WHERE clause. May involve 

1421 any column of a dimension table or (as a shortcut for the primary 

1422 key column of a dimension table) dimension name. See 

1423 :ref:`daf_butler_dimension_expressions` for more information. 

1424 findFirst : `bool`, optional 

1425 If `True` (`False` is default), for each result data ID, only 

1426 yield one `DatasetRef` of each `DatasetType`, from the first 

1427 collection in which a dataset of that dataset type appears 

1428 (according to the order of ``collections`` passed in). If `True`, 

1429 ``collections`` must not contain regular expressions and may not 

1430 be `...`. 

1431 components : `bool`, optional 

1432 If `True`, apply all dataset expression patterns to component 

1433 dataset type names as well. If `False`, never apply patterns to 

1434 components. If `None` (default), apply patterns to components only 

1435 if their parent datasets were not matched by the expression. 

1436 Fully-specified component datasets (`str` or `DatasetType` 

1437 instances) are always included. 

1438 bind : `Mapping`, optional 

1439 Mapping containing literal values that should be injected into the 

1440 ``where`` expression, keyed by the identifiers they replace. 

1441 check : `bool`, optional 

1442 If `True` (default) check the query for consistency before 

1443 executing it. This may reject some valid queries that resemble 

1444 common mistakes (e.g. queries for visits without specifying an 

1445 instrument). 

1446 **kwargs 

1447 Additional keyword arguments are forwarded to 

1448 `DataCoordinate.standardize` when processing the ``dataId`` 

1449 argument (and may be used to provide a constraining data ID even 

1450 when the ``dataId`` argument is `None`). 

1451 

1452 Returns 

1453 ------- 

1454 refs : `queries.DatasetQueryResults` 

1455 Dataset references matching the given query criteria. 

1456 

1457 Raises 

1458 ------ 

1459 TypeError 

1460 Raised when the arguments are incompatible, such as when a 

1461 collection wildcard is passed when ``findFirst`` is `True`, or 

1462 when ``collections`` is `None` and``self.defaults.collections`` is 

1463 also `None`. 

1464 

1465 Notes 

1466 ----- 

1467 When multiple dataset types are queried in a single call, the 

1468 results of this operation are equivalent to querying for each dataset 

1469 type separately in turn, and no information about the relationships 

1470 between datasets of different types is included. In contexts where 

1471 that kind of information is important, the recommended pattern is to 

1472 use `queryDataIds` to first obtain data IDs (possibly with the 

1473 desired dataset types and collections passed as constraints to the 

1474 query), and then use multiple (generally much simpler) calls to 

1475 `queryDatasets` with the returned data IDs passed as constraints. 

1476 """ 

1477 # Standardize the collections expression. 

1478 if collections is None: 

1479 if not self.defaults.collections: 

1480 raise TypeError("No collections provided to findDataset, " 

1481 "and no defaults from registry construction.") 

1482 collections = self.defaults.collections 

1483 elif findFirst: 

1484 collections = CollectionSearch.fromExpression(collections) 

1485 else: 

1486 collections = CollectionQuery.fromExpression(collections) 

1487 # Standardize and expand the data ID provided as a constraint. 

1488 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1489 

1490 # We can only query directly if given a non-component DatasetType 

1491 # instance. If we were given an expression or str or a component 

1492 # DatasetType instance, we'll populate this dict, recurse, and return. 

1493 # If we already have a non-component DatasetType, it will remain None 

1494 # and we'll run the query directly. 

1495 composition: Optional[ 

1496 Dict[ 

1497 DatasetType, # parent dataset type 

1498 List[Optional[str]] # component name, or None for parent 

1499 ] 

1500 ] = None 

1501 if not isinstance(datasetType, DatasetType): 

1502 # We were given a dataset type expression (which may be as simple 

1503 # as a str). Loop over all matching datasets, delegating handling 

1504 # of the `components` argument to queryDatasetTypes, as we populate 

1505 # the composition dict. 

1506 composition = defaultdict(list) 

1507 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1508 parentName, componentName = trueDatasetType.nameAndComponent() 

1509 if componentName is not None: 

1510 parentDatasetType = self.getDatasetType(parentName) 

1511 composition.setdefault(parentDatasetType, []).append(componentName) 

1512 else: 

1513 composition.setdefault(trueDatasetType, []).append(None) 

1514 elif datasetType.isComponent(): 

1515 # We were given a true DatasetType instance, but it's a component. 

1516 # the composition dict will have exactly one item. 

1517 parentName, componentName = datasetType.nameAndComponent() 

1518 parentDatasetType = self.getDatasetType(parentName) 

1519 composition = {parentDatasetType: [componentName]} 

1520 if composition is not None: 

1521 # We need to recurse. Do that once for each parent dataset type. 

1522 chain = [] 

1523 for parentDatasetType, componentNames in composition.items(): 

1524 parentResults = self.queryDatasets( 

1525 parentDatasetType, 

1526 collections=collections, 

1527 dimensions=dimensions, 

1528 dataId=standardizedDataId, 

1529 where=where, 

1530 findFirst=findFirst, 

1531 check=check, 

1532 ) 

1533 if isinstance(parentResults, queries.ParentDatasetQueryResults): 

1534 chain.append( 

1535 parentResults.withComponents(componentNames) 

1536 ) 

1537 else: 

1538 # Should only happen if we know there would be no results. 

1539 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \ 

1540 and not parentResults._chain 

1541 return queries.ChainedDatasetQueryResults(chain) 

1542 # If we get here, there's no need to recurse (or we are already 

1543 # recursing; there can only ever be one level of recursion). 

1544 

1545 # The full set of dimensions in the query is the combination of those 

1546 # needed for the DatasetType and those explicitly requested, if any. 

1547 requestedDimensionNames = set(datasetType.dimensions.names) 

1548 if dimensions is not None: 

1549 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1550 # Construct the summary structure needed to construct a QueryBuilder. 

1551 summary = queries.QuerySummary( 

1552 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1553 dataId=standardizedDataId, 

1554 expression=where, 

1555 bind=bind, 

1556 defaults=self.defaults.dataId, 

1557 check=check, 

1558 ) 

1559 builder = self.makeQueryBuilder(summary) 

1560 # Add the dataset subquery to the query, telling the QueryBuilder to 

1561 # include the rank of the selected collection in the results only if we 

1562 # need to findFirst. Note that if any of the collections are 

1563 # actually wildcard expressions, and we've asked for deduplication, 

1564 # this will raise TypeError for us. 

1565 if not builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst): 

1566 return queries.ChainedDatasetQueryResults(()) 

1567 query = builder.finish() 

1568 return queries.ParentDatasetQueryResults(self._db, query, components=[None]) 

1569 

1570 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1571 dataId: Optional[DataId] = None, 

1572 datasets: Any = None, 

1573 collections: Any = None, 

1574 where: Optional[str] = None, 

1575 components: Optional[bool] = None, 

1576 bind: Optional[Mapping[str, Any]] = None, 

1577 check: bool = True, 

1578 **kwargs: Any) -> queries.DataCoordinateQueryResults: 

1579 """Query for data IDs matching user-provided criteria. 

1580 

1581 Parameters 

1582 ---------- 

1583 dimensions : `Dimension` or `str`, or iterable thereof 

1584 The dimensions of the data IDs to yield, as either `Dimension` 

1585 instances or `str`. Will be automatically expanded to a complete 

1586 `DimensionGraph`. 

1587 dataId : `dict` or `DataCoordinate`, optional 

1588 A data ID whose key-value pairs are used as equality constraints 

1589 in the query. 

1590 datasets : `Any`, optional 

1591 An expression that fully or partially identifies dataset types 

1592 that should constrain the yielded data IDs. For example, including 

1593 "raw" here would constrain the yielded ``instrument``, 

1594 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1595 those for which at least one "raw" dataset exists in 

1596 ``collections``. Allowed types include `DatasetType`, `str`, 

1597 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1598 expressions, ``...`` is not permitted - it doesn't make sense to 

1599 constrain data IDs on the existence of *all* datasets. 

1600 See :ref:`daf_butler_dataset_type_expressions` for more 

1601 information. 

1602 collections: `Any`, optional 

1603 An expression that fully or partially identifies the collections 

1604 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1605 thereof. `...` can be used to return all collections. Must be 

1606 provided if ``datasets`` is, and is ignored if it is not. See 

1607 :ref:`daf_butler_collection_expressions` for more information. 

1608 If not provided, ``self.default.collections`` is used. 

1609 where : `str`, optional 

1610 A string expression similar to a SQL WHERE clause. May involve 

1611 any column of a dimension table or (as a shortcut for the primary 

1612 key column of a dimension table) dimension name. See 

1613 :ref:`daf_butler_dimension_expressions` for more information. 

1614 components : `bool`, optional 

1615 If `True`, apply all dataset expression patterns to component 

1616 dataset type names as well. If `False`, never apply patterns to 

1617 components. If `None` (default), apply patterns to components only 

1618 if their parent datasets were not matched by the expression. 

1619 Fully-specified component datasets (`str` or `DatasetType` 

1620 instances) are always included. 

1621 bind : `Mapping`, optional 

1622 Mapping containing literal values that should be injected into the 

1623 ``where`` expression, keyed by the identifiers they replace. 

1624 check : `bool`, optional 

1625 If `True` (default) check the query for consistency before 

1626 executing it. This may reject some valid queries that resemble 

1627 common mistakes (e.g. queries for visits without specifying an 

1628 instrument). 

1629 **kwargs 

1630 Additional keyword arguments are forwarded to 

1631 `DataCoordinate.standardize` when processing the ``dataId`` 

1632 argument (and may be used to provide a constraining data ID even 

1633 when the ``dataId`` argument is `None`). 

1634 

1635 Returns 

1636 ------- 

1637 dataIds : `DataCoordinateQueryResults` 

1638 Data IDs matching the given query parameters. These are guaranteed 

1639 to identify all dimensions (`DataCoordinate.hasFull` returns 

1640 `True`), but will not contain `DimensionRecord` objects 

1641 (`DataCoordinate.hasRecords` returns `False`). Call 

1642 `DataCoordinateQueryResults.expanded` on the returned object to 

1643 fetch those (and consider using 

1644 `DataCoordinateQueryResults.materialize` on the returned object 

1645 first if the expected number of rows is very large). See 

1646 documentation for those methods for additional information. 

1647 

1648 Raises 

1649 ------ 

1650 TypeError 

1651 Raised if ``collections`` is `None`, ``self.defaults.collections`` 

1652 is `None`, and ``datasets`` is not `None`. 

1653 """ 

1654 dimensions = iterable(dimensions) 

1655 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1656 standardizedDatasetTypes = set() 

1657 requestedDimensions = self.dimensions.extract(dimensions) 

1658 queryDimensionNames = set(requestedDimensions.names) 

1659 if datasets is not None: 

1660 if collections is None: 

1661 if not self.defaults.collections: 

1662 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1663 collections = self.defaults.collections 

1664 else: 

1665 # Preprocess collections expression in case the original 

1666 # included single-pass iterators (we'll want to use it multiple 

1667 # times below). 

1668 collections = CollectionQuery.fromExpression(collections) 

1669 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1670 queryDimensionNames.update(datasetType.dimensions.names) 

1671 # If any matched dataset type is a component, just operate on 

1672 # its parent instead, because Registry doesn't know anything 

1673 # about what components exist, and here (unlike queryDatasets) 

1674 # we don't care about returning them. 

1675 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1676 if componentName is not None: 

1677 datasetType = self.getDatasetType(parentDatasetTypeName) 

1678 standardizedDatasetTypes.add(datasetType) 

1679 

1680 summary = queries.QuerySummary( 

1681 requested=DimensionGraph(self.dimensions, names=queryDimensionNames), 

1682 dataId=standardizedDataId, 

1683 expression=where, 

1684 bind=bind, 

1685 defaults=self.defaults.dataId, 

1686 check=check, 

1687 ) 

1688 builder = self.makeQueryBuilder(summary) 

1689 for datasetType in standardizedDatasetTypes: 

1690 builder.joinDataset(datasetType, collections, isResult=False) 

1691 query = builder.finish() 

1692 return queries.DataCoordinateQueryResults(self._db, query) 

1693 

1694 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

1695 dataId: Optional[DataId] = None, 

1696 datasets: Any = None, 

1697 collections: Any = None, 

1698 where: Optional[str] = None, 

1699 components: Optional[bool] = None, 

1700 bind: Optional[Mapping[str, Any]] = None, 

1701 check: bool = True, 

1702 **kwargs: Any) -> Iterator[DimensionRecord]: 

1703 """Query for dimension information matching user-provided criteria. 

1704 

1705 Parameters 

1706 ---------- 

1707 element : `DimensionElement` or `str` 

1708 The dimension element to obtain records for. 

1709 dataId : `dict` or `DataCoordinate`, optional 

1710 A data ID whose key-value pairs are used as equality constraints 

1711 in the query. 

1712 datasets : `Any`, optional 

1713 An expression that fully or partially identifies dataset types 

1714 that should constrain the yielded records. See `queryDataIds` and 

1715 :ref:`daf_butler_dataset_type_expressions` for more information. 

1716 collections: `Any`, optional 

1717 An expression that fully or partially identifies the collections 

1718 to search for datasets. See `queryDataIds` and 

1719 :ref:`daf_butler_collection_expressions` for more information. 

1720 where : `str`, optional 

1721 A string expression similar to a SQL WHERE clause. See 

1722 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1723 information. 

1724 components : `bool`, optional 

1725 Whether to apply dataset expressions to components as well. 

1726 See `queryDataIds` for more information. 

1727 bind : `Mapping`, optional 

1728 Mapping containing literal values that should be injected into the 

1729 ``where`` expression, keyed by the identifiers they replace. 

1730 check : `bool`, optional 

1731 If `True` (default) check the query for consistency before 

1732 executing it. This may reject some valid queries that resemble 

1733 common mistakes (e.g. queries for visits without specifying an 

1734 instrument). 

1735 **kwargs 

1736 Additional keyword arguments are forwarded to 

1737 `DataCoordinate.standardize` when processing the ``dataId`` 

1738 argument (and may be used to provide a constraining data ID even 

1739 when the ``dataId`` argument is `None`). 

1740 

1741 Returns 

1742 ------- 

1743 dataIds : `DataCoordinateQueryResults` 

1744 Data IDs matching the given query parameters. 

1745 """ 

1746 if not isinstance(element, DimensionElement): 

1747 try: 

1748 element = self.dimensions[element] 

1749 except KeyError as e: 

1750 raise KeyError(f"No such dimension '{element}', available dimensions: " 

1751 + str(self.dimensions.getStaticElements())) from e 

1752 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections, 

1753 where=where, components=components, bind=bind, check=check, **kwargs) 

1754 return iter(self._managers.dimensions[element].fetch(dataIds)) 

1755 

1756 def queryDatasetAssociations( 

1757 self, 

1758 datasetType: Union[str, DatasetType], 

1759 collections: Any = ..., 

1760 *, 

1761 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1762 flattenChains: bool = False, 

1763 ) -> Iterator[DatasetAssociation]: 

1764 """Iterate over dataset-collection combinations where the dataset is in 

1765 the collection. 

1766 

1767 This method is a temporary placeholder for better support for 

1768 assocation results in `queryDatasets`. It will probably be 

1769 removed in the future, and should be avoided in production code 

1770 whenever possible. 

1771 

1772 Parameters 

1773 ---------- 

1774 datasetType : `DatasetType` or `str` 

1775 A dataset type object or the name of one. 

1776 collections: `Any`, optional 

1777 An expression that fully or partially identifies the collections 

1778 to search for datasets. See `queryCollections` and 

1779 :ref:`daf_butler_collection_expressions` for more information. 

1780 If not provided, ``self.default.collections`` is used. 

1781 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1782 If provided, only yield associations from collections of these 

1783 types. 

1784 flattenChains : `bool`, optional 

1785 If `True` (default) search in the children of 

1786 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1787 collections are ignored. 

1788 

1789 Yields 

1790 ------ 

1791 association : `DatasetAssociation` 

1792 Object representing the relationship beween a single dataset and 

1793 a single collection. 

1794 

1795 Raises 

1796 ------ 

1797 TypeError 

1798 Raised if ``collections`` is `None` and 

1799 ``self.defaults.collections`` is `None`. 

1800 """ 

1801 if collections is None: 

1802 if not self.defaults.collections: 

1803 raise TypeError("No collections provided to findDataset, " 

1804 "and no defaults from registry construction.") 

1805 collections = self.defaults.collections 

1806 else: 

1807 collections = CollectionQuery.fromExpression(collections) 

1808 TimespanReprClass = self._db.getTimespanRepresentation() 

1809 if isinstance(datasetType, str): 

1810 storage = self._managers.datasets[datasetType] 

1811 else: 

1812 storage = self._managers.datasets[datasetType.name] 

1813 for collectionRecord in collections.iter(self._managers.collections, 

1814 collectionTypes=frozenset(collectionTypes), 

1815 flattenChains=flattenChains): 

1816 query = storage.select(collectionRecord) 

1817 if query is None: 

1818 continue 

1819 for row in self._db.query(query.combine()): 

1820 dataId = DataCoordinate.fromRequiredValues( 

1821 storage.datasetType.dimensions, 

1822 tuple(row[name] for name in storage.datasetType.dimensions.required.names) 

1823 ) 

1824 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1825 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, 

1826 conform=False) 

1827 if collectionRecord.type is CollectionType.CALIBRATION: 

1828 timespan = TimespanReprClass.extract(row) 

1829 else: 

1830 timespan = None 

1831 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1832 

1833 storageClasses: StorageClassFactory 

1834 """All storage classes known to the registry (`StorageClassFactory`). 

1835 """