Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 TYPE_CHECKING, 

41 Union, 

42) 

43 

44import sqlalchemy 

45 

46from ..core import ( 

47 ButlerURI, 

48 Config, 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DataId, 

52 DatasetAssociation, 

53 DatasetRef, 

54 DatasetType, 

55 ddl, 

56 Dimension, 

57 DimensionConfig, 

58 DimensionElement, 

59 DimensionGraph, 

60 DimensionRecord, 

61 DimensionUniverse, 

62 NamedKeyMapping, 

63 NameLookupMapping, 

64 Progress, 

65 StorageClassFactory, 

66 Timespan, 

67) 

68from . import queries 

69from ..core.utils import iterable, transactional 

70from ._config import RegistryConfig 

71from ._collectionType import CollectionType 

72from ._defaults import RegistryDefaults 

73from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

74from .managers import RegistryManagerTypes, RegistryManagerInstances 

75from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

76from .summaries import CollectionSummary 

77from .interfaces import ChainedCollectionRecord, RunRecord 

78 

79if TYPE_CHECKING: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 from .._butlerConfig import ButlerConfig 

81 from .interfaces import ( 

82 Database, 

83 DatastoreRegistryBridgeManager, 

84 ) 

85 

86 

87_LOG = logging.getLogger(__name__) 

88 

89# key for dimensions configuration in attributes table 

90_DIMENSIONS_ATTR = "config:dimensions.json" 

91 

92 

93class Registry: 

94 """Registry interface. 

95 

96 Parameters 

97 ---------- 

98 database : `Database` 

99 Database instance to store Registry. 

100 defaults : `RegistryDefaults`, optional 

101 Default collection search path and/or output `~CollectionType.RUN` 

102 collection. 

103 attributes : `type` 

104 Manager class implementing `ButlerAttributeManager`. 

105 opaque : `type` 

106 Manager class implementing `OpaqueTableStorageManager`. 

107 dimensions : `type` 

108 Manager class implementing `DimensionRecordStorageManager`. 

109 collections : `type` 

110 Manager class implementing `CollectionManager`. 

111 datasets : `type` 

112 Manager class implementing `DatasetRecordStorageManager`. 

113 datastoreBridges : `type` 

114 Manager class implementing `DatastoreRegistryBridgeManager`. 

115 dimensionConfig : `DimensionConfig`, optional 

116 Dimension universe configuration, only used when ``create`` is True. 

117 writeable : `bool`, optional 

118 If True then Registry will support write operations. 

119 create : `bool`, optional 

120 If True then database schema will be initialized, it must be empty 

121 before instantiating Registry. 

122 """ 

123 

124 defaultConfigFile: Optional[str] = None 

125 """Path to configuration defaults. Accessed within the ``configs`` resource 

126 or relative to a search path. Can be None if no defaults specified. 

127 """ 

128 

129 @classmethod 

130 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None, 

131 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

132 butlerRoot: Optional[str] = None) -> Registry: 

133 """Create registry database and return `Registry` instance. 

134 

135 This method initializes database contents, database must be empty 

136 prior to calling this method. 

137 

138 Parameters 

139 ---------- 

140 config : `RegistryConfig` or `str`, optional 

141 Registry configuration, if missing then default configuration will 

142 be loaded from registry.yaml. 

143 dimensionConfig : `DimensionConfig` or `str`, optional 

144 Dimensions configuration, if missing then default configuration 

145 will be loaded from dimensions.yaml. 

146 butlerRoot : `str`, optional 

147 Path to the repository root this `Registry` will manage. 

148 

149 Returns 

150 ------- 

151 registry : `Registry` 

152 A new `Registry` instance. 

153 """ 

154 if isinstance(config, str): 

155 config = RegistryConfig(config) 

156 elif config is None: 

157 config = RegistryConfig() 

158 elif not isinstance(config, RegistryConfig): 

159 raise TypeError(f"Incompatible Registry configuration type: {type(config)}") 

160 config.replaceRoot(butlerRoot) 

161 

162 if isinstance(dimensionConfig, str): 

163 dimensionConfig = DimensionConfig(config) 

164 elif dimensionConfig is None: 

165 dimensionConfig = DimensionConfig() 

166 elif not isinstance(dimensionConfig, DimensionConfig): 

167 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

168 

169 DatabaseClass = config.getDatabaseClass() 

170 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

171 namespace=config.get("namespace")) 

172 managerTypes = RegistryManagerTypes.fromConfig(config) 

173 managers = managerTypes.makeRepo(database, dimensionConfig) 

174 return cls(database, RegistryDefaults(), managers) 

175 

176 @classmethod 

177 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], 

178 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True, 

179 defaults: Optional[RegistryDefaults] = None) -> Registry: 

180 """Create `Registry` subclass instance from `config`. 

181 

182 Registry database must be inbitialized prior to calling this method. 

183 

184 Parameters 

185 ---------- 

186 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

187 Registry configuration 

188 butlerRoot : `str` or `ButlerURI`, optional 

189 Path to the repository root this `Registry` will manage. 

190 writeable : `bool`, optional 

191 If `True` (default) create a read-write connection to the database. 

192 defaults : `RegistryDefaults`, optional 

193 Default collection search path and/or output `~CollectionType.RUN` 

194 collection. 

195 

196 Returns 

197 ------- 

198 registry : `Registry` (subclass) 

199 A new `Registry` subclass instance. 

200 """ 

201 if not isinstance(config, RegistryConfig): 

202 if isinstance(config, str) or isinstance(config, Config): 

203 config = RegistryConfig(config) 

204 else: 

205 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

206 config.replaceRoot(butlerRoot) 

207 DatabaseClass = config.getDatabaseClass() 

208 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

209 namespace=config.get("namespace"), writeable=writeable) 

210 managerTypes = RegistryManagerTypes.fromConfig(config) 

211 managers = managerTypes.loadRepo(database) 

212 if defaults is None: 

213 defaults = RegistryDefaults() 

214 return cls(database, defaults, managers) 

215 

216 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

217 self._db = database 

218 self._managers = managers 

219 self.storageClasses = StorageClassFactory() 

220 # Intentionally invoke property setter to initialize defaults. This 

221 # can only be done after most of the rest of Registry has already been 

222 # initialized, and must be done before the property getter is used. 

223 self.defaults = defaults 

224 

225 def __str__(self) -> str: 

226 return str(self._db) 

227 

228 def __repr__(self) -> str: 

229 return f"Registry({self._db!r}, {self.dimensions!r})" 

230 

231 def isWriteable(self) -> bool: 

232 """Return `True` if this registry allows write operations, and `False` 

233 otherwise. 

234 """ 

235 return self._db.isWriteable() 

236 

237 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

238 """Create a new `Registry` backed by the same data repository and 

239 connection as this one, but independent defaults. 

240 

241 Parameters 

242 ---------- 

243 defaults : `RegistryDefaults`, optional 

244 Default collections and data ID values for the new registry. If 

245 not provided, ``self.defaults`` will be used (but future changes 

246 to either registry's defaults will not affect the other). 

247 

248 Returns 

249 ------- 

250 copy : `Registry` 

251 A new `Registry` instance with its own defaults. 

252 

253 Notes 

254 ----- 

255 Because the new registry shares a connection with the original, they 

256 also share transaction state (despite the fact that their `transaction` 

257 context manager methods do not reflect this), and must be used with 

258 care. 

259 """ 

260 if defaults is None: 

261 # No need to copy, because `RegistryDefaults` is immutable; we 

262 # effectively copy on write. 

263 defaults = self.defaults 

264 return Registry(self._db, defaults, self._managers) 

265 

266 @property 

267 def dimensions(self) -> DimensionUniverse: 

268 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

269 """ 

270 return self._managers.dimensions.universe 

271 

272 @property 

273 def defaults(self) -> RegistryDefaults: 

274 """Default collection search path and/or output `~CollectionType.RUN` 

275 collection (`RegistryDefaults`). 

276 

277 This is an immutable struct whose components may not be set 

278 individually, but the entire struct can be set by assigning to this 

279 property. 

280 """ 

281 return self._defaults 

282 

283 @defaults.setter 

284 def defaults(self, value: RegistryDefaults) -> None: 

285 if value.run is not None: 

286 self.registerRun(value.run) 

287 value.finish(self) 

288 self._defaults = value 

289 

290 def refresh(self) -> None: 

291 """Refresh all in-memory state by querying the database. 

292 

293 This may be necessary to enable querying for entities added by other 

294 `Registry` instances after this one was constructed. 

295 """ 

296 self._managers.refresh() 

297 

298 @contextlib.contextmanager 

299 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

300 """Return a context manager that represents a transaction. 

301 """ 

302 try: 

303 with self._db.transaction(savepoint=savepoint): 

304 yield 

305 except BaseException: 

306 # TODO: this clears the caches sometimes when we wouldn't actually 

307 # need to. Can we avoid that? 

308 self._managers.dimensions.clearCaches() 

309 raise 

310 

311 def resetConnectionPool(self) -> None: 

312 """Reset SQLAlchemy connection pool for registry database. 

313 

314 This operation is useful when using registry with fork-based 

315 multiprocessing. To use registry across fork boundary one has to make 

316 sure that there are no currently active connections (no session or 

317 transaction is in progress) and connection pool is reset using this 

318 method. This method should be called by the child process immediately 

319 after the fork. 

320 """ 

321 self._db._engine.dispose() 

322 

323 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

324 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

325 other data repository client. 

326 

327 Opaque table records can be added via `insertOpaqueData`, retrieved via 

328 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

329 

330 Parameters 

331 ---------- 

332 tableName : `str` 

333 Logical name of the opaque table. This may differ from the 

334 actual name used in the database by a prefix and/or suffix. 

335 spec : `ddl.TableSpec` 

336 Specification for the table to be added. 

337 """ 

338 self._managers.opaque.register(tableName, spec) 

339 

340 @transactional 

341 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

342 """Insert records into an opaque table. 

343 

344 Parameters 

345 ---------- 

346 tableName : `str` 

347 Logical name of the opaque table. Must match the name used in a 

348 previous call to `registerOpaqueTable`. 

349 data 

350 Each additional positional argument is a dictionary that represents 

351 a single row to be added. 

352 """ 

353 self._managers.opaque[tableName].insert(*data) 

354 

355 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

356 """Retrieve records from an opaque table. 

357 

358 Parameters 

359 ---------- 

360 tableName : `str` 

361 Logical name of the opaque table. Must match the name used in a 

362 previous call to `registerOpaqueTable`. 

363 where 

364 Additional keyword arguments are interpreted as equality 

365 constraints that restrict the returned rows (combined with AND); 

366 keyword arguments are column names and values are the values they 

367 must have. 

368 

369 Yields 

370 ------ 

371 row : `dict` 

372 A dictionary representing a single result row. 

373 """ 

374 yield from self._managers.opaque[tableName].fetch(**where) 

375 

376 @transactional 

377 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

378 """Remove records from an opaque table. 

379 

380 Parameters 

381 ---------- 

382 tableName : `str` 

383 Logical name of the opaque table. Must match the name used in a 

384 previous call to `registerOpaqueTable`. 

385 where 

386 Additional keyword arguments are interpreted as equality 

387 constraints that restrict the deleted rows (combined with AND); 

388 keyword arguments are column names and values are the values they 

389 must have. 

390 """ 

391 self._managers.opaque[tableName].delete(where.keys(), where) 

392 

393 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED, 

394 doc: Optional[str] = None) -> None: 

395 """Add a new collection if one with the given name does not exist. 

396 

397 Parameters 

398 ---------- 

399 name : `str` 

400 The name of the collection to create. 

401 type : `CollectionType` 

402 Enum value indicating the type of collection to create. 

403 doc : `str`, optional 

404 Documentation string for the collection. 

405 

406 Notes 

407 ----- 

408 This method cannot be called within transactions, as it needs to be 

409 able to perform its own transaction to be concurrent. 

410 """ 

411 self._managers.collections.register(name, type, doc=doc) 

412 

413 def getCollectionType(self, name: str) -> CollectionType: 

414 """Return an enumeration value indicating the type of the given 

415 collection. 

416 

417 Parameters 

418 ---------- 

419 name : `str` 

420 The name of the collection. 

421 

422 Returns 

423 ------- 

424 type : `CollectionType` 

425 Enum value indicating the type of this collection. 

426 

427 Raises 

428 ------ 

429 MissingCollectionError 

430 Raised if no collection with the given name exists. 

431 """ 

432 return self._managers.collections.find(name).type 

433 

434 def registerRun(self, name: str, doc: Optional[str] = None) -> None: 

435 """Add a new run if one with the given name does not exist. 

436 

437 Parameters 

438 ---------- 

439 name : `str` 

440 The name of the run to create. 

441 doc : `str`, optional 

442 Documentation string for the collection. 

443 

444 Notes 

445 ----- 

446 This method cannot be called within transactions, as it needs to be 

447 able to perform its own transaction to be concurrent. 

448 """ 

449 self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

450 

451 @transactional 

452 def removeCollection(self, name: str) -> None: 

453 """Completely remove the given collection. 

454 

455 Parameters 

456 ---------- 

457 name : `str` 

458 The name of the collection to remove. 

459 

460 Raises 

461 ------ 

462 MissingCollectionError 

463 Raised if no collection with the given name exists. 

464 

465 Notes 

466 ----- 

467 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

468 in it are also fully removed. This requires that those datasets be 

469 removed (or at least trashed) from any datastores that hold them first. 

470 

471 A collection may not be deleted as long as it is referenced by a 

472 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

473 be deleted or redefined first. 

474 """ 

475 self._managers.collections.remove(name) 

476 

477 def getCollectionChain(self, parent: str) -> CollectionSearch: 

478 """Return the child collections in a `~CollectionType.CHAINED` 

479 collection. 

480 

481 Parameters 

482 ---------- 

483 parent : `str` 

484 Name of the chained collection. Must have already been added via 

485 a call to `Registry.registerCollection`. 

486 

487 Returns 

488 ------- 

489 children : `CollectionSearch` 

490 An object that defines the search path of the collection. 

491 See :ref:`daf_butler_collection_expressions` for more information. 

492 

493 Raises 

494 ------ 

495 MissingCollectionError 

496 Raised if ``parent`` does not exist in the `Registry`. 

497 TypeError 

498 Raised if ``parent`` does not correspond to a 

499 `~CollectionType.CHAINED` collection. 

500 """ 

501 record = self._managers.collections.find(parent) 

502 if record.type is not CollectionType.CHAINED: 

503 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

504 assert isinstance(record, ChainedCollectionRecord) 

505 return record.children 

506 

507 @transactional 

508 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

509 """Define or redefine a `~CollectionType.CHAINED` collection. 

510 

511 Parameters 

512 ---------- 

513 parent : `str` 

514 Name of the chained collection. Must have already been added via 

515 a call to `Registry.registerCollection`. 

516 children : `Any` 

517 An expression defining an ordered search of child collections, 

518 generally an iterable of `str`; see 

519 :ref:`daf_butler_collection_expressions` for more information. 

520 flatten : `bool`, optional 

521 If `True` (`False` is default), recursively flatten out any nested 

522 `~CollectionType.CHAINED` collections in ``children`` first. 

523 

524 Raises 

525 ------ 

526 MissingCollectionError 

527 Raised when any of the given collections do not exist in the 

528 `Registry`. 

529 TypeError 

530 Raised if ``parent`` does not correspond to a 

531 `~CollectionType.CHAINED` collection. 

532 ValueError 

533 Raised if the given collections contains a cycle. 

534 """ 

535 record = self._managers.collections.find(parent) 

536 if record.type is not CollectionType.CHAINED: 

537 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

538 assert isinstance(record, ChainedCollectionRecord) 

539 children = CollectionSearch.fromExpression(children) 

540 if children != record.children or flatten: 

541 record.update(self._managers.collections, children, flatten=flatten) 

542 

543 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

544 """Retrieve the documentation string for a collection. 

545 

546 Parameters 

547 ---------- 

548 name : `str` 

549 Name of the collection. 

550 

551 Returns 

552 ------- 

553 docs : `str` or `None` 

554 Docstring for the collection with the given name. 

555 """ 

556 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

557 

558 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

559 """Set the documentation string for a collection. 

560 

561 Parameters 

562 ---------- 

563 name : `str` 

564 Name of the collection. 

565 docs : `str` or `None` 

566 Docstring for the collection with the given name; will replace any 

567 existing docstring. Passing `None` will remove any existing 

568 docstring. 

569 """ 

570 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

571 

572 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

573 """Return a summary for the given collection. 

574 

575 Parameters 

576 ---------- 

577 collection : `str` 

578 Name of the collection for which a summary is to be retrieved. 

579 

580 Returns 

581 ------- 

582 summary : `CollectionSummary` 

583 Summary of the dataset types and governor dimension values in 

584 this collection. 

585 """ 

586 record = self._managers.collections.find(collection) 

587 return self._managers.datasets.getCollectionSummary(record) 

588 

589 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

590 """ 

591 Add a new `DatasetType` to the Registry. 

592 

593 It is not an error to register the same `DatasetType` twice. 

594 

595 Parameters 

596 ---------- 

597 datasetType : `DatasetType` 

598 The `DatasetType` to be added. 

599 

600 Returns 

601 ------- 

602 inserted : `bool` 

603 `True` if ``datasetType`` was inserted, `False` if an identical 

604 existing `DatsetType` was found. Note that in either case the 

605 DatasetType is guaranteed to be defined in the Registry 

606 consistently with the given definition. 

607 

608 Raises 

609 ------ 

610 ValueError 

611 Raised if the dimensions or storage class are invalid. 

612 ConflictingDefinitionError 

613 Raised if this DatasetType is already registered with a different 

614 definition. 

615 

616 Notes 

617 ----- 

618 This method cannot be called within transactions, as it needs to be 

619 able to perform its own transaction to be concurrent. 

620 """ 

621 _, inserted = self._managers.datasets.register(datasetType) 

622 return inserted 

623 

624 def removeDatasetType(self, name: str) -> None: 

625 """Remove the named `DatasetType` from the registry. 

626 

627 .. warning:: 

628 

629 Registry caches the dataset type definitions. This means that 

630 deleting the dataset type definition may result in unexpected 

631 behavior from other butler processes that are active that have 

632 not seen the deletion. 

633 

634 Parameters 

635 ---------- 

636 name : `str` 

637 Name of the type to be removed. 

638 

639 Raises 

640 ------ 

641 lsst.daf.butler.registry.OrphanedRecordError 

642 Raised if an attempt is made to remove the dataset type definition 

643 when there are already datasets associated with it. 

644 

645 Notes 

646 ----- 

647 If the dataset type is not registered the method will return without 

648 action. 

649 """ 

650 self._managers.datasets.remove(name) 

651 

652 def getDatasetType(self, name: str) -> DatasetType: 

653 """Get the `DatasetType`. 

654 

655 Parameters 

656 ---------- 

657 name : `str` 

658 Name of the type. 

659 

660 Returns 

661 ------- 

662 type : `DatasetType` 

663 The `DatasetType` associated with the given name. 

664 

665 Raises 

666 ------ 

667 KeyError 

668 Requested named DatasetType could not be found in registry. 

669 """ 

670 return self._managers.datasets[name].datasetType 

671 

672 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

673 collections: Any = None, timespan: Optional[Timespan] = None, 

674 **kwargs: Any) -> Optional[DatasetRef]: 

675 """Find a dataset given its `DatasetType` and data ID. 

676 

677 This can be used to obtain a `DatasetRef` that permits the dataset to 

678 be read from a `Datastore`. If the dataset is a component and can not 

679 be found using the provided dataset type, a dataset ref for the parent 

680 will be returned instead but with the correct dataset type. 

681 

682 Parameters 

683 ---------- 

684 datasetType : `DatasetType` or `str` 

685 A `DatasetType` or the name of one. 

686 dataId : `dict` or `DataCoordinate`, optional 

687 A `dict`-like object containing the `Dimension` links that identify 

688 the dataset within a collection. 

689 collections, optional. 

690 An expression that fully or partially identifies the collections to 

691 search for the dataset; see 

692 :ref:`daf_butler_collection_expressions` for more information. 

693 Defaults to ``self.defaults.collections``. 

694 timespan : `Timespan`, optional 

695 A timespan that the validity range of the dataset must overlap. 

696 If not provided, any `~CollectionType.CALIBRATION` collections 

697 matched by the ``collections`` argument will not be searched. 

698 **kwargs 

699 Additional keyword arguments passed to 

700 `DataCoordinate.standardize` to convert ``dataId`` to a true 

701 `DataCoordinate` or augment an existing one. 

702 

703 Returns 

704 ------- 

705 ref : `DatasetRef` 

706 A reference to the dataset, or `None` if no matching Dataset 

707 was found. 

708 

709 Raises 

710 ------ 

711 TypeError 

712 Raised if ``collections`` is `None` and 

713 ``self.defaults.collections`` is `None`. 

714 LookupError 

715 Raised if one or more data ID keys are missing. 

716 KeyError 

717 Raised if the dataset type does not exist. 

718 MissingCollectionError 

719 Raised if any of ``collections`` does not exist in the registry. 

720 

721 Notes 

722 ----- 

723 This method simply returns `None` and does not raise an exception even 

724 when the set of collections searched is intrinsically incompatible with 

725 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

726 only `~CollectionType.CALIBRATION` collections are being searched. 

727 This may make it harder to debug some lookup failures, but the behavior 

728 is intentional; we consider it more important that failed searches are 

729 reported consistently, regardless of the reason, and that adding 

730 additional collections that do not contain a match to the search path 

731 never changes the behavior. 

732 """ 

733 if isinstance(datasetType, DatasetType): 

734 storage = self._managers.datasets[datasetType.name] 

735 else: 

736 storage = self._managers.datasets[datasetType] 

737 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

738 universe=self.dimensions, defaults=self.defaults.dataId, 

739 **kwargs) 

740 if collections is None: 

741 if not self.defaults.collections: 

742 raise TypeError("No collections provided to findDataset, " 

743 "and no defaults from registry construction.") 

744 collections = self.defaults.collections 

745 else: 

746 collections = CollectionSearch.fromExpression(collections) 

747 for collectionRecord in collections.iter(self._managers.collections): 

748 if (collectionRecord.type is CollectionType.CALIBRATION 

749 and (not storage.datasetType.isCalibration() or timespan is None)): 

750 continue 

751 result = storage.find(collectionRecord, dataId, timespan=timespan) 

752 if result is not None: 

753 return result 

754 

755 return None 

756 

757 @transactional 

758 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

759 run: Optional[str] = None, expand: bool = True) -> List[DatasetRef]: 

760 """Insert one or more datasets into the `Registry` 

761 

762 This always adds new datasets; to associate existing datasets with 

763 a new collection, use ``associate``. 

764 

765 Parameters 

766 ---------- 

767 datasetType : `DatasetType` or `str` 

768 A `DatasetType` or the name of one. 

769 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

770 Dimension-based identifiers for the new datasets. 

771 run : `str`, optional 

772 The name of the run that produced the datasets. Defaults to 

773 ``self.defaults.run``. 

774 expand : `bool`, optional 

775 If `True` (default), expand data IDs as they are inserted. This is 

776 necessary in general to allow datastore to generate file templates, 

777 but it may be disabled if the caller can guarantee this is 

778 unnecessary. 

779 

780 Returns 

781 ------- 

782 refs : `list` of `DatasetRef` 

783 Resolved `DatasetRef` instances for all given data IDs (in the same 

784 order). 

785 

786 Raises 

787 ------ 

788 TypeError 

789 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

790 ConflictingDefinitionError 

791 If a dataset with the same dataset type and data ID as one of those 

792 given already exists in ``run``. 

793 MissingCollectionError 

794 Raised if ``run`` does not exist in the registry. 

795 """ 

796 if isinstance(datasetType, DatasetType): 

797 storage = self._managers.datasets.find(datasetType.name) 

798 if storage is None: 

799 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

800 else: 

801 storage = self._managers.datasets.find(datasetType) 

802 if storage is None: 

803 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

804 if run is None: 

805 if self.defaults.run is None: 

806 raise TypeError("No run provided to insertDatasets, " 

807 "and no default from registry construction.") 

808 run = self.defaults.run 

809 runRecord = self._managers.collections.find(run) 

810 if runRecord.type is not CollectionType.RUN: 

811 raise TypeError(f"Given collection is of type {runRecord.type.name}; RUN collection required.") 

812 assert isinstance(runRecord, RunRecord) 

813 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

814 if expand: 

815 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

816 for dataId in progress.wrap(dataIds, 

817 f"Expanding {storage.datasetType.name} data IDs")] 

818 else: 

819 expandedDataIds = [DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) 

820 for dataId in dataIds] 

821 try: 

822 refs = list(storage.insert(runRecord, expandedDataIds)) 

823 except sqlalchemy.exc.IntegrityError as err: 

824 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

825 f"one or more datasets of type {storage.datasetType} into " 

826 f"collection '{run}'. " 

827 f"This probably means a dataset with the same data ID " 

828 f"and dataset type already exists, but it may also mean a " 

829 f"dimension row is missing.") from err 

830 return refs 

831 

832 def getDataset(self, id: int) -> Optional[DatasetRef]: 

833 """Retrieve a Dataset entry. 

834 

835 Parameters 

836 ---------- 

837 id : `int` 

838 The unique identifier for the dataset. 

839 

840 Returns 

841 ------- 

842 ref : `DatasetRef` or `None` 

843 A ref to the Dataset, or `None` if no matching Dataset 

844 was found. 

845 """ 

846 ref = self._managers.datasets.getDatasetRef(id) 

847 if ref is None: 

848 return None 

849 return ref 

850 

851 @transactional 

852 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

853 """Remove datasets from the Registry. 

854 

855 The datasets will be removed unconditionally from all collections, and 

856 any `Quantum` that consumed this dataset will instead be marked with 

857 having a NULL input. `Datastore` records will *not* be deleted; the 

858 caller is responsible for ensuring that the dataset has already been 

859 removed from all Datastores. 

860 

861 Parameters 

862 ---------- 

863 refs : `Iterable` of `DatasetRef` 

864 References to the datasets to be removed. Must include a valid 

865 ``id`` attribute, and should be considered invalidated upon return. 

866 

867 Raises 

868 ------ 

869 AmbiguousDatasetError 

870 Raised if any ``ref.id`` is `None`. 

871 OrphanedRecordError 

872 Raised if any dataset is still present in any `Datastore`. 

873 """ 

874 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

875 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

876 desc="Removing datasets by type"): 

877 storage = self._managers.datasets.find(datasetType.name) 

878 assert storage is not None 

879 try: 

880 storage.delete(refsForType) 

881 except sqlalchemy.exc.IntegrityError as err: 

882 raise OrphanedRecordError("One or more datasets is still " 

883 "present in one or more Datastores.") from err 

884 

885 @transactional 

886 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

887 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

888 

889 If a DatasetRef with the same exact integer ID is already in a 

890 collection nothing is changed. If a `DatasetRef` with the same 

891 `DatasetType` and data ID but with different integer ID 

892 exists in the collection, `ConflictingDefinitionError` is raised. 

893 

894 Parameters 

895 ---------- 

896 collection : `str` 

897 Indicates the collection the datasets should be associated with. 

898 refs : `Iterable` [ `DatasetRef` ] 

899 An iterable of resolved `DatasetRef` instances that already exist 

900 in this `Registry`. 

901 

902 Raises 

903 ------ 

904 ConflictingDefinitionError 

905 If a Dataset with the given `DatasetRef` already exists in the 

906 given collection. 

907 AmbiguousDatasetError 

908 Raised if ``any(ref.id is None for ref in refs)``. 

909 MissingCollectionError 

910 Raised if ``collection`` does not exist in the registry. 

911 TypeError 

912 Raise adding new datasets to the given ``collection`` is not 

913 allowed. 

914 """ 

915 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

916 collectionRecord = self._managers.collections.find(collection) 

917 if collectionRecord.type is not CollectionType.TAGGED: 

918 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

919 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

920 desc="Associating datasets by type"): 

921 storage = self._managers.datasets.find(datasetType.name) 

922 assert storage is not None 

923 try: 

924 storage.associate(collectionRecord, refsForType) 

925 except sqlalchemy.exc.IntegrityError as err: 

926 raise ConflictingDefinitionError( 

927 f"Constraint violation while associating dataset of type {datasetType.name} with " 

928 f"collection {collection}. This probably means that one or more datasets with the same " 

929 f"dataset type and data ID already exist in the collection, but it may also indicate " 

930 f"that the datasets do not exist." 

931 ) from err 

932 

933 @transactional 

934 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

935 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

936 

937 ``collection`` and ``ref`` combinations that are not currently 

938 associated are silently ignored. 

939 

940 Parameters 

941 ---------- 

942 collection : `str` 

943 The collection the datasets should no longer be associated with. 

944 refs : `Iterable` [ `DatasetRef` ] 

945 An iterable of resolved `DatasetRef` instances that already exist 

946 in this `Registry`. 

947 

948 Raises 

949 ------ 

950 AmbiguousDatasetError 

951 Raised if any of the given dataset references is unresolved. 

952 MissingCollectionError 

953 Raised if ``collection`` does not exist in the registry. 

954 TypeError 

955 Raise adding new datasets to the given ``collection`` is not 

956 allowed. 

957 """ 

958 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

959 collectionRecord = self._managers.collections.find(collection) 

960 if collectionRecord.type is not CollectionType.TAGGED: 

961 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

962 "expected TAGGED.") 

963 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

964 desc="Disassociating datasets by type"): 

965 storage = self._managers.datasets.find(datasetType.name) 

966 assert storage is not None 

967 storage.disassociate(collectionRecord, refsForType) 

968 

969 @transactional 

970 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

971 """Associate one or more datasets with a calibration collection and a 

972 validity range within it. 

973 

974 Parameters 

975 ---------- 

976 collection : `str` 

977 The name of an already-registered `~CollectionType.CALIBRATION` 

978 collection. 

979 refs : `Iterable` [ `DatasetRef` ] 

980 Datasets to be associated. 

981 timespan : `Timespan` 

982 The validity range for these datasets within the collection. 

983 

984 Raises 

985 ------ 

986 AmbiguousDatasetError 

987 Raised if any of the given `DatasetRef` instances is unresolved. 

988 ConflictingDefinitionError 

989 Raised if the collection already contains a different dataset with 

990 the same `DatasetType` and data ID and an overlapping validity 

991 range. 

992 TypeError 

993 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

994 collection or if one or more datasets are of a dataset type for 

995 which `DatasetType.isCalibration` returns `False`. 

996 """ 

997 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

998 collectionRecord = self._managers.collections.find(collection) 

999 for datasetType, refsForType in progress.iter_item_chunks(DatasetRef.groupByType(refs).items(), 

1000 desc="Certifying datasets by type"): 

1001 storage = self._managers.datasets[datasetType.name] 

1002 storage.certify(collectionRecord, refsForType, timespan) 

1003 

1004 @transactional 

1005 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *, 

1006 dataIds: Optional[Iterable[DataId]] = None) -> None: 

1007 """Remove or adjust datasets to clear a validity range within a 

1008 calibration collection. 

1009 

1010 Parameters 

1011 ---------- 

1012 collection : `str` 

1013 The name of an already-registered `~CollectionType.CALIBRATION` 

1014 collection. 

1015 datasetType : `str` or `DatasetType` 

1016 Name or `DatasetType` instance for the datasets to be decertified. 

1017 timespan : `Timespan`, optional 

1018 The validity range to remove datasets from within the collection. 

1019 Datasets that overlap this range but are not contained by it will 

1020 have their validity ranges adjusted to not overlap it, which may 

1021 split a single dataset validity range into two. 

1022 dataIds : `Iterable` [ `DataId` ], optional 

1023 Data IDs that should be decertified within the given validity range 

1024 If `None`, all data IDs for ``self.datasetType`` will be 

1025 decertified. 

1026 

1027 Raises 

1028 ------ 

1029 TypeError 

1030 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1031 collection or if ``datasetType.isCalibration() is False``. 

1032 """ 

1033 collectionRecord = self._managers.collections.find(collection) 

1034 if isinstance(datasetType, str): 

1035 storage = self._managers.datasets[datasetType] 

1036 else: 

1037 storage = self._managers.datasets[datasetType.name] 

1038 standardizedDataIds = None 

1039 if dataIds is not None: 

1040 standardizedDataIds = [DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) 

1041 for d in dataIds] 

1042 storage.decertify(collectionRecord, timespan, dataIds=standardizedDataIds) 

1043 

1044 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1045 """Return an object that allows a new `Datastore` instance to 

1046 communicate with this `Registry`. 

1047 

1048 Returns 

1049 ------- 

1050 manager : `DatastoreRegistryBridgeManager` 

1051 Object that mediates communication between this `Registry` and its 

1052 associated datastores. 

1053 """ 

1054 return self._managers.datastores 

1055 

1056 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1057 """Retrieve datastore locations for a given dataset. 

1058 

1059 Parameters 

1060 ---------- 

1061 ref : `DatasetRef` 

1062 A reference to the dataset for which to retrieve storage 

1063 information. 

1064 

1065 Returns 

1066 ------- 

1067 datastores : `Iterable` [ `str` ] 

1068 All the matching datastores holding this dataset. 

1069 

1070 Raises 

1071 ------ 

1072 AmbiguousDatasetError 

1073 Raised if ``ref.id`` is `None`. 

1074 """ 

1075 return self._managers.datastores.findDatastores(ref) 

1076 

1077 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

1078 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1079 withDefaults: bool = True, 

1080 **kwargs: Any) -> DataCoordinate: 

1081 """Expand a dimension-based data ID to include additional information. 

1082 

1083 Parameters 

1084 ---------- 

1085 dataId : `DataCoordinate` or `dict`, optional 

1086 Data ID to be expanded; augmented and overridden by ``kwds``. 

1087 graph : `DimensionGraph`, optional 

1088 Set of dimensions for the expanded ID. If `None`, the dimensions 

1089 will be inferred from the keys of ``dataId`` and ``kwds``. 

1090 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

1091 are silently ignored, providing a way to extract and expand a 

1092 subset of a data ID. 

1093 records : `Mapping` [`str`, `DimensionRecord`], optional 

1094 Dimension record data to use before querying the database for that 

1095 data, keyed by element name. 

1096 withDefaults : `bool`, optional 

1097 Utilize ``self.defaults.dataId`` to fill in missing governor 

1098 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1099 used). 

1100 **kwargs 

1101 Additional keywords are treated like additional key-value pairs for 

1102 ``dataId``, extending and overriding 

1103 

1104 Returns 

1105 ------- 

1106 expanded : `DataCoordinate` 

1107 A data ID that includes full metadata for all of the dimensions it 

1108 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and 

1109 ``expanded.hasFull()`` both return `True`. 

1110 """ 

1111 if not withDefaults: 

1112 defaults = None 

1113 else: 

1114 defaults = self.defaults.dataId 

1115 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, 

1116 defaults=defaults, **kwargs) 

1117 if standardized.hasRecords(): 

1118 return standardized 

1119 if records is None: 

1120 records = {} 

1121 elif isinstance(records, NamedKeyMapping): 

1122 records = records.byName() 

1123 else: 

1124 records = dict(records) 

1125 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1126 records.update(dataId.records.byName()) 

1127 keys = standardized.byName() 

1128 for element in standardized.graph.primaryKeyTraversalOrder: 

1129 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

1130 if record is ...: 

1131 if isinstance(element, Dimension) and keys.get(element.name) is None: 

1132 if element in standardized.graph.required: 

1133 raise LookupError( 

1134 f"No value or null value for required dimension {element.name}." 

1135 ) 

1136 keys[element.name] = None 

1137 record = None 

1138 else: 

1139 storage = self._managers.dimensions[element] 

1140 dataIdSet = DataCoordinateIterable.fromScalar( 

1141 DataCoordinate.standardize(keys, graph=element.graph) 

1142 ) 

1143 fetched = tuple(storage.fetch(dataIdSet)) 

1144 try: 

1145 (record,) = fetched 

1146 except ValueError: 

1147 record = None 

1148 records[element.name] = record 

1149 if record is not None: 

1150 for d in element.implied: 

1151 value = getattr(record, d.name) 

1152 if keys.setdefault(d.name, value) != value: 

1153 raise InconsistentDataIdError( 

1154 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1155 f"but {element.name} implies {d.name}={value!r}." 

1156 ) 

1157 else: 

1158 if element in standardized.graph.required: 

1159 raise LookupError( 

1160 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1161 ) 

1162 if element.alwaysJoin: 

1163 raise InconsistentDataIdError( 

1164 f"Could not fetch record for element {element.name} via keys {keys}, ", 

1165 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

1166 "related." 

1167 ) 

1168 for d in element.implied: 

1169 keys.setdefault(d.name, None) 

1170 records.setdefault(d.name, None) 

1171 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

1172 

1173 def insertDimensionData(self, element: Union[DimensionElement, str], 

1174 *data: Union[Mapping[str, Any], DimensionRecord], 

1175 conform: bool = True) -> None: 

1176 """Insert one or more dimension records into the database. 

1177 

1178 Parameters 

1179 ---------- 

1180 element : `DimensionElement` or `str` 

1181 The `DimensionElement` or name thereof that identifies the table 

1182 records will be inserted into. 

1183 data : `dict` or `DimensionRecord` (variadic) 

1184 One or more records to insert. 

1185 conform : `bool`, optional 

1186 If `False` (`True` is default) perform no checking or conversions, 

1187 and assume that ``element`` is a `DimensionElement` instance and 

1188 ``data`` is a one or more `DimensionRecord` instances of the 

1189 appropriate subclass. 

1190 """ 

1191 if conform: 

1192 if isinstance(element, str): 

1193 element = self.dimensions[element] 

1194 records = [row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1195 for row in data] 

1196 else: 

1197 # Ignore typing since caller said to trust them with conform=False. 

1198 records = data # type: ignore 

1199 storage = self._managers.dimensions[element] # type: ignore 

1200 storage.insert(*records) 

1201 

1202 def syncDimensionData(self, element: Union[DimensionElement, str], 

1203 row: Union[Mapping[str, Any], DimensionRecord], 

1204 conform: bool = True) -> bool: 

1205 """Synchronize the given dimension record with the database, inserting 

1206 if it does not already exist and comparing values if it does. 

1207 

1208 Parameters 

1209 ---------- 

1210 element : `DimensionElement` or `str` 

1211 The `DimensionElement` or name thereof that identifies the table 

1212 records will be inserted into. 

1213 row : `dict` or `DimensionRecord` 

1214 The record to insert. 

1215 conform : `bool`, optional 

1216 If `False` (`True` is default) perform no checking or conversions, 

1217 and assume that ``element`` is a `DimensionElement` instance and 

1218 ``data`` is a one or more `DimensionRecord` instances of the 

1219 appropriate subclass. 

1220 

1221 Returns 

1222 ------- 

1223 inserted : `bool` 

1224 `True` if a new row was inserted, `False` otherwise. 

1225 

1226 Raises 

1227 ------ 

1228 ConflictingDefinitionError 

1229 Raised if the record exists in the database (according to primary 

1230 key lookup) but is inconsistent with the given one. 

1231 """ 

1232 if conform: 

1233 if isinstance(element, str): 

1234 element = self.dimensions[element] 

1235 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1236 else: 

1237 # Ignore typing since caller said to trust them with conform=False. 

1238 record = row # type: ignore 

1239 storage = self._managers.dimensions[element] # type: ignore 

1240 return storage.sync(record) 

1241 

1242 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

1243 ) -> Iterator[DatasetType]: 

1244 """Iterate over the dataset types whose names match an expression. 

1245 

1246 Parameters 

1247 ---------- 

1248 expression : `Any`, optional 

1249 An expression that fully or partially identifies the dataset types 

1250 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1251 `...` can be used to return all dataset types, and is the default. 

1252 See :ref:`daf_butler_dataset_type_expressions` for more 

1253 information. 

1254 components : `bool`, optional 

1255 If `True`, apply all expression patterns to component dataset type 

1256 names as well. If `False`, never apply patterns to components. 

1257 If `None` (default), apply patterns to components only if their 

1258 parent datasets were not matched by the expression. 

1259 Fully-specified component datasets (`str` or `DatasetType` 

1260 instances) are always included. 

1261 

1262 Yields 

1263 ------ 

1264 datasetType : `DatasetType` 

1265 A `DatasetType` instance whose name matches ``expression``. 

1266 """ 

1267 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

1268 if wildcard is Ellipsis: 

1269 for datasetType in self._managers.datasets: 

1270 # The dataset type can no longer be a component 

1271 yield datasetType 

1272 if components: 

1273 # Automatically create the component dataset types 

1274 try: 

1275 componentsForDatasetType = datasetType.makeAllComponentDatasetTypes() 

1276 except KeyError as err: 

1277 _LOG.warning(f"Could not load storage class {err} for {datasetType.name}; " 

1278 "if it has components they will not be included in query results.") 

1279 else: 

1280 yield from componentsForDatasetType 

1281 return 

1282 done: Set[str] = set() 

1283 for name in wildcard.strings: 

1284 storage = self._managers.datasets.find(name) 

1285 if storage is not None: 

1286 done.add(storage.datasetType.name) 

1287 yield storage.datasetType 

1288 if wildcard.patterns: 

1289 # If components (the argument) is None, we'll save component 

1290 # dataset that we might want to match, but only if their parents 

1291 # didn't get included. 

1292 componentsForLater = [] 

1293 for registeredDatasetType in self._managers.datasets: 

1294 # Components are not stored in registry so expand them here 

1295 allDatasetTypes = [registeredDatasetType] 

1296 try: 

1297 allDatasetTypes.extend(registeredDatasetType.makeAllComponentDatasetTypes()) 

1298 except KeyError as err: 

1299 _LOG.warning(f"Could not load storage class {err} for {registeredDatasetType.name}; " 

1300 "if it has components they will not be included in query results.") 

1301 for datasetType in allDatasetTypes: 

1302 if datasetType.name in done: 

1303 continue 

1304 parentName, componentName = datasetType.nameAndComponent() 

1305 if componentName is not None and not components: 

1306 if components is None and parentName not in done: 

1307 componentsForLater.append(datasetType) 

1308 continue 

1309 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1310 done.add(datasetType.name) 

1311 yield datasetType 

1312 # Go back and try to match saved components. 

1313 for datasetType in componentsForLater: 

1314 parentName, _ = datasetType.nameAndComponent() 

1315 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1316 yield datasetType 

1317 

1318 def queryCollections(self, expression: Any = ..., 

1319 datasetType: Optional[DatasetType] = None, 

1320 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1321 flattenChains: bool = False, 

1322 includeChains: Optional[bool] = None) -> Iterator[str]: 

1323 """Iterate over the collections whose names match an expression. 

1324 

1325 Parameters 

1326 ---------- 

1327 expression : `Any`, optional 

1328 An expression that identifies the collections to return, such as a 

1329 `str` (for full matches), `re.Pattern` (for partial matches), or 

1330 iterable thereof. `...` can be used to return all collections, 

1331 and is the default. See :ref:`daf_butler_collection_expressions` 

1332 for more information. 

1333 datasetType : `DatasetType`, optional 

1334 If provided, only yield collections that may contain datasets of 

1335 this type. This is a conservative approximation in general; it may 

1336 yield collections that do not have any such datasets. 

1337 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1338 If provided, only yield collections of these types. 

1339 flattenChains : `bool`, optional 

1340 If `True` (`False` is default), recursively yield the child 

1341 collections of matching `~CollectionType.CHAINED` collections. 

1342 includeChains : `bool`, optional 

1343 If `True`, yield records for matching `~CollectionType.CHAINED` 

1344 collections. Default is the opposite of ``flattenChains``: include 

1345 either CHAINED collections or their children, but not both. 

1346 

1347 Yields 

1348 ------ 

1349 collection : `str` 

1350 The name of a collection that matches ``expression``. 

1351 """ 

1352 # Right now the datasetTypes argument is completely ignored, but that 

1353 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1354 # ticket will take care of that. 

1355 query = CollectionQuery.fromExpression(expression) 

1356 for record in query.iter(self._managers.collections, collectionTypes=frozenset(collectionTypes), 

1357 flattenChains=flattenChains, includeChains=includeChains): 

1358 yield record.name 

1359 

1360 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder: 

1361 """Return a `QueryBuilder` instance capable of constructing and 

1362 managing more complex queries than those obtainable via `Registry` 

1363 interfaces. 

1364 

1365 This is an advanced interface; downstream code should prefer 

1366 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1367 are sufficient. 

1368 

1369 Parameters 

1370 ---------- 

1371 summary : `queries.QuerySummary` 

1372 Object describing and categorizing the full set of dimensions that 

1373 will be included in the query. 

1374 

1375 Returns 

1376 ------- 

1377 builder : `queries.QueryBuilder` 

1378 Object that can be used to construct and perform advanced queries. 

1379 """ 

1380 return queries.QueryBuilder( 

1381 summary, 

1382 queries.RegistryManagers( 

1383 collections=self._managers.collections, 

1384 dimensions=self._managers.dimensions, 

1385 datasets=self._managers.datasets, 

1386 TimespanReprClass=self._db.getTimespanRepresentation(), 

1387 ), 

1388 ) 

1389 

1390 def queryDatasets(self, datasetType: Any, *, 

1391 collections: Any = None, 

1392 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1393 dataId: Optional[DataId] = None, 

1394 where: Optional[str] = None, 

1395 findFirst: bool = False, 

1396 components: Optional[bool] = None, 

1397 bind: Optional[Mapping[str, Any]] = None, 

1398 check: bool = True, 

1399 **kwargs: Any) -> queries.DatasetQueryResults: 

1400 """Query for and iterate over dataset references matching user-provided 

1401 criteria. 

1402 

1403 Parameters 

1404 ---------- 

1405 datasetType 

1406 An expression that fully or partially identifies the dataset types 

1407 to be queried. Allowed types include `DatasetType`, `str`, 

1408 `re.Pattern`, and iterables thereof. The special value `...` can 

1409 be used to query all dataset types. See 

1410 :ref:`daf_butler_dataset_type_expressions` for more information. 

1411 collections: optional 

1412 An expression that identifies the collections to search, such as a 

1413 `str` (for full matches), `re.Pattern` (for partial matches), or 

1414 iterable thereof. `...` can be used to search all collections 

1415 (actually just all `~CollectionType.RUN` collections, because this 

1416 will still find all datasets). If not provided, 

1417 ``self.default.collections`` is used. See 

1418 :ref:`daf_butler_collection_expressions` for more information. 

1419 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1420 Dimensions to include in the query (in addition to those used 

1421 to identify the queried dataset type(s)), either to constrain 

1422 the resulting datasets to those for which a matching dimension 

1423 exists, or to relate the dataset type's dimensions to dimensions 

1424 referenced by the ``dataId`` or ``where`` arguments. 

1425 dataId : `dict` or `DataCoordinate`, optional 

1426 A data ID whose key-value pairs are used as equality constraints 

1427 in the query. 

1428 where : `str`, optional 

1429 A string expression similar to a SQL WHERE clause. May involve 

1430 any column of a dimension table or (as a shortcut for the primary 

1431 key column of a dimension table) dimension name. See 

1432 :ref:`daf_butler_dimension_expressions` for more information. 

1433 findFirst : `bool`, optional 

1434 If `True` (`False` is default), for each result data ID, only 

1435 yield one `DatasetRef` of each `DatasetType`, from the first 

1436 collection in which a dataset of that dataset type appears 

1437 (according to the order of ``collections`` passed in). If `True`, 

1438 ``collections`` must not contain regular expressions and may not 

1439 be `...`. 

1440 components : `bool`, optional 

1441 If `True`, apply all dataset expression patterns to component 

1442 dataset type names as well. If `False`, never apply patterns to 

1443 components. If `None` (default), apply patterns to components only 

1444 if their parent datasets were not matched by the expression. 

1445 Fully-specified component datasets (`str` or `DatasetType` 

1446 instances) are always included. 

1447 bind : `Mapping`, optional 

1448 Mapping containing literal values that should be injected into the 

1449 ``where`` expression, keyed by the identifiers they replace. 

1450 check : `bool`, optional 

1451 If `True` (default) check the query for consistency before 

1452 executing it. This may reject some valid queries that resemble 

1453 common mistakes (e.g. queries for visits without specifying an 

1454 instrument). 

1455 **kwargs 

1456 Additional keyword arguments are forwarded to 

1457 `DataCoordinate.standardize` when processing the ``dataId`` 

1458 argument (and may be used to provide a constraining data ID even 

1459 when the ``dataId`` argument is `None`). 

1460 

1461 Returns 

1462 ------- 

1463 refs : `queries.DatasetQueryResults` 

1464 Dataset references matching the given query criteria. Nested data 

1465 IDs are guaranteed to include values for all implied dimensions 

1466 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1467 include dimension records (`DataCoordinate.hasRecords` will be 

1468 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1469 on the result object (which returns a new one). 

1470 

1471 Raises 

1472 ------ 

1473 TypeError 

1474 Raised when the arguments are incompatible, such as when a 

1475 collection wildcard is passed when ``findFirst`` is `True`, or 

1476 when ``collections`` is `None` and``self.defaults.collections`` is 

1477 also `None`. 

1478 

1479 Notes 

1480 ----- 

1481 When multiple dataset types are queried in a single call, the 

1482 results of this operation are equivalent to querying for each dataset 

1483 type separately in turn, and no information about the relationships 

1484 between datasets of different types is included. In contexts where 

1485 that kind of information is important, the recommended pattern is to 

1486 use `queryDataIds` to first obtain data IDs (possibly with the 

1487 desired dataset types and collections passed as constraints to the 

1488 query), and then use multiple (generally much simpler) calls to 

1489 `queryDatasets` with the returned data IDs passed as constraints. 

1490 """ 

1491 # Standardize the collections expression. 

1492 if collections is None: 

1493 if not self.defaults.collections: 

1494 raise TypeError("No collections provided to findDataset, " 

1495 "and no defaults from registry construction.") 

1496 collections = self.defaults.collections 

1497 elif findFirst: 

1498 collections = CollectionSearch.fromExpression(collections) 

1499 else: 

1500 collections = CollectionQuery.fromExpression(collections) 

1501 # Standardize and expand the data ID provided as a constraint. 

1502 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1503 

1504 # We can only query directly if given a non-component DatasetType 

1505 # instance. If we were given an expression or str or a component 

1506 # DatasetType instance, we'll populate this dict, recurse, and return. 

1507 # If we already have a non-component DatasetType, it will remain None 

1508 # and we'll run the query directly. 

1509 composition: Optional[ 

1510 Dict[ 

1511 DatasetType, # parent dataset type 

1512 List[Optional[str]] # component name, or None for parent 

1513 ] 

1514 ] = None 

1515 if not isinstance(datasetType, DatasetType): 

1516 # We were given a dataset type expression (which may be as simple 

1517 # as a str). Loop over all matching datasets, delegating handling 

1518 # of the `components` argument to queryDatasetTypes, as we populate 

1519 # the composition dict. 

1520 composition = defaultdict(list) 

1521 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1522 parentName, componentName = trueDatasetType.nameAndComponent() 

1523 if componentName is not None: 

1524 parentDatasetType = self.getDatasetType(parentName) 

1525 composition.setdefault(parentDatasetType, []).append(componentName) 

1526 else: 

1527 composition.setdefault(trueDatasetType, []).append(None) 

1528 elif datasetType.isComponent(): 

1529 # We were given a true DatasetType instance, but it's a component. 

1530 # the composition dict will have exactly one item. 

1531 parentName, componentName = datasetType.nameAndComponent() 

1532 parentDatasetType = self.getDatasetType(parentName) 

1533 composition = {parentDatasetType: [componentName]} 

1534 if composition is not None: 

1535 # We need to recurse. Do that once for each parent dataset type. 

1536 chain = [] 

1537 for parentDatasetType, componentNames in composition.items(): 

1538 parentResults = self.queryDatasets( 

1539 parentDatasetType, 

1540 collections=collections, 

1541 dimensions=dimensions, 

1542 dataId=standardizedDataId, 

1543 where=where, 

1544 findFirst=findFirst, 

1545 check=check, 

1546 ) 

1547 if isinstance(parentResults, queries.ParentDatasetQueryResults): 

1548 chain.append( 

1549 parentResults.withComponents(componentNames) 

1550 ) 

1551 else: 

1552 # Should only happen if we know there would be no results. 

1553 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \ 

1554 and not parentResults._chain 

1555 return queries.ChainedDatasetQueryResults(chain) 

1556 # If we get here, there's no need to recurse (or we are already 

1557 # recursing; there can only ever be one level of recursion). 

1558 

1559 # The full set of dimensions in the query is the combination of those 

1560 # needed for the DatasetType and those explicitly requested, if any. 

1561 requestedDimensionNames = set(datasetType.dimensions.names) 

1562 if dimensions is not None: 

1563 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1564 # Construct the summary structure needed to construct a QueryBuilder. 

1565 summary = queries.QuerySummary( 

1566 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1567 dataId=standardizedDataId, 

1568 expression=where, 

1569 bind=bind, 

1570 defaults=self.defaults.dataId, 

1571 check=check, 

1572 ) 

1573 builder = self.makeQueryBuilder(summary) 

1574 # Add the dataset subquery to the query, telling the QueryBuilder to 

1575 # include the rank of the selected collection in the results only if we 

1576 # need to findFirst. Note that if any of the collections are 

1577 # actually wildcard expressions, and we've asked for deduplication, 

1578 # this will raise TypeError for us. 

1579 if not builder.joinDataset(datasetType, collections, isResult=True, findFirst=findFirst): 

1580 return queries.ChainedDatasetQueryResults(()) 

1581 query = builder.finish() 

1582 return queries.ParentDatasetQueryResults(self._db, query, components=[None]) 

1583 

1584 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1585 dataId: Optional[DataId] = None, 

1586 datasets: Any = None, 

1587 collections: Any = None, 

1588 where: Optional[str] = None, 

1589 components: Optional[bool] = None, 

1590 bind: Optional[Mapping[str, Any]] = None, 

1591 check: bool = True, 

1592 **kwargs: Any) -> queries.DataCoordinateQueryResults: 

1593 """Query for data IDs matching user-provided criteria. 

1594 

1595 Parameters 

1596 ---------- 

1597 dimensions : `Dimension` or `str`, or iterable thereof 

1598 The dimensions of the data IDs to yield, as either `Dimension` 

1599 instances or `str`. Will be automatically expanded to a complete 

1600 `DimensionGraph`. 

1601 dataId : `dict` or `DataCoordinate`, optional 

1602 A data ID whose key-value pairs are used as equality constraints 

1603 in the query. 

1604 datasets : `Any`, optional 

1605 An expression that fully or partially identifies dataset types 

1606 that should constrain the yielded data IDs. For example, including 

1607 "raw" here would constrain the yielded ``instrument``, 

1608 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1609 those for which at least one "raw" dataset exists in 

1610 ``collections``. Allowed types include `DatasetType`, `str`, 

1611 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1612 expressions, ``...`` is not permitted - it doesn't make sense to 

1613 constrain data IDs on the existence of *all* datasets. 

1614 See :ref:`daf_butler_dataset_type_expressions` for more 

1615 information. 

1616 collections: `Any`, optional 

1617 An expression that identifies the collections to search for 

1618 datasets, such as a `str` (for full matches), `re.Pattern` (for 

1619 partial matches), or iterable thereof. `...` can be used to search 

1620 all collections (actually just all `~CollectionType.RUN` 

1621 collections, because this will still find all datasets). If not 

1622 provided, ``self.default.collections`` is used. Ignored unless 

1623 ``datasets`` is also passed. See 

1624 :ref:`daf_butler_collection_expressions` for more information. 

1625 where : `str`, optional 

1626 A string expression similar to a SQL WHERE clause. May involve 

1627 any column of a dimension table or (as a shortcut for the primary 

1628 key column of a dimension table) dimension name. See 

1629 :ref:`daf_butler_dimension_expressions` for more information. 

1630 components : `bool`, optional 

1631 If `True`, apply all dataset expression patterns to component 

1632 dataset type names as well. If `False`, never apply patterns to 

1633 components. If `None` (default), apply patterns to components only 

1634 if their parent datasets were not matched by the expression. 

1635 Fully-specified component datasets (`str` or `DatasetType` 

1636 instances) are always included. 

1637 bind : `Mapping`, optional 

1638 Mapping containing literal values that should be injected into the 

1639 ``where`` expression, keyed by the identifiers they replace. 

1640 check : `bool`, optional 

1641 If `True` (default) check the query for consistency before 

1642 executing it. This may reject some valid queries that resemble 

1643 common mistakes (e.g. queries for visits without specifying an 

1644 instrument). 

1645 **kwargs 

1646 Additional keyword arguments are forwarded to 

1647 `DataCoordinate.standardize` when processing the ``dataId`` 

1648 argument (and may be used to provide a constraining data ID even 

1649 when the ``dataId`` argument is `None`). 

1650 

1651 Returns 

1652 ------- 

1653 dataIds : `DataCoordinateQueryResults` 

1654 Data IDs matching the given query parameters. These are guaranteed 

1655 to identify all dimensions (`DataCoordinate.hasFull` returns 

1656 `True`), but will not contain `DimensionRecord` objects 

1657 (`DataCoordinate.hasRecords` returns `False`). Call 

1658 `DataCoordinateQueryResults.expanded` on the returned object to 

1659 fetch those (and consider using 

1660 `DataCoordinateQueryResults.materialize` on the returned object 

1661 first if the expected number of rows is very large). See 

1662 documentation for those methods for additional information. 

1663 

1664 Raises 

1665 ------ 

1666 TypeError 

1667 Raised if ``collections`` is `None`, ``self.defaults.collections`` 

1668 is `None`, and ``datasets`` is not `None`. 

1669 """ 

1670 dimensions = iterable(dimensions) 

1671 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1672 standardizedDatasetTypes = set() 

1673 requestedDimensions = self.dimensions.extract(dimensions) 

1674 queryDimensionNames = set(requestedDimensions.names) 

1675 if datasets is not None: 

1676 if collections is None: 

1677 if not self.defaults.collections: 

1678 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1679 collections = self.defaults.collections 

1680 else: 

1681 # Preprocess collections expression in case the original 

1682 # included single-pass iterators (we'll want to use it multiple 

1683 # times below). 

1684 collections = CollectionQuery.fromExpression(collections) 

1685 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1686 queryDimensionNames.update(datasetType.dimensions.names) 

1687 # If any matched dataset type is a component, just operate on 

1688 # its parent instead, because Registry doesn't know anything 

1689 # about what components exist, and here (unlike queryDatasets) 

1690 # we don't care about returning them. 

1691 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1692 if componentName is not None: 

1693 datasetType = self.getDatasetType(parentDatasetTypeName) 

1694 standardizedDatasetTypes.add(datasetType) 

1695 

1696 summary = queries.QuerySummary( 

1697 requested=DimensionGraph(self.dimensions, names=queryDimensionNames), 

1698 dataId=standardizedDataId, 

1699 expression=where, 

1700 bind=bind, 

1701 defaults=self.defaults.dataId, 

1702 check=check, 

1703 ) 

1704 builder = self.makeQueryBuilder(summary) 

1705 for datasetType in standardizedDatasetTypes: 

1706 builder.joinDataset(datasetType, collections, isResult=False) 

1707 query = builder.finish() 

1708 return queries.DataCoordinateQueryResults(self._db, query) 

1709 

1710 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

1711 dataId: Optional[DataId] = None, 

1712 datasets: Any = None, 

1713 collections: Any = None, 

1714 where: Optional[str] = None, 

1715 components: Optional[bool] = None, 

1716 bind: Optional[Mapping[str, Any]] = None, 

1717 check: bool = True, 

1718 **kwargs: Any) -> Iterator[DimensionRecord]: 

1719 """Query for dimension information matching user-provided criteria. 

1720 

1721 Parameters 

1722 ---------- 

1723 element : `DimensionElement` or `str` 

1724 The dimension element to obtain records for. 

1725 dataId : `dict` or `DataCoordinate`, optional 

1726 A data ID whose key-value pairs are used as equality constraints 

1727 in the query. 

1728 datasets : `Any`, optional 

1729 An expression that fully or partially identifies dataset types 

1730 that should constrain the yielded records. See `queryDataIds` and 

1731 :ref:`daf_butler_dataset_type_expressions` for more information. 

1732 collections: `Any`, optional 

1733 An expression that identifies the collections to search for 

1734 datasets, such as a `str` (for full matches), `re.Pattern` (for 

1735 partial matches), or iterable thereof. `...` can be used to search 

1736 all collections (actually just all `~CollectionType.RUN` 

1737 collections, because this will still find all datasets). If not 

1738 provided, ``self.default.collections`` is used. Ignored unless 

1739 ``datasets`` is also passed. See 

1740 :ref:`daf_butler_collection_expressions` for more information. 

1741 where : `str`, optional 

1742 A string expression similar to a SQL WHERE clause. See 

1743 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1744 information. 

1745 components : `bool`, optional 

1746 Whether to apply dataset expressions to components as well. 

1747 See `queryDataIds` for more information. 

1748 bind : `Mapping`, optional 

1749 Mapping containing literal values that should be injected into the 

1750 ``where`` expression, keyed by the identifiers they replace. 

1751 check : `bool`, optional 

1752 If `True` (default) check the query for consistency before 

1753 executing it. This may reject some valid queries that resemble 

1754 common mistakes (e.g. queries for visits without specifying an 

1755 instrument). 

1756 **kwargs 

1757 Additional keyword arguments are forwarded to 

1758 `DataCoordinate.standardize` when processing the ``dataId`` 

1759 argument (and may be used to provide a constraining data ID even 

1760 when the ``dataId`` argument is `None`). 

1761 

1762 Returns 

1763 ------- 

1764 dataIds : `DataCoordinateQueryResults` 

1765 Data IDs matching the given query parameters. 

1766 """ 

1767 if not isinstance(element, DimensionElement): 

1768 try: 

1769 element = self.dimensions[element] 

1770 except KeyError as e: 

1771 raise KeyError(f"No such dimension '{element}', available dimensions: " 

1772 + str(self.dimensions.getStaticElements())) from e 

1773 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections, 

1774 where=where, components=components, bind=bind, check=check, **kwargs) 

1775 return iter(self._managers.dimensions[element].fetch(dataIds)) 

1776 

1777 def queryDatasetAssociations( 

1778 self, 

1779 datasetType: Union[str, DatasetType], 

1780 collections: Any = ..., 

1781 *, 

1782 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1783 flattenChains: bool = False, 

1784 ) -> Iterator[DatasetAssociation]: 

1785 """Iterate over dataset-collection combinations where the dataset is in 

1786 the collection. 

1787 

1788 This method is a temporary placeholder for better support for 

1789 assocation results in `queryDatasets`. It will probably be 

1790 removed in the future, and should be avoided in production code 

1791 whenever possible. 

1792 

1793 Parameters 

1794 ---------- 

1795 datasetType : `DatasetType` or `str` 

1796 A dataset type object or the name of one. 

1797 collections: `Any`, optional 

1798 An expression that identifies the collections to search for 

1799 datasets, such as a `str` (for full matches), `re.Pattern` (for 

1800 partial matches), or iterable thereof. `...` can be used to search 

1801 all collections (actually just all `~CollectionType.RUN` 

1802 collections, because this will still find all datasets). If not 

1803 provided, ``self.default.collections`` is used. See 

1804 :ref:`daf_butler_collection_expressions` for more information. 

1805 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1806 If provided, only yield associations from collections of these 

1807 types. 

1808 flattenChains : `bool`, optional 

1809 If `True` (default) search in the children of 

1810 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1811 collections are ignored. 

1812 

1813 Yields 

1814 ------ 

1815 association : `DatasetAssociation` 

1816 Object representing the relationship beween a single dataset and 

1817 a single collection. 

1818 

1819 Raises 

1820 ------ 

1821 TypeError 

1822 Raised if ``collections`` is `None` and 

1823 ``self.defaults.collections`` is `None`. 

1824 """ 

1825 if collections is None: 

1826 if not self.defaults.collections: 

1827 raise TypeError("No collections provided to findDataset, " 

1828 "and no defaults from registry construction.") 

1829 collections = self.defaults.collections 

1830 else: 

1831 collections = CollectionQuery.fromExpression(collections) 

1832 TimespanReprClass = self._db.getTimespanRepresentation() 

1833 if isinstance(datasetType, str): 

1834 storage = self._managers.datasets[datasetType] 

1835 else: 

1836 storage = self._managers.datasets[datasetType.name] 

1837 for collectionRecord in collections.iter(self._managers.collections, 

1838 collectionTypes=frozenset(collectionTypes), 

1839 flattenChains=flattenChains): 

1840 query = storage.select(collectionRecord) 

1841 if query is None: 

1842 continue 

1843 for row in self._db.query(query.combine()): 

1844 dataId = DataCoordinate.fromRequiredValues( 

1845 storage.datasetType.dimensions, 

1846 tuple(row[name] for name in storage.datasetType.dimensions.required.names) 

1847 ) 

1848 runRecord = self._managers.collections[row[self._managers.collections.getRunForeignKeyName()]] 

1849 ref = DatasetRef(storage.datasetType, dataId, id=row["id"], run=runRecord.name, 

1850 conform=False) 

1851 if collectionRecord.type is CollectionType.CALIBRATION: 

1852 timespan = TimespanReprClass.extract(row) 

1853 else: 

1854 timespan = None 

1855 yield DatasetAssociation(ref=ref, collection=collectionRecord.name, timespan=timespan) 

1856 

1857 storageClasses: StorageClassFactory 

1858 """All storage classes known to the registry (`StorageClassFactory`). 

1859 """