Coverage for python/lsst/daf/butler/registry/_registry.py: 61%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

178 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28from abc import ABC, abstractmethod 

29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple, Type, Union 

30 

31from lsst.resources import ResourcePathExpression 

32from lsst.utils import doImportType 

33 

34from ..core import ( 

35 Config, 

36 DataCoordinate, 

37 DataCoordinateIterable, 

38 DataId, 

39 DatasetAssociation, 

40 DatasetId, 

41 DatasetRef, 

42 DatasetType, 

43 Dimension, 

44 DimensionConfig, 

45 DimensionElement, 

46 DimensionGraph, 

47 DimensionRecord, 

48 DimensionUniverse, 

49 NameLookupMapping, 

50 StorageClassFactory, 

51 Timespan, 

52) 

53from ._collectionType import CollectionType 

54from ._config import RegistryConfig 

55from ._defaults import RegistryDefaults 

56from .interfaces import DatasetIdGenEnum 

57from .summaries import CollectionSummary 

58from .wildcards import CollectionSearch 

59 

60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 from .._butlerConfig import ButlerConfig 

62 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager 

63 

64_LOG = logging.getLogger(__name__) 

65 

66 

67class Registry(ABC): 

68 """Abstract Registry interface. 

69 

70 Each registry implementation can have its own constructor parameters. 

71 The assumption is that an instance of a specific subclass will be 

72 constructed from configuration using `Registry.fromConfig()`. 

73 The base class will look for a ``cls`` entry and call that specific 

74 `fromConfig()` method. 

75 

76 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

77 property. No other properties are assumed shared between implementations. 

78 """ 

79 

80 defaultConfigFile: Optional[str] = None 

81 """Path to configuration defaults. Accessed within the ``configs`` resource 

82 or relative to a search path. Can be None if no defaults specified. 

83 """ 

84 

85 @classmethod 

86 def forceRegistryConfig( 

87 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

88 ) -> RegistryConfig: 

89 """Force the supplied config to a `RegistryConfig`. 

90 

91 Parameters 

92 ---------- 

93 config : `RegistryConfig`, `Config` or `str` or `None` 

94 Registry configuration, if missing then default configuration will 

95 be loaded from registry.yaml. 

96 

97 Returns 

98 ------- 

99 registry_config : `RegistryConfig` 

100 A registry config. 

101 """ 

102 if not isinstance(config, RegistryConfig): 

103 if isinstance(config, (str, Config)) or config is None: 

104 config = RegistryConfig(config) 

105 else: 

106 raise ValueError(f"Incompatible Registry configuration: {config}") 

107 return config 

108 

109 @classmethod 

110 def determineTrampoline( 

111 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

112 ) -> Tuple[Type[Registry], RegistryConfig]: 

113 """Return class to use to instantiate real registry. 

114 

115 Parameters 

116 ---------- 

117 config : `RegistryConfig` or `str`, optional 

118 Registry configuration, if missing then default configuration will 

119 be loaded from registry.yaml. 

120 

121 Returns 

122 ------- 

123 requested_cls : `type` of `Registry` 

124 The real registry class to use. 

125 registry_config : `RegistryConfig` 

126 The `RegistryConfig` to use. 

127 """ 

128 config = cls.forceRegistryConfig(config) 

129 

130 # Default to the standard registry 

131 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

132 registry_cls = doImportType(registry_cls_name) 

133 if registry_cls is cls: 

134 raise ValueError("Can not instantiate the abstract base Registry from config") 

135 if not issubclass(registry_cls, Registry): 

136 raise TypeError( 

137 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

138 ) 

139 return registry_cls, config 

140 

141 @classmethod 

142 def createFromConfig( 

143 cls, 

144 config: Optional[Union[RegistryConfig, str]] = None, 

145 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

146 butlerRoot: Optional[ResourcePathExpression] = None, 

147 ) -> Registry: 

148 """Create registry database and return `Registry` instance. 

149 

150 This method initializes database contents, database must be empty 

151 prior to calling this method. 

152 

153 Parameters 

154 ---------- 

155 config : `RegistryConfig` or `str`, optional 

156 Registry configuration, if missing then default configuration will 

157 be loaded from registry.yaml. 

158 dimensionConfig : `DimensionConfig` or `str`, optional 

159 Dimensions configuration, if missing then default configuration 

160 will be loaded from dimensions.yaml. 

161 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

162 Path to the repository root this `Registry` will manage. 

163 

164 Returns 

165 ------- 

166 registry : `Registry` 

167 A new `Registry` instance. 

168 

169 Notes 

170 ----- 

171 This class will determine the concrete `Registry` subclass to 

172 use from configuration. Each subclass should implement this method 

173 even if it can not create a registry. 

174 """ 

175 registry_cls, registry_config = cls.determineTrampoline(config) 

176 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

177 

178 @classmethod 

179 def fromConfig( 

180 cls, 

181 config: Union[ButlerConfig, RegistryConfig, Config, str], 

182 butlerRoot: Optional[ResourcePathExpression] = None, 

183 writeable: bool = True, 

184 defaults: Optional[RegistryDefaults] = None, 

185 ) -> Registry: 

186 """Create `Registry` subclass instance from `config`. 

187 

188 Registry database must be initialized prior to calling this method. 

189 

190 Parameters 

191 ---------- 

192 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

193 Registry configuration 

194 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

195 Path to the repository root this `Registry` will manage. 

196 writeable : `bool`, optional 

197 If `True` (default) create a read-write connection to the database. 

198 defaults : `RegistryDefaults`, optional 

199 Default collection search path and/or output `~CollectionType.RUN` 

200 collection. 

201 

202 Returns 

203 ------- 

204 registry : `Registry` (subclass) 

205 A new `Registry` subclass instance. 

206 

207 Notes 

208 ----- 

209 This class will determine the concrete `Registry` subclass to 

210 use from configuration. Each subclass should implement this method. 

211 """ 

212 # The base class implementation should trampoline to the correct 

213 # subclass. No implementation should ever use this implementation 

214 # directly. If no class is specified, default to the standard 

215 # registry. 

216 registry_cls, registry_config = cls.determineTrampoline(config) 

217 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

218 

219 @abstractmethod 

220 def isWriteable(self) -> bool: 

221 """Return `True` if this registry allows write operations, and `False` 

222 otherwise. 

223 """ 

224 raise NotImplementedError() 

225 

226 @abstractmethod 

227 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

228 """Create a new `Registry` backed by the same data repository and 

229 connection as this one, but independent defaults. 

230 

231 Parameters 

232 ---------- 

233 defaults : `RegistryDefaults`, optional 

234 Default collections and data ID values for the new registry. If 

235 not provided, ``self.defaults`` will be used (but future changes 

236 to either registry's defaults will not affect the other). 

237 

238 Returns 

239 ------- 

240 copy : `Registry` 

241 A new `Registry` instance with its own defaults. 

242 

243 Notes 

244 ----- 

245 Because the new registry shares a connection with the original, they 

246 also share transaction state (despite the fact that their `transaction` 

247 context manager methods do not reflect this), and must be used with 

248 care. 

249 """ 

250 raise NotImplementedError() 

251 

252 @property 

253 @abstractmethod 

254 def dimensions(self) -> DimensionUniverse: 

255 """Definitions of all dimensions recognized by this `Registry` 

256 (`DimensionUniverse`). 

257 """ 

258 raise NotImplementedError() 

259 

260 @property 

261 def defaults(self) -> RegistryDefaults: 

262 """Default collection search path and/or output `~CollectionType.RUN` 

263 collection (`RegistryDefaults`). 

264 

265 This is an immutable struct whose components may not be set 

266 individually, but the entire struct can be set by assigning to this 

267 property. 

268 """ 

269 return self._defaults 

270 

271 @defaults.setter 

272 def defaults(self, value: RegistryDefaults) -> None: 

273 if value.run is not None: 

274 self.registerRun(value.run) 

275 value.finish(self) 

276 self._defaults = value 

277 

278 @abstractmethod 

279 def refresh(self) -> None: 

280 """Refresh all in-memory state by querying the database. 

281 

282 This may be necessary to enable querying for entities added by other 

283 registry instances after this one was constructed. 

284 """ 

285 raise NotImplementedError() 

286 

287 @contextlib.contextmanager 

288 @abstractmethod 

289 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

290 """Return a context manager that represents a transaction.""" 

291 raise NotImplementedError() 

292 

293 def resetConnectionPool(self) -> None: 

294 """Reset connection pool for registry if relevant. 

295 

296 This operation can be used reset connections to servers when 

297 using registry with fork-based multiprocessing. This method should 

298 usually be called by the child process immediately 

299 after the fork. 

300 

301 The base class implementation is a no-op. 

302 """ 

303 pass 

304 

305 @abstractmethod 

306 def registerCollection( 

307 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

308 ) -> bool: 

309 """Add a new collection if one with the given name does not exist. 

310 

311 Parameters 

312 ---------- 

313 name : `str` 

314 The name of the collection to create. 

315 type : `CollectionType` 

316 Enum value indicating the type of collection to create. 

317 doc : `str`, optional 

318 Documentation string for the collection. 

319 

320 Returns 

321 ------- 

322 registered : `bool` 

323 Boolean indicating whether the collection was already registered 

324 or was created by this call. 

325 

326 Notes 

327 ----- 

328 This method cannot be called within transactions, as it needs to be 

329 able to perform its own transaction to be concurrent. 

330 """ 

331 raise NotImplementedError() 

332 

333 @abstractmethod 

334 def getCollectionType(self, name: str) -> CollectionType: 

335 """Return an enumeration value indicating the type of the given 

336 collection. 

337 

338 Parameters 

339 ---------- 

340 name : `str` 

341 The name of the collection. 

342 

343 Returns 

344 ------- 

345 type : `CollectionType` 

346 Enum value indicating the type of this collection. 

347 

348 Raises 

349 ------ 

350 MissingCollectionError 

351 Raised if no collection with the given name exists. 

352 """ 

353 raise NotImplementedError() 

354 

355 @abstractmethod 

356 def _get_collection_record(self, name: str) -> CollectionRecord: 

357 """Return the record for this collection. 

358 

359 Parameters 

360 ---------- 

361 name : `str` 

362 Name of the collection for which the record is to be retrieved. 

363 

364 Returns 

365 ------- 

366 record : `CollectionRecord` 

367 The record for this collection. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

373 """Add a new run if one with the given name does not exist. 

374 

375 Parameters 

376 ---------- 

377 name : `str` 

378 The name of the run to create. 

379 doc : `str`, optional 

380 Documentation string for the collection. 

381 

382 Returns 

383 ------- 

384 registered : `bool` 

385 Boolean indicating whether a new run was registered. `False` 

386 if it already existed. 

387 

388 Notes 

389 ----- 

390 This method cannot be called within transactions, as it needs to be 

391 able to perform its own transaction to be concurrent. 

392 """ 

393 raise NotImplementedError() 

394 

395 @abstractmethod 

396 def removeCollection(self, name: str) -> None: 

397 """Remove the given collection from the registry. 

398 

399 Parameters 

400 ---------- 

401 name : `str` 

402 The name of the collection to remove. 

403 

404 Raises 

405 ------ 

406 MissingCollectionError 

407 Raised if no collection with the given name exists. 

408 sqlalchemy.IntegrityError 

409 Raised if the database rows associated with the collection are 

410 still referenced by some other table, such as a dataset in a 

411 datastore (for `~CollectionType.RUN` collections only) or a 

412 `~CollectionType.CHAINED` collection of which this collection is 

413 a child. 

414 

415 Notes 

416 ----- 

417 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

418 in it will removed from the `Registry` database. This requires that 

419 those datasets be removed (or at least trashed) from any datastores 

420 that hold them first. 

421 

422 A collection may not be deleted as long as it is referenced by a 

423 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

424 be deleted or redefined first. 

425 """ 

426 raise NotImplementedError() 

427 

428 @abstractmethod 

429 def getCollectionChain(self, parent: str) -> CollectionSearch: 

430 """Return the child collections in a `~CollectionType.CHAINED` 

431 collection. 

432 

433 Parameters 

434 ---------- 

435 parent : `str` 

436 Name of the chained collection. Must have already been added via 

437 a call to `Registry.registerCollection`. 

438 

439 Returns 

440 ------- 

441 children : `CollectionSearch` 

442 An object that defines the search path of the collection. 

443 See :ref:`daf_butler_collection_expressions` for more information. 

444 

445 Raises 

446 ------ 

447 MissingCollectionError 

448 Raised if ``parent`` does not exist in the `Registry`. 

449 TypeError 

450 Raised if ``parent`` does not correspond to a 

451 `~CollectionType.CHAINED` collection. 

452 """ 

453 raise NotImplementedError() 

454 

455 @abstractmethod 

456 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

457 """Define or redefine a `~CollectionType.CHAINED` collection. 

458 

459 Parameters 

460 ---------- 

461 parent : `str` 

462 Name of the chained collection. Must have already been added via 

463 a call to `Registry.registerCollection`. 

464 children : `Any` 

465 An expression defining an ordered search of child collections, 

466 generally an iterable of `str`; see 

467 :ref:`daf_butler_collection_expressions` for more information. 

468 flatten : `bool`, optional 

469 If `True` (`False` is default), recursively flatten out any nested 

470 `~CollectionType.CHAINED` collections in ``children`` first. 

471 

472 Raises 

473 ------ 

474 MissingCollectionError 

475 Raised when any of the given collections do not exist in the 

476 `Registry`. 

477 TypeError 

478 Raised if ``parent`` does not correspond to a 

479 `~CollectionType.CHAINED` collection. 

480 ValueError 

481 Raised if the given collections contains a cycle. 

482 """ 

483 raise NotImplementedError() 

484 

485 @abstractmethod 

486 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

487 """Retrieve the documentation string for a collection. 

488 

489 Parameters 

490 ---------- 

491 name : `str` 

492 Name of the collection. 

493 

494 Returns 

495 ------- 

496 docs : `str` or `None` 

497 Docstring for the collection with the given name. 

498 """ 

499 raise NotImplementedError() 

500 

501 @abstractmethod 

502 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

503 """Set the documentation string for a collection. 

504 

505 Parameters 

506 ---------- 

507 name : `str` 

508 Name of the collection. 

509 docs : `str` or `None` 

510 Docstring for the collection with the given name; will replace any 

511 existing docstring. Passing `None` will remove any existing 

512 docstring. 

513 """ 

514 raise NotImplementedError() 

515 

516 @abstractmethod 

517 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

518 """Return a summary for the given collection. 

519 

520 Parameters 

521 ---------- 

522 collection : `str` 

523 Name of the collection for which a summary is to be retrieved. 

524 

525 Returns 

526 ------- 

527 summary : `CollectionSummary` 

528 Summary of the dataset types and governor dimension values in 

529 this collection. 

530 """ 

531 raise NotImplementedError() 

532 

533 @abstractmethod 

534 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

535 """ 

536 Add a new `DatasetType` to the Registry. 

537 

538 It is not an error to register the same `DatasetType` twice. 

539 

540 Parameters 

541 ---------- 

542 datasetType : `DatasetType` 

543 The `DatasetType` to be added. 

544 

545 Returns 

546 ------- 

547 inserted : `bool` 

548 `True` if ``datasetType`` was inserted, `False` if an identical 

549 existing `DatsetType` was found. Note that in either case the 

550 DatasetType is guaranteed to be defined in the Registry 

551 consistently with the given definition. 

552 

553 Raises 

554 ------ 

555 ValueError 

556 Raised if the dimensions or storage class are invalid. 

557 ConflictingDefinitionError 

558 Raised if this DatasetType is already registered with a different 

559 definition. 

560 

561 Notes 

562 ----- 

563 This method cannot be called within transactions, as it needs to be 

564 able to perform its own transaction to be concurrent. 

565 """ 

566 raise NotImplementedError() 

567 

568 @abstractmethod 

569 def removeDatasetType(self, name: str) -> None: 

570 """Remove the named `DatasetType` from the registry. 

571 

572 .. warning:: 

573 

574 Registry implementations can cache the dataset type definitions. 

575 This means that deleting the dataset type definition may result in 

576 unexpected behavior from other butler processes that are active 

577 that have not seen the deletion. 

578 

579 Parameters 

580 ---------- 

581 name : `str` 

582 Name of the type to be removed. 

583 

584 Raises 

585 ------ 

586 lsst.daf.butler.registry.OrphanedRecordError 

587 Raised if an attempt is made to remove the dataset type definition 

588 when there are already datasets associated with it. 

589 

590 Notes 

591 ----- 

592 If the dataset type is not registered the method will return without 

593 action. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def getDatasetType(self, name: str) -> DatasetType: 

599 """Get the `DatasetType`. 

600 

601 Parameters 

602 ---------- 

603 name : `str` 

604 Name of the type. 

605 

606 Returns 

607 ------- 

608 type : `DatasetType` 

609 The `DatasetType` associated with the given name. 

610 

611 Raises 

612 ------ 

613 KeyError 

614 Requested named DatasetType could not be found in registry. 

615 """ 

616 raise NotImplementedError() 

617 

618 @abstractmethod 

619 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

620 """Test whether the given dataset ID generation mode is supported by 

621 `insertDatasets`. 

622 

623 Parameters 

624 ---------- 

625 mode : `DatasetIdGenEnum` 

626 Enum value for the mode to test. 

627 

628 Returns 

629 ------- 

630 supported : `bool` 

631 Whether the given mode is supported. 

632 """ 

633 raise NotImplementedError() 

634 

635 @abstractmethod 

636 def findDataset( 

637 self, 

638 datasetType: Union[DatasetType, str], 

639 dataId: Optional[DataId] = None, 

640 *, 

641 collections: Any = None, 

642 timespan: Optional[Timespan] = None, 

643 **kwargs: Any, 

644 ) -> Optional[DatasetRef]: 

645 """Find a dataset given its `DatasetType` and data ID. 

646 

647 This can be used to obtain a `DatasetRef` that permits the dataset to 

648 be read from a `Datastore`. If the dataset is a component and can not 

649 be found using the provided dataset type, a dataset ref for the parent 

650 will be returned instead but with the correct dataset type. 

651 

652 Parameters 

653 ---------- 

654 datasetType : `DatasetType` or `str` 

655 A `DatasetType` or the name of one. 

656 dataId : `dict` or `DataCoordinate`, optional 

657 A `dict`-like object containing the `Dimension` links that identify 

658 the dataset within a collection. 

659 collections, optional. 

660 An expression that fully or partially identifies the collections to 

661 search for the dataset; see 

662 :ref:`daf_butler_collection_expressions` for more information. 

663 Defaults to ``self.defaults.collections``. 

664 timespan : `Timespan`, optional 

665 A timespan that the validity range of the dataset must overlap. 

666 If not provided, any `~CollectionType.CALIBRATION` collections 

667 matched by the ``collections`` argument will not be searched. 

668 **kwargs 

669 Additional keyword arguments passed to 

670 `DataCoordinate.standardize` to convert ``dataId`` to a true 

671 `DataCoordinate` or augment an existing one. 

672 

673 Returns 

674 ------- 

675 ref : `DatasetRef` 

676 A reference to the dataset, or `None` if no matching Dataset 

677 was found. 

678 

679 Raises 

680 ------ 

681 TypeError 

682 Raised if ``collections`` is `None` and 

683 ``self.defaults.collections`` is `None`. 

684 LookupError 

685 Raised if one or more data ID keys are missing. 

686 KeyError 

687 Raised if the dataset type does not exist. 

688 MissingCollectionError 

689 Raised if any of ``collections`` does not exist in the registry. 

690 

691 Notes 

692 ----- 

693 This method simply returns `None` and does not raise an exception even 

694 when the set of collections searched is intrinsically incompatible with 

695 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

696 only `~CollectionType.CALIBRATION` collections are being searched. 

697 This may make it harder to debug some lookup failures, but the behavior 

698 is intentional; we consider it more important that failed searches are 

699 reported consistently, regardless of the reason, and that adding 

700 additional collections that do not contain a match to the search path 

701 never changes the behavior. 

702 """ 

703 raise NotImplementedError() 

704 

705 @abstractmethod 

706 def insertDatasets( 

707 self, 

708 datasetType: Union[DatasetType, str], 

709 dataIds: Iterable[DataId], 

710 run: Optional[str] = None, 

711 expand: bool = True, 

712 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

713 ) -> List[DatasetRef]: 

714 """Insert one or more datasets into the `Registry` 

715 

716 This always adds new datasets; to associate existing datasets with 

717 a new collection, use ``associate``. 

718 

719 Parameters 

720 ---------- 

721 datasetType : `DatasetType` or `str` 

722 A `DatasetType` or the name of one. 

723 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

724 Dimension-based identifiers for the new datasets. 

725 run : `str`, optional 

726 The name of the run that produced the datasets. Defaults to 

727 ``self.defaults.run``. 

728 expand : `bool`, optional 

729 If `True` (default), expand data IDs as they are inserted. This is 

730 necessary in general to allow datastore to generate file templates, 

731 but it may be disabled if the caller can guarantee this is 

732 unnecessary. 

733 idGenerationMode : `DatasetIdGenEnum`, optional 

734 Specifies option for generating dataset IDs. By default unique IDs 

735 are generated for each inserted dataset. 

736 

737 Returns 

738 ------- 

739 refs : `list` of `DatasetRef` 

740 Resolved `DatasetRef` instances for all given data IDs (in the same 

741 order). 

742 

743 Raises 

744 ------ 

745 TypeError 

746 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

747 ConflictingDefinitionError 

748 If a dataset with the same dataset type and data ID as one of those 

749 given already exists in ``run``. 

750 MissingCollectionError 

751 Raised if ``run`` does not exist in the registry. 

752 """ 

753 raise NotImplementedError() 

754 

755 @abstractmethod 

756 def _importDatasets( 

757 self, 

758 datasets: Iterable[DatasetRef], 

759 expand: bool = True, 

760 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

761 reuseIds: bool = False, 

762 ) -> List[DatasetRef]: 

763 """Import one or more datasets into the `Registry`. 

764 

765 Difference from `insertDatasets` method is that this method accepts 

766 `DatasetRef` instances which should already be resolved and have a 

767 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

768 `uuid.UUID`) then datasets which already exist in the registry will be 

769 ignored if imported again. 

770 

771 Parameters 

772 ---------- 

773 datasets : `~collections.abc.Iterable` of `DatasetRef` 

774 Datasets to be inserted. All `DatasetRef` instances must have 

775 identical ``datasetType`` and ``run`` attributes. ``run`` 

776 attribute can be `None` and defaults to ``self.defaults.run``. 

777 Datasets can specify ``id`` attribute which will be used for 

778 inserted datasets. All dataset IDs must have the same type 

779 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

780 configured backend then IDs will be ignored and new IDs will be 

781 generated by backend. 

782 expand : `bool`, optional 

783 If `True` (default), expand data IDs as they are inserted. This is 

784 necessary in general to allow datastore to generate file templates, 

785 but it may be disabled if the caller can guarantee this is 

786 unnecessary. 

787 idGenerationMode : `DatasetIdGenEnum`, optional 

788 Specifies option for generating dataset IDs when IDs are not 

789 provided or their type does not match backend type. By default 

790 unique IDs are generated for each inserted dataset. 

791 reuseIds : `bool`, optional 

792 If `True` then forces re-use of imported dataset IDs for integer 

793 IDs which are normally generated as auto-incremented; exception 

794 will be raised if imported IDs clash with existing ones. This 

795 option has no effect on the use of globally-unique IDs which are 

796 always re-used (or generated if integer IDs are being imported). 

797 

798 Returns 

799 ------- 

800 refs : `list` of `DatasetRef` 

801 Resolved `DatasetRef` instances for all given data IDs (in the same 

802 order). If any of ``datasets`` has an ID which already exists in 

803 the database then it will not be inserted or updated, but a 

804 resolved `DatasetRef` will be returned for it in any case. 

805 

806 Raises 

807 ------ 

808 TypeError 

809 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

810 ConflictingDefinitionError 

811 If a dataset with the same dataset type and data ID as one of those 

812 given already exists in ``run``. 

813 MissingCollectionError 

814 Raised if ``run`` does not exist in the registry. 

815 

816 Notes 

817 ----- 

818 This method is considered package-private and internal to Butler 

819 implementation. Clients outside daf_butler package should not use this 

820 method. 

821 """ 

822 raise NotImplementedError() 

823 

824 @abstractmethod 

825 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

826 """Retrieve a Dataset entry. 

827 

828 Parameters 

829 ---------- 

830 id : `DatasetId` 

831 The unique identifier for the dataset. 

832 

833 Returns 

834 ------- 

835 ref : `DatasetRef` or `None` 

836 A ref to the Dataset, or `None` if no matching Dataset 

837 was found. 

838 """ 

839 raise NotImplementedError() 

840 

841 @abstractmethod 

842 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

843 """Remove datasets from the Registry. 

844 

845 The datasets will be removed unconditionally from all collections, and 

846 any `Quantum` that consumed this dataset will instead be marked with 

847 having a NULL input. `Datastore` records will *not* be deleted; the 

848 caller is responsible for ensuring that the dataset has already been 

849 removed from all Datastores. 

850 

851 Parameters 

852 ---------- 

853 refs : `Iterable` of `DatasetRef` 

854 References to the datasets to be removed. Must include a valid 

855 ``id`` attribute, and should be considered invalidated upon return. 

856 

857 Raises 

858 ------ 

859 AmbiguousDatasetError 

860 Raised if any ``ref.id`` is `None`. 

861 OrphanedRecordError 

862 Raised if any dataset is still present in any `Datastore`. 

863 """ 

864 raise NotImplementedError() 

865 

866 @abstractmethod 

867 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

868 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

869 

870 If a DatasetRef with the same exact ID is already in a collection 

871 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

872 data ID but with different ID exists in the collection, 

873 `ConflictingDefinitionError` is raised. 

874 

875 Parameters 

876 ---------- 

877 collection : `str` 

878 Indicates the collection the datasets should be associated with. 

879 refs : `Iterable` [ `DatasetRef` ] 

880 An iterable of resolved `DatasetRef` instances that already exist 

881 in this `Registry`. 

882 

883 Raises 

884 ------ 

885 ConflictingDefinitionError 

886 If a Dataset with the given `DatasetRef` already exists in the 

887 given collection. 

888 AmbiguousDatasetError 

889 Raised if ``any(ref.id is None for ref in refs)``. 

890 MissingCollectionError 

891 Raised if ``collection`` does not exist in the registry. 

892 TypeError 

893 Raise adding new datasets to the given ``collection`` is not 

894 allowed. 

895 """ 

896 raise NotImplementedError() 

897 

898 @abstractmethod 

899 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

900 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

901 

902 ``collection`` and ``ref`` combinations that are not currently 

903 associated are silently ignored. 

904 

905 Parameters 

906 ---------- 

907 collection : `str` 

908 The collection the datasets should no longer be associated with. 

909 refs : `Iterable` [ `DatasetRef` ] 

910 An iterable of resolved `DatasetRef` instances that already exist 

911 in this `Registry`. 

912 

913 Raises 

914 ------ 

915 AmbiguousDatasetError 

916 Raised if any of the given dataset references is unresolved. 

917 MissingCollectionError 

918 Raised if ``collection`` does not exist in the registry. 

919 TypeError 

920 Raise adding new datasets to the given ``collection`` is not 

921 allowed. 

922 """ 

923 raise NotImplementedError() 

924 

925 @abstractmethod 

926 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

927 """Associate one or more datasets with a calibration collection and a 

928 validity range within it. 

929 

930 Parameters 

931 ---------- 

932 collection : `str` 

933 The name of an already-registered `~CollectionType.CALIBRATION` 

934 collection. 

935 refs : `Iterable` [ `DatasetRef` ] 

936 Datasets to be associated. 

937 timespan : `Timespan` 

938 The validity range for these datasets within the collection. 

939 

940 Raises 

941 ------ 

942 AmbiguousDatasetError 

943 Raised if any of the given `DatasetRef` instances is unresolved. 

944 ConflictingDefinitionError 

945 Raised if the collection already contains a different dataset with 

946 the same `DatasetType` and data ID and an overlapping validity 

947 range. 

948 TypeError 

949 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

950 collection or if one or more datasets are of a dataset type for 

951 which `DatasetType.isCalibration` returns `False`. 

952 """ 

953 raise NotImplementedError() 

954 

955 @abstractmethod 

956 def decertify( 

957 self, 

958 collection: str, 

959 datasetType: Union[str, DatasetType], 

960 timespan: Timespan, 

961 *, 

962 dataIds: Optional[Iterable[DataId]] = None, 

963 ) -> None: 

964 """Remove or adjust datasets to clear a validity range within a 

965 calibration collection. 

966 

967 Parameters 

968 ---------- 

969 collection : `str` 

970 The name of an already-registered `~CollectionType.CALIBRATION` 

971 collection. 

972 datasetType : `str` or `DatasetType` 

973 Name or `DatasetType` instance for the datasets to be decertified. 

974 timespan : `Timespan`, optional 

975 The validity range to remove datasets from within the collection. 

976 Datasets that overlap this range but are not contained by it will 

977 have their validity ranges adjusted to not overlap it, which may 

978 split a single dataset validity range into two. 

979 dataIds : `Iterable` [ `DataId` ], optional 

980 Data IDs that should be decertified within the given validity range 

981 If `None`, all data IDs for ``self.datasetType`` will be 

982 decertified. 

983 

984 Raises 

985 ------ 

986 TypeError 

987 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

988 collection or if ``datasetType.isCalibration() is False``. 

989 """ 

990 raise NotImplementedError() 

991 

992 @abstractmethod 

993 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

994 """Return an object that allows a new `Datastore` instance to 

995 communicate with this `Registry`. 

996 

997 Returns 

998 ------- 

999 manager : `DatastoreRegistryBridgeManager` 

1000 Object that mediates communication between this `Registry` and its 

1001 associated datastores. 

1002 """ 

1003 raise NotImplementedError() 

1004 

1005 @abstractmethod 

1006 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1007 """Retrieve datastore locations for a given dataset. 

1008 

1009 Parameters 

1010 ---------- 

1011 ref : `DatasetRef` 

1012 A reference to the dataset for which to retrieve storage 

1013 information. 

1014 

1015 Returns 

1016 ------- 

1017 datastores : `Iterable` [ `str` ] 

1018 All the matching datastores holding this dataset. 

1019 

1020 Raises 

1021 ------ 

1022 AmbiguousDatasetError 

1023 Raised if ``ref.id`` is `None`. 

1024 """ 

1025 raise NotImplementedError() 

1026 

1027 @abstractmethod 

1028 def expandDataId( 

1029 self, 

1030 dataId: Optional[DataId] = None, 

1031 *, 

1032 graph: Optional[DimensionGraph] = None, 

1033 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1034 withDefaults: bool = True, 

1035 **kwargs: Any, 

1036 ) -> DataCoordinate: 

1037 """Expand a dimension-based data ID to include additional information. 

1038 

1039 Parameters 

1040 ---------- 

1041 dataId : `DataCoordinate` or `dict`, optional 

1042 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1043 graph : `DimensionGraph`, optional 

1044 Set of dimensions for the expanded ID. If `None`, the dimensions 

1045 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1046 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1047 ``graph`` are silently ignored, providing a way to extract and 

1048 ``graph`` expand a subset of a data ID. 

1049 records : `Mapping` [`str`, `DimensionRecord`], optional 

1050 Dimension record data to use before querying the database for that 

1051 data, keyed by element name. 

1052 withDefaults : `bool`, optional 

1053 Utilize ``self.defaults.dataId`` to fill in missing governor 

1054 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1055 used). 

1056 **kwargs 

1057 Additional keywords are treated like additional key-value pairs for 

1058 ``dataId``, extending and overriding 

1059 

1060 Returns 

1061 ------- 

1062 expanded : `DataCoordinate` 

1063 A data ID that includes full metadata for all of the dimensions it 

1064 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1065 ``expanded.hasFull()`` both return `True`. 

1066 """ 

1067 raise NotImplementedError() 

1068 

1069 @abstractmethod 

1070 def insertDimensionData( 

1071 self, 

1072 element: Union[DimensionElement, str], 

1073 *data: Union[Mapping[str, Any], DimensionRecord], 

1074 conform: bool = True, 

1075 replace: bool = False, 

1076 ) -> None: 

1077 """Insert one or more dimension records into the database. 

1078 

1079 Parameters 

1080 ---------- 

1081 element : `DimensionElement` or `str` 

1082 The `DimensionElement` or name thereof that identifies the table 

1083 records will be inserted into. 

1084 data : `dict` or `DimensionRecord` (variadic) 

1085 One or more records to insert. 

1086 conform : `bool`, optional 

1087 If `False` (`True` is default) perform no checking or conversions, 

1088 and assume that ``element`` is a `DimensionElement` instance and 

1089 ``data`` is a one or more `DimensionRecord` instances of the 

1090 appropriate subclass. 

1091 replace: `bool`, optional 

1092 If `True` (`False` is default), replace existing records in the 

1093 database if there is a conflict. 

1094 """ 

1095 raise NotImplementedError() 

1096 

1097 @abstractmethod 

1098 def syncDimensionData( 

1099 self, 

1100 element: Union[DimensionElement, str], 

1101 row: Union[Mapping[str, Any], DimensionRecord], 

1102 conform: bool = True, 

1103 update: bool = False, 

1104 ) -> Union[bool, Dict[str, Any]]: 

1105 """Synchronize the given dimension record with the database, inserting 

1106 if it does not already exist and comparing values if it does. 

1107 

1108 Parameters 

1109 ---------- 

1110 element : `DimensionElement` or `str` 

1111 The `DimensionElement` or name thereof that identifies the table 

1112 records will be inserted into. 

1113 row : `dict` or `DimensionRecord` 

1114 The record to insert. 

1115 conform : `bool`, optional 

1116 If `False` (`True` is default) perform no checking or conversions, 

1117 and assume that ``element`` is a `DimensionElement` instance and 

1118 ``data`` is a one or more `DimensionRecord` instances of the 

1119 appropriate subclass. 

1120 update: `bool`, optional 

1121 If `True` (`False` is default), update the existing record in the 

1122 database if there is a conflict. 

1123 

1124 Returns 

1125 ------- 

1126 inserted_or_updated : `bool` or `dict` 

1127 `True` if a new row was inserted, `False` if no changes were 

1128 needed, or a `dict` mapping updated column names to their old 

1129 values if an update was performed (only possible if 

1130 ``update=True``). 

1131 

1132 Raises 

1133 ------ 

1134 ConflictingDefinitionError 

1135 Raised if the record exists in the database (according to primary 

1136 key lookup) but is inconsistent with the given one. 

1137 """ 

1138 raise NotImplementedError() 

1139 

1140 @abstractmethod 

1141 def queryDatasetTypes( 

1142 self, 

1143 expression: Any = ..., 

1144 *, 

1145 components: Optional[bool] = None, 

1146 missing: Optional[List[str]] = None, 

1147 ) -> Iterator[DatasetType]: 

1148 """Iterate over the dataset types whose names match an expression. 

1149 

1150 Parameters 

1151 ---------- 

1152 expression : `Any`, optional 

1153 An expression that fully or partially identifies the dataset types 

1154 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1155 ``...`` can be used to return all dataset types, and is the 

1156 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1157 information. 

1158 components : `bool`, optional 

1159 If `True`, apply all expression patterns to component dataset type 

1160 names as well. If `False`, never apply patterns to components. 

1161 If `None` (default), apply patterns to components only if their 

1162 parent datasets were not matched by the expression. 

1163 Fully-specified component datasets (`str` or `DatasetType` 

1164 instances) are always included. 

1165 missing : `list` of `str`, optional 

1166 String dataset type names that were explicitly given (i.e. not 

1167 regular expression patterns) but not found will be appended to this 

1168 list, if it is provided. 

1169 

1170 Yields 

1171 ------ 

1172 datasetType : `DatasetType` 

1173 A `DatasetType` instance whose name matches ``expression``. 

1174 """ 

1175 raise NotImplementedError() 

1176 

1177 @abstractmethod 

1178 def queryCollections( 

1179 self, 

1180 expression: Any = ..., 

1181 datasetType: Optional[DatasetType] = None, 

1182 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1183 flattenChains: bool = False, 

1184 includeChains: Optional[bool] = None, 

1185 ) -> Iterator[str]: 

1186 """Iterate over the collections whose names match an expression. 

1187 

1188 Parameters 

1189 ---------- 

1190 expression : `Any`, optional 

1191 An expression that identifies the collections to return, such as 

1192 a `str` (for full matches or partial matches via globs), 

1193 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1194 can be used to return all collections, and is the default. 

1195 See :ref:`daf_butler_collection_expressions` for more information. 

1196 datasetType : `DatasetType`, optional 

1197 If provided, only yield collections that may contain datasets of 

1198 this type. This is a conservative approximation in general; it may 

1199 yield collections that do not have any such datasets. 

1200 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1201 `CollectionType`, optional 

1202 If provided, only yield collections of these types. 

1203 flattenChains : `bool`, optional 

1204 If `True` (`False` is default), recursively yield the child 

1205 collections of matching `~CollectionType.CHAINED` collections. 

1206 includeChains : `bool`, optional 

1207 If `True`, yield records for matching `~CollectionType.CHAINED` 

1208 collections. Default is the opposite of ``flattenChains``: include 

1209 either CHAINED collections or their children, but not both. 

1210 

1211 Yields 

1212 ------ 

1213 collection : `str` 

1214 The name of a collection that matches ``expression``. 

1215 """ 

1216 raise NotImplementedError() 

1217 

1218 @abstractmethod 

1219 def queryDatasets( 

1220 self, 

1221 datasetType: Any, 

1222 *, 

1223 collections: Any = None, 

1224 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1225 dataId: Optional[DataId] = None, 

1226 where: Optional[str] = None, 

1227 findFirst: bool = False, 

1228 components: Optional[bool] = None, 

1229 bind: Optional[Mapping[str, Any]] = None, 

1230 check: bool = True, 

1231 **kwargs: Any, 

1232 ) -> Iterable[DatasetRef]: 

1233 """Query for and iterate over dataset references matching user-provided 

1234 criteria. 

1235 

1236 Parameters 

1237 ---------- 

1238 datasetType 

1239 An expression that fully or partially identifies the dataset types 

1240 to be queried. Allowed types include `DatasetType`, `str`, 

1241 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1242 be used to query all dataset types. See 

1243 :ref:`daf_butler_dataset_type_expressions` for more information. 

1244 collections: optional 

1245 An expression that identifies the collections to search, such as a 

1246 `str` (for full matches or partial matches via globs), `re.Pattern` 

1247 (for partial matches), or iterable thereof. ``...`` can be used to 

1248 search all collections (actually just all `~CollectionType.RUN` 

1249 collections, because this will still find all datasets). 

1250 If not provided, ``self.default.collections`` is used. See 

1251 :ref:`daf_butler_collection_expressions` for more information. 

1252 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1253 Dimensions to include in the query (in addition to those used 

1254 to identify the queried dataset type(s)), either to constrain 

1255 the resulting datasets to those for which a matching dimension 

1256 exists, or to relate the dataset type's dimensions to dimensions 

1257 referenced by the ``dataId`` or ``where`` arguments. 

1258 dataId : `dict` or `DataCoordinate`, optional 

1259 A data ID whose key-value pairs are used as equality constraints 

1260 in the query. 

1261 where : `str`, optional 

1262 A string expression similar to a SQL WHERE clause. May involve 

1263 any column of a dimension table or (as a shortcut for the primary 

1264 key column of a dimension table) dimension name. See 

1265 :ref:`daf_butler_dimension_expressions` for more information. 

1266 findFirst : `bool`, optional 

1267 If `True` (`False` is default), for each result data ID, only 

1268 yield one `DatasetRef` of each `DatasetType`, from the first 

1269 collection in which a dataset of that dataset type appears 

1270 (according to the order of ``collections`` passed in). If `True`, 

1271 ``collections`` must not contain regular expressions and may not 

1272 be ``...``. 

1273 components : `bool`, optional 

1274 If `True`, apply all dataset expression patterns to component 

1275 dataset type names as well. If `False`, never apply patterns to 

1276 components. If `None` (default), apply patterns to components only 

1277 if their parent datasets were not matched by the expression. 

1278 Fully-specified component datasets (`str` or `DatasetType` 

1279 instances) are always included. 

1280 bind : `Mapping`, optional 

1281 Mapping containing literal values that should be injected into the 

1282 ``where`` expression, keyed by the identifiers they replace. 

1283 check : `bool`, optional 

1284 If `True` (default) check the query for consistency before 

1285 executing it. This may reject some valid queries that resemble 

1286 common mistakes (e.g. queries for visits without specifying an 

1287 instrument). 

1288 **kwargs 

1289 Additional keyword arguments are forwarded to 

1290 `DataCoordinate.standardize` when processing the ``dataId`` 

1291 argument (and may be used to provide a constraining data ID even 

1292 when the ``dataId`` argument is `None`). 

1293 

1294 Returns 

1295 ------- 

1296 refs : `queries.DatasetQueryResults` 

1297 Dataset references matching the given query criteria. Nested data 

1298 IDs are guaranteed to include values for all implied dimensions 

1299 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1300 include dimension records (`DataCoordinate.hasRecords` will be 

1301 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1302 on the result object (which returns a new one). 

1303 

1304 Raises 

1305 ------ 

1306 TypeError 

1307 Raised when the arguments are incompatible, such as when a 

1308 collection wildcard is passed when ``findFirst`` is `True`, or 

1309 when ``collections`` is `None` and``self.defaults.collections`` is 

1310 also `None`. 

1311 

1312 Notes 

1313 ----- 

1314 When multiple dataset types are queried in a single call, the 

1315 results of this operation are equivalent to querying for each dataset 

1316 type separately in turn, and no information about the relationships 

1317 between datasets of different types is included. In contexts where 

1318 that kind of information is important, the recommended pattern is to 

1319 use `queryDataIds` to first obtain data IDs (possibly with the 

1320 desired dataset types and collections passed as constraints to the 

1321 query), and then use multiple (generally much simpler) calls to 

1322 `queryDatasets` with the returned data IDs passed as constraints. 

1323 """ 

1324 raise NotImplementedError() 

1325 

1326 @abstractmethod 

1327 def queryDataIds( 

1328 self, 

1329 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1330 *, 

1331 dataId: Optional[DataId] = None, 

1332 datasets: Any = None, 

1333 collections: Any = None, 

1334 where: Optional[str] = None, 

1335 components: Optional[bool] = None, 

1336 bind: Optional[Mapping[str, Any]] = None, 

1337 check: bool = True, 

1338 **kwargs: Any, 

1339 ) -> DataCoordinateIterable: 

1340 """Query for data IDs matching user-provided criteria. 

1341 

1342 Parameters 

1343 ---------- 

1344 dimensions : `Dimension` or `str`, or iterable thereof 

1345 The dimensions of the data IDs to yield, as either `Dimension` 

1346 instances or `str`. Will be automatically expanded to a complete 

1347 `DimensionGraph`. 

1348 dataId : `dict` or `DataCoordinate`, optional 

1349 A data ID whose key-value pairs are used as equality constraints 

1350 in the query. 

1351 datasets : `Any`, optional 

1352 An expression that fully or partially identifies dataset types 

1353 that should constrain the yielded data IDs. For example, including 

1354 "raw" here would constrain the yielded ``instrument``, 

1355 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1356 those for which at least one "raw" dataset exists in 

1357 ``collections``. Allowed types include `DatasetType`, `str`, 

1358 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1359 expressions, ``...`` is not permitted - it doesn't make sense to 

1360 constrain data IDs on the existence of *all* datasets. 

1361 See :ref:`daf_butler_dataset_type_expressions` for more 

1362 information. 

1363 collections: `Any`, optional 

1364 An expression that identifies the collections to search for 

1365 datasets, such as a `str` (for full matches or partial matches 

1366 via globs), `re.Pattern` (for partial matches), or iterable 

1367 thereof. ``...`` can be used to search all collections (actually 

1368 just all `~CollectionType.RUN` collections, because this will 

1369 still find all datasets). If not provided, 

1370 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1371 is also passed. See :ref:`daf_butler_collection_expressions` for 

1372 more information. 

1373 where : `str`, optional 

1374 A string expression similar to a SQL WHERE clause. May involve 

1375 any column of a dimension table or (as a shortcut for the primary 

1376 key column of a dimension table) dimension name. See 

1377 :ref:`daf_butler_dimension_expressions` for more information. 

1378 components : `bool`, optional 

1379 If `True`, apply all dataset expression patterns to component 

1380 dataset type names as well. If `False`, never apply patterns to 

1381 components. If `None` (default), apply patterns to components only 

1382 if their parent datasets were not matched by the expression. 

1383 Fully-specified component datasets (`str` or `DatasetType` 

1384 instances) are always included. 

1385 bind : `Mapping`, optional 

1386 Mapping containing literal values that should be injected into the 

1387 ``where`` expression, keyed by the identifiers they replace. 

1388 check : `bool`, optional 

1389 If `True` (default) check the query for consistency before 

1390 executing it. This may reject some valid queries that resemble 

1391 common mistakes (e.g. queries for visits without specifying an 

1392 instrument). 

1393 **kwargs 

1394 Additional keyword arguments are forwarded to 

1395 `DataCoordinate.standardize` when processing the ``dataId`` 

1396 argument (and may be used to provide a constraining data ID even 

1397 when the ``dataId`` argument is `None`). 

1398 

1399 Returns 

1400 ------- 

1401 dataIds : `DataCoordinateQueryResults` 

1402 Data IDs matching the given query parameters. These are guaranteed 

1403 to identify all dimensions (`DataCoordinate.hasFull` returns 

1404 `True`), but will not contain `DimensionRecord` objects 

1405 (`DataCoordinate.hasRecords` returns `False`). Call 

1406 `DataCoordinateQueryResults.expanded` on the returned object to 

1407 fetch those (and consider using 

1408 `DataCoordinateQueryResults.materialize` on the returned object 

1409 first if the expected number of rows is very large). See 

1410 documentation for those methods for additional information. 

1411 

1412 Raises 

1413 ------ 

1414 TypeError 

1415 Raised if ``collections`` is `None`, ``self.defaults.collections`` 

1416 is `None`, and ``datasets`` is not `None`. 

1417 """ 

1418 raise NotImplementedError() 

1419 

1420 @abstractmethod 

1421 def queryDimensionRecords( 

1422 self, 

1423 element: Union[DimensionElement, str], 

1424 *, 

1425 dataId: Optional[DataId] = None, 

1426 datasets: Any = None, 

1427 collections: Any = None, 

1428 where: Optional[str] = None, 

1429 components: Optional[bool] = None, 

1430 bind: Optional[Mapping[str, Any]] = None, 

1431 check: bool = True, 

1432 **kwargs: Any, 

1433 ) -> Iterable[DimensionRecord]: 

1434 """Query for dimension information matching user-provided criteria. 

1435 

1436 Parameters 

1437 ---------- 

1438 element : `DimensionElement` or `str` 

1439 The dimension element to obtain records for. 

1440 dataId : `dict` or `DataCoordinate`, optional 

1441 A data ID whose key-value pairs are used as equality constraints 

1442 in the query. 

1443 datasets : `Any`, optional 

1444 An expression that fully or partially identifies dataset types 

1445 that should constrain the yielded records. See `queryDataIds` and 

1446 :ref:`daf_butler_dataset_type_expressions` for more information. 

1447 collections : `Any`, optional 

1448 An expression that identifies the collections to search for 

1449 datasets, such as a `str` (for full matches or partial matches 

1450 via globs), `re.Pattern` (for partial matches), or iterable 

1451 thereof. ``...`` can be used to search all collections (actually 

1452 just all `~CollectionType.RUN` collections, because this will 

1453 still find all datasets). If not provided, 

1454 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1455 is also passed. See :ref:`daf_butler_collection_expressions` for 

1456 more information. 

1457 where : `str`, optional 

1458 A string expression similar to a SQL WHERE clause. See 

1459 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1460 information. 

1461 components : `bool`, optional 

1462 Whether to apply dataset expressions to components as well. 

1463 See `queryDataIds` for more information. 

1464 bind : `Mapping`, optional 

1465 Mapping containing literal values that should be injected into the 

1466 ``where`` expression, keyed by the identifiers they replace. 

1467 check : `bool`, optional 

1468 If `True` (default) check the query for consistency before 

1469 executing it. This may reject some valid queries that resemble 

1470 common mistakes (e.g. queries for visits without specifying an 

1471 instrument). 

1472 **kwargs 

1473 Additional keyword arguments are forwarded to 

1474 `DataCoordinate.standardize` when processing the ``dataId`` 

1475 argument (and may be used to provide a constraining data ID even 

1476 when the ``dataId`` argument is `None`). 

1477 

1478 Returns 

1479 ------- 

1480 dataIds : `Iterator` [ `DimensionRecord` ] 

1481 Data IDs matching the given query parameters. 

1482 """ 

1483 raise NotImplementedError() 

1484 

1485 @abstractmethod 

1486 def queryDatasetAssociations( 

1487 self, 

1488 datasetType: Union[str, DatasetType], 

1489 collections: Any = ..., 

1490 *, 

1491 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1492 flattenChains: bool = False, 

1493 ) -> Iterator[DatasetAssociation]: 

1494 """Iterate over dataset-collection combinations where the dataset is in 

1495 the collection. 

1496 

1497 This method is a temporary placeholder for better support for 

1498 association results in `queryDatasets`. It will probably be 

1499 removed in the future, and should be avoided in production code 

1500 whenever possible. 

1501 

1502 Parameters 

1503 ---------- 

1504 datasetType : `DatasetType` or `str` 

1505 A dataset type object or the name of one. 

1506 collections: `Any`, optional 

1507 An expression that identifies the collections to search for 

1508 datasets, such as a `str` (for full matches or partial matches 

1509 via globs), `re.Pattern` (for partial matches), or iterable 

1510 thereof. ``...`` can be used to search all collections (actually 

1511 just all `~CollectionType.RUN` collections, because this will still 

1512 find all datasets). If not provided, ``self.default.collections`` 

1513 is used. See :ref:`daf_butler_collection_expressions` for more 

1514 information. 

1515 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1516 If provided, only yield associations from collections of these 

1517 types. 

1518 flattenChains : `bool`, optional 

1519 If `True` (default) search in the children of 

1520 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1521 collections are ignored. 

1522 

1523 Yields 

1524 ------ 

1525 association : `DatasetAssociation` 

1526 Object representing the relationship between a single dataset and 

1527 a single collection. 

1528 

1529 Raises 

1530 ------ 

1531 TypeError 

1532 Raised if ``collections`` is `None` and 

1533 ``self.defaults.collections`` is `None`. 

1534 """ 

1535 raise NotImplementedError() 

1536 

1537 storageClasses: StorageClassFactory 

1538 """All storage classes known to the registry (`StorageClassFactory`). 

1539 """