Coverage for python/lsst/daf/butler/registry/_registry.py: 61%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

181 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28from abc import ABC, abstractmethod 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Set, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44from lsst.resources import ResourcePathExpression 

45from lsst.utils import doImportType 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DataId, 

52 DatasetAssociation, 

53 DatasetId, 

54 DatasetRef, 

55 DatasetType, 

56 Dimension, 

57 DimensionConfig, 

58 DimensionElement, 

59 DimensionGraph, 

60 DimensionRecord, 

61 DimensionUniverse, 

62 NameLookupMapping, 

63 StorageClassFactory, 

64 Timespan, 

65) 

66from ._collectionType import CollectionType 

67from ._config import RegistryConfig 

68from ._defaults import RegistryDefaults 

69from .interfaces import DatasetIdGenEnum 

70from .summaries import CollectionSummary 

71from .wildcards import CollectionSearch 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from .._butlerConfig import ButlerConfig 

75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager 

76 

77_LOG = logging.getLogger(__name__) 

78 

79 

80class Registry(ABC): 

81 """Abstract Registry interface. 

82 

83 Each registry implementation can have its own constructor parameters. 

84 The assumption is that an instance of a specific subclass will be 

85 constructed from configuration using `Registry.fromConfig()`. 

86 The base class will look for a ``cls`` entry and call that specific 

87 `fromConfig()` method. 

88 

89 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

90 property. No other properties are assumed shared between implementations. 

91 """ 

92 

93 defaultConfigFile: Optional[str] = None 

94 """Path to configuration defaults. Accessed within the ``configs`` resource 

95 or relative to a search path. Can be None if no defaults specified. 

96 """ 

97 

98 @classmethod 

99 def forceRegistryConfig( 

100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

101 ) -> RegistryConfig: 

102 """Force the supplied config to a `RegistryConfig`. 

103 

104 Parameters 

105 ---------- 

106 config : `RegistryConfig`, `Config` or `str` or `None` 

107 Registry configuration, if missing then default configuration will 

108 be loaded from registry.yaml. 

109 

110 Returns 

111 ------- 

112 registry_config : `RegistryConfig` 

113 A registry config. 

114 """ 

115 if not isinstance(config, RegistryConfig): 

116 if isinstance(config, (str, Config)) or config is None: 

117 config = RegistryConfig(config) 

118 else: 

119 raise ValueError(f"Incompatible Registry configuration: {config}") 

120 return config 

121 

122 @classmethod 

123 def determineTrampoline( 

124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

125 ) -> Tuple[Type[Registry], RegistryConfig]: 

126 """Return class to use to instantiate real registry. 

127 

128 Parameters 

129 ---------- 

130 config : `RegistryConfig` or `str`, optional 

131 Registry configuration, if missing then default configuration will 

132 be loaded from registry.yaml. 

133 

134 Returns 

135 ------- 

136 requested_cls : `type` of `Registry` 

137 The real registry class to use. 

138 registry_config : `RegistryConfig` 

139 The `RegistryConfig` to use. 

140 """ 

141 config = cls.forceRegistryConfig(config) 

142 

143 # Default to the standard registry 

144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

145 registry_cls = doImportType(registry_cls_name) 

146 if registry_cls is cls: 

147 raise ValueError("Can not instantiate the abstract base Registry from config") 

148 if not issubclass(registry_cls, Registry): 

149 raise TypeError( 

150 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

151 ) 

152 return registry_cls, config 

153 

154 @classmethod 

155 def createFromConfig( 

156 cls, 

157 config: Optional[Union[RegistryConfig, str]] = None, 

158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

159 butlerRoot: Optional[ResourcePathExpression] = None, 

160 ) -> Registry: 

161 """Create registry database and return `Registry` instance. 

162 

163 This method initializes database contents, database must be empty 

164 prior to calling this method. 

165 

166 Parameters 

167 ---------- 

168 config : `RegistryConfig` or `str`, optional 

169 Registry configuration, if missing then default configuration will 

170 be loaded from registry.yaml. 

171 dimensionConfig : `DimensionConfig` or `str`, optional 

172 Dimensions configuration, if missing then default configuration 

173 will be loaded from dimensions.yaml. 

174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

175 Path to the repository root this `Registry` will manage. 

176 

177 Returns 

178 ------- 

179 registry : `Registry` 

180 A new `Registry` instance. 

181 

182 Notes 

183 ----- 

184 This class will determine the concrete `Registry` subclass to 

185 use from configuration. Each subclass should implement this method 

186 even if it can not create a registry. 

187 """ 

188 registry_cls, registry_config = cls.determineTrampoline(config) 

189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

190 

191 @classmethod 

192 def fromConfig( 

193 cls, 

194 config: Union[ButlerConfig, RegistryConfig, Config, str], 

195 butlerRoot: Optional[ResourcePathExpression] = None, 

196 writeable: bool = True, 

197 defaults: Optional[RegistryDefaults] = None, 

198 ) -> Registry: 

199 """Create `Registry` subclass instance from `config`. 

200 

201 Registry database must be initialized prior to calling this method. 

202 

203 Parameters 

204 ---------- 

205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

206 Registry configuration 

207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

208 Path to the repository root this `Registry` will manage. 

209 writeable : `bool`, optional 

210 If `True` (default) create a read-write connection to the database. 

211 defaults : `RegistryDefaults`, optional 

212 Default collection search path and/or output `~CollectionType.RUN` 

213 collection. 

214 

215 Returns 

216 ------- 

217 registry : `Registry` (subclass) 

218 A new `Registry` subclass instance. 

219 

220 Notes 

221 ----- 

222 This class will determine the concrete `Registry` subclass to 

223 use from configuration. Each subclass should implement this method. 

224 """ 

225 # The base class implementation should trampoline to the correct 

226 # subclass. No implementation should ever use this implementation 

227 # directly. If no class is specified, default to the standard 

228 # registry. 

229 registry_cls, registry_config = cls.determineTrampoline(config) 

230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

231 

232 @abstractmethod 

233 def isWriteable(self) -> bool: 

234 """Return `True` if this registry allows write operations, and `False` 

235 otherwise. 

236 """ 

237 raise NotImplementedError() 

238 

239 @abstractmethod 

240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

241 """Create a new `Registry` backed by the same data repository and 

242 connection as this one, but independent defaults. 

243 

244 Parameters 

245 ---------- 

246 defaults : `RegistryDefaults`, optional 

247 Default collections and data ID values for the new registry. If 

248 not provided, ``self.defaults`` will be used (but future changes 

249 to either registry's defaults will not affect the other). 

250 

251 Returns 

252 ------- 

253 copy : `Registry` 

254 A new `Registry` instance with its own defaults. 

255 

256 Notes 

257 ----- 

258 Because the new registry shares a connection with the original, they 

259 also share transaction state (despite the fact that their `transaction` 

260 context manager methods do not reflect this), and must be used with 

261 care. 

262 """ 

263 raise NotImplementedError() 

264 

265 @property 

266 @abstractmethod 

267 def dimensions(self) -> DimensionUniverse: 

268 """Definitions of all dimensions recognized by this `Registry` 

269 (`DimensionUniverse`). 

270 """ 

271 raise NotImplementedError() 

272 

273 @property 

274 def defaults(self) -> RegistryDefaults: 

275 """Default collection search path and/or output `~CollectionType.RUN` 

276 collection (`RegistryDefaults`). 

277 

278 This is an immutable struct whose components may not be set 

279 individually, but the entire struct can be set by assigning to this 

280 property. 

281 """ 

282 return self._defaults 

283 

284 @defaults.setter 

285 def defaults(self, value: RegistryDefaults) -> None: 

286 if value.run is not None: 

287 self.registerRun(value.run) 

288 value.finish(self) 

289 self._defaults = value 

290 

291 @abstractmethod 

292 def refresh(self) -> None: 

293 """Refresh all in-memory state by querying the database. 

294 

295 This may be necessary to enable querying for entities added by other 

296 registry instances after this one was constructed. 

297 """ 

298 raise NotImplementedError() 

299 

300 @contextlib.contextmanager 

301 @abstractmethod 

302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

303 """Return a context manager that represents a transaction.""" 

304 raise NotImplementedError() 

305 

306 def resetConnectionPool(self) -> None: 

307 """Reset connection pool for registry if relevant. 

308 

309 This operation can be used reset connections to servers when 

310 using registry with fork-based multiprocessing. This method should 

311 usually be called by the child process immediately 

312 after the fork. 

313 

314 The base class implementation is a no-op. 

315 """ 

316 pass 

317 

318 @abstractmethod 

319 def registerCollection( 

320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

321 ) -> bool: 

322 """Add a new collection if one with the given name does not exist. 

323 

324 Parameters 

325 ---------- 

326 name : `str` 

327 The name of the collection to create. 

328 type : `CollectionType` 

329 Enum value indicating the type of collection to create. 

330 doc : `str`, optional 

331 Documentation string for the collection. 

332 

333 Returns 

334 ------- 

335 registered : `bool` 

336 Boolean indicating whether the collection was already registered 

337 or was created by this call. 

338 

339 Notes 

340 ----- 

341 This method cannot be called within transactions, as it needs to be 

342 able to perform its own transaction to be concurrent. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def getCollectionType(self, name: str) -> CollectionType: 

348 """Return an enumeration value indicating the type of the given 

349 collection. 

350 

351 Parameters 

352 ---------- 

353 name : `str` 

354 The name of the collection. 

355 

356 Returns 

357 ------- 

358 type : `CollectionType` 

359 Enum value indicating the type of this collection. 

360 

361 Raises 

362 ------ 

363 MissingCollectionError 

364 Raised if no collection with the given name exists. 

365 """ 

366 raise NotImplementedError() 

367 

368 @abstractmethod 

369 def _get_collection_record(self, name: str) -> CollectionRecord: 

370 """Return the record for this collection. 

371 

372 Parameters 

373 ---------- 

374 name : `str` 

375 Name of the collection for which the record is to be retrieved. 

376 

377 Returns 

378 ------- 

379 record : `CollectionRecord` 

380 The record for this collection. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

386 """Add a new run if one with the given name does not exist. 

387 

388 Parameters 

389 ---------- 

390 name : `str` 

391 The name of the run to create. 

392 doc : `str`, optional 

393 Documentation string for the collection. 

394 

395 Returns 

396 ------- 

397 registered : `bool` 

398 Boolean indicating whether a new run was registered. `False` 

399 if it already existed. 

400 

401 Notes 

402 ----- 

403 This method cannot be called within transactions, as it needs to be 

404 able to perform its own transaction to be concurrent. 

405 """ 

406 raise NotImplementedError() 

407 

408 @abstractmethod 

409 def removeCollection(self, name: str) -> None: 

410 """Remove the given collection from the registry. 

411 

412 Parameters 

413 ---------- 

414 name : `str` 

415 The name of the collection to remove. 

416 

417 Raises 

418 ------ 

419 MissingCollectionError 

420 Raised if no collection with the given name exists. 

421 sqlalchemy.IntegrityError 

422 Raised if the database rows associated with the collection are 

423 still referenced by some other table, such as a dataset in a 

424 datastore (for `~CollectionType.RUN` collections only) or a 

425 `~CollectionType.CHAINED` collection of which this collection is 

426 a child. 

427 

428 Notes 

429 ----- 

430 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

431 in it will removed from the `Registry` database. This requires that 

432 those datasets be removed (or at least trashed) from any datastores 

433 that hold them first. 

434 

435 A collection may not be deleted as long as it is referenced by a 

436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

437 be deleted or redefined first. 

438 """ 

439 raise NotImplementedError() 

440 

441 @abstractmethod 

442 def getCollectionChain(self, parent: str) -> CollectionSearch: 

443 """Return the child collections in a `~CollectionType.CHAINED` 

444 collection. 

445 

446 Parameters 

447 ---------- 

448 parent : `str` 

449 Name of the chained collection. Must have already been added via 

450 a call to `Registry.registerCollection`. 

451 

452 Returns 

453 ------- 

454 children : `CollectionSearch` 

455 An object that defines the search path of the collection. 

456 See :ref:`daf_butler_collection_expressions` for more information. 

457 

458 Raises 

459 ------ 

460 MissingCollectionError 

461 Raised if ``parent`` does not exist in the `Registry`. 

462 TypeError 

463 Raised if ``parent`` does not correspond to a 

464 `~CollectionType.CHAINED` collection. 

465 """ 

466 raise NotImplementedError() 

467 

468 @abstractmethod 

469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

470 """Define or redefine a `~CollectionType.CHAINED` collection. 

471 

472 Parameters 

473 ---------- 

474 parent : `str` 

475 Name of the chained collection. Must have already been added via 

476 a call to `Registry.registerCollection`. 

477 children : `Any` 

478 An expression defining an ordered search of child collections, 

479 generally an iterable of `str`; see 

480 :ref:`daf_butler_collection_expressions` for more information. 

481 flatten : `bool`, optional 

482 If `True` (`False` is default), recursively flatten out any nested 

483 `~CollectionType.CHAINED` collections in ``children`` first. 

484 

485 Raises 

486 ------ 

487 MissingCollectionError 

488 Raised when any of the given collections do not exist in the 

489 `Registry`. 

490 TypeError 

491 Raised if ``parent`` does not correspond to a 

492 `~CollectionType.CHAINED` collection. 

493 ValueError 

494 Raised if the given collections contains a cycle. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def getCollectionParentChains(self, collection: str) -> Set[str]: 

500 """Return the CHAINED collections that directly contain the given one. 

501 

502 Parameters 

503 ---------- 

504 name : `str` 

505 Name of the collection. 

506 

507 Returns 

508 ------- 

509 chains : `set` of `str` 

510 Set of `~CollectionType.CHAINED` collection names. 

511 """ 

512 raise NotImplementedError() 

513 

514 @abstractmethod 

515 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

516 """Retrieve the documentation string for a collection. 

517 

518 Parameters 

519 ---------- 

520 name : `str` 

521 Name of the collection. 

522 

523 Returns 

524 ------- 

525 docs : `str` or `None` 

526 Docstring for the collection with the given name. 

527 """ 

528 raise NotImplementedError() 

529 

530 @abstractmethod 

531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

532 """Set the documentation string for a collection. 

533 

534 Parameters 

535 ---------- 

536 name : `str` 

537 Name of the collection. 

538 docs : `str` or `None` 

539 Docstring for the collection with the given name; will replace any 

540 existing docstring. Passing `None` will remove any existing 

541 docstring. 

542 """ 

543 raise NotImplementedError() 

544 

545 @abstractmethod 

546 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

547 """Return a summary for the given collection. 

548 

549 Parameters 

550 ---------- 

551 collection : `str` 

552 Name of the collection for which a summary is to be retrieved. 

553 

554 Returns 

555 ------- 

556 summary : `CollectionSummary` 

557 Summary of the dataset types and governor dimension values in 

558 this collection. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

564 """ 

565 Add a new `DatasetType` to the Registry. 

566 

567 It is not an error to register the same `DatasetType` twice. 

568 

569 Parameters 

570 ---------- 

571 datasetType : `DatasetType` 

572 The `DatasetType` to be added. 

573 

574 Returns 

575 ------- 

576 inserted : `bool` 

577 `True` if ``datasetType`` was inserted, `False` if an identical 

578 existing `DatsetType` was found. Note that in either case the 

579 DatasetType is guaranteed to be defined in the Registry 

580 consistently with the given definition. 

581 

582 Raises 

583 ------ 

584 ValueError 

585 Raised if the dimensions or storage class are invalid. 

586 ConflictingDefinitionError 

587 Raised if this DatasetType is already registered with a different 

588 definition. 

589 

590 Notes 

591 ----- 

592 This method cannot be called within transactions, as it needs to be 

593 able to perform its own transaction to be concurrent. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def removeDatasetType(self, name: str) -> None: 

599 """Remove the named `DatasetType` from the registry. 

600 

601 .. warning:: 

602 

603 Registry implementations can cache the dataset type definitions. 

604 This means that deleting the dataset type definition may result in 

605 unexpected behavior from other butler processes that are active 

606 that have not seen the deletion. 

607 

608 Parameters 

609 ---------- 

610 name : `str` 

611 Name of the type to be removed. 

612 

613 Raises 

614 ------ 

615 lsst.daf.butler.registry.OrphanedRecordError 

616 Raised if an attempt is made to remove the dataset type definition 

617 when there are already datasets associated with it. 

618 

619 Notes 

620 ----- 

621 If the dataset type is not registered the method will return without 

622 action. 

623 """ 

624 raise NotImplementedError() 

625 

626 @abstractmethod 

627 def getDatasetType(self, name: str) -> DatasetType: 

628 """Get the `DatasetType`. 

629 

630 Parameters 

631 ---------- 

632 name : `str` 

633 Name of the type. 

634 

635 Returns 

636 ------- 

637 type : `DatasetType` 

638 The `DatasetType` associated with the given name. 

639 

640 Raises 

641 ------ 

642 KeyError 

643 Requested named DatasetType could not be found in registry. 

644 """ 

645 raise NotImplementedError() 

646 

647 @abstractmethod 

648 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

649 """Test whether the given dataset ID generation mode is supported by 

650 `insertDatasets`. 

651 

652 Parameters 

653 ---------- 

654 mode : `DatasetIdGenEnum` 

655 Enum value for the mode to test. 

656 

657 Returns 

658 ------- 

659 supported : `bool` 

660 Whether the given mode is supported. 

661 """ 

662 raise NotImplementedError() 

663 

664 @abstractmethod 

665 def findDataset( 

666 self, 

667 datasetType: Union[DatasetType, str], 

668 dataId: Optional[DataId] = None, 

669 *, 

670 collections: Any = None, 

671 timespan: Optional[Timespan] = None, 

672 **kwargs: Any, 

673 ) -> Optional[DatasetRef]: 

674 """Find a dataset given its `DatasetType` and data ID. 

675 

676 This can be used to obtain a `DatasetRef` that permits the dataset to 

677 be read from a `Datastore`. If the dataset is a component and can not 

678 be found using the provided dataset type, a dataset ref for the parent 

679 will be returned instead but with the correct dataset type. 

680 

681 Parameters 

682 ---------- 

683 datasetType : `DatasetType` or `str` 

684 A `DatasetType` or the name of one. 

685 dataId : `dict` or `DataCoordinate`, optional 

686 A `dict`-like object containing the `Dimension` links that identify 

687 the dataset within a collection. 

688 collections, optional. 

689 An expression that fully or partially identifies the collections to 

690 search for the dataset; see 

691 :ref:`daf_butler_collection_expressions` for more information. 

692 Defaults to ``self.defaults.collections``. 

693 timespan : `Timespan`, optional 

694 A timespan that the validity range of the dataset must overlap. 

695 If not provided, any `~CollectionType.CALIBRATION` collections 

696 matched by the ``collections`` argument will not be searched. 

697 **kwargs 

698 Additional keyword arguments passed to 

699 `DataCoordinate.standardize` to convert ``dataId`` to a true 

700 `DataCoordinate` or augment an existing one. 

701 

702 Returns 

703 ------- 

704 ref : `DatasetRef` 

705 A reference to the dataset, or `None` if no matching Dataset 

706 was found. 

707 

708 Raises 

709 ------ 

710 TypeError 

711 Raised if ``collections`` is `None` and 

712 ``self.defaults.collections`` is `None`. 

713 LookupError 

714 Raised if one or more data ID keys are missing. 

715 KeyError 

716 Raised if the dataset type does not exist. 

717 MissingCollectionError 

718 Raised if any of ``collections`` does not exist in the registry. 

719 

720 Notes 

721 ----- 

722 This method simply returns `None` and does not raise an exception even 

723 when the set of collections searched is intrinsically incompatible with 

724 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

725 only `~CollectionType.CALIBRATION` collections are being searched. 

726 This may make it harder to debug some lookup failures, but the behavior 

727 is intentional; we consider it more important that failed searches are 

728 reported consistently, regardless of the reason, and that adding 

729 additional collections that do not contain a match to the search path 

730 never changes the behavior. 

731 """ 

732 raise NotImplementedError() 

733 

734 @abstractmethod 

735 def insertDatasets( 

736 self, 

737 datasetType: Union[DatasetType, str], 

738 dataIds: Iterable[DataId], 

739 run: Optional[str] = None, 

740 expand: bool = True, 

741 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

742 ) -> List[DatasetRef]: 

743 """Insert one or more datasets into the `Registry` 

744 

745 This always adds new datasets; to associate existing datasets with 

746 a new collection, use ``associate``. 

747 

748 Parameters 

749 ---------- 

750 datasetType : `DatasetType` or `str` 

751 A `DatasetType` or the name of one. 

752 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

753 Dimension-based identifiers for the new datasets. 

754 run : `str`, optional 

755 The name of the run that produced the datasets. Defaults to 

756 ``self.defaults.run``. 

757 expand : `bool`, optional 

758 If `True` (default), expand data IDs as they are inserted. This is 

759 necessary in general to allow datastore to generate file templates, 

760 but it may be disabled if the caller can guarantee this is 

761 unnecessary. 

762 idGenerationMode : `DatasetIdGenEnum`, optional 

763 Specifies option for generating dataset IDs. By default unique IDs 

764 are generated for each inserted dataset. 

765 

766 Returns 

767 ------- 

768 refs : `list` of `DatasetRef` 

769 Resolved `DatasetRef` instances for all given data IDs (in the same 

770 order). 

771 

772 Raises 

773 ------ 

774 TypeError 

775 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

776 ConflictingDefinitionError 

777 If a dataset with the same dataset type and data ID as one of those 

778 given already exists in ``run``. 

779 MissingCollectionError 

780 Raised if ``run`` does not exist in the registry. 

781 """ 

782 raise NotImplementedError() 

783 

784 @abstractmethod 

785 def _importDatasets( 

786 self, 

787 datasets: Iterable[DatasetRef], 

788 expand: bool = True, 

789 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

790 reuseIds: bool = False, 

791 ) -> List[DatasetRef]: 

792 """Import one or more datasets into the `Registry`. 

793 

794 Difference from `insertDatasets` method is that this method accepts 

795 `DatasetRef` instances which should already be resolved and have a 

796 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

797 `uuid.UUID`) then datasets which already exist in the registry will be 

798 ignored if imported again. 

799 

800 Parameters 

801 ---------- 

802 datasets : `~collections.abc.Iterable` of `DatasetRef` 

803 Datasets to be inserted. All `DatasetRef` instances must have 

804 identical ``datasetType`` and ``run`` attributes. ``run`` 

805 attribute can be `None` and defaults to ``self.defaults.run``. 

806 Datasets can specify ``id`` attribute which will be used for 

807 inserted datasets. All dataset IDs must have the same type 

808 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

809 configured backend then IDs will be ignored and new IDs will be 

810 generated by backend. 

811 expand : `bool`, optional 

812 If `True` (default), expand data IDs as they are inserted. This is 

813 necessary in general to allow datastore to generate file templates, 

814 but it may be disabled if the caller can guarantee this is 

815 unnecessary. 

816 idGenerationMode : `DatasetIdGenEnum`, optional 

817 Specifies option for generating dataset IDs when IDs are not 

818 provided or their type does not match backend type. By default 

819 unique IDs are generated for each inserted dataset. 

820 reuseIds : `bool`, optional 

821 If `True` then forces re-use of imported dataset IDs for integer 

822 IDs which are normally generated as auto-incremented; exception 

823 will be raised if imported IDs clash with existing ones. This 

824 option has no effect on the use of globally-unique IDs which are 

825 always re-used (or generated if integer IDs are being imported). 

826 

827 Returns 

828 ------- 

829 refs : `list` of `DatasetRef` 

830 Resolved `DatasetRef` instances for all given data IDs (in the same 

831 order). If any of ``datasets`` has an ID which already exists in 

832 the database then it will not be inserted or updated, but a 

833 resolved `DatasetRef` will be returned for it in any case. 

834 

835 Raises 

836 ------ 

837 TypeError 

838 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

839 ConflictingDefinitionError 

840 If a dataset with the same dataset type and data ID as one of those 

841 given already exists in ``run``. 

842 MissingCollectionError 

843 Raised if ``run`` does not exist in the registry. 

844 

845 Notes 

846 ----- 

847 This method is considered package-private and internal to Butler 

848 implementation. Clients outside daf_butler package should not use this 

849 method. 

850 """ 

851 raise NotImplementedError() 

852 

853 @abstractmethod 

854 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

855 """Retrieve a Dataset entry. 

856 

857 Parameters 

858 ---------- 

859 id : `DatasetId` 

860 The unique identifier for the dataset. 

861 

862 Returns 

863 ------- 

864 ref : `DatasetRef` or `None` 

865 A ref to the Dataset, or `None` if no matching Dataset 

866 was found. 

867 """ 

868 raise NotImplementedError() 

869 

870 @abstractmethod 

871 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

872 """Remove datasets from the Registry. 

873 

874 The datasets will be removed unconditionally from all collections, and 

875 any `Quantum` that consumed this dataset will instead be marked with 

876 having a NULL input. `Datastore` records will *not* be deleted; the 

877 caller is responsible for ensuring that the dataset has already been 

878 removed from all Datastores. 

879 

880 Parameters 

881 ---------- 

882 refs : `Iterable` of `DatasetRef` 

883 References to the datasets to be removed. Must include a valid 

884 ``id`` attribute, and should be considered invalidated upon return. 

885 

886 Raises 

887 ------ 

888 AmbiguousDatasetError 

889 Raised if any ``ref.id`` is `None`. 

890 OrphanedRecordError 

891 Raised if any dataset is still present in any `Datastore`. 

892 """ 

893 raise NotImplementedError() 

894 

895 @abstractmethod 

896 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

897 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

898 

899 If a DatasetRef with the same exact ID is already in a collection 

900 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

901 data ID but with different ID exists in the collection, 

902 `ConflictingDefinitionError` is raised. 

903 

904 Parameters 

905 ---------- 

906 collection : `str` 

907 Indicates the collection the datasets should be associated with. 

908 refs : `Iterable` [ `DatasetRef` ] 

909 An iterable of resolved `DatasetRef` instances that already exist 

910 in this `Registry`. 

911 

912 Raises 

913 ------ 

914 ConflictingDefinitionError 

915 If a Dataset with the given `DatasetRef` already exists in the 

916 given collection. 

917 AmbiguousDatasetError 

918 Raised if ``any(ref.id is None for ref in refs)``. 

919 MissingCollectionError 

920 Raised if ``collection`` does not exist in the registry. 

921 TypeError 

922 Raise adding new datasets to the given ``collection`` is not 

923 allowed. 

924 """ 

925 raise NotImplementedError() 

926 

927 @abstractmethod 

928 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

929 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

930 

931 ``collection`` and ``ref`` combinations that are not currently 

932 associated are silently ignored. 

933 

934 Parameters 

935 ---------- 

936 collection : `str` 

937 The collection the datasets should no longer be associated with. 

938 refs : `Iterable` [ `DatasetRef` ] 

939 An iterable of resolved `DatasetRef` instances that already exist 

940 in this `Registry`. 

941 

942 Raises 

943 ------ 

944 AmbiguousDatasetError 

945 Raised if any of the given dataset references is unresolved. 

946 MissingCollectionError 

947 Raised if ``collection`` does not exist in the registry. 

948 TypeError 

949 Raise adding new datasets to the given ``collection`` is not 

950 allowed. 

951 """ 

952 raise NotImplementedError() 

953 

954 @abstractmethod 

955 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

956 """Associate one or more datasets with a calibration collection and a 

957 validity range within it. 

958 

959 Parameters 

960 ---------- 

961 collection : `str` 

962 The name of an already-registered `~CollectionType.CALIBRATION` 

963 collection. 

964 refs : `Iterable` [ `DatasetRef` ] 

965 Datasets to be associated. 

966 timespan : `Timespan` 

967 The validity range for these datasets within the collection. 

968 

969 Raises 

970 ------ 

971 AmbiguousDatasetError 

972 Raised if any of the given `DatasetRef` instances is unresolved. 

973 ConflictingDefinitionError 

974 Raised if the collection already contains a different dataset with 

975 the same `DatasetType` and data ID and an overlapping validity 

976 range. 

977 TypeError 

978 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

979 collection or if one or more datasets are of a dataset type for 

980 which `DatasetType.isCalibration` returns `False`. 

981 """ 

982 raise NotImplementedError() 

983 

984 @abstractmethod 

985 def decertify( 

986 self, 

987 collection: str, 

988 datasetType: Union[str, DatasetType], 

989 timespan: Timespan, 

990 *, 

991 dataIds: Optional[Iterable[DataId]] = None, 

992 ) -> None: 

993 """Remove or adjust datasets to clear a validity range within a 

994 calibration collection. 

995 

996 Parameters 

997 ---------- 

998 collection : `str` 

999 The name of an already-registered `~CollectionType.CALIBRATION` 

1000 collection. 

1001 datasetType : `str` or `DatasetType` 

1002 Name or `DatasetType` instance for the datasets to be decertified. 

1003 timespan : `Timespan`, optional 

1004 The validity range to remove datasets from within the collection. 

1005 Datasets that overlap this range but are not contained by it will 

1006 have their validity ranges adjusted to not overlap it, which may 

1007 split a single dataset validity range into two. 

1008 dataIds : `Iterable` [ `DataId` ], optional 

1009 Data IDs that should be decertified within the given validity range 

1010 If `None`, all data IDs for ``self.datasetType`` will be 

1011 decertified. 

1012 

1013 Raises 

1014 ------ 

1015 TypeError 

1016 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1017 collection or if ``datasetType.isCalibration() is False``. 

1018 """ 

1019 raise NotImplementedError() 

1020 

1021 @abstractmethod 

1022 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1023 """Return an object that allows a new `Datastore` instance to 

1024 communicate with this `Registry`. 

1025 

1026 Returns 

1027 ------- 

1028 manager : `DatastoreRegistryBridgeManager` 

1029 Object that mediates communication between this `Registry` and its 

1030 associated datastores. 

1031 """ 

1032 raise NotImplementedError() 

1033 

1034 @abstractmethod 

1035 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1036 """Retrieve datastore locations for a given dataset. 

1037 

1038 Parameters 

1039 ---------- 

1040 ref : `DatasetRef` 

1041 A reference to the dataset for which to retrieve storage 

1042 information. 

1043 

1044 Returns 

1045 ------- 

1046 datastores : `Iterable` [ `str` ] 

1047 All the matching datastores holding this dataset. 

1048 

1049 Raises 

1050 ------ 

1051 AmbiguousDatasetError 

1052 Raised if ``ref.id`` is `None`. 

1053 """ 

1054 raise NotImplementedError() 

1055 

1056 @abstractmethod 

1057 def expandDataId( 

1058 self, 

1059 dataId: Optional[DataId] = None, 

1060 *, 

1061 graph: Optional[DimensionGraph] = None, 

1062 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1063 withDefaults: bool = True, 

1064 **kwargs: Any, 

1065 ) -> DataCoordinate: 

1066 """Expand a dimension-based data ID to include additional information. 

1067 

1068 Parameters 

1069 ---------- 

1070 dataId : `DataCoordinate` or `dict`, optional 

1071 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1072 graph : `DimensionGraph`, optional 

1073 Set of dimensions for the expanded ID. If `None`, the dimensions 

1074 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1075 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1076 ``graph`` are silently ignored, providing a way to extract and 

1077 ``graph`` expand a subset of a data ID. 

1078 records : `Mapping` [`str`, `DimensionRecord`], optional 

1079 Dimension record data to use before querying the database for that 

1080 data, keyed by element name. 

1081 withDefaults : `bool`, optional 

1082 Utilize ``self.defaults.dataId`` to fill in missing governor 

1083 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1084 used). 

1085 **kwargs 

1086 Additional keywords are treated like additional key-value pairs for 

1087 ``dataId``, extending and overriding 

1088 

1089 Returns 

1090 ------- 

1091 expanded : `DataCoordinate` 

1092 A data ID that includes full metadata for all of the dimensions it 

1093 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1094 ``expanded.hasFull()`` both return `True`. 

1095 """ 

1096 raise NotImplementedError() 

1097 

1098 @abstractmethod 

1099 def insertDimensionData( 

1100 self, 

1101 element: Union[DimensionElement, str], 

1102 *data: Union[Mapping[str, Any], DimensionRecord], 

1103 conform: bool = True, 

1104 replace: bool = False, 

1105 ) -> None: 

1106 """Insert one or more dimension records into the database. 

1107 

1108 Parameters 

1109 ---------- 

1110 element : `DimensionElement` or `str` 

1111 The `DimensionElement` or name thereof that identifies the table 

1112 records will be inserted into. 

1113 data : `dict` or `DimensionRecord` (variadic) 

1114 One or more records to insert. 

1115 conform : `bool`, optional 

1116 If `False` (`True` is default) perform no checking or conversions, 

1117 and assume that ``element`` is a `DimensionElement` instance and 

1118 ``data`` is a one or more `DimensionRecord` instances of the 

1119 appropriate subclass. 

1120 replace: `bool`, optional 

1121 If `True` (`False` is default), replace existing records in the 

1122 database if there is a conflict. 

1123 """ 

1124 raise NotImplementedError() 

1125 

1126 @abstractmethod 

1127 def syncDimensionData( 

1128 self, 

1129 element: Union[DimensionElement, str], 

1130 row: Union[Mapping[str, Any], DimensionRecord], 

1131 conform: bool = True, 

1132 update: bool = False, 

1133 ) -> Union[bool, Dict[str, Any]]: 

1134 """Synchronize the given dimension record with the database, inserting 

1135 if it does not already exist and comparing values if it does. 

1136 

1137 Parameters 

1138 ---------- 

1139 element : `DimensionElement` or `str` 

1140 The `DimensionElement` or name thereof that identifies the table 

1141 records will be inserted into. 

1142 row : `dict` or `DimensionRecord` 

1143 The record to insert. 

1144 conform : `bool`, optional 

1145 If `False` (`True` is default) perform no checking or conversions, 

1146 and assume that ``element`` is a `DimensionElement` instance and 

1147 ``data`` is a one or more `DimensionRecord` instances of the 

1148 appropriate subclass. 

1149 update: `bool`, optional 

1150 If `True` (`False` is default), update the existing record in the 

1151 database if there is a conflict. 

1152 

1153 Returns 

1154 ------- 

1155 inserted_or_updated : `bool` or `dict` 

1156 `True` if a new row was inserted, `False` if no changes were 

1157 needed, or a `dict` mapping updated column names to their old 

1158 values if an update was performed (only possible if 

1159 ``update=True``). 

1160 

1161 Raises 

1162 ------ 

1163 ConflictingDefinitionError 

1164 Raised if the record exists in the database (according to primary 

1165 key lookup) but is inconsistent with the given one. 

1166 """ 

1167 raise NotImplementedError() 

1168 

1169 @abstractmethod 

1170 def queryDatasetTypes( 

1171 self, 

1172 expression: Any = ..., 

1173 *, 

1174 components: Optional[bool] = None, 

1175 missing: Optional[List[str]] = None, 

1176 ) -> Iterator[DatasetType]: 

1177 """Iterate over the dataset types whose names match an expression. 

1178 

1179 Parameters 

1180 ---------- 

1181 expression : `Any`, optional 

1182 An expression that fully or partially identifies the dataset types 

1183 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1184 ``...`` can be used to return all dataset types, and is the 

1185 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1186 information. 

1187 components : `bool`, optional 

1188 If `True`, apply all expression patterns to component dataset type 

1189 names as well. If `False`, never apply patterns to components. 

1190 If `None` (default), apply patterns to components only if their 

1191 parent datasets were not matched by the expression. 

1192 Fully-specified component datasets (`str` or `DatasetType` 

1193 instances) are always included. 

1194 missing : `list` of `str`, optional 

1195 String dataset type names that were explicitly given (i.e. not 

1196 regular expression patterns) but not found will be appended to this 

1197 list, if it is provided. 

1198 

1199 Yields 

1200 ------ 

1201 datasetType : `DatasetType` 

1202 A `DatasetType` instance whose name matches ``expression``. 

1203 """ 

1204 raise NotImplementedError() 

1205 

1206 @abstractmethod 

1207 def queryCollections( 

1208 self, 

1209 expression: Any = ..., 

1210 datasetType: Optional[DatasetType] = None, 

1211 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1212 flattenChains: bool = False, 

1213 includeChains: Optional[bool] = None, 

1214 ) -> Iterator[str]: 

1215 """Iterate over the collections whose names match an expression. 

1216 

1217 Parameters 

1218 ---------- 

1219 expression : `Any`, optional 

1220 An expression that identifies the collections to return, such as 

1221 a `str` (for full matches or partial matches via globs), 

1222 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1223 can be used to return all collections, and is the default. 

1224 See :ref:`daf_butler_collection_expressions` for more information. 

1225 datasetType : `DatasetType`, optional 

1226 If provided, only yield collections that may contain datasets of 

1227 this type. This is a conservative approximation in general; it may 

1228 yield collections that do not have any such datasets. 

1229 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1230 `CollectionType`, optional 

1231 If provided, only yield collections of these types. 

1232 flattenChains : `bool`, optional 

1233 If `True` (`False` is default), recursively yield the child 

1234 collections of matching `~CollectionType.CHAINED` collections. 

1235 includeChains : `bool`, optional 

1236 If `True`, yield records for matching `~CollectionType.CHAINED` 

1237 collections. Default is the opposite of ``flattenChains``: include 

1238 either CHAINED collections or their children, but not both. 

1239 

1240 Yields 

1241 ------ 

1242 collection : `str` 

1243 The name of a collection that matches ``expression``. 

1244 """ 

1245 raise NotImplementedError() 

1246 

1247 @abstractmethod 

1248 def queryDatasets( 

1249 self, 

1250 datasetType: Any, 

1251 *, 

1252 collections: Any = None, 

1253 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1254 dataId: Optional[DataId] = None, 

1255 where: Optional[str] = None, 

1256 findFirst: bool = False, 

1257 components: Optional[bool] = None, 

1258 bind: Optional[Mapping[str, Any]] = None, 

1259 check: bool = True, 

1260 **kwargs: Any, 

1261 ) -> Iterable[DatasetRef]: 

1262 """Query for and iterate over dataset references matching user-provided 

1263 criteria. 

1264 

1265 Parameters 

1266 ---------- 

1267 datasetType 

1268 An expression that fully or partially identifies the dataset types 

1269 to be queried. Allowed types include `DatasetType`, `str`, 

1270 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1271 be used to query all dataset types. See 

1272 :ref:`daf_butler_dataset_type_expressions` for more information. 

1273 collections: optional 

1274 An expression that identifies the collections to search, such as a 

1275 `str` (for full matches or partial matches via globs), `re.Pattern` 

1276 (for partial matches), or iterable thereof. ``...`` can be used to 

1277 search all collections (actually just all `~CollectionType.RUN` 

1278 collections, because this will still find all datasets). 

1279 If not provided, ``self.default.collections`` is used. See 

1280 :ref:`daf_butler_collection_expressions` for more information. 

1281 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1282 Dimensions to include in the query (in addition to those used 

1283 to identify the queried dataset type(s)), either to constrain 

1284 the resulting datasets to those for which a matching dimension 

1285 exists, or to relate the dataset type's dimensions to dimensions 

1286 referenced by the ``dataId`` or ``where`` arguments. 

1287 dataId : `dict` or `DataCoordinate`, optional 

1288 A data ID whose key-value pairs are used as equality constraints 

1289 in the query. 

1290 where : `str`, optional 

1291 A string expression similar to a SQL WHERE clause. May involve 

1292 any column of a dimension table or (as a shortcut for the primary 

1293 key column of a dimension table) dimension name. See 

1294 :ref:`daf_butler_dimension_expressions` for more information. 

1295 findFirst : `bool`, optional 

1296 If `True` (`False` is default), for each result data ID, only 

1297 yield one `DatasetRef` of each `DatasetType`, from the first 

1298 collection in which a dataset of that dataset type appears 

1299 (according to the order of ``collections`` passed in). If `True`, 

1300 ``collections`` must not contain regular expressions and may not 

1301 be ``...``. 

1302 components : `bool`, optional 

1303 If `True`, apply all dataset expression patterns to component 

1304 dataset type names as well. If `False`, never apply patterns to 

1305 components. If `None` (default), apply patterns to components only 

1306 if their parent datasets were not matched by the expression. 

1307 Fully-specified component datasets (`str` or `DatasetType` 

1308 instances) are always included. 

1309 bind : `Mapping`, optional 

1310 Mapping containing literal values that should be injected into the 

1311 ``where`` expression, keyed by the identifiers they replace. 

1312 check : `bool`, optional 

1313 If `True` (default) check the query for consistency before 

1314 executing it. This may reject some valid queries that resemble 

1315 common mistakes (e.g. queries for visits without specifying an 

1316 instrument). 

1317 **kwargs 

1318 Additional keyword arguments are forwarded to 

1319 `DataCoordinate.standardize` when processing the ``dataId`` 

1320 argument (and may be used to provide a constraining data ID even 

1321 when the ``dataId`` argument is `None`). 

1322 

1323 Returns 

1324 ------- 

1325 refs : `queries.DatasetQueryResults` 

1326 Dataset references matching the given query criteria. Nested data 

1327 IDs are guaranteed to include values for all implied dimensions 

1328 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1329 include dimension records (`DataCoordinate.hasRecords` will be 

1330 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1331 on the result object (which returns a new one). 

1332 

1333 Raises 

1334 ------ 

1335 TypeError 

1336 Raised when the arguments are incompatible, such as when a 

1337 collection wildcard is passed when ``findFirst`` is `True`, or 

1338 when ``collections`` is `None` and``self.defaults.collections`` is 

1339 also `None`. 

1340 

1341 Notes 

1342 ----- 

1343 When multiple dataset types are queried in a single call, the 

1344 results of this operation are equivalent to querying for each dataset 

1345 type separately in turn, and no information about the relationships 

1346 between datasets of different types is included. In contexts where 

1347 that kind of information is important, the recommended pattern is to 

1348 use `queryDataIds` to first obtain data IDs (possibly with the 

1349 desired dataset types and collections passed as constraints to the 

1350 query), and then use multiple (generally much simpler) calls to 

1351 `queryDatasets` with the returned data IDs passed as constraints. 

1352 """ 

1353 raise NotImplementedError() 

1354 

1355 @abstractmethod 

1356 def queryDataIds( 

1357 self, 

1358 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1359 *, 

1360 dataId: Optional[DataId] = None, 

1361 datasets: Any = None, 

1362 collections: Any = None, 

1363 where: Optional[str] = None, 

1364 components: Optional[bool] = None, 

1365 bind: Optional[Mapping[str, Any]] = None, 

1366 check: bool = True, 

1367 **kwargs: Any, 

1368 ) -> DataCoordinateIterable: 

1369 """Query for data IDs matching user-provided criteria. 

1370 

1371 Parameters 

1372 ---------- 

1373 dimensions : `Dimension` or `str`, or iterable thereof 

1374 The dimensions of the data IDs to yield, as either `Dimension` 

1375 instances or `str`. Will be automatically expanded to a complete 

1376 `DimensionGraph`. 

1377 dataId : `dict` or `DataCoordinate`, optional 

1378 A data ID whose key-value pairs are used as equality constraints 

1379 in the query. 

1380 datasets : `Any`, optional 

1381 An expression that fully or partially identifies dataset types 

1382 that should constrain the yielded data IDs. For example, including 

1383 "raw" here would constrain the yielded ``instrument``, 

1384 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1385 those for which at least one "raw" dataset exists in 

1386 ``collections``. Allowed types include `DatasetType`, `str`, 

1387 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1388 expressions, ``...`` is not permitted - it doesn't make sense to 

1389 constrain data IDs on the existence of *all* datasets. 

1390 See :ref:`daf_butler_dataset_type_expressions` for more 

1391 information. 

1392 collections: `Any`, optional 

1393 An expression that identifies the collections to search for 

1394 datasets, such as a `str` (for full matches or partial matches 

1395 via globs), `re.Pattern` (for partial matches), or iterable 

1396 thereof. ``...`` can be used to search all collections (actually 

1397 just all `~CollectionType.RUN` collections, because this will 

1398 still find all datasets). If not provided, 

1399 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1400 is also passed. See :ref:`daf_butler_collection_expressions` for 

1401 more information. 

1402 where : `str`, optional 

1403 A string expression similar to a SQL WHERE clause. May involve 

1404 any column of a dimension table or (as a shortcut for the primary 

1405 key column of a dimension table) dimension name. See 

1406 :ref:`daf_butler_dimension_expressions` for more information. 

1407 components : `bool`, optional 

1408 If `True`, apply all dataset expression patterns to component 

1409 dataset type names as well. If `False`, never apply patterns to 

1410 components. If `None` (default), apply patterns to components only 

1411 if their parent datasets were not matched by the expression. 

1412 Fully-specified component datasets (`str` or `DatasetType` 

1413 instances) are always included. 

1414 bind : `Mapping`, optional 

1415 Mapping containing literal values that should be injected into the 

1416 ``where`` expression, keyed by the identifiers they replace. 

1417 check : `bool`, optional 

1418 If `True` (default) check the query for consistency before 

1419 executing it. This may reject some valid queries that resemble 

1420 common mistakes (e.g. queries for visits without specifying an 

1421 instrument). 

1422 **kwargs 

1423 Additional keyword arguments are forwarded to 

1424 `DataCoordinate.standardize` when processing the ``dataId`` 

1425 argument (and may be used to provide a constraining data ID even 

1426 when the ``dataId`` argument is `None`). 

1427 

1428 Returns 

1429 ------- 

1430 dataIds : `DataCoordinateQueryResults` 

1431 Data IDs matching the given query parameters. These are guaranteed 

1432 to identify all dimensions (`DataCoordinate.hasFull` returns 

1433 `True`), but will not contain `DimensionRecord` objects 

1434 (`DataCoordinate.hasRecords` returns `False`). Call 

1435 `DataCoordinateQueryResults.expanded` on the returned object to 

1436 fetch those (and consider using 

1437 `DataCoordinateQueryResults.materialize` on the returned object 

1438 first if the expected number of rows is very large). See 

1439 documentation for those methods for additional information. 

1440 

1441 Raises 

1442 ------ 

1443 TypeError 

1444 Raised if ``collections`` is `None`, ``self.defaults.collections`` 

1445 is `None`, and ``datasets`` is not `None`. 

1446 """ 

1447 raise NotImplementedError() 

1448 

1449 @abstractmethod 

1450 def queryDimensionRecords( 

1451 self, 

1452 element: Union[DimensionElement, str], 

1453 *, 

1454 dataId: Optional[DataId] = None, 

1455 datasets: Any = None, 

1456 collections: Any = None, 

1457 where: Optional[str] = None, 

1458 components: Optional[bool] = None, 

1459 bind: Optional[Mapping[str, Any]] = None, 

1460 check: bool = True, 

1461 **kwargs: Any, 

1462 ) -> Iterable[DimensionRecord]: 

1463 """Query for dimension information matching user-provided criteria. 

1464 

1465 Parameters 

1466 ---------- 

1467 element : `DimensionElement` or `str` 

1468 The dimension element to obtain records for. 

1469 dataId : `dict` or `DataCoordinate`, optional 

1470 A data ID whose key-value pairs are used as equality constraints 

1471 in the query. 

1472 datasets : `Any`, optional 

1473 An expression that fully or partially identifies dataset types 

1474 that should constrain the yielded records. See `queryDataIds` and 

1475 :ref:`daf_butler_dataset_type_expressions` for more information. 

1476 collections : `Any`, optional 

1477 An expression that identifies the collections to search for 

1478 datasets, such as a `str` (for full matches or partial matches 

1479 via globs), `re.Pattern` (for partial matches), or iterable 

1480 thereof. ``...`` can be used to search all collections (actually 

1481 just all `~CollectionType.RUN` collections, because this will 

1482 still find all datasets). If not provided, 

1483 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1484 is also passed. See :ref:`daf_butler_collection_expressions` for 

1485 more information. 

1486 where : `str`, optional 

1487 A string expression similar to a SQL WHERE clause. See 

1488 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1489 information. 

1490 components : `bool`, optional 

1491 Whether to apply dataset expressions to components as well. 

1492 See `queryDataIds` for more information. 

1493 bind : `Mapping`, optional 

1494 Mapping containing literal values that should be injected into the 

1495 ``where`` expression, keyed by the identifiers they replace. 

1496 check : `bool`, optional 

1497 If `True` (default) check the query for consistency before 

1498 executing it. This may reject some valid queries that resemble 

1499 common mistakes (e.g. queries for visits without specifying an 

1500 instrument). 

1501 **kwargs 

1502 Additional keyword arguments are forwarded to 

1503 `DataCoordinate.standardize` when processing the ``dataId`` 

1504 argument (and may be used to provide a constraining data ID even 

1505 when the ``dataId`` argument is `None`). 

1506 

1507 Returns 

1508 ------- 

1509 dataIds : `Iterator` [ `DimensionRecord` ] 

1510 Data IDs matching the given query parameters. 

1511 """ 

1512 raise NotImplementedError() 

1513 

1514 @abstractmethod 

1515 def queryDatasetAssociations( 

1516 self, 

1517 datasetType: Union[str, DatasetType], 

1518 collections: Any = ..., 

1519 *, 

1520 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1521 flattenChains: bool = False, 

1522 ) -> Iterator[DatasetAssociation]: 

1523 """Iterate over dataset-collection combinations where the dataset is in 

1524 the collection. 

1525 

1526 This method is a temporary placeholder for better support for 

1527 association results in `queryDatasets`. It will probably be 

1528 removed in the future, and should be avoided in production code 

1529 whenever possible. 

1530 

1531 Parameters 

1532 ---------- 

1533 datasetType : `DatasetType` or `str` 

1534 A dataset type object or the name of one. 

1535 collections: `Any`, optional 

1536 An expression that identifies the collections to search for 

1537 datasets, such as a `str` (for full matches or partial matches 

1538 via globs), `re.Pattern` (for partial matches), or iterable 

1539 thereof. ``...`` can be used to search all collections (actually 

1540 just all `~CollectionType.RUN` collections, because this will still 

1541 find all datasets). If not provided, ``self.default.collections`` 

1542 is used. See :ref:`daf_butler_collection_expressions` for more 

1543 information. 

1544 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1545 If provided, only yield associations from collections of these 

1546 types. 

1547 flattenChains : `bool`, optional 

1548 If `True` (default) search in the children of 

1549 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1550 collections are ignored. 

1551 

1552 Yields 

1553 ------ 

1554 association : `DatasetAssociation` 

1555 Object representing the relationship between a single dataset and 

1556 a single collection. 

1557 

1558 Raises 

1559 ------ 

1560 TypeError 

1561 Raised if ``collections`` is `None` and 

1562 ``self.defaults.collections`` is `None`. 

1563 """ 

1564 raise NotImplementedError() 

1565 

1566 storageClasses: StorageClassFactory 

1567 """All storage classes known to the registry (`StorageClassFactory`). 

1568 """