Coverage for python/lsst/daf/butler/registry/_registry.py: 61%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

177 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from abc import ABC, abstractmethod 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Tuple, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45from lsst.utils import doImportType 

46 

47from ..core import ( 

48 ButlerURI, 

49 Config, 

50 DataCoordinate, 

51 DataCoordinateIterable, 

52 DataId, 

53 DatasetAssociation, 

54 DatasetId, 

55 DatasetRef, 

56 DatasetType, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionRecord, 

62 DimensionUniverse, 

63 NameLookupMapping, 

64 StorageClassFactory, 

65 Timespan, 

66) 

67 

68from ._config import RegistryConfig 

69from ._collectionType import CollectionType 

70from ._defaults import RegistryDefaults 

71from .interfaces import DatasetIdGenEnum 

72from .wildcards import CollectionSearch 

73from .summaries import CollectionSummary 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from .._butlerConfig import ButlerConfig 

77 from .interfaces import ( 

78 CollectionRecord, 

79 DatastoreRegistryBridgeManager, 

80 ) 

81 

82_LOG = logging.getLogger(__name__) 

83 

84 

85class Registry(ABC): 

86 """Abstract Registry interface. 

87 

88 Each registry implementation can have its own constructor parameters. 

89 The assumption is that an instance of a specific subclass will be 

90 constructed from configuration using `Registry.fromConfig()`. 

91 The base class will look for a ``cls`` entry and call that specific 

92 `fromConfig()` method. 

93 

94 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

95 property. No other properties are assumed shared between implementations. 

96 """ 

97 

98 defaultConfigFile: Optional[str] = None 

99 """Path to configuration defaults. Accessed within the ``configs`` resource 

100 or relative to a search path. Can be None if no defaults specified. 

101 """ 

102 

103 @classmethod 

104 def forceRegistryConfig(cls, config: Optional[Union[ButlerConfig, 

105 RegistryConfig, Config, str]]) -> RegistryConfig: 

106 """Force the supplied config to a `RegistryConfig`. 

107 

108 Parameters 

109 ---------- 

110 config : `RegistryConfig`, `Config` or `str` or `None` 

111 Registry configuration, if missing then default configuration will 

112 be loaded from registry.yaml. 

113 

114 Returns 

115 ------- 

116 registry_config : `RegistryConfig` 

117 A registry config. 

118 """ 

119 if not isinstance(config, RegistryConfig): 

120 if isinstance(config, (str, Config)) or config is None: 

121 config = RegistryConfig(config) 

122 else: 

123 raise ValueError(f"Incompatible Registry configuration: {config}") 

124 return config 

125 

126 @classmethod 

127 def determineTrampoline(cls, 

128 config: Optional[Union[ButlerConfig, 

129 RegistryConfig, 

130 Config, 

131 str]]) -> Tuple[Type[Registry], RegistryConfig]: 

132 """Return class to use to instantiate real registry. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 

140 Returns 

141 ------- 

142 requested_cls : `type` of `Registry` 

143 The real registry class to use. 

144 registry_config : `RegistryConfig` 

145 The `RegistryConfig` to use. 

146 """ 

147 config = cls.forceRegistryConfig(config) 

148 

149 # Default to the standard registry 

150 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

151 registry_cls = doImportType(registry_cls_name) 

152 if registry_cls is cls: 

153 raise ValueError("Can not instantiate the abstract base Registry from config") 

154 if not issubclass(registry_cls, Registry): 

155 raise TypeError(f"Registry class obtained from config {registry_cls_name} is " 

156 "not a Registry class.") 

157 return registry_cls, config 

158 

159 @classmethod 

160 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None, 

161 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

162 butlerRoot: Optional[str] = None) -> Registry: 

163 """Create registry database and return `Registry` instance. 

164 

165 This method initializes database contents, database must be empty 

166 prior to calling this method. 

167 

168 Parameters 

169 ---------- 

170 config : `RegistryConfig` or `str`, optional 

171 Registry configuration, if missing then default configuration will 

172 be loaded from registry.yaml. 

173 dimensionConfig : `DimensionConfig` or `str`, optional 

174 Dimensions configuration, if missing then default configuration 

175 will be loaded from dimensions.yaml. 

176 butlerRoot : `str`, optional 

177 Path to the repository root this `Registry` will manage. 

178 

179 Returns 

180 ------- 

181 registry : `Registry` 

182 A new `Registry` instance. 

183 

184 Notes 

185 ----- 

186 This class will determine the concrete `Registry` subclass to 

187 use from configuration. Each subclass should implement this method 

188 even if it can not create a registry. 

189 """ 

190 registry_cls, registry_config = cls.determineTrampoline(config) 

191 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

192 

193 @classmethod 

194 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], 

195 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True, 

196 defaults: Optional[RegistryDefaults] = None) -> Registry: 

197 """Create `Registry` subclass instance from `config`. 

198 

199 Registry database must be initialized prior to calling this method. 

200 

201 Parameters 

202 ---------- 

203 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

204 Registry configuration 

205 butlerRoot : `str` or `ButlerURI`, optional 

206 Path to the repository root this `Registry` will manage. 

207 writeable : `bool`, optional 

208 If `True` (default) create a read-write connection to the database. 

209 defaults : `RegistryDefaults`, optional 

210 Default collection search path and/or output `~CollectionType.RUN` 

211 collection. 

212 

213 Returns 

214 ------- 

215 registry : `Registry` (subclass) 

216 A new `Registry` subclass instance. 

217 

218 Notes 

219 ----- 

220 This class will determine the concrete `Registry` subclass to 

221 use from configuration. Each subclass should implement this method. 

222 """ 

223 # The base class implementation should trampoline to the correct 

224 # subclass. No implementation should ever use this implementation 

225 # directly. If no class is specified, default to the standard 

226 # registry. 

227 registry_cls, registry_config = cls.determineTrampoline(config) 

228 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

229 

230 @abstractmethod 

231 def isWriteable(self) -> bool: 

232 """Return `True` if this registry allows write operations, and `False` 

233 otherwise. 

234 """ 

235 raise NotImplementedError() 

236 

237 @abstractmethod 

238 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

239 """Create a new `Registry` backed by the same data repository and 

240 connection as this one, but independent defaults. 

241 

242 Parameters 

243 ---------- 

244 defaults : `RegistryDefaults`, optional 

245 Default collections and data ID values for the new registry. If 

246 not provided, ``self.defaults`` will be used (but future changes 

247 to either registry's defaults will not affect the other). 

248 

249 Returns 

250 ------- 

251 copy : `Registry` 

252 A new `Registry` instance with its own defaults. 

253 

254 Notes 

255 ----- 

256 Because the new registry shares a connection with the original, they 

257 also share transaction state (despite the fact that their `transaction` 

258 context manager methods do not reflect this), and must be used with 

259 care. 

260 """ 

261 raise NotImplementedError() 

262 

263 @property 

264 @abstractmethod 

265 def dimensions(self) -> DimensionUniverse: 

266 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

267 """ 

268 raise NotImplementedError() 

269 

270 @property 

271 def defaults(self) -> RegistryDefaults: 

272 """Default collection search path and/or output `~CollectionType.RUN` 

273 collection (`RegistryDefaults`). 

274 

275 This is an immutable struct whose components may not be set 

276 individually, but the entire struct can be set by assigning to this 

277 property. 

278 """ 

279 return self._defaults 

280 

281 @defaults.setter 

282 def defaults(self, value: RegistryDefaults) -> None: 

283 if value.run is not None: 

284 self.registerRun(value.run) 

285 value.finish(self) 

286 self._defaults = value 

287 

288 @abstractmethod 

289 def refresh(self) -> None: 

290 """Refresh all in-memory state by querying the database. 

291 

292 This may be necessary to enable querying for entities added by other 

293 registry instances after this one was constructed. 

294 """ 

295 raise NotImplementedError() 

296 

297 @contextlib.contextmanager 

298 @abstractmethod 

299 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

300 """Return a context manager that represents a transaction. 

301 """ 

302 raise NotImplementedError() 

303 

304 def resetConnectionPool(self) -> None: 

305 """Reset connection pool for registry if relevant. 

306 

307 This operation can be used reset connections to servers when 

308 using registry with fork-based multiprocessing. This method should 

309 usually be called by the child process immediately 

310 after the fork. 

311 

312 The base class implementation is a no-op. 

313 """ 

314 pass 

315 

316 @abstractmethod 

317 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED, 

318 doc: Optional[str] = None) -> bool: 

319 """Add a new collection if one with the given name does not exist. 

320 

321 Parameters 

322 ---------- 

323 name : `str` 

324 The name of the collection to create. 

325 type : `CollectionType` 

326 Enum value indicating the type of collection to create. 

327 doc : `str`, optional 

328 Documentation string for the collection. 

329 

330 Returns 

331 ------- 

332 registered : `bool` 

333 Boolean indicating whether the collection was already registered 

334 or was created by this call. 

335 

336 Notes 

337 ----- 

338 This method cannot be called within transactions, as it needs to be 

339 able to perform its own transaction to be concurrent. 

340 """ 

341 raise NotImplementedError() 

342 

343 @abstractmethod 

344 def getCollectionType(self, name: str) -> CollectionType: 

345 """Return an enumeration value indicating the type of the given 

346 collection. 

347 

348 Parameters 

349 ---------- 

350 name : `str` 

351 The name of the collection. 

352 

353 Returns 

354 ------- 

355 type : `CollectionType` 

356 Enum value indicating the type of this collection. 

357 

358 Raises 

359 ------ 

360 MissingCollectionError 

361 Raised if no collection with the given name exists. 

362 """ 

363 raise NotImplementedError() 

364 

365 @abstractmethod 

366 def _get_collection_record(self, name: str) -> CollectionRecord: 

367 """Return the record for this collection. 

368 

369 Parameters 

370 ---------- 

371 name : `str` 

372 Name of the collection for which the record is to be retrieved. 

373 

374 Returns 

375 ------- 

376 record : `CollectionRecord` 

377 The record for this collection. 

378 """ 

379 raise NotImplementedError() 

380 

381 @abstractmethod 

382 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

383 """Add a new run if one with the given name does not exist. 

384 

385 Parameters 

386 ---------- 

387 name : `str` 

388 The name of the run to create. 

389 doc : `str`, optional 

390 Documentation string for the collection. 

391 

392 Returns 

393 ------- 

394 registered : `bool` 

395 Boolean indicating whether a new run was registered. `False` 

396 if it already existed. 

397 

398 Notes 

399 ----- 

400 This method cannot be called within transactions, as it needs to be 

401 able to perform its own transaction to be concurrent. 

402 """ 

403 raise NotImplementedError() 

404 

405 @abstractmethod 

406 def removeCollection(self, name: str) -> None: 

407 """Remove the given collection from the registry. 

408 

409 Parameters 

410 ---------- 

411 name : `str` 

412 The name of the collection to remove. 

413 

414 Raises 

415 ------ 

416 MissingCollectionError 

417 Raised if no collection with the given name exists. 

418 sqlalchemy.IntegrityError 

419 Raised if the database rows associated with the collection are 

420 still referenced by some other table, such as a dataset in a 

421 datastore (for `~CollectionType.RUN` collections only) or a 

422 `~CollectionType.CHAINED` collection of which this collection is 

423 a child. 

424 

425 Notes 

426 ----- 

427 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

428 in it will removed from the `Registry` database. This requires that 

429 those datasets be removed (or at least trashed) from any datastores 

430 that hold them first. 

431 

432 A collection may not be deleted as long as it is referenced by a 

433 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

434 be deleted or redefined first. 

435 """ 

436 raise NotImplementedError() 

437 

438 @abstractmethod 

439 def getCollectionChain(self, parent: str) -> CollectionSearch: 

440 """Return the child collections in a `~CollectionType.CHAINED` 

441 collection. 

442 

443 Parameters 

444 ---------- 

445 parent : `str` 

446 Name of the chained collection. Must have already been added via 

447 a call to `Registry.registerCollection`. 

448 

449 Returns 

450 ------- 

451 children : `CollectionSearch` 

452 An object that defines the search path of the collection. 

453 See :ref:`daf_butler_collection_expressions` for more information. 

454 

455 Raises 

456 ------ 

457 MissingCollectionError 

458 Raised if ``parent`` does not exist in the `Registry`. 

459 TypeError 

460 Raised if ``parent`` does not correspond to a 

461 `~CollectionType.CHAINED` collection. 

462 """ 

463 raise NotImplementedError() 

464 

465 @abstractmethod 

466 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

467 """Define or redefine a `~CollectionType.CHAINED` collection. 

468 

469 Parameters 

470 ---------- 

471 parent : `str` 

472 Name of the chained collection. Must have already been added via 

473 a call to `Registry.registerCollection`. 

474 children : `Any` 

475 An expression defining an ordered search of child collections, 

476 generally an iterable of `str`; see 

477 :ref:`daf_butler_collection_expressions` for more information. 

478 flatten : `bool`, optional 

479 If `True` (`False` is default), recursively flatten out any nested 

480 `~CollectionType.CHAINED` collections in ``children`` first. 

481 

482 Raises 

483 ------ 

484 MissingCollectionError 

485 Raised when any of the given collections do not exist in the 

486 `Registry`. 

487 TypeError 

488 Raised if ``parent`` does not correspond to a 

489 `~CollectionType.CHAINED` collection. 

490 ValueError 

491 Raised if the given collections contains a cycle. 

492 """ 

493 raise NotImplementedError() 

494 

495 @abstractmethod 

496 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

497 """Retrieve the documentation string for a collection. 

498 

499 Parameters 

500 ---------- 

501 name : `str` 

502 Name of the collection. 

503 

504 Returns 

505 ------- 

506 docs : `str` or `None` 

507 Docstring for the collection with the given name. 

508 """ 

509 raise NotImplementedError() 

510 

511 @abstractmethod 

512 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

513 """Set the documentation string for a collection. 

514 

515 Parameters 

516 ---------- 

517 name : `str` 

518 Name of the collection. 

519 docs : `str` or `None` 

520 Docstring for the collection with the given name; will replace any 

521 existing docstring. Passing `None` will remove any existing 

522 docstring. 

523 """ 

524 raise NotImplementedError() 

525 

526 @abstractmethod 

527 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

528 """Return a summary for the given collection. 

529 

530 Parameters 

531 ---------- 

532 collection : `str` 

533 Name of the collection for which a summary is to be retrieved. 

534 

535 Returns 

536 ------- 

537 summary : `CollectionSummary` 

538 Summary of the dataset types and governor dimension values in 

539 this collection. 

540 """ 

541 raise NotImplementedError() 

542 

543 @abstractmethod 

544 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

545 """ 

546 Add a new `DatasetType` to the Registry. 

547 

548 It is not an error to register the same `DatasetType` twice. 

549 

550 Parameters 

551 ---------- 

552 datasetType : `DatasetType` 

553 The `DatasetType` to be added. 

554 

555 Returns 

556 ------- 

557 inserted : `bool` 

558 `True` if ``datasetType`` was inserted, `False` if an identical 

559 existing `DatsetType` was found. Note that in either case the 

560 DatasetType is guaranteed to be defined in the Registry 

561 consistently with the given definition. 

562 

563 Raises 

564 ------ 

565 ValueError 

566 Raised if the dimensions or storage class are invalid. 

567 ConflictingDefinitionError 

568 Raised if this DatasetType is already registered with a different 

569 definition. 

570 

571 Notes 

572 ----- 

573 This method cannot be called within transactions, as it needs to be 

574 able to perform its own transaction to be concurrent. 

575 """ 

576 raise NotImplementedError() 

577 

578 @abstractmethod 

579 def removeDatasetType(self, name: str) -> None: 

580 """Remove the named `DatasetType` from the registry. 

581 

582 .. warning:: 

583 

584 Registry implementations can cache the dataset type definitions. 

585 This means that deleting the dataset type definition may result in 

586 unexpected behavior from other butler processes that are active 

587 that have not seen the deletion. 

588 

589 Parameters 

590 ---------- 

591 name : `str` 

592 Name of the type to be removed. 

593 

594 Raises 

595 ------ 

596 lsst.daf.butler.registry.OrphanedRecordError 

597 Raised if an attempt is made to remove the dataset type definition 

598 when there are already datasets associated with it. 

599 

600 Notes 

601 ----- 

602 If the dataset type is not registered the method will return without 

603 action. 

604 """ 

605 raise NotImplementedError() 

606 

607 @abstractmethod 

608 def getDatasetType(self, name: str) -> DatasetType: 

609 """Get the `DatasetType`. 

610 

611 Parameters 

612 ---------- 

613 name : `str` 

614 Name of the type. 

615 

616 Returns 

617 ------- 

618 type : `DatasetType` 

619 The `DatasetType` associated with the given name. 

620 

621 Raises 

622 ------ 

623 KeyError 

624 Requested named DatasetType could not be found in registry. 

625 """ 

626 raise NotImplementedError() 

627 

628 @abstractmethod 

629 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

630 """Test whether the given dataset ID generation mode is supported by 

631 `insertDatasets`. 

632 

633 Parameters 

634 ---------- 

635 mode : `DatasetIdGenEnum` 

636 Enum value for the mode to test. 

637 

638 Returns 

639 ------- 

640 supported : `bool` 

641 Whether the given mode is supported. 

642 """ 

643 raise NotImplementedError() 

644 

645 @abstractmethod 

646 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

647 collections: Any = None, timespan: Optional[Timespan] = None, 

648 **kwargs: Any) -> Optional[DatasetRef]: 

649 """Find a dataset given its `DatasetType` and data ID. 

650 

651 This can be used to obtain a `DatasetRef` that permits the dataset to 

652 be read from a `Datastore`. If the dataset is a component and can not 

653 be found using the provided dataset type, a dataset ref for the parent 

654 will be returned instead but with the correct dataset type. 

655 

656 Parameters 

657 ---------- 

658 datasetType : `DatasetType` or `str` 

659 A `DatasetType` or the name of one. 

660 dataId : `dict` or `DataCoordinate`, optional 

661 A `dict`-like object containing the `Dimension` links that identify 

662 the dataset within a collection. 

663 collections, optional. 

664 An expression that fully or partially identifies the collections to 

665 search for the dataset; see 

666 :ref:`daf_butler_collection_expressions` for more information. 

667 Defaults to ``self.defaults.collections``. 

668 timespan : `Timespan`, optional 

669 A timespan that the validity range of the dataset must overlap. 

670 If not provided, any `~CollectionType.CALIBRATION` collections 

671 matched by the ``collections`` argument will not be searched. 

672 **kwargs 

673 Additional keyword arguments passed to 

674 `DataCoordinate.standardize` to convert ``dataId`` to a true 

675 `DataCoordinate` or augment an existing one. 

676 

677 Returns 

678 ------- 

679 ref : `DatasetRef` 

680 A reference to the dataset, or `None` if no matching Dataset 

681 was found. 

682 

683 Raises 

684 ------ 

685 TypeError 

686 Raised if ``collections`` is `None` and 

687 ``self.defaults.collections`` is `None`. 

688 LookupError 

689 Raised if one or more data ID keys are missing. 

690 KeyError 

691 Raised if the dataset type does not exist. 

692 MissingCollectionError 

693 Raised if any of ``collections`` does not exist in the registry. 

694 

695 Notes 

696 ----- 

697 This method simply returns `None` and does not raise an exception even 

698 when the set of collections searched is intrinsically incompatible with 

699 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

700 only `~CollectionType.CALIBRATION` collections are being searched. 

701 This may make it harder to debug some lookup failures, but the behavior 

702 is intentional; we consider it more important that failed searches are 

703 reported consistently, regardless of the reason, and that adding 

704 additional collections that do not contain a match to the search path 

705 never changes the behavior. 

706 """ 

707 raise NotImplementedError() 

708 

709 @abstractmethod 

710 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

711 run: Optional[str] = None, expand: bool = True, 

712 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE) -> List[DatasetRef]: 

713 """Insert one or more datasets into the `Registry` 

714 

715 This always adds new datasets; to associate existing datasets with 

716 a new collection, use ``associate``. 

717 

718 Parameters 

719 ---------- 

720 datasetType : `DatasetType` or `str` 

721 A `DatasetType` or the name of one. 

722 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

723 Dimension-based identifiers for the new datasets. 

724 run : `str`, optional 

725 The name of the run that produced the datasets. Defaults to 

726 ``self.defaults.run``. 

727 expand : `bool`, optional 

728 If `True` (default), expand data IDs as they are inserted. This is 

729 necessary in general to allow datastore to generate file templates, 

730 but it may be disabled if the caller can guarantee this is 

731 unnecessary. 

732 idGenerationMode : `DatasetIdGenEnum`, optional 

733 Specifies option for generating dataset IDs. By default unique IDs 

734 are generated for each inserted dataset. 

735 

736 Returns 

737 ------- 

738 refs : `list` of `DatasetRef` 

739 Resolved `DatasetRef` instances for all given data IDs (in the same 

740 order). 

741 

742 Raises 

743 ------ 

744 TypeError 

745 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

746 ConflictingDefinitionError 

747 If a dataset with the same dataset type and data ID as one of those 

748 given already exists in ``run``. 

749 MissingCollectionError 

750 Raised if ``run`` does not exist in the registry. 

751 """ 

752 raise NotImplementedError() 

753 

754 @abstractmethod 

755 def _importDatasets(self, datasets: Iterable[DatasetRef], expand: bool = True, 

756 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

757 reuseIds: bool = False) -> List[DatasetRef]: 

758 """Import one or more datasets into the `Registry`. 

759 

760 Difference from `insertDatasets` method is that this method accepts 

761 `DatasetRef` instances which should already be resolved and have a 

762 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

763 `uuid.UUID`) then datasets which already exist in the registry will be 

764 ignored if imported again. 

765 

766 Parameters 

767 ---------- 

768 datasets : `~collections.abc.Iterable` of `DatasetRef` 

769 Datasets to be inserted. All `DatasetRef` instances must have 

770 identical ``datasetType`` and ``run`` attributes. ``run`` 

771 attribute can be `None` and defaults to ``self.defaults.run``. 

772 Datasets can specify ``id`` attribute which will be used for 

773 inserted datasets. All dataset IDs must have the same type 

774 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

775 configured backend then IDs will be ignored and new IDs will be 

776 generated by backend. 

777 expand : `bool`, optional 

778 If `True` (default), expand data IDs as they are inserted. This is 

779 necessary in general to allow datastore to generate file templates, 

780 but it may be disabled if the caller can guarantee this is 

781 unnecessary. 

782 idGenerationMode : `DatasetIdGenEnum`, optional 

783 Specifies option for generating dataset IDs when IDs are not 

784 provided or their type does not match backend type. By default 

785 unique IDs are generated for each inserted dataset. 

786 reuseIds : `bool`, optional 

787 If `True` then forces re-use of imported dataset IDs for integer 

788 IDs which are normally generated as auto-incremented; exception 

789 will be raised if imported IDs clash with existing ones. This 

790 option has no effect on the use of globally-unique IDs which are 

791 always re-used (or generated if integer IDs are being imported). 

792 

793 Returns 

794 ------- 

795 refs : `list` of `DatasetRef` 

796 Resolved `DatasetRef` instances for all given data IDs (in the same 

797 order). If any of ``datasets`` has an ID which already exists in 

798 the database then it will not be inserted or updated, but a 

799 resolved `DatasetRef` will be returned for it in any case. 

800 

801 Raises 

802 ------ 

803 TypeError 

804 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

805 ConflictingDefinitionError 

806 If a dataset with the same dataset type and data ID as one of those 

807 given already exists in ``run``. 

808 MissingCollectionError 

809 Raised if ``run`` does not exist in the registry. 

810 

811 Notes 

812 ----- 

813 This method is considered package-private and internal to Butler 

814 implementation. Clients outside daf_butler package should not use this 

815 method. 

816 """ 

817 raise NotImplementedError() 

818 

819 @abstractmethod 

820 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

821 """Retrieve a Dataset entry. 

822 

823 Parameters 

824 ---------- 

825 id : `DatasetId` 

826 The unique identifier for the dataset. 

827 

828 Returns 

829 ------- 

830 ref : `DatasetRef` or `None` 

831 A ref to the Dataset, or `None` if no matching Dataset 

832 was found. 

833 """ 

834 raise NotImplementedError() 

835 

836 @abstractmethod 

837 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

838 """Remove datasets from the Registry. 

839 

840 The datasets will be removed unconditionally from all collections, and 

841 any `Quantum` that consumed this dataset will instead be marked with 

842 having a NULL input. `Datastore` records will *not* be deleted; the 

843 caller is responsible for ensuring that the dataset has already been 

844 removed from all Datastores. 

845 

846 Parameters 

847 ---------- 

848 refs : `Iterable` of `DatasetRef` 

849 References to the datasets to be removed. Must include a valid 

850 ``id`` attribute, and should be considered invalidated upon return. 

851 

852 Raises 

853 ------ 

854 AmbiguousDatasetError 

855 Raised if any ``ref.id`` is `None`. 

856 OrphanedRecordError 

857 Raised if any dataset is still present in any `Datastore`. 

858 """ 

859 raise NotImplementedError() 

860 

861 @abstractmethod 

862 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

863 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

864 

865 If a DatasetRef with the same exact ID is already in a collection 

866 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

867 data ID but with different ID exists in the collection, 

868 `ConflictingDefinitionError` is raised. 

869 

870 Parameters 

871 ---------- 

872 collection : `str` 

873 Indicates the collection the datasets should be associated with. 

874 refs : `Iterable` [ `DatasetRef` ] 

875 An iterable of resolved `DatasetRef` instances that already exist 

876 in this `Registry`. 

877 

878 Raises 

879 ------ 

880 ConflictingDefinitionError 

881 If a Dataset with the given `DatasetRef` already exists in the 

882 given collection. 

883 AmbiguousDatasetError 

884 Raised if ``any(ref.id is None for ref in refs)``. 

885 MissingCollectionError 

886 Raised if ``collection`` does not exist in the registry. 

887 TypeError 

888 Raise adding new datasets to the given ``collection`` is not 

889 allowed. 

890 """ 

891 raise NotImplementedError() 

892 

893 @abstractmethod 

894 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

895 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

896 

897 ``collection`` and ``ref`` combinations that are not currently 

898 associated are silently ignored. 

899 

900 Parameters 

901 ---------- 

902 collection : `str` 

903 The collection the datasets should no longer be associated with. 

904 refs : `Iterable` [ `DatasetRef` ] 

905 An iterable of resolved `DatasetRef` instances that already exist 

906 in this `Registry`. 

907 

908 Raises 

909 ------ 

910 AmbiguousDatasetError 

911 Raised if any of the given dataset references is unresolved. 

912 MissingCollectionError 

913 Raised if ``collection`` does not exist in the registry. 

914 TypeError 

915 Raise adding new datasets to the given ``collection`` is not 

916 allowed. 

917 """ 

918 raise NotImplementedError() 

919 

920 @abstractmethod 

921 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

922 """Associate one or more datasets with a calibration collection and a 

923 validity range within it. 

924 

925 Parameters 

926 ---------- 

927 collection : `str` 

928 The name of an already-registered `~CollectionType.CALIBRATION` 

929 collection. 

930 refs : `Iterable` [ `DatasetRef` ] 

931 Datasets to be associated. 

932 timespan : `Timespan` 

933 The validity range for these datasets within the collection. 

934 

935 Raises 

936 ------ 

937 AmbiguousDatasetError 

938 Raised if any of the given `DatasetRef` instances is unresolved. 

939 ConflictingDefinitionError 

940 Raised if the collection already contains a different dataset with 

941 the same `DatasetType` and data ID and an overlapping validity 

942 range. 

943 TypeError 

944 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

945 collection or if one or more datasets are of a dataset type for 

946 which `DatasetType.isCalibration` returns `False`. 

947 """ 

948 raise NotImplementedError() 

949 

950 @abstractmethod 

951 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *, 

952 dataIds: Optional[Iterable[DataId]] = None) -> None: 

953 """Remove or adjust datasets to clear a validity range within a 

954 calibration collection. 

955 

956 Parameters 

957 ---------- 

958 collection : `str` 

959 The name of an already-registered `~CollectionType.CALIBRATION` 

960 collection. 

961 datasetType : `str` or `DatasetType` 

962 Name or `DatasetType` instance for the datasets to be decertified. 

963 timespan : `Timespan`, optional 

964 The validity range to remove datasets from within the collection. 

965 Datasets that overlap this range but are not contained by it will 

966 have their validity ranges adjusted to not overlap it, which may 

967 split a single dataset validity range into two. 

968 dataIds : `Iterable` [ `DataId` ], optional 

969 Data IDs that should be decertified within the given validity range 

970 If `None`, all data IDs for ``self.datasetType`` will be 

971 decertified. 

972 

973 Raises 

974 ------ 

975 TypeError 

976 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

977 collection or if ``datasetType.isCalibration() is False``. 

978 """ 

979 raise NotImplementedError() 

980 

981 @abstractmethod 

982 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

983 """Return an object that allows a new `Datastore` instance to 

984 communicate with this `Registry`. 

985 

986 Returns 

987 ------- 

988 manager : `DatastoreRegistryBridgeManager` 

989 Object that mediates communication between this `Registry` and its 

990 associated datastores. 

991 """ 

992 raise NotImplementedError() 

993 

994 @abstractmethod 

995 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

996 """Retrieve datastore locations for a given dataset. 

997 

998 Parameters 

999 ---------- 

1000 ref : `DatasetRef` 

1001 A reference to the dataset for which to retrieve storage 

1002 information. 

1003 

1004 Returns 

1005 ------- 

1006 datastores : `Iterable` [ `str` ] 

1007 All the matching datastores holding this dataset. 

1008 

1009 Raises 

1010 ------ 

1011 AmbiguousDatasetError 

1012 Raised if ``ref.id`` is `None`. 

1013 """ 

1014 raise NotImplementedError() 

1015 

1016 @abstractmethod 

1017 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

1018 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1019 withDefaults: bool = True, 

1020 **kwargs: Any) -> DataCoordinate: 

1021 """Expand a dimension-based data ID to include additional information. 

1022 

1023 Parameters 

1024 ---------- 

1025 dataId : `DataCoordinate` or `dict`, optional 

1026 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1027 graph : `DimensionGraph`, optional 

1028 Set of dimensions for the expanded ID. If `None`, the dimensions 

1029 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1030 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1031 ``graph`` are silently ignored, providing a way to extract and 

1032 ``graph`` expand a subset of a data ID. 

1033 records : `Mapping` [`str`, `DimensionRecord`], optional 

1034 Dimension record data to use before querying the database for that 

1035 data, keyed by element name. 

1036 withDefaults : `bool`, optional 

1037 Utilize ``self.defaults.dataId`` to fill in missing governor 

1038 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1039 used). 

1040 **kwargs 

1041 Additional keywords are treated like additional key-value pairs for 

1042 ``dataId``, extending and overriding 

1043 

1044 Returns 

1045 ------- 

1046 expanded : `DataCoordinate` 

1047 A data ID that includes full metadata for all of the dimensions it 

1048 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1049 ``expanded.hasFull()`` both return `True`. 

1050 """ 

1051 raise NotImplementedError() 

1052 

1053 @abstractmethod 

1054 def insertDimensionData(self, element: Union[DimensionElement, str], 

1055 *data: Union[Mapping[str, Any], DimensionRecord], 

1056 conform: bool = True, 

1057 replace: bool = False) -> None: 

1058 """Insert one or more dimension records into the database. 

1059 

1060 Parameters 

1061 ---------- 

1062 element : `DimensionElement` or `str` 

1063 The `DimensionElement` or name thereof that identifies the table 

1064 records will be inserted into. 

1065 data : `dict` or `DimensionRecord` (variadic) 

1066 One or more records to insert. 

1067 conform : `bool`, optional 

1068 If `False` (`True` is default) perform no checking or conversions, 

1069 and assume that ``element`` is a `DimensionElement` instance and 

1070 ``data`` is a one or more `DimensionRecord` instances of the 

1071 appropriate subclass. 

1072 replace: `bool`, optional 

1073 If `True` (`False` is default), replace existing records in the 

1074 database if there is a conflict. 

1075 """ 

1076 raise NotImplementedError() 

1077 

1078 @abstractmethod 

1079 def syncDimensionData(self, element: Union[DimensionElement, str], 

1080 row: Union[Mapping[str, Any], DimensionRecord], 

1081 conform: bool = True, 

1082 update: bool = False) -> Union[bool, Dict[str, Any]]: 

1083 """Synchronize the given dimension record with the database, inserting 

1084 if it does not already exist and comparing values if it does. 

1085 

1086 Parameters 

1087 ---------- 

1088 element : `DimensionElement` or `str` 

1089 The `DimensionElement` or name thereof that identifies the table 

1090 records will be inserted into. 

1091 row : `dict` or `DimensionRecord` 

1092 The record to insert. 

1093 conform : `bool`, optional 

1094 If `False` (`True` is default) perform no checking or conversions, 

1095 and assume that ``element`` is a `DimensionElement` instance and 

1096 ``data`` is a one or more `DimensionRecord` instances of the 

1097 appropriate subclass. 

1098 update: `bool`, optional 

1099 If `True` (`False` is default), update the existing record in the 

1100 database if there is a conflict. 

1101 

1102 Returns 

1103 ------- 

1104 inserted_or_updated : `bool` or `dict` 

1105 `True` if a new row was inserted, `False` if no changes were 

1106 needed, or a `dict` mapping updated column names to their old 

1107 values if an update was performed (only possible if 

1108 ``update=True``). 

1109 

1110 Raises 

1111 ------ 

1112 ConflictingDefinitionError 

1113 Raised if the record exists in the database (according to primary 

1114 key lookup) but is inconsistent with the given one. 

1115 """ 

1116 raise NotImplementedError() 

1117 

1118 @abstractmethod 

1119 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None, 

1120 missing: Optional[List[str]] = None, 

1121 ) -> Iterator[DatasetType]: 

1122 """Iterate over the dataset types whose names match an expression. 

1123 

1124 Parameters 

1125 ---------- 

1126 expression : `Any`, optional 

1127 An expression that fully or partially identifies the dataset types 

1128 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1129 ``...`` can be used to return all dataset types, and is the 

1130 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1131 information. 

1132 components : `bool`, optional 

1133 If `True`, apply all expression patterns to component dataset type 

1134 names as well. If `False`, never apply patterns to components. 

1135 If `None` (default), apply patterns to components only if their 

1136 parent datasets were not matched by the expression. 

1137 Fully-specified component datasets (`str` or `DatasetType` 

1138 instances) are always included. 

1139 missing : `list` of `str`, optional 

1140 String dataset type names that were explicitly given (i.e. not 

1141 regular expression patterns) but not found will be appended to this 

1142 list, if it is provided. 

1143 

1144 Yields 

1145 ------ 

1146 datasetType : `DatasetType` 

1147 A `DatasetType` instance whose name matches ``expression``. 

1148 """ 

1149 raise NotImplementedError() 

1150 

1151 @abstractmethod 

1152 def queryCollections( 

1153 self, 

1154 expression: Any = ..., 

1155 datasetType: Optional[DatasetType] = None, 

1156 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1157 flattenChains: bool = False, 

1158 includeChains: Optional[bool] = None, 

1159 ) -> Iterator[str]: 

1160 """Iterate over the collections whose names match an expression. 

1161 

1162 Parameters 

1163 ---------- 

1164 expression : `Any`, optional 

1165 An expression that identifies the collections to return, such as 

1166 a `str` (for full matches or partial matches via globs), 

1167 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1168 can be used to return all collections, and is the default. 

1169 See :ref:`daf_butler_collection_expressions` for more information. 

1170 datasetType : `DatasetType`, optional 

1171 If provided, only yield collections that may contain datasets of 

1172 this type. This is a conservative approximation in general; it may 

1173 yield collections that do not have any such datasets. 

1174 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1175 `CollectionType`, optional 

1176 If provided, only yield collections of these types. 

1177 flattenChains : `bool`, optional 

1178 If `True` (`False` is default), recursively yield the child 

1179 collections of matching `~CollectionType.CHAINED` collections. 

1180 includeChains : `bool`, optional 

1181 If `True`, yield records for matching `~CollectionType.CHAINED` 

1182 collections. Default is the opposite of ``flattenChains``: include 

1183 either CHAINED collections or their children, but not both. 

1184 

1185 Yields 

1186 ------ 

1187 collection : `str` 

1188 The name of a collection that matches ``expression``. 

1189 """ 

1190 raise NotImplementedError() 

1191 

1192 @abstractmethod 

1193 def queryDatasets(self, datasetType: Any, *, 

1194 collections: Any = None, 

1195 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1196 dataId: Optional[DataId] = None, 

1197 where: Optional[str] = None, 

1198 findFirst: bool = False, 

1199 components: Optional[bool] = None, 

1200 bind: Optional[Mapping[str, Any]] = None, 

1201 check: bool = True, 

1202 **kwargs: Any) -> Iterable[DatasetRef]: 

1203 """Query for and iterate over dataset references matching user-provided 

1204 criteria. 

1205 

1206 Parameters 

1207 ---------- 

1208 datasetType 

1209 An expression that fully or partially identifies the dataset types 

1210 to be queried. Allowed types include `DatasetType`, `str`, 

1211 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1212 be used to query all dataset types. See 

1213 :ref:`daf_butler_dataset_type_expressions` for more information. 

1214 collections: optional 

1215 An expression that identifies the collections to search, such as a 

1216 `str` (for full matches or partial matches via globs), `re.Pattern` 

1217 (for partial matches), or iterable thereof. ``...`` can be used to 

1218 search all collections (actually just all `~CollectionType.RUN` 

1219 collections, because this will still find all datasets). 

1220 If not provided, ``self.default.collections`` is used. See 

1221 :ref:`daf_butler_collection_expressions` for more information. 

1222 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1223 Dimensions to include in the query (in addition to those used 

1224 to identify the queried dataset type(s)), either to constrain 

1225 the resulting datasets to those for which a matching dimension 

1226 exists, or to relate the dataset type's dimensions to dimensions 

1227 referenced by the ``dataId`` or ``where`` arguments. 

1228 dataId : `dict` or `DataCoordinate`, optional 

1229 A data ID whose key-value pairs are used as equality constraints 

1230 in the query. 

1231 where : `str`, optional 

1232 A string expression similar to a SQL WHERE clause. May involve 

1233 any column of a dimension table or (as a shortcut for the primary 

1234 key column of a dimension table) dimension name. See 

1235 :ref:`daf_butler_dimension_expressions` for more information. 

1236 findFirst : `bool`, optional 

1237 If `True` (`False` is default), for each result data ID, only 

1238 yield one `DatasetRef` of each `DatasetType`, from the first 

1239 collection in which a dataset of that dataset type appears 

1240 (according to the order of ``collections`` passed in). If `True`, 

1241 ``collections`` must not contain regular expressions and may not 

1242 be ``...``. 

1243 components : `bool`, optional 

1244 If `True`, apply all dataset expression patterns to component 

1245 dataset type names as well. If `False`, never apply patterns to 

1246 components. If `None` (default), apply patterns to components only 

1247 if their parent datasets were not matched by the expression. 

1248 Fully-specified component datasets (`str` or `DatasetType` 

1249 instances) are always included. 

1250 bind : `Mapping`, optional 

1251 Mapping containing literal values that should be injected into the 

1252 ``where`` expression, keyed by the identifiers they replace. 

1253 check : `bool`, optional 

1254 If `True` (default) check the query for consistency before 

1255 executing it. This may reject some valid queries that resemble 

1256 common mistakes (e.g. queries for visits without specifying an 

1257 instrument). 

1258 **kwargs 

1259 Additional keyword arguments are forwarded to 

1260 `DataCoordinate.standardize` when processing the ``dataId`` 

1261 argument (and may be used to provide a constraining data ID even 

1262 when the ``dataId`` argument is `None`). 

1263 

1264 Returns 

1265 ------- 

1266 refs : `queries.DatasetQueryResults` 

1267 Dataset references matching the given query criteria. Nested data 

1268 IDs are guaranteed to include values for all implied dimensions 

1269 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1270 include dimension records (`DataCoordinate.hasRecords` will be 

1271 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1272 on the result object (which returns a new one). 

1273 

1274 Raises 

1275 ------ 

1276 TypeError 

1277 Raised when the arguments are incompatible, such as when a 

1278 collection wildcard is passed when ``findFirst`` is `True`, or 

1279 when ``collections`` is `None` and``self.defaults.collections`` is 

1280 also `None`. 

1281 

1282 Notes 

1283 ----- 

1284 When multiple dataset types are queried in a single call, the 

1285 results of this operation are equivalent to querying for each dataset 

1286 type separately in turn, and no information about the relationships 

1287 between datasets of different types is included. In contexts where 

1288 that kind of information is important, the recommended pattern is to 

1289 use `queryDataIds` to first obtain data IDs (possibly with the 

1290 desired dataset types and collections passed as constraints to the 

1291 query), and then use multiple (generally much simpler) calls to 

1292 `queryDatasets` with the returned data IDs passed as constraints. 

1293 """ 

1294 raise NotImplementedError() 

1295 

1296 @abstractmethod 

1297 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1298 dataId: Optional[DataId] = None, 

1299 datasets: Any = None, 

1300 collections: Any = None, 

1301 where: Optional[str] = None, 

1302 components: Optional[bool] = None, 

1303 bind: Optional[Mapping[str, Any]] = None, 

1304 check: bool = True, 

1305 **kwargs: Any) -> DataCoordinateIterable: 

1306 """Query for data IDs matching user-provided criteria. 

1307 

1308 Parameters 

1309 ---------- 

1310 dimensions : `Dimension` or `str`, or iterable thereof 

1311 The dimensions of the data IDs to yield, as either `Dimension` 

1312 instances or `str`. Will be automatically expanded to a complete 

1313 `DimensionGraph`. 

1314 dataId : `dict` or `DataCoordinate`, optional 

1315 A data ID whose key-value pairs are used as equality constraints 

1316 in the query. 

1317 datasets : `Any`, optional 

1318 An expression that fully or partially identifies dataset types 

1319 that should constrain the yielded data IDs. For example, including 

1320 "raw" here would constrain the yielded ``instrument``, 

1321 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1322 those for which at least one "raw" dataset exists in 

1323 ``collections``. Allowed types include `DatasetType`, `str`, 

1324 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1325 expressions, ``...`` is not permitted - it doesn't make sense to 

1326 constrain data IDs on the existence of *all* datasets. 

1327 See :ref:`daf_butler_dataset_type_expressions` for more 

1328 information. 

1329 collections: `Any`, optional 

1330 An expression that identifies the collections to search for 

1331 datasets, such as a `str` (for full matches or partial matches 

1332 via globs), `re.Pattern` (for partial matches), or iterable 

1333 thereof. ``...`` can be used to search all collections (actually 

1334 just all `~CollectionType.RUN` collections, because this will 

1335 still find all datasets). If not provided, 

1336 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1337 is also passed. See :ref:`daf_butler_collection_expressions` for 

1338 more information. 

1339 where : `str`, optional 

1340 A string expression similar to a SQL WHERE clause. May involve 

1341 any column of a dimension table or (as a shortcut for the primary 

1342 key column of a dimension table) dimension name. See 

1343 :ref:`daf_butler_dimension_expressions` for more information. 

1344 components : `bool`, optional 

1345 If `True`, apply all dataset expression patterns to component 

1346 dataset type names as well. If `False`, never apply patterns to 

1347 components. If `None` (default), apply patterns to components only 

1348 if their parent datasets were not matched by the expression. 

1349 Fully-specified component datasets (`str` or `DatasetType` 

1350 instances) are always included. 

1351 bind : `Mapping`, optional 

1352 Mapping containing literal values that should be injected into the 

1353 ``where`` expression, keyed by the identifiers they replace. 

1354 check : `bool`, optional 

1355 If `True` (default) check the query for consistency before 

1356 executing it. This may reject some valid queries that resemble 

1357 common mistakes (e.g. queries for visits without specifying an 

1358 instrument). 

1359 **kwargs 

1360 Additional keyword arguments are forwarded to 

1361 `DataCoordinate.standardize` when processing the ``dataId`` 

1362 argument (and may be used to provide a constraining data ID even 

1363 when the ``dataId`` argument is `None`). 

1364 

1365 Returns 

1366 ------- 

1367 dataIds : `DataCoordinateQueryResults` 

1368 Data IDs matching the given query parameters. These are guaranteed 

1369 to identify all dimensions (`DataCoordinate.hasFull` returns 

1370 `True`), but will not contain `DimensionRecord` objects 

1371 (`DataCoordinate.hasRecords` returns `False`). Call 

1372 `DataCoordinateQueryResults.expanded` on the returned object to 

1373 fetch those (and consider using 

1374 `DataCoordinateQueryResults.materialize` on the returned object 

1375 first if the expected number of rows is very large). See 

1376 documentation for those methods for additional information. 

1377 

1378 Raises 

1379 ------ 

1380 TypeError 

1381 Raised if ``collections`` is `None`, ``self.defaults.collections`` 

1382 is `None`, and ``datasets`` is not `None`. 

1383 """ 

1384 raise NotImplementedError() 

1385 

1386 @abstractmethod 

1387 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

1388 dataId: Optional[DataId] = None, 

1389 datasets: Any = None, 

1390 collections: Any = None, 

1391 where: Optional[str] = None, 

1392 components: Optional[bool] = None, 

1393 bind: Optional[Mapping[str, Any]] = None, 

1394 check: bool = True, 

1395 **kwargs: Any) -> Iterable[DimensionRecord]: 

1396 """Query for dimension information matching user-provided criteria. 

1397 

1398 Parameters 

1399 ---------- 

1400 element : `DimensionElement` or `str` 

1401 The dimension element to obtain records for. 

1402 dataId : `dict` or `DataCoordinate`, optional 

1403 A data ID whose key-value pairs are used as equality constraints 

1404 in the query. 

1405 datasets : `Any`, optional 

1406 An expression that fully or partially identifies dataset types 

1407 that should constrain the yielded records. See `queryDataIds` and 

1408 :ref:`daf_butler_dataset_type_expressions` for more information. 

1409 collections : `Any`, optional 

1410 An expression that identifies the collections to search for 

1411 datasets, such as a `str` (for full matches or partial matches 

1412 via globs), `re.Pattern` (for partial matches), or iterable 

1413 thereof. ``...`` can be used to search all collections (actually 

1414 just all `~CollectionType.RUN` collections, because this will 

1415 still find all datasets). If not provided, 

1416 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1417 is also passed. See :ref:`daf_butler_collection_expressions` for 

1418 more information. 

1419 where : `str`, optional 

1420 A string expression similar to a SQL WHERE clause. See 

1421 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1422 information. 

1423 components : `bool`, optional 

1424 Whether to apply dataset expressions to components as well. 

1425 See `queryDataIds` for more information. 

1426 bind : `Mapping`, optional 

1427 Mapping containing literal values that should be injected into the 

1428 ``where`` expression, keyed by the identifiers they replace. 

1429 check : `bool`, optional 

1430 If `True` (default) check the query for consistency before 

1431 executing it. This may reject some valid queries that resemble 

1432 common mistakes (e.g. queries for visits without specifying an 

1433 instrument). 

1434 **kwargs 

1435 Additional keyword arguments are forwarded to 

1436 `DataCoordinate.standardize` when processing the ``dataId`` 

1437 argument (and may be used to provide a constraining data ID even 

1438 when the ``dataId`` argument is `None`). 

1439 

1440 Returns 

1441 ------- 

1442 dataIds : `Iterator` [ `DimensionRecord` ] 

1443 Data IDs matching the given query parameters. 

1444 """ 

1445 raise NotImplementedError() 

1446 

1447 @abstractmethod 

1448 def queryDatasetAssociations( 

1449 self, 

1450 datasetType: Union[str, DatasetType], 

1451 collections: Any = ..., 

1452 *, 

1453 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1454 flattenChains: bool = False, 

1455 ) -> Iterator[DatasetAssociation]: 

1456 """Iterate over dataset-collection combinations where the dataset is in 

1457 the collection. 

1458 

1459 This method is a temporary placeholder for better support for 

1460 association results in `queryDatasets`. It will probably be 

1461 removed in the future, and should be avoided in production code 

1462 whenever possible. 

1463 

1464 Parameters 

1465 ---------- 

1466 datasetType : `DatasetType` or `str` 

1467 A dataset type object or the name of one. 

1468 collections: `Any`, optional 

1469 An expression that identifies the collections to search for 

1470 datasets, such as a `str` (for full matches or partial matches 

1471 via globs), `re.Pattern` (for partial matches), or iterable 

1472 thereof. ``...`` can be used to search all collections (actually 

1473 just all `~CollectionType.RUN` collections, because this will still 

1474 find all datasets). If not provided, ``self.default.collections`` 

1475 is used. See :ref:`daf_butler_collection_expressions` for more 

1476 information. 

1477 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1478 If provided, only yield associations from collections of these 

1479 types. 

1480 flattenChains : `bool`, optional 

1481 If `True` (default) search in the children of 

1482 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1483 collections are ignored. 

1484 

1485 Yields 

1486 ------ 

1487 association : `DatasetAssociation` 

1488 Object representing the relationship between a single dataset and 

1489 a single collection. 

1490 

1491 Raises 

1492 ------ 

1493 TypeError 

1494 Raised if ``collections`` is `None` and 

1495 ``self.defaults.collections`` is `None`. 

1496 """ 

1497 raise NotImplementedError() 

1498 

1499 storageClasses: StorageClassFactory 

1500 """All storage classes known to the registry (`StorageClassFactory`). 

1501 """