Coverage for python/lsst/daf/butler/registry/_registry.py: 62%

174 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from abc import ABC, abstractmethod 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Tuple, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45from lsst.utils import doImport 

46 

47from ..core import ( 

48 ButlerURI, 

49 Config, 

50 DataCoordinate, 

51 DataCoordinateIterable, 

52 DataId, 

53 DatasetAssociation, 

54 DatasetId, 

55 DatasetRef, 

56 DatasetType, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionRecord, 

62 DimensionUniverse, 

63 NameLookupMapping, 

64 StorageClassFactory, 

65 Timespan, 

66) 

67 

68from ._config import RegistryConfig 

69from ._collectionType import CollectionType 

70from ._defaults import RegistryDefaults 

71from .interfaces import DatasetIdGenEnum 

72from .wildcards import CollectionSearch 

73from .summaries import CollectionSummary 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from .._butlerConfig import ButlerConfig 

77 from .interfaces import ( 

78 CollectionRecord, 

79 DatastoreRegistryBridgeManager, 

80 ) 

81 

82_LOG = logging.getLogger(__name__) 

83 

84 

85class Registry(ABC): 

86 """Abstract Registry interface. 

87 

88 Each registry implementation can have its own constructor parameters. 

89 The assumption is that an instance of a specific subclass will be 

90 constructed from configuration using `Registry.fromConfig()`. 

91 The base class will look for a ``cls`` entry and call that specific 

92 `fromConfig()` method. 

93 

94 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

95 property. No other properties are assumed shared between implementations. 

96 """ 

97 

98 defaultConfigFile: Optional[str] = None 

99 """Path to configuration defaults. Accessed within the ``configs`` resource 

100 or relative to a search path. Can be None if no defaults specified. 

101 """ 

102 

103 @classmethod 

104 def forceRegistryConfig(cls, config: Optional[Union[ButlerConfig, 

105 RegistryConfig, Config, str]]) -> RegistryConfig: 

106 """Force the supplied config to a `RegistryConfig`. 

107 

108 Parameters 

109 ---------- 

110 config : `RegistryConfig`, `Config` or `str` or `None` 

111 Registry configuration, if missing then default configuration will 

112 be loaded from registry.yaml. 

113 

114 Returns 

115 ------- 

116 registry_config : `RegistryConfig` 

117 A registry config. 

118 """ 

119 if not isinstance(config, RegistryConfig): 

120 if isinstance(config, (str, Config)) or config is None: 

121 config = RegistryConfig(config) 

122 else: 

123 raise ValueError(f"Incompatible Registry configuration: {config}") 

124 return config 

125 

126 @classmethod 

127 def determineTrampoline(cls, 

128 config: Optional[Union[ButlerConfig, 

129 RegistryConfig, 

130 Config, 

131 str]]) -> Tuple[Type[Registry], RegistryConfig]: 

132 """Return class to use to instantiate real registry. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 

140 Returns 

141 ------- 

142 requested_cls : `type` of `Registry` 

143 The real registry class to use. 

144 registry_config : `RegistryConfig` 

145 The `RegistryConfig` to use. 

146 """ 

147 config = cls.forceRegistryConfig(config) 

148 

149 # Default to the standard registry 

150 registry_cls = doImport(config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")) 

151 if registry_cls is cls: 

152 raise ValueError("Can not instantiate the abstract base Registry from config") 

153 return registry_cls, config 

154 

155 @classmethod 

156 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None, 

157 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

158 butlerRoot: Optional[str] = None) -> Registry: 

159 """Create registry database and return `Registry` instance. 

160 

161 This method initializes database contents, database must be empty 

162 prior to calling this method. 

163 

164 Parameters 

165 ---------- 

166 config : `RegistryConfig` or `str`, optional 

167 Registry configuration, if missing then default configuration will 

168 be loaded from registry.yaml. 

169 dimensionConfig : `DimensionConfig` or `str`, optional 

170 Dimensions configuration, if missing then default configuration 

171 will be loaded from dimensions.yaml. 

172 butlerRoot : `str`, optional 

173 Path to the repository root this `Registry` will manage. 

174 

175 Returns 

176 ------- 

177 registry : `Registry` 

178 A new `Registry` instance. 

179 

180 Notes 

181 ----- 

182 This class will determine the concrete `Registry` subclass to 

183 use from configuration. Each subclass should implement this method 

184 even if it can not create a registry. 

185 """ 

186 registry_cls, registry_config = cls.determineTrampoline(config) 

187 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

188 

189 @classmethod 

190 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], 

191 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True, 

192 defaults: Optional[RegistryDefaults] = None) -> Registry: 

193 """Create `Registry` subclass instance from `config`. 

194 

195 Registry database must be initialized prior to calling this method. 

196 

197 Parameters 

198 ---------- 

199 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

200 Registry configuration 

201 butlerRoot : `str` or `ButlerURI`, optional 

202 Path to the repository root this `Registry` will manage. 

203 writeable : `bool`, optional 

204 If `True` (default) create a read-write connection to the database. 

205 defaults : `RegistryDefaults`, optional 

206 Default collection search path and/or output `~CollectionType.RUN` 

207 collection. 

208 

209 Returns 

210 ------- 

211 registry : `Registry` (subclass) 

212 A new `Registry` subclass instance. 

213 

214 Notes 

215 ----- 

216 This class will determine the concrete `Registry` subclass to 

217 use from configuration. Each subclass should implement this method. 

218 """ 

219 # The base class implementation should trampoline to the correct 

220 # subclass. No implementation should ever use this implementation 

221 # directly. If no class is specified, default to the standard 

222 # registry. 

223 registry_cls, registry_config = cls.determineTrampoline(config) 

224 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

225 

226 @abstractmethod 

227 def isWriteable(self) -> bool: 

228 """Return `True` if this registry allows write operations, and `False` 

229 otherwise. 

230 """ 

231 raise NotImplementedError() 

232 

233 @abstractmethod 

234 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

235 """Create a new `Registry` backed by the same data repository and 

236 connection as this one, but independent defaults. 

237 

238 Parameters 

239 ---------- 

240 defaults : `RegistryDefaults`, optional 

241 Default collections and data ID values for the new registry. If 

242 not provided, ``self.defaults`` will be used (but future changes 

243 to either registry's defaults will not affect the other). 

244 

245 Returns 

246 ------- 

247 copy : `Registry` 

248 A new `Registry` instance with its own defaults. 

249 

250 Notes 

251 ----- 

252 Because the new registry shares a connection with the original, they 

253 also share transaction state (despite the fact that their `transaction` 

254 context manager methods do not reflect this), and must be used with 

255 care. 

256 """ 

257 raise NotImplementedError() 

258 

259 @property 

260 @abstractmethod 

261 def dimensions(self) -> DimensionUniverse: 

262 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

263 """ 

264 raise NotImplementedError() 

265 

266 @property 

267 def defaults(self) -> RegistryDefaults: 

268 """Default collection search path and/or output `~CollectionType.RUN` 

269 collection (`RegistryDefaults`). 

270 

271 This is an immutable struct whose components may not be set 

272 individually, but the entire struct can be set by assigning to this 

273 property. 

274 """ 

275 return self._defaults 

276 

277 @defaults.setter 

278 def defaults(self, value: RegistryDefaults) -> None: 

279 if value.run is not None: 

280 self.registerRun(value.run) 

281 value.finish(self) 

282 self._defaults = value 

283 

284 @abstractmethod 

285 def refresh(self) -> None: 

286 """Refresh all in-memory state by querying the database. 

287 

288 This may be necessary to enable querying for entities added by other 

289 registry instances after this one was constructed. 

290 """ 

291 raise NotImplementedError() 

292 

293 @contextlib.contextmanager 

294 @abstractmethod 

295 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

296 """Return a context manager that represents a transaction. 

297 """ 

298 raise NotImplementedError() 

299 

300 def resetConnectionPool(self) -> None: 

301 """Reset connection pool for registry if relevant. 

302 

303 This operation can be used reset connections to servers when 

304 using registry with fork-based multiprocessing. This method should 

305 usually be called by the child process immediately 

306 after the fork. 

307 

308 The base class implementation is a no-op. 

309 """ 

310 pass 

311 

312 @abstractmethod 

313 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED, 

314 doc: Optional[str] = None) -> bool: 

315 """Add a new collection if one with the given name does not exist. 

316 

317 Parameters 

318 ---------- 

319 name : `str` 

320 The name of the collection to create. 

321 type : `CollectionType` 

322 Enum value indicating the type of collection to create. 

323 doc : `str`, optional 

324 Documentation string for the collection. 

325 

326 Returns 

327 ------- 

328 registered : `bool` 

329 Boolean indicating whether the collection was already registered 

330 or was created by this call. 

331 

332 Notes 

333 ----- 

334 This method cannot be called within transactions, as it needs to be 

335 able to perform its own transaction to be concurrent. 

336 """ 

337 raise NotImplementedError() 

338 

339 @abstractmethod 

340 def getCollectionType(self, name: str) -> CollectionType: 

341 """Return an enumeration value indicating the type of the given 

342 collection. 

343 

344 Parameters 

345 ---------- 

346 name : `str` 

347 The name of the collection. 

348 

349 Returns 

350 ------- 

351 type : `CollectionType` 

352 Enum value indicating the type of this collection. 

353 

354 Raises 

355 ------ 

356 MissingCollectionError 

357 Raised if no collection with the given name exists. 

358 """ 

359 raise NotImplementedError() 

360 

361 @abstractmethod 

362 def _get_collection_record(self, name: str) -> CollectionRecord: 

363 """Return the record for this collection. 

364 

365 Parameters 

366 ---------- 

367 name : `str` 

368 Name of the collection for which the record is to be retrieved. 

369 

370 Returns 

371 ------- 

372 record : `CollectionRecord` 

373 The record for this collection. 

374 """ 

375 raise NotImplementedError() 

376 

377 @abstractmethod 

378 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

379 """Add a new run if one with the given name does not exist. 

380 

381 Parameters 

382 ---------- 

383 name : `str` 

384 The name of the run to create. 

385 doc : `str`, optional 

386 Documentation string for the collection. 

387 

388 Returns 

389 ------- 

390 registered : `bool` 

391 Boolean indicating whether a new run was registered. `False` 

392 if it already existed. 

393 

394 Notes 

395 ----- 

396 This method cannot be called within transactions, as it needs to be 

397 able to perform its own transaction to be concurrent. 

398 """ 

399 raise NotImplementedError() 

400 

401 @abstractmethod 

402 def removeCollection(self, name: str) -> None: 

403 """Completely remove the given collection. 

404 

405 Parameters 

406 ---------- 

407 name : `str` 

408 The name of the collection to remove. 

409 

410 Raises 

411 ------ 

412 MissingCollectionError 

413 Raised if no collection with the given name exists. 

414 

415 Notes 

416 ----- 

417 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

418 in it are also fully removed. This requires that those datasets be 

419 removed (or at least trashed) from any datastores that hold them first. 

420 

421 A collection may not be deleted as long as it is referenced by a 

422 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

423 be deleted or redefined first. 

424 """ 

425 raise NotImplementedError() 

426 

427 @abstractmethod 

428 def getCollectionChain(self, parent: str) -> CollectionSearch: 

429 """Return the child collections in a `~CollectionType.CHAINED` 

430 collection. 

431 

432 Parameters 

433 ---------- 

434 parent : `str` 

435 Name of the chained collection. Must have already been added via 

436 a call to `Registry.registerCollection`. 

437 

438 Returns 

439 ------- 

440 children : `CollectionSearch` 

441 An object that defines the search path of the collection. 

442 See :ref:`daf_butler_collection_expressions` for more information. 

443 

444 Raises 

445 ------ 

446 MissingCollectionError 

447 Raised if ``parent`` does not exist in the `Registry`. 

448 TypeError 

449 Raised if ``parent`` does not correspond to a 

450 `~CollectionType.CHAINED` collection. 

451 """ 

452 raise NotImplementedError() 

453 

454 @abstractmethod 

455 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

456 """Define or redefine a `~CollectionType.CHAINED` collection. 

457 

458 Parameters 

459 ---------- 

460 parent : `str` 

461 Name of the chained collection. Must have already been added via 

462 a call to `Registry.registerCollection`. 

463 children : `Any` 

464 An expression defining an ordered search of child collections, 

465 generally an iterable of `str`; see 

466 :ref:`daf_butler_collection_expressions` for more information. 

467 flatten : `bool`, optional 

468 If `True` (`False` is default), recursively flatten out any nested 

469 `~CollectionType.CHAINED` collections in ``children`` first. 

470 

471 Raises 

472 ------ 

473 MissingCollectionError 

474 Raised when any of the given collections do not exist in the 

475 `Registry`. 

476 TypeError 

477 Raised if ``parent`` does not correspond to a 

478 `~CollectionType.CHAINED` collection. 

479 ValueError 

480 Raised if the given collections contains a cycle. 

481 """ 

482 raise NotImplementedError() 

483 

484 @abstractmethod 

485 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

486 """Retrieve the documentation string for a collection. 

487 

488 Parameters 

489 ---------- 

490 name : `str` 

491 Name of the collection. 

492 

493 Returns 

494 ------- 

495 docs : `str` or `None` 

496 Docstring for the collection with the given name. 

497 """ 

498 raise NotImplementedError() 

499 

500 @abstractmethod 

501 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

502 """Set the documentation string for a collection. 

503 

504 Parameters 

505 ---------- 

506 name : `str` 

507 Name of the collection. 

508 docs : `str` or `None` 

509 Docstring for the collection with the given name; will replace any 

510 existing docstring. Passing `None` will remove any existing 

511 docstring. 

512 """ 

513 raise NotImplementedError() 

514 

515 @abstractmethod 

516 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

517 """Return a summary for the given collection. 

518 

519 Parameters 

520 ---------- 

521 collection : `str` 

522 Name of the collection for which a summary is to be retrieved. 

523 

524 Returns 

525 ------- 

526 summary : `CollectionSummary` 

527 Summary of the dataset types and governor dimension values in 

528 this collection. 

529 """ 

530 raise NotImplementedError() 

531 

532 @abstractmethod 

533 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

534 """ 

535 Add a new `DatasetType` to the Registry. 

536 

537 It is not an error to register the same `DatasetType` twice. 

538 

539 Parameters 

540 ---------- 

541 datasetType : `DatasetType` 

542 The `DatasetType` to be added. 

543 

544 Returns 

545 ------- 

546 inserted : `bool` 

547 `True` if ``datasetType`` was inserted, `False` if an identical 

548 existing `DatsetType` was found. Note that in either case the 

549 DatasetType is guaranteed to be defined in the Registry 

550 consistently with the given definition. 

551 

552 Raises 

553 ------ 

554 ValueError 

555 Raised if the dimensions or storage class are invalid. 

556 ConflictingDefinitionError 

557 Raised if this DatasetType is already registered with a different 

558 definition. 

559 

560 Notes 

561 ----- 

562 This method cannot be called within transactions, as it needs to be 

563 able to perform its own transaction to be concurrent. 

564 """ 

565 raise NotImplementedError() 

566 

567 @abstractmethod 

568 def removeDatasetType(self, name: str) -> None: 

569 """Remove the named `DatasetType` from the registry. 

570 

571 .. warning:: 

572 

573 Registry implementations can cache the dataset type definitions. 

574 This means that deleting the dataset type definition may result in 

575 unexpected behavior from other butler processes that are active 

576 that have not seen the deletion. 

577 

578 Parameters 

579 ---------- 

580 name : `str` 

581 Name of the type to be removed. 

582 

583 Raises 

584 ------ 

585 lsst.daf.butler.registry.OrphanedRecordError 

586 Raised if an attempt is made to remove the dataset type definition 

587 when there are already datasets associated with it. 

588 

589 Notes 

590 ----- 

591 If the dataset type is not registered the method will return without 

592 action. 

593 """ 

594 raise NotImplementedError() 

595 

596 @abstractmethod 

597 def getDatasetType(self, name: str) -> DatasetType: 

598 """Get the `DatasetType`. 

599 

600 Parameters 

601 ---------- 

602 name : `str` 

603 Name of the type. 

604 

605 Returns 

606 ------- 

607 type : `DatasetType` 

608 The `DatasetType` associated with the given name. 

609 

610 Raises 

611 ------ 

612 KeyError 

613 Requested named DatasetType could not be found in registry. 

614 """ 

615 raise NotImplementedError() 

616 

617 @abstractmethod 

618 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

619 """Test whether the given dataset ID generation mode is supported by 

620 `insertDatasets`. 

621 

622 Parameters 

623 ---------- 

624 mode : `DatasetIdGenEnum` 

625 Enum value for the mode to test. 

626 

627 Returns 

628 ------- 

629 supported : `bool` 

630 Whether the given mode is supported. 

631 """ 

632 raise NotImplementedError() 

633 

634 @abstractmethod 

635 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

636 collections: Any = None, timespan: Optional[Timespan] = None, 

637 **kwargs: Any) -> Optional[DatasetRef]: 

638 """Find a dataset given its `DatasetType` and data ID. 

639 

640 This can be used to obtain a `DatasetRef` that permits the dataset to 

641 be read from a `Datastore`. If the dataset is a component and can not 

642 be found using the provided dataset type, a dataset ref for the parent 

643 will be returned instead but with the correct dataset type. 

644 

645 Parameters 

646 ---------- 

647 datasetType : `DatasetType` or `str` 

648 A `DatasetType` or the name of one. 

649 dataId : `dict` or `DataCoordinate`, optional 

650 A `dict`-like object containing the `Dimension` links that identify 

651 the dataset within a collection. 

652 collections, optional. 

653 An expression that fully or partially identifies the collections to 

654 search for the dataset; see 

655 :ref:`daf_butler_collection_expressions` for more information. 

656 Defaults to ``self.defaults.collections``. 

657 timespan : `Timespan`, optional 

658 A timespan that the validity range of the dataset must overlap. 

659 If not provided, any `~CollectionType.CALIBRATION` collections 

660 matched by the ``collections`` argument will not be searched. 

661 **kwargs 

662 Additional keyword arguments passed to 

663 `DataCoordinate.standardize` to convert ``dataId`` to a true 

664 `DataCoordinate` or augment an existing one. 

665 

666 Returns 

667 ------- 

668 ref : `DatasetRef` 

669 A reference to the dataset, or `None` if no matching Dataset 

670 was found. 

671 

672 Raises 

673 ------ 

674 TypeError 

675 Raised if ``collections`` is `None` and 

676 ``self.defaults.collections`` is `None`. 

677 LookupError 

678 Raised if one or more data ID keys are missing. 

679 KeyError 

680 Raised if the dataset type does not exist. 

681 MissingCollectionError 

682 Raised if any of ``collections`` does not exist in the registry. 

683 

684 Notes 

685 ----- 

686 This method simply returns `None` and does not raise an exception even 

687 when the set of collections searched is intrinsically incompatible with 

688 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

689 only `~CollectionType.CALIBRATION` collections are being searched. 

690 This may make it harder to debug some lookup failures, but the behavior 

691 is intentional; we consider it more important that failed searches are 

692 reported consistently, regardless of the reason, and that adding 

693 additional collections that do not contain a match to the search path 

694 never changes the behavior. 

695 """ 

696 raise NotImplementedError() 

697 

698 @abstractmethod 

699 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

700 run: Optional[str] = None, expand: bool = True, 

701 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE) -> List[DatasetRef]: 

702 """Insert one or more datasets into the `Registry` 

703 

704 This always adds new datasets; to associate existing datasets with 

705 a new collection, use ``associate``. 

706 

707 Parameters 

708 ---------- 

709 datasetType : `DatasetType` or `str` 

710 A `DatasetType` or the name of one. 

711 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

712 Dimension-based identifiers for the new datasets. 

713 run : `str`, optional 

714 The name of the run that produced the datasets. Defaults to 

715 ``self.defaults.run``. 

716 expand : `bool`, optional 

717 If `True` (default), expand data IDs as they are inserted. This is 

718 necessary in general to allow datastore to generate file templates, 

719 but it may be disabled if the caller can guarantee this is 

720 unnecessary. 

721 idGenerationMode : `DatasetIdGenEnum`, optional 

722 Specifies option for generating dataset IDs. By default unique IDs 

723 are generated for each inserted dataset. 

724 

725 Returns 

726 ------- 

727 refs : `list` of `DatasetRef` 

728 Resolved `DatasetRef` instances for all given data IDs (in the same 

729 order). 

730 

731 Raises 

732 ------ 

733 TypeError 

734 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

735 ConflictingDefinitionError 

736 If a dataset with the same dataset type and data ID as one of those 

737 given already exists in ``run``. 

738 MissingCollectionError 

739 Raised if ``run`` does not exist in the registry. 

740 """ 

741 raise NotImplementedError() 

742 

743 @abstractmethod 

744 def _importDatasets(self, datasets: Iterable[DatasetRef], expand: bool = True, 

745 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

746 reuseIds: bool = False) -> List[DatasetRef]: 

747 """Import one or more datasets into the `Registry`. 

748 

749 Difference from `insertDatasets` method is that this method accepts 

750 `DatasetRef` instances which should already be resolved and have a 

751 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

752 `uuid.UUID`) then datasets which already exist in the registry will be 

753 ignored if imported again. 

754 

755 Parameters 

756 ---------- 

757 datasets : `~collections.abc.Iterable` of `DatasetRef` 

758 Datasets to be inserted. All `DatasetRef` instances must have 

759 identical ``datasetType`` and ``run`` attributes. ``run`` 

760 attribute can be `None` and defaults to ``self.defaults.run``. 

761 Datasets can specify ``id`` attribute which will be used for 

762 inserted datasets. All dataset IDs must have the same type 

763 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

764 configured backend then IDs will be ignored and new IDs will be 

765 generated by backend. 

766 expand : `bool`, optional 

767 If `True` (default), expand data IDs as they are inserted. This is 

768 necessary in general to allow datastore to generate file templates, 

769 but it may be disabled if the caller can guarantee this is 

770 unnecessary. 

771 idGenerationMode : `DatasetIdGenEnum`, optional 

772 Specifies option for generating dataset IDs when IDs are not 

773 provided or their type does not match backend type. By default 

774 unique IDs are generated for each inserted dataset. 

775 reuseIds : `bool`, optional 

776 If `True` then forces re-use of imported dataset IDs for integer 

777 IDs which are normally generated as auto-incremented; exception 

778 will be raised if imported IDs clash with existing ones. This 

779 option has no effect on the use of globally-unique IDs which are 

780 always re-used (or generated if integer IDs are being imported). 

781 

782 Returns 

783 ------- 

784 refs : `list` of `DatasetRef` 

785 Resolved `DatasetRef` instances for all given data IDs (in the same 

786 order). If any of ``datasets`` has an ID which already exists in 

787 the database then it will not be inserted or updated, but a 

788 resolved `DatasetRef` will be returned for it in any case. 

789 

790 Raises 

791 ------ 

792 TypeError 

793 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

794 ConflictingDefinitionError 

795 If a dataset with the same dataset type and data ID as one of those 

796 given already exists in ``run``. 

797 MissingCollectionError 

798 Raised if ``run`` does not exist in the registry. 

799 

800 Notes 

801 ----- 

802 This method is considered package-private and internal to Butler 

803 implementation. Clients outside daf_butler package should not use this 

804 method. 

805 """ 

806 raise NotImplementedError() 

807 

808 @abstractmethod 

809 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

810 """Retrieve a Dataset entry. 

811 

812 Parameters 

813 ---------- 

814 id : `DatasetId` 

815 The unique identifier for the dataset. 

816 

817 Returns 

818 ------- 

819 ref : `DatasetRef` or `None` 

820 A ref to the Dataset, or `None` if no matching Dataset 

821 was found. 

822 """ 

823 raise NotImplementedError() 

824 

825 @abstractmethod 

826 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

827 """Remove datasets from the Registry. 

828 

829 The datasets will be removed unconditionally from all collections, and 

830 any `Quantum` that consumed this dataset will instead be marked with 

831 having a NULL input. `Datastore` records will *not* be deleted; the 

832 caller is responsible for ensuring that the dataset has already been 

833 removed from all Datastores. 

834 

835 Parameters 

836 ---------- 

837 refs : `Iterable` of `DatasetRef` 

838 References to the datasets to be removed. Must include a valid 

839 ``id`` attribute, and should be considered invalidated upon return. 

840 

841 Raises 

842 ------ 

843 AmbiguousDatasetError 

844 Raised if any ``ref.id`` is `None`. 

845 OrphanedRecordError 

846 Raised if any dataset is still present in any `Datastore`. 

847 """ 

848 raise NotImplementedError() 

849 

850 @abstractmethod 

851 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

852 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

853 

854 If a DatasetRef with the same exact ID is already in a collection 

855 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

856 data ID but with different ID exists in the collection, 

857 `ConflictingDefinitionError` is raised. 

858 

859 Parameters 

860 ---------- 

861 collection : `str` 

862 Indicates the collection the datasets should be associated with. 

863 refs : `Iterable` [ `DatasetRef` ] 

864 An iterable of resolved `DatasetRef` instances that already exist 

865 in this `Registry`. 

866 

867 Raises 

868 ------ 

869 ConflictingDefinitionError 

870 If a Dataset with the given `DatasetRef` already exists in the 

871 given collection. 

872 AmbiguousDatasetError 

873 Raised if ``any(ref.id is None for ref in refs)``. 

874 MissingCollectionError 

875 Raised if ``collection`` does not exist in the registry. 

876 TypeError 

877 Raise adding new datasets to the given ``collection`` is not 

878 allowed. 

879 """ 

880 raise NotImplementedError() 

881 

882 @abstractmethod 

883 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

884 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

885 

886 ``collection`` and ``ref`` combinations that are not currently 

887 associated are silently ignored. 

888 

889 Parameters 

890 ---------- 

891 collection : `str` 

892 The collection the datasets should no longer be associated with. 

893 refs : `Iterable` [ `DatasetRef` ] 

894 An iterable of resolved `DatasetRef` instances that already exist 

895 in this `Registry`. 

896 

897 Raises 

898 ------ 

899 AmbiguousDatasetError 

900 Raised if any of the given dataset references is unresolved. 

901 MissingCollectionError 

902 Raised if ``collection`` does not exist in the registry. 

903 TypeError 

904 Raise adding new datasets to the given ``collection`` is not 

905 allowed. 

906 """ 

907 raise NotImplementedError() 

908 

909 @abstractmethod 

910 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

911 """Associate one or more datasets with a calibration collection and a 

912 validity range within it. 

913 

914 Parameters 

915 ---------- 

916 collection : `str` 

917 The name of an already-registered `~CollectionType.CALIBRATION` 

918 collection. 

919 refs : `Iterable` [ `DatasetRef` ] 

920 Datasets to be associated. 

921 timespan : `Timespan` 

922 The validity range for these datasets within the collection. 

923 

924 Raises 

925 ------ 

926 AmbiguousDatasetError 

927 Raised if any of the given `DatasetRef` instances is unresolved. 

928 ConflictingDefinitionError 

929 Raised if the collection already contains a different dataset with 

930 the same `DatasetType` and data ID and an overlapping validity 

931 range. 

932 TypeError 

933 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

934 collection or if one or more datasets are of a dataset type for 

935 which `DatasetType.isCalibration` returns `False`. 

936 """ 

937 raise NotImplementedError() 

938 

939 @abstractmethod 

940 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *, 

941 dataIds: Optional[Iterable[DataId]] = None) -> None: 

942 """Remove or adjust datasets to clear a validity range within a 

943 calibration collection. 

944 

945 Parameters 

946 ---------- 

947 collection : `str` 

948 The name of an already-registered `~CollectionType.CALIBRATION` 

949 collection. 

950 datasetType : `str` or `DatasetType` 

951 Name or `DatasetType` instance for the datasets to be decertified. 

952 timespan : `Timespan`, optional 

953 The validity range to remove datasets from within the collection. 

954 Datasets that overlap this range but are not contained by it will 

955 have their validity ranges adjusted to not overlap it, which may 

956 split a single dataset validity range into two. 

957 dataIds : `Iterable` [ `DataId` ], optional 

958 Data IDs that should be decertified within the given validity range 

959 If `None`, all data IDs for ``self.datasetType`` will be 

960 decertified. 

961 

962 Raises 

963 ------ 

964 TypeError 

965 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

966 collection or if ``datasetType.isCalibration() is False``. 

967 """ 

968 raise NotImplementedError() 

969 

970 @abstractmethod 

971 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

972 """Return an object that allows a new `Datastore` instance to 

973 communicate with this `Registry`. 

974 

975 Returns 

976 ------- 

977 manager : `DatastoreRegistryBridgeManager` 

978 Object that mediates communication between this `Registry` and its 

979 associated datastores. 

980 """ 

981 raise NotImplementedError() 

982 

983 @abstractmethod 

984 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

985 """Retrieve datastore locations for a given dataset. 

986 

987 Parameters 

988 ---------- 

989 ref : `DatasetRef` 

990 A reference to the dataset for which to retrieve storage 

991 information. 

992 

993 Returns 

994 ------- 

995 datastores : `Iterable` [ `str` ] 

996 All the matching datastores holding this dataset. 

997 

998 Raises 

999 ------ 

1000 AmbiguousDatasetError 

1001 Raised if ``ref.id`` is `None`. 

1002 """ 

1003 raise NotImplementedError() 

1004 

1005 @abstractmethod 

1006 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

1007 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1008 withDefaults: bool = True, 

1009 **kwargs: Any) -> DataCoordinate: 

1010 """Expand a dimension-based data ID to include additional information. 

1011 

1012 Parameters 

1013 ---------- 

1014 dataId : `DataCoordinate` or `dict`, optional 

1015 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1016 graph : `DimensionGraph`, optional 

1017 Set of dimensions for the expanded ID. If `None`, the dimensions 

1018 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1019 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1020 ``graph`` are silently ignored, providing a way to extract and 

1021 ``graph`` expand a subset of a data ID. 

1022 records : `Mapping` [`str`, `DimensionRecord`], optional 

1023 Dimension record data to use before querying the database for that 

1024 data, keyed by element name. 

1025 withDefaults : `bool`, optional 

1026 Utilize ``self.defaults.dataId`` to fill in missing governor 

1027 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1028 used). 

1029 **kwargs 

1030 Additional keywords are treated like additional key-value pairs for 

1031 ``dataId``, extending and overriding 

1032 

1033 Returns 

1034 ------- 

1035 expanded : `DataCoordinate` 

1036 A data ID that includes full metadata for all of the dimensions it 

1037 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and 

1038 ``expanded.hasFull()`` both return `True`. 

1039 """ 

1040 raise NotImplementedError() 

1041 

1042 @abstractmethod 

1043 def insertDimensionData(self, element: Union[DimensionElement, str], 

1044 *data: Union[Mapping[str, Any], DimensionRecord], 

1045 conform: bool = True, 

1046 replace: bool = False) -> None: 

1047 """Insert one or more dimension records into the database. 

1048 

1049 Parameters 

1050 ---------- 

1051 element : `DimensionElement` or `str` 

1052 The `DimensionElement` or name thereof that identifies the table 

1053 records will be inserted into. 

1054 data : `dict` or `DimensionRecord` (variadic) 

1055 One or more records to insert. 

1056 conform : `bool`, optional 

1057 If `False` (`True` is default) perform no checking or conversions, 

1058 and assume that ``element`` is a `DimensionElement` instance and 

1059 ``data`` is a one or more `DimensionRecord` instances of the 

1060 appropriate subclass. 

1061 replace: `bool`, optional 

1062 If `True` (`False` is default), replace existing records in the 

1063 database if there is a conflict. 

1064 """ 

1065 raise NotImplementedError() 

1066 

1067 @abstractmethod 

1068 def syncDimensionData(self, element: Union[DimensionElement, str], 

1069 row: Union[Mapping[str, Any], DimensionRecord], 

1070 conform: bool = True, 

1071 update: bool = False) -> Union[bool, Dict[str, Any]]: 

1072 """Synchronize the given dimension record with the database, inserting 

1073 if it does not already exist and comparing values if it does. 

1074 

1075 Parameters 

1076 ---------- 

1077 element : `DimensionElement` or `str` 

1078 The `DimensionElement` or name thereof that identifies the table 

1079 records will be inserted into. 

1080 row : `dict` or `DimensionRecord` 

1081 The record to insert. 

1082 conform : `bool`, optional 

1083 If `False` (`True` is default) perform no checking or conversions, 

1084 and assume that ``element`` is a `DimensionElement` instance and 

1085 ``data`` is a one or more `DimensionRecord` instances of the 

1086 appropriate subclass. 

1087 update: `bool`, optional 

1088 If `True` (`False` is default), update the existing record in the 

1089 database if there is a conflict. 

1090 

1091 Returns 

1092 ------- 

1093 inserted_or_updated : `bool` or `dict` 

1094 `True` if a new row was inserted, `False` if no changes were 

1095 needed, or a `dict` mapping updated column names to their old 

1096 values if an update was performed (only possible if 

1097 ``update=True``). 

1098 

1099 Raises 

1100 ------ 

1101 ConflictingDefinitionError 

1102 Raised if the record exists in the database (according to primary 

1103 key lookup) but is inconsistent with the given one. 

1104 """ 

1105 raise NotImplementedError() 

1106 

1107 @abstractmethod 

1108 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

1109 ) -> Iterator[DatasetType]: 

1110 """Iterate over the dataset types whose names match an expression. 

1111 

1112 Parameters 

1113 ---------- 

1114 expression : `Any`, optional 

1115 An expression that fully or partially identifies the dataset types 

1116 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1117 `...` can be used to return all dataset types, and is the default. 

1118 See :ref:`daf_butler_dataset_type_expressions` for more 

1119 information. 

1120 components : `bool`, optional 

1121 If `True`, apply all expression patterns to component dataset type 

1122 names as well. If `False`, never apply patterns to components. 

1123 If `None` (default), apply patterns to components only if their 

1124 parent datasets were not matched by the expression. 

1125 Fully-specified component datasets (`str` or `DatasetType` 

1126 instances) are always included. 

1127 

1128 Yields 

1129 ------ 

1130 datasetType : `DatasetType` 

1131 A `DatasetType` instance whose name matches ``expression``. 

1132 """ 

1133 raise NotImplementedError() 

1134 

1135 @abstractmethod 

1136 def queryCollections(self, expression: Any = ..., 

1137 datasetType: Optional[DatasetType] = None, 

1138 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1139 flattenChains: bool = False, 

1140 includeChains: Optional[bool] = None) -> Iterator[str]: 

1141 """Iterate over the collections whose names match an expression. 

1142 

1143 Parameters 

1144 ---------- 

1145 expression : `Any`, optional 

1146 An expression that identifies the collections to return, such as 

1147 a `str` (for full matches or partial matches via globs), 

1148 `re.Pattern` (for partial matches), or iterable thereof. `...` 

1149 can be used to return all collections, and is the default. 

1150 See :ref:`daf_butler_collection_expressions` for more information. 

1151 datasetType : `DatasetType`, optional 

1152 If provided, only yield collections that may contain datasets of 

1153 this type. This is a conservative approximation in general; it may 

1154 yield collections that do not have any such datasets. 

1155 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1156 If provided, only yield collections of these types. 

1157 flattenChains : `bool`, optional 

1158 If `True` (`False` is default), recursively yield the child 

1159 collections of matching `~CollectionType.CHAINED` collections. 

1160 includeChains : `bool`, optional 

1161 If `True`, yield records for matching `~CollectionType.CHAINED` 

1162 collections. Default is the opposite of ``flattenChains``: include 

1163 either CHAINED collections or their children, but not both. 

1164 

1165 Yields 

1166 ------ 

1167 collection : `str` 

1168 The name of a collection that matches ``expression``. 

1169 """ 

1170 raise NotImplementedError() 

1171 

1172 @abstractmethod 

1173 def queryDatasets(self, datasetType: Any, *, 

1174 collections: Any = None, 

1175 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1176 dataId: Optional[DataId] = None, 

1177 where: Optional[str] = None, 

1178 findFirst: bool = False, 

1179 components: Optional[bool] = None, 

1180 bind: Optional[Mapping[str, Any]] = None, 

1181 check: bool = True, 

1182 **kwargs: Any) -> Iterable[DatasetRef]: 

1183 """Query for and iterate over dataset references matching user-provided 

1184 criteria. 

1185 

1186 Parameters 

1187 ---------- 

1188 datasetType 

1189 An expression that fully or partially identifies the dataset types 

1190 to be queried. Allowed types include `DatasetType`, `str`, 

1191 `re.Pattern`, and iterables thereof. The special value `...` can 

1192 be used to query all dataset types. See 

1193 :ref:`daf_butler_dataset_type_expressions` for more information. 

1194 collections: optional 

1195 An expression that identifies the collections to search, such as a 

1196 `str` (for full matches or partial matches via globs), `re.Pattern` 

1197 (for partial matches), or iterable thereof. `...` can be used to 

1198 search all collections (actually just all `~CollectionType.RUN` 

1199 collections, because this will still find all datasets). 

1200 If not provided, ``self.default.collections`` is used. See 

1201 :ref:`daf_butler_collection_expressions` for more information. 

1202 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1203 Dimensions to include in the query (in addition to those used 

1204 to identify the queried dataset type(s)), either to constrain 

1205 the resulting datasets to those for which a matching dimension 

1206 exists, or to relate the dataset type's dimensions to dimensions 

1207 referenced by the ``dataId`` or ``where`` arguments. 

1208 dataId : `dict` or `DataCoordinate`, optional 

1209 A data ID whose key-value pairs are used as equality constraints 

1210 in the query. 

1211 where : `str`, optional 

1212 A string expression similar to a SQL WHERE clause. May involve 

1213 any column of a dimension table or (as a shortcut for the primary 

1214 key column of a dimension table) dimension name. See 

1215 :ref:`daf_butler_dimension_expressions` for more information. 

1216 findFirst : `bool`, optional 

1217 If `True` (`False` is default), for each result data ID, only 

1218 yield one `DatasetRef` of each `DatasetType`, from the first 

1219 collection in which a dataset of that dataset type appears 

1220 (according to the order of ``collections`` passed in). If `True`, 

1221 ``collections`` must not contain regular expressions and may not 

1222 be `...`. 

1223 components : `bool`, optional 

1224 If `True`, apply all dataset expression patterns to component 

1225 dataset type names as well. If `False`, never apply patterns to 

1226 components. If `None` (default), apply patterns to components only 

1227 if their parent datasets were not matched by the expression. 

1228 Fully-specified component datasets (`str` or `DatasetType` 

1229 instances) are always included. 

1230 bind : `Mapping`, optional 

1231 Mapping containing literal values that should be injected into the 

1232 ``where`` expression, keyed by the identifiers they replace. 

1233 check : `bool`, optional 

1234 If `True` (default) check the query for consistency before 

1235 executing it. This may reject some valid queries that resemble 

1236 common mistakes (e.g. queries for visits without specifying an 

1237 instrument). 

1238 **kwargs 

1239 Additional keyword arguments are forwarded to 

1240 `DataCoordinate.standardize` when processing the ``dataId`` 

1241 argument (and may be used to provide a constraining data ID even 

1242 when the ``dataId`` argument is `None`). 

1243 

1244 Returns 

1245 ------- 

1246 refs : `queries.DatasetQueryResults` 

1247 Dataset references matching the given query criteria. Nested data 

1248 IDs are guaranteed to include values for all implied dimensions 

1249 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1250 include dimension records (`DataCoordinate.hasRecords` will be 

1251 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1252 on the result object (which returns a new one). 

1253 

1254 Raises 

1255 ------ 

1256 TypeError 

1257 Raised when the arguments are incompatible, such as when a 

1258 collection wildcard is passed when ``findFirst`` is `True`, or 

1259 when ``collections`` is `None` and``self.defaults.collections`` is 

1260 also `None`. 

1261 

1262 Notes 

1263 ----- 

1264 When multiple dataset types are queried in a single call, the 

1265 results of this operation are equivalent to querying for each dataset 

1266 type separately in turn, and no information about the relationships 

1267 between datasets of different types is included. In contexts where 

1268 that kind of information is important, the recommended pattern is to 

1269 use `queryDataIds` to first obtain data IDs (possibly with the 

1270 desired dataset types and collections passed as constraints to the 

1271 query), and then use multiple (generally much simpler) calls to 

1272 `queryDatasets` with the returned data IDs passed as constraints. 

1273 """ 

1274 raise NotImplementedError() 

1275 

1276 @abstractmethod 

1277 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1278 dataId: Optional[DataId] = None, 

1279 datasets: Any = None, 

1280 collections: Any = None, 

1281 where: Optional[str] = None, 

1282 components: Optional[bool] = None, 

1283 bind: Optional[Mapping[str, Any]] = None, 

1284 check: bool = True, 

1285 **kwargs: Any) -> DataCoordinateIterable: 

1286 """Query for data IDs matching user-provided criteria. 

1287 

1288 Parameters 

1289 ---------- 

1290 dimensions : `Dimension` or `str`, or iterable thereof 

1291 The dimensions of the data IDs to yield, as either `Dimension` 

1292 instances or `str`. Will be automatically expanded to a complete 

1293 `DimensionGraph`. 

1294 dataId : `dict` or `DataCoordinate`, optional 

1295 A data ID whose key-value pairs are used as equality constraints 

1296 in the query. 

1297 datasets : `Any`, optional 

1298 An expression that fully or partially identifies dataset types 

1299 that should constrain the yielded data IDs. For example, including 

1300 "raw" here would constrain the yielded ``instrument``, 

1301 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1302 those for which at least one "raw" dataset exists in 

1303 ``collections``. Allowed types include `DatasetType`, `str`, 

1304 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1305 expressions, ``...`` is not permitted - it doesn't make sense to 

1306 constrain data IDs on the existence of *all* datasets. 

1307 See :ref:`daf_butler_dataset_type_expressions` for more 

1308 information. 

1309 collections: `Any`, optional 

1310 An expression that identifies the collections to search for 

1311 datasets, such as a `str` (for full matches or partial matches 

1312 via globs), `re.Pattern` (for partial matches), or iterable 

1313 thereof. `...` can be used to search all collections (actually 

1314 just all `~CollectionType.RUN` collections, because this will 

1315 still find all datasets). If not provided, 

1316 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1317 is also passed. See :ref:`daf_butler_collection_expressions` for 

1318 more information. 

1319 where : `str`, optional 

1320 A string expression similar to a SQL WHERE clause. May involve 

1321 any column of a dimension table or (as a shortcut for the primary 

1322 key column of a dimension table) dimension name. See 

1323 :ref:`daf_butler_dimension_expressions` for more information. 

1324 components : `bool`, optional 

1325 If `True`, apply all dataset expression patterns to component 

1326 dataset type names as well. If `False`, never apply patterns to 

1327 components. If `None` (default), apply patterns to components only 

1328 if their parent datasets were not matched by the expression. 

1329 Fully-specified component datasets (`str` or `DatasetType` 

1330 instances) are always included. 

1331 bind : `Mapping`, optional 

1332 Mapping containing literal values that should be injected into the 

1333 ``where`` expression, keyed by the identifiers they replace. 

1334 check : `bool`, optional 

1335 If `True` (default) check the query for consistency before 

1336 executing it. This may reject some valid queries that resemble 

1337 common mistakes (e.g. queries for visits without specifying an 

1338 instrument). 

1339 **kwargs 

1340 Additional keyword arguments are forwarded to 

1341 `DataCoordinate.standardize` when processing the ``dataId`` 

1342 argument (and may be used to provide a constraining data ID even 

1343 when the ``dataId`` argument is `None`). 

1344 

1345 Returns 

1346 ------- 

1347 dataIds : `DataCoordinateQueryResults` 

1348 Data IDs matching the given query parameters. These are guaranteed 

1349 to identify all dimensions (`DataCoordinate.hasFull` returns 

1350 `True`), but will not contain `DimensionRecord` objects 

1351 (`DataCoordinate.hasRecords` returns `False`). Call 

1352 `DataCoordinateQueryResults.expanded` on the returned object to 

1353 fetch those (and consider using 

1354 `DataCoordinateQueryResults.materialize` on the returned object 

1355 first if the expected number of rows is very large). See 

1356 documentation for those methods for additional information. 

1357 

1358 Raises 

1359 ------ 

1360 TypeError 

1361 Raised if ``collections`` is `None`, ``self.defaults.collections`` 

1362 is `None`, and ``datasets`` is not `None`. 

1363 """ 

1364 raise NotImplementedError() 

1365 

1366 @abstractmethod 

1367 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

1368 dataId: Optional[DataId] = None, 

1369 datasets: Any = None, 

1370 collections: Any = None, 

1371 where: Optional[str] = None, 

1372 components: Optional[bool] = None, 

1373 bind: Optional[Mapping[str, Any]] = None, 

1374 check: bool = True, 

1375 **kwargs: Any) -> Iterator[DimensionRecord]: 

1376 """Query for dimension information matching user-provided criteria. 

1377 

1378 Parameters 

1379 ---------- 

1380 element : `DimensionElement` or `str` 

1381 The dimension element to obtain records for. 

1382 dataId : `dict` or `DataCoordinate`, optional 

1383 A data ID whose key-value pairs are used as equality constraints 

1384 in the query. 

1385 datasets : `Any`, optional 

1386 An expression that fully or partially identifies dataset types 

1387 that should constrain the yielded records. See `queryDataIds` and 

1388 :ref:`daf_butler_dataset_type_expressions` for more information. 

1389 collections: `Any`, optional 

1390 An expression that identifies the collections to search for 

1391 datasets, such as a `str` (for full matches or partial matches 

1392 via globs), `re.Pattern` (for partial matches), or iterable 

1393 thereof. `...` can be used to search all collections (actually 

1394 just all `~CollectionType.RUN` collections, because this will 

1395 still find all datasets). If not provided, 

1396 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1397 is also passed. See :ref:`daf_butler_collection_expressions` for 

1398 more information. 

1399 where : `str`, optional 

1400 A string expression similar to a SQL WHERE clause. See 

1401 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1402 information. 

1403 components : `bool`, optional 

1404 Whether to apply dataset expressions to components as well. 

1405 See `queryDataIds` for more information. 

1406 bind : `Mapping`, optional 

1407 Mapping containing literal values that should be injected into the 

1408 ``where`` expression, keyed by the identifiers they replace. 

1409 check : `bool`, optional 

1410 If `True` (default) check the query for consistency before 

1411 executing it. This may reject some valid queries that resemble 

1412 common mistakes (e.g. queries for visits without specifying an 

1413 instrument). 

1414 **kwargs 

1415 Additional keyword arguments are forwarded to 

1416 `DataCoordinate.standardize` when processing the ``dataId`` 

1417 argument (and may be used to provide a constraining data ID even 

1418 when the ``dataId`` argument is `None`). 

1419 

1420 Returns 

1421 ------- 

1422 dataIds : `DataCoordinateQueryResults` 

1423 Data IDs matching the given query parameters. 

1424 """ 

1425 raise NotImplementedError() 

1426 

1427 @abstractmethod 

1428 def queryDatasetAssociations( 

1429 self, 

1430 datasetType: Union[str, DatasetType], 

1431 collections: Any = ..., 

1432 *, 

1433 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1434 flattenChains: bool = False, 

1435 ) -> Iterator[DatasetAssociation]: 

1436 """Iterate over dataset-collection combinations where the dataset is in 

1437 the collection. 

1438 

1439 This method is a temporary placeholder for better support for 

1440 assocation results in `queryDatasets`. It will probably be 

1441 removed in the future, and should be avoided in production code 

1442 whenever possible. 

1443 

1444 Parameters 

1445 ---------- 

1446 datasetType : `DatasetType` or `str` 

1447 A dataset type object or the name of one. 

1448 collections: `Any`, optional 

1449 An expression that identifies the collections to search for 

1450 datasets, such as a `str` (for full matches or partial matches 

1451 via globs), `re.Pattern` (for partial matches), or iterable 

1452 thereof. `...` can be used to search all collections (actually 

1453 just all `~CollectionType.RUN` collections, because this will still 

1454 find all datasets). If not provided, ``self.default.collections`` 

1455 is used. See :ref:`daf_butler_collection_expressions` for more 

1456 information. 

1457 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1458 If provided, only yield associations from collections of these 

1459 types. 

1460 flattenChains : `bool`, optional 

1461 If `True` (default) search in the children of 

1462 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1463 collections are ignored. 

1464 

1465 Yields 

1466 ------ 

1467 association : `DatasetAssociation` 

1468 Object representing the relationship beween a single dataset and 

1469 a single collection. 

1470 

1471 Raises 

1472 ------ 

1473 TypeError 

1474 Raised if ``collections`` is `None` and 

1475 ``self.defaults.collections`` is `None`. 

1476 """ 

1477 raise NotImplementedError() 

1478 

1479 storageClasses: StorageClassFactory 

1480 """All storage classes known to the registry (`StorageClassFactory`). 

1481 """