Coverage for python/lsst/daf/butler/registry/_registry.py: 62%

185 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-26 02:22 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28from abc import ABC, abstractmethod 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Set, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44from lsst.resources import ResourcePathExpression 

45from lsst.utils import doImportType 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataId, 

51 DatasetAssociation, 

52 DatasetId, 

53 DatasetRef, 

54 DatasetType, 

55 Dimension, 

56 DimensionConfig, 

57 DimensionElement, 

58 DimensionGraph, 

59 DimensionRecord, 

60 DimensionUniverse, 

61 NameLookupMapping, 

62 StorageClassFactory, 

63 Timespan, 

64) 

65from ._collectionType import CollectionType 

66from ._config import RegistryConfig 

67from ._defaults import RegistryDefaults 

68from .interfaces import DatasetIdFactory, DatasetIdGenEnum 

69from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

70from .summaries import CollectionSummary 

71from .wildcards import CollectionSearch 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from .._butlerConfig import ButlerConfig 

75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager 

76 

77_LOG = logging.getLogger(__name__) 

78 

79 

80class Registry(ABC): 

81 """Abstract Registry interface. 

82 

83 Each registry implementation can have its own constructor parameters. 

84 The assumption is that an instance of a specific subclass will be 

85 constructed from configuration using `Registry.fromConfig()`. 

86 The base class will look for a ``cls`` entry and call that specific 

87 `fromConfig()` method. 

88 

89 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

90 property. No other properties are assumed shared between implementations. 

91 """ 

92 

93 defaultConfigFile: Optional[str] = None 

94 """Path to configuration defaults. Accessed within the ``configs`` resource 

95 or relative to a search path. Can be None if no defaults specified. 

96 """ 

97 

98 @classmethod 

99 def forceRegistryConfig( 

100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

101 ) -> RegistryConfig: 

102 """Force the supplied config to a `RegistryConfig`. 

103 

104 Parameters 

105 ---------- 

106 config : `RegistryConfig`, `Config` or `str` or `None` 

107 Registry configuration, if missing then default configuration will 

108 be loaded from registry.yaml. 

109 

110 Returns 

111 ------- 

112 registry_config : `RegistryConfig` 

113 A registry config. 

114 """ 

115 if not isinstance(config, RegistryConfig): 

116 if isinstance(config, (str, Config)) or config is None: 

117 config = RegistryConfig(config) 

118 else: 

119 raise ValueError(f"Incompatible Registry configuration: {config}") 

120 return config 

121 

122 @classmethod 

123 def determineTrampoline( 

124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

125 ) -> Tuple[Type[Registry], RegistryConfig]: 

126 """Return class to use to instantiate real registry. 

127 

128 Parameters 

129 ---------- 

130 config : `RegistryConfig` or `str`, optional 

131 Registry configuration, if missing then default configuration will 

132 be loaded from registry.yaml. 

133 

134 Returns 

135 ------- 

136 requested_cls : `type` of `Registry` 

137 The real registry class to use. 

138 registry_config : `RegistryConfig` 

139 The `RegistryConfig` to use. 

140 """ 

141 config = cls.forceRegistryConfig(config) 

142 

143 # Default to the standard registry 

144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

145 registry_cls = doImportType(registry_cls_name) 

146 if registry_cls is cls: 

147 raise ValueError("Can not instantiate the abstract base Registry from config") 

148 if not issubclass(registry_cls, Registry): 

149 raise TypeError( 

150 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

151 ) 

152 return registry_cls, config 

153 

154 @classmethod 

155 def createFromConfig( 

156 cls, 

157 config: Optional[Union[RegistryConfig, str]] = None, 

158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

159 butlerRoot: Optional[ResourcePathExpression] = None, 

160 ) -> Registry: 

161 """Create registry database and return `Registry` instance. 

162 

163 This method initializes database contents, database must be empty 

164 prior to calling this method. 

165 

166 Parameters 

167 ---------- 

168 config : `RegistryConfig` or `str`, optional 

169 Registry configuration, if missing then default configuration will 

170 be loaded from registry.yaml. 

171 dimensionConfig : `DimensionConfig` or `str`, optional 

172 Dimensions configuration, if missing then default configuration 

173 will be loaded from dimensions.yaml. 

174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

175 Path to the repository root this `Registry` will manage. 

176 

177 Returns 

178 ------- 

179 registry : `Registry` 

180 A new `Registry` instance. 

181 

182 Notes 

183 ----- 

184 This class will determine the concrete `Registry` subclass to 

185 use from configuration. Each subclass should implement this method 

186 even if it can not create a registry. 

187 """ 

188 registry_cls, registry_config = cls.determineTrampoline(config) 

189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

190 

191 @classmethod 

192 def fromConfig( 

193 cls, 

194 config: Union[ButlerConfig, RegistryConfig, Config, str], 

195 butlerRoot: Optional[ResourcePathExpression] = None, 

196 writeable: bool = True, 

197 defaults: Optional[RegistryDefaults] = None, 

198 ) -> Registry: 

199 """Create `Registry` subclass instance from `config`. 

200 

201 Registry database must be initialized prior to calling this method. 

202 

203 Parameters 

204 ---------- 

205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

206 Registry configuration 

207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

208 Path to the repository root this `Registry` will manage. 

209 writeable : `bool`, optional 

210 If `True` (default) create a read-write connection to the database. 

211 defaults : `RegistryDefaults`, optional 

212 Default collection search path and/or output `~CollectionType.RUN` 

213 collection. 

214 

215 Returns 

216 ------- 

217 registry : `Registry` (subclass) 

218 A new `Registry` subclass instance. 

219 

220 Notes 

221 ----- 

222 This class will determine the concrete `Registry` subclass to 

223 use from configuration. Each subclass should implement this method. 

224 """ 

225 # The base class implementation should trampoline to the correct 

226 # subclass. No implementation should ever use this implementation 

227 # directly. If no class is specified, default to the standard 

228 # registry. 

229 registry_cls, registry_config = cls.determineTrampoline(config) 

230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

231 

232 @abstractmethod 

233 def isWriteable(self) -> bool: 

234 """Return `True` if this registry allows write operations, and `False` 

235 otherwise. 

236 """ 

237 raise NotImplementedError() 

238 

239 @abstractmethod 

240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

241 """Create a new `Registry` backed by the same data repository and 

242 connection as this one, but independent defaults. 

243 

244 Parameters 

245 ---------- 

246 defaults : `RegistryDefaults`, optional 

247 Default collections and data ID values for the new registry. If 

248 not provided, ``self.defaults`` will be used (but future changes 

249 to either registry's defaults will not affect the other). 

250 

251 Returns 

252 ------- 

253 copy : `Registry` 

254 A new `Registry` instance with its own defaults. 

255 

256 Notes 

257 ----- 

258 Because the new registry shares a connection with the original, they 

259 also share transaction state (despite the fact that their `transaction` 

260 context manager methods do not reflect this), and must be used with 

261 care. 

262 """ 

263 raise NotImplementedError() 

264 

265 @property 

266 @abstractmethod 

267 def dimensions(self) -> DimensionUniverse: 

268 """Definitions of all dimensions recognized by this `Registry` 

269 (`DimensionUniverse`). 

270 """ 

271 raise NotImplementedError() 

272 

273 @property 

274 def defaults(self) -> RegistryDefaults: 

275 """Default collection search path and/or output `~CollectionType.RUN` 

276 collection (`RegistryDefaults`). 

277 

278 This is an immutable struct whose components may not be set 

279 individually, but the entire struct can be set by assigning to this 

280 property. 

281 """ 

282 return self._defaults 

283 

284 @defaults.setter 

285 def defaults(self, value: RegistryDefaults) -> None: 

286 if value.run is not None: 

287 self.registerRun(value.run) 

288 value.finish(self) 

289 self._defaults = value 

290 

291 @abstractmethod 

292 def refresh(self) -> None: 

293 """Refresh all in-memory state by querying the database. 

294 

295 This may be necessary to enable querying for entities added by other 

296 registry instances after this one was constructed. 

297 """ 

298 raise NotImplementedError() 

299 

300 @contextlib.contextmanager 

301 @abstractmethod 

302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

303 """Return a context manager that represents a transaction.""" 

304 raise NotImplementedError() 

305 

306 def resetConnectionPool(self) -> None: 

307 """Reset connection pool for registry if relevant. 

308 

309 This operation can be used reset connections to servers when 

310 using registry with fork-based multiprocessing. This method should 

311 usually be called by the child process immediately 

312 after the fork. 

313 

314 The base class implementation is a no-op. 

315 """ 

316 pass 

317 

318 @abstractmethod 

319 def registerCollection( 

320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

321 ) -> bool: 

322 """Add a new collection if one with the given name does not exist. 

323 

324 Parameters 

325 ---------- 

326 name : `str` 

327 The name of the collection to create. 

328 type : `CollectionType` 

329 Enum value indicating the type of collection to create. 

330 doc : `str`, optional 

331 Documentation string for the collection. 

332 

333 Returns 

334 ------- 

335 registered : `bool` 

336 Boolean indicating whether the collection was already registered 

337 or was created by this call. 

338 

339 Notes 

340 ----- 

341 This method cannot be called within transactions, as it needs to be 

342 able to perform its own transaction to be concurrent. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def getCollectionType(self, name: str) -> CollectionType: 

348 """Return an enumeration value indicating the type of the given 

349 collection. 

350 

351 Parameters 

352 ---------- 

353 name : `str` 

354 The name of the collection. 

355 

356 Returns 

357 ------- 

358 type : `CollectionType` 

359 Enum value indicating the type of this collection. 

360 

361 Raises 

362 ------ 

363 MissingCollectionError 

364 Raised if no collection with the given name exists. 

365 """ 

366 raise NotImplementedError() 

367 

368 @abstractmethod 

369 def _get_collection_record(self, name: str) -> CollectionRecord: 

370 """Return the record for this collection. 

371 

372 Parameters 

373 ---------- 

374 name : `str` 

375 Name of the collection for which the record is to be retrieved. 

376 

377 Returns 

378 ------- 

379 record : `CollectionRecord` 

380 The record for this collection. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

386 """Add a new run if one with the given name does not exist. 

387 

388 Parameters 

389 ---------- 

390 name : `str` 

391 The name of the run to create. 

392 doc : `str`, optional 

393 Documentation string for the collection. 

394 

395 Returns 

396 ------- 

397 registered : `bool` 

398 Boolean indicating whether a new run was registered. `False` 

399 if it already existed. 

400 

401 Notes 

402 ----- 

403 This method cannot be called within transactions, as it needs to be 

404 able to perform its own transaction to be concurrent. 

405 """ 

406 raise NotImplementedError() 

407 

408 @abstractmethod 

409 def removeCollection(self, name: str) -> None: 

410 """Remove the given collection from the registry. 

411 

412 Parameters 

413 ---------- 

414 name : `str` 

415 The name of the collection to remove. 

416 

417 Raises 

418 ------ 

419 MissingCollectionError 

420 Raised if no collection with the given name exists. 

421 sqlalchemy.IntegrityError 

422 Raised if the database rows associated with the collection are 

423 still referenced by some other table, such as a dataset in a 

424 datastore (for `~CollectionType.RUN` collections only) or a 

425 `~CollectionType.CHAINED` collection of which this collection is 

426 a child. 

427 

428 Notes 

429 ----- 

430 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

431 in it will removed from the `Registry` database. This requires that 

432 those datasets be removed (or at least trashed) from any datastores 

433 that hold them first. 

434 

435 A collection may not be deleted as long as it is referenced by a 

436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

437 be deleted or redefined first. 

438 """ 

439 raise NotImplementedError() 

440 

441 @abstractmethod 

442 def getCollectionChain(self, parent: str) -> CollectionSearch: 

443 """Return the child collections in a `~CollectionType.CHAINED` 

444 collection. 

445 

446 Parameters 

447 ---------- 

448 parent : `str` 

449 Name of the chained collection. Must have already been added via 

450 a call to `Registry.registerCollection`. 

451 

452 Returns 

453 ------- 

454 children : `CollectionSearch` 

455 An object that defines the search path of the collection. 

456 See :ref:`daf_butler_collection_expressions` for more information. 

457 

458 Raises 

459 ------ 

460 MissingCollectionError 

461 Raised if ``parent`` does not exist in the `Registry`. 

462 CollectionTypeError 

463 Raised if ``parent`` does not correspond to a 

464 `~CollectionType.CHAINED` collection. 

465 """ 

466 raise NotImplementedError() 

467 

468 @abstractmethod 

469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

470 """Define or redefine a `~CollectionType.CHAINED` collection. 

471 

472 Parameters 

473 ---------- 

474 parent : `str` 

475 Name of the chained collection. Must have already been added via 

476 a call to `Registry.registerCollection`. 

477 children : `Any` 

478 An expression defining an ordered search of child collections, 

479 generally an iterable of `str`; see 

480 :ref:`daf_butler_collection_expressions` for more information. 

481 flatten : `bool`, optional 

482 If `True` (`False` is default), recursively flatten out any nested 

483 `~CollectionType.CHAINED` collections in ``children`` first. 

484 

485 Raises 

486 ------ 

487 MissingCollectionError 

488 Raised when any of the given collections do not exist in the 

489 `Registry`. 

490 CollectionTypeError 

491 Raised if ``parent`` does not correspond to a 

492 `~CollectionType.CHAINED` collection. 

493 ValueError 

494 Raised if the given collections contains a cycle. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def getCollectionParentChains(self, collection: str) -> Set[str]: 

500 """Return the CHAINED collections that directly contain the given one. 

501 

502 Parameters 

503 ---------- 

504 name : `str` 

505 Name of the collection. 

506 

507 Returns 

508 ------- 

509 chains : `set` of `str` 

510 Set of `~CollectionType.CHAINED` collection names. 

511 """ 

512 raise NotImplementedError() 

513 

514 @abstractmethod 

515 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

516 """Retrieve the documentation string for a collection. 

517 

518 Parameters 

519 ---------- 

520 name : `str` 

521 Name of the collection. 

522 

523 Returns 

524 ------- 

525 docs : `str` or `None` 

526 Docstring for the collection with the given name. 

527 """ 

528 raise NotImplementedError() 

529 

530 @abstractmethod 

531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

532 """Set the documentation string for a collection. 

533 

534 Parameters 

535 ---------- 

536 name : `str` 

537 Name of the collection. 

538 docs : `str` or `None` 

539 Docstring for the collection with the given name; will replace any 

540 existing docstring. Passing `None` will remove any existing 

541 docstring. 

542 """ 

543 raise NotImplementedError() 

544 

545 @abstractmethod 

546 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

547 """Return a summary for the given collection. 

548 

549 Parameters 

550 ---------- 

551 collection : `str` 

552 Name of the collection for which a summary is to be retrieved. 

553 

554 Returns 

555 ------- 

556 summary : `CollectionSummary` 

557 Summary of the dataset types and governor dimension values in 

558 this collection. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

564 """ 

565 Add a new `DatasetType` to the Registry. 

566 

567 It is not an error to register the same `DatasetType` twice. 

568 

569 Parameters 

570 ---------- 

571 datasetType : `DatasetType` 

572 The `DatasetType` to be added. 

573 

574 Returns 

575 ------- 

576 inserted : `bool` 

577 `True` if ``datasetType`` was inserted, `False` if an identical 

578 existing `DatsetType` was found. Note that in either case the 

579 DatasetType is guaranteed to be defined in the Registry 

580 consistently with the given definition. 

581 

582 Raises 

583 ------ 

584 ValueError 

585 Raised if the dimensions or storage class are invalid. 

586 ConflictingDefinitionError 

587 Raised if this DatasetType is already registered with a different 

588 definition. 

589 

590 Notes 

591 ----- 

592 This method cannot be called within transactions, as it needs to be 

593 able to perform its own transaction to be concurrent. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def removeDatasetType(self, name: str) -> None: 

599 """Remove the named `DatasetType` from the registry. 

600 

601 .. warning:: 

602 

603 Registry implementations can cache the dataset type definitions. 

604 This means that deleting the dataset type definition may result in 

605 unexpected behavior from other butler processes that are active 

606 that have not seen the deletion. 

607 

608 Parameters 

609 ---------- 

610 name : `str` 

611 Name of the type to be removed. 

612 

613 Raises 

614 ------ 

615 lsst.daf.butler.registry.OrphanedRecordError 

616 Raised if an attempt is made to remove the dataset type definition 

617 when there are already datasets associated with it. 

618 

619 Notes 

620 ----- 

621 If the dataset type is not registered the method will return without 

622 action. 

623 """ 

624 raise NotImplementedError() 

625 

626 @abstractmethod 

627 def getDatasetType(self, name: str) -> DatasetType: 

628 """Get the `DatasetType`. 

629 

630 Parameters 

631 ---------- 

632 name : `str` 

633 Name of the type. 

634 

635 Returns 

636 ------- 

637 type : `DatasetType` 

638 The `DatasetType` associated with the given name. 

639 

640 Raises 

641 ------ 

642 KeyError 

643 Requested named DatasetType could not be found in registry. 

644 """ 

645 raise NotImplementedError() 

646 

647 @abstractmethod 

648 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

649 """Test whether the given dataset ID generation mode is supported by 

650 `insertDatasets`. 

651 

652 Parameters 

653 ---------- 

654 mode : `DatasetIdGenEnum` 

655 Enum value for the mode to test. 

656 

657 Returns 

658 ------- 

659 supported : `bool` 

660 Whether the given mode is supported. 

661 """ 

662 raise NotImplementedError() 

663 

664 @abstractmethod 

665 def findDataset( 

666 self, 

667 datasetType: Union[DatasetType, str], 

668 dataId: Optional[DataId] = None, 

669 *, 

670 collections: Any = None, 

671 timespan: Optional[Timespan] = None, 

672 **kwargs: Any, 

673 ) -> Optional[DatasetRef]: 

674 """Find a dataset given its `DatasetType` and data ID. 

675 

676 This can be used to obtain a `DatasetRef` that permits the dataset to 

677 be read from a `Datastore`. If the dataset is a component and can not 

678 be found using the provided dataset type, a dataset ref for the parent 

679 will be returned instead but with the correct dataset type. 

680 

681 Parameters 

682 ---------- 

683 datasetType : `DatasetType` or `str` 

684 A `DatasetType` or the name of one. 

685 dataId : `dict` or `DataCoordinate`, optional 

686 A `dict`-like object containing the `Dimension` links that identify 

687 the dataset within a collection. 

688 collections, optional. 

689 An expression that fully or partially identifies the collections to 

690 search for the dataset; see 

691 :ref:`daf_butler_collection_expressions` for more information. 

692 Defaults to ``self.defaults.collections``. 

693 timespan : `Timespan`, optional 

694 A timespan that the validity range of the dataset must overlap. 

695 If not provided, any `~CollectionType.CALIBRATION` collections 

696 matched by the ``collections`` argument will not be searched. 

697 **kwargs 

698 Additional keyword arguments passed to 

699 `DataCoordinate.standardize` to convert ``dataId`` to a true 

700 `DataCoordinate` or augment an existing one. 

701 

702 Returns 

703 ------- 

704 ref : `DatasetRef` 

705 A reference to the dataset, or `None` if no matching Dataset 

706 was found. 

707 

708 Raises 

709 ------ 

710 NoDefaultCollectionError 

711 Raised if ``collections`` is `None` and 

712 ``self.defaults.collections`` is `None`. 

713 LookupError 

714 Raised if one or more data ID keys are missing. 

715 KeyError 

716 Raised if the dataset type does not exist. 

717 MissingCollectionError 

718 Raised if any of ``collections`` does not exist in the registry. 

719 

720 Notes 

721 ----- 

722 This method simply returns `None` and does not raise an exception even 

723 when the set of collections searched is intrinsically incompatible with 

724 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

725 only `~CollectionType.CALIBRATION` collections are being searched. 

726 This may make it harder to debug some lookup failures, but the behavior 

727 is intentional; we consider it more important that failed searches are 

728 reported consistently, regardless of the reason, and that adding 

729 additional collections that do not contain a match to the search path 

730 never changes the behavior. 

731 """ 

732 raise NotImplementedError() 

733 

734 @abstractmethod 

735 def insertDatasets( 

736 self, 

737 datasetType: Union[DatasetType, str], 

738 dataIds: Iterable[DataId], 

739 run: Optional[str] = None, 

740 expand: bool = True, 

741 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

742 ) -> List[DatasetRef]: 

743 """Insert one or more datasets into the `Registry` 

744 

745 This always adds new datasets; to associate existing datasets with 

746 a new collection, use ``associate``. 

747 

748 Parameters 

749 ---------- 

750 datasetType : `DatasetType` or `str` 

751 A `DatasetType` or the name of one. 

752 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

753 Dimension-based identifiers for the new datasets. 

754 run : `str`, optional 

755 The name of the run that produced the datasets. Defaults to 

756 ``self.defaults.run``. 

757 expand : `bool`, optional 

758 If `True` (default), expand data IDs as they are inserted. This is 

759 necessary in general to allow datastore to generate file templates, 

760 but it may be disabled if the caller can guarantee this is 

761 unnecessary. 

762 idGenerationMode : `DatasetIdGenEnum`, optional 

763 Specifies option for generating dataset IDs. By default unique IDs 

764 are generated for each inserted dataset. 

765 

766 Returns 

767 ------- 

768 refs : `list` of `DatasetRef` 

769 Resolved `DatasetRef` instances for all given data IDs (in the same 

770 order). 

771 

772 Raises 

773 ------ 

774 DatasetTypeError 

775 Raised if ``datasetType`` is not known to registry. 

776 CollectionTypeError 

777 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

778 NoDefaultCollectionError 

779 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

780 ConflictingDefinitionError 

781 If a dataset with the same dataset type and data ID as one of those 

782 given already exists in ``run``. 

783 MissingCollectionError 

784 Raised if ``run`` does not exist in the registry. 

785 """ 

786 raise NotImplementedError() 

787 

788 @abstractmethod 

789 def _importDatasets( 

790 self, 

791 datasets: Iterable[DatasetRef], 

792 expand: bool = True, 

793 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

794 reuseIds: bool = False, 

795 ) -> List[DatasetRef]: 

796 """Import one or more datasets into the `Registry`. 

797 

798 Difference from `insertDatasets` method is that this method accepts 

799 `DatasetRef` instances which should already be resolved and have a 

800 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

801 `uuid.UUID`) then datasets which already exist in the registry will be 

802 ignored if imported again. 

803 

804 Parameters 

805 ---------- 

806 datasets : `~collections.abc.Iterable` of `DatasetRef` 

807 Datasets to be inserted. All `DatasetRef` instances must have 

808 identical ``datasetType`` and ``run`` attributes. ``run`` 

809 attribute can be `None` and defaults to ``self.defaults.run``. 

810 Datasets can specify ``id`` attribute which will be used for 

811 inserted datasets. All dataset IDs must have the same type 

812 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

813 configured backend then IDs will be ignored and new IDs will be 

814 generated by backend. 

815 expand : `bool`, optional 

816 If `True` (default), expand data IDs as they are inserted. This is 

817 necessary in general to allow datastore to generate file templates, 

818 but it may be disabled if the caller can guarantee this is 

819 unnecessary. 

820 idGenerationMode : `DatasetIdGenEnum`, optional 

821 Specifies option for generating dataset IDs when IDs are not 

822 provided or their type does not match backend type. By default 

823 unique IDs are generated for each inserted dataset. 

824 reuseIds : `bool`, optional 

825 If `True` then forces re-use of imported dataset IDs for integer 

826 IDs which are normally generated as auto-incremented; exception 

827 will be raised if imported IDs clash with existing ones. This 

828 option has no effect on the use of globally-unique IDs which are 

829 always re-used (or generated if integer IDs are being imported). 

830 

831 Returns 

832 ------- 

833 refs : `list` of `DatasetRef` 

834 Resolved `DatasetRef` instances for all given data IDs (in the same 

835 order). If any of ``datasets`` has an ID which already exists in 

836 the database then it will not be inserted or updated, but a 

837 resolved `DatasetRef` will be returned for it in any case. 

838 

839 Raises 

840 ------ 

841 NoDefaultCollectionError 

842 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

843 DatasetTypeError 

844 Raised if datasets correspond to more than one dataset type or 

845 dataset type is not known to registry. 

846 ConflictingDefinitionError 

847 If a dataset with the same dataset type and data ID as one of those 

848 given already exists in ``run``. 

849 MissingCollectionError 

850 Raised if ``run`` does not exist in the registry. 

851 

852 Notes 

853 ----- 

854 This method is considered package-private and internal to Butler 

855 implementation. Clients outside daf_butler package should not use this 

856 method. 

857 """ 

858 raise NotImplementedError() 

859 

860 @abstractmethod 

861 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

862 """Retrieve a Dataset entry. 

863 

864 Parameters 

865 ---------- 

866 id : `DatasetId` 

867 The unique identifier for the dataset. 

868 

869 Returns 

870 ------- 

871 ref : `DatasetRef` or `None` 

872 A ref to the Dataset, or `None` if no matching Dataset 

873 was found. 

874 """ 

875 raise NotImplementedError() 

876 

877 @abstractmethod 

878 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

879 """Remove datasets from the Registry. 

880 

881 The datasets will be removed unconditionally from all collections, and 

882 any `Quantum` that consumed this dataset will instead be marked with 

883 having a NULL input. `Datastore` records will *not* be deleted; the 

884 caller is responsible for ensuring that the dataset has already been 

885 removed from all Datastores. 

886 

887 Parameters 

888 ---------- 

889 refs : `Iterable` of `DatasetRef` 

890 References to the datasets to be removed. Must include a valid 

891 ``id`` attribute, and should be considered invalidated upon return. 

892 

893 Raises 

894 ------ 

895 AmbiguousDatasetError 

896 Raised if any ``ref.id`` is `None`. 

897 OrphanedRecordError 

898 Raised if any dataset is still present in any `Datastore`. 

899 """ 

900 raise NotImplementedError() 

901 

902 @abstractmethod 

903 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

904 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

905 

906 If a DatasetRef with the same exact ID is already in a collection 

907 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

908 data ID but with different ID exists in the collection, 

909 `ConflictingDefinitionError` is raised. 

910 

911 Parameters 

912 ---------- 

913 collection : `str` 

914 Indicates the collection the datasets should be associated with. 

915 refs : `Iterable` [ `DatasetRef` ] 

916 An iterable of resolved `DatasetRef` instances that already exist 

917 in this `Registry`. 

918 

919 Raises 

920 ------ 

921 ConflictingDefinitionError 

922 If a Dataset with the given `DatasetRef` already exists in the 

923 given collection. 

924 AmbiguousDatasetError 

925 Raised if ``any(ref.id is None for ref in refs)``. 

926 MissingCollectionError 

927 Raised if ``collection`` does not exist in the registry. 

928 CollectionTypeError 

929 Raise adding new datasets to the given ``collection`` is not 

930 allowed. 

931 """ 

932 raise NotImplementedError() 

933 

934 @abstractmethod 

935 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

936 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

937 

938 ``collection`` and ``ref`` combinations that are not currently 

939 associated are silently ignored. 

940 

941 Parameters 

942 ---------- 

943 collection : `str` 

944 The collection the datasets should no longer be associated with. 

945 refs : `Iterable` [ `DatasetRef` ] 

946 An iterable of resolved `DatasetRef` instances that already exist 

947 in this `Registry`. 

948 

949 Raises 

950 ------ 

951 AmbiguousDatasetError 

952 Raised if any of the given dataset references is unresolved. 

953 MissingCollectionError 

954 Raised if ``collection`` does not exist in the registry. 

955 CollectionTypeError 

956 Raise adding new datasets to the given ``collection`` is not 

957 allowed. 

958 """ 

959 raise NotImplementedError() 

960 

961 @abstractmethod 

962 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

963 """Associate one or more datasets with a calibration collection and a 

964 validity range within it. 

965 

966 Parameters 

967 ---------- 

968 collection : `str` 

969 The name of an already-registered `~CollectionType.CALIBRATION` 

970 collection. 

971 refs : `Iterable` [ `DatasetRef` ] 

972 Datasets to be associated. 

973 timespan : `Timespan` 

974 The validity range for these datasets within the collection. 

975 

976 Raises 

977 ------ 

978 AmbiguousDatasetError 

979 Raised if any of the given `DatasetRef` instances is unresolved. 

980 ConflictingDefinitionError 

981 Raised if the collection already contains a different dataset with 

982 the same `DatasetType` and data ID and an overlapping validity 

983 range. 

984 CollectionTypeError 

985 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

986 collection or if one or more datasets are of a dataset type for 

987 which `DatasetType.isCalibration` returns `False`. 

988 """ 

989 raise NotImplementedError() 

990 

991 @abstractmethod 

992 def decertify( 

993 self, 

994 collection: str, 

995 datasetType: Union[str, DatasetType], 

996 timespan: Timespan, 

997 *, 

998 dataIds: Optional[Iterable[DataId]] = None, 

999 ) -> None: 

1000 """Remove or adjust datasets to clear a validity range within a 

1001 calibration collection. 

1002 

1003 Parameters 

1004 ---------- 

1005 collection : `str` 

1006 The name of an already-registered `~CollectionType.CALIBRATION` 

1007 collection. 

1008 datasetType : `str` or `DatasetType` 

1009 Name or `DatasetType` instance for the datasets to be decertified. 

1010 timespan : `Timespan`, optional 

1011 The validity range to remove datasets from within the collection. 

1012 Datasets that overlap this range but are not contained by it will 

1013 have their validity ranges adjusted to not overlap it, which may 

1014 split a single dataset validity range into two. 

1015 dataIds : `Iterable` [ `DataId` ], optional 

1016 Data IDs that should be decertified within the given validity range 

1017 If `None`, all data IDs for ``self.datasetType`` will be 

1018 decertified. 

1019 

1020 Raises 

1021 ------ 

1022 CollectionTypeError 

1023 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1024 collection or if ``datasetType.isCalibration() is False``. 

1025 """ 

1026 raise NotImplementedError() 

1027 

1028 @abstractmethod 

1029 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1030 """Return an object that allows a new `Datastore` instance to 

1031 communicate with this `Registry`. 

1032 

1033 Returns 

1034 ------- 

1035 manager : `DatastoreRegistryBridgeManager` 

1036 Object that mediates communication between this `Registry` and its 

1037 associated datastores. 

1038 """ 

1039 raise NotImplementedError() 

1040 

1041 @abstractmethod 

1042 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1043 """Retrieve datastore locations for a given dataset. 

1044 

1045 Parameters 

1046 ---------- 

1047 ref : `DatasetRef` 

1048 A reference to the dataset for which to retrieve storage 

1049 information. 

1050 

1051 Returns 

1052 ------- 

1053 datastores : `Iterable` [ `str` ] 

1054 All the matching datastores holding this dataset. 

1055 

1056 Raises 

1057 ------ 

1058 AmbiguousDatasetError 

1059 Raised if ``ref.id`` is `None`. 

1060 """ 

1061 raise NotImplementedError() 

1062 

1063 @abstractmethod 

1064 def expandDataId( 

1065 self, 

1066 dataId: Optional[DataId] = None, 

1067 *, 

1068 graph: Optional[DimensionGraph] = None, 

1069 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1070 withDefaults: bool = True, 

1071 **kwargs: Any, 

1072 ) -> DataCoordinate: 

1073 """Expand a dimension-based data ID to include additional information. 

1074 

1075 Parameters 

1076 ---------- 

1077 dataId : `DataCoordinate` or `dict`, optional 

1078 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1079 graph : `DimensionGraph`, optional 

1080 Set of dimensions for the expanded ID. If `None`, the dimensions 

1081 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1082 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1083 ``graph`` are silently ignored, providing a way to extract and 

1084 ``graph`` expand a subset of a data ID. 

1085 records : `Mapping` [`str`, `DimensionRecord`], optional 

1086 Dimension record data to use before querying the database for that 

1087 data, keyed by element name. 

1088 withDefaults : `bool`, optional 

1089 Utilize ``self.defaults.dataId`` to fill in missing governor 

1090 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1091 used). 

1092 **kwargs 

1093 Additional keywords are treated like additional key-value pairs for 

1094 ``dataId``, extending and overriding 

1095 

1096 Returns 

1097 ------- 

1098 expanded : `DataCoordinate` 

1099 A data ID that includes full metadata for all of the dimensions it 

1100 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1101 ``expanded.hasFull()`` both return `True`. 

1102 

1103 Raises 

1104 ------ 

1105 DataIdError 

1106 Raised when ``dataId`` or keyword arguments specify unknown 

1107 dimensions or values, or when a resulting data ID contains 

1108 contradictory key-value pairs, according to dimension 

1109 relationships. 

1110 """ 

1111 raise NotImplementedError() 

1112 

1113 @abstractmethod 

1114 def insertDimensionData( 

1115 self, 

1116 element: Union[DimensionElement, str], 

1117 *data: Union[Mapping[str, Any], DimensionRecord], 

1118 conform: bool = True, 

1119 replace: bool = False, 

1120 skip_existing: bool = False, 

1121 ) -> None: 

1122 """Insert one or more dimension records into the database. 

1123 

1124 Parameters 

1125 ---------- 

1126 element : `DimensionElement` or `str` 

1127 The `DimensionElement` or name thereof that identifies the table 

1128 records will be inserted into. 

1129 data : `dict` or `DimensionRecord` (variadic) 

1130 One or more records to insert. 

1131 conform : `bool`, optional 

1132 If `False` (`True` is default) perform no checking or conversions, 

1133 and assume that ``element`` is a `DimensionElement` instance and 

1134 ``data`` is a one or more `DimensionRecord` instances of the 

1135 appropriate subclass. 

1136 replace : `bool`, optional 

1137 If `True` (`False` is default), replace existing records in the 

1138 database if there is a conflict. 

1139 skip_existing : `bool`, optional 

1140 If `True` (`False` is default), skip insertion if a record with 

1141 the same primary key values already exists. Unlike 

1142 `syncDimensionData`, this will not detect when the given record 

1143 differs from what is in the database, and should not be used when 

1144 this is a concern. 

1145 """ 

1146 raise NotImplementedError() 

1147 

1148 @abstractmethod 

1149 def syncDimensionData( 

1150 self, 

1151 element: Union[DimensionElement, str], 

1152 row: Union[Mapping[str, Any], DimensionRecord], 

1153 conform: bool = True, 

1154 update: bool = False, 

1155 ) -> Union[bool, Dict[str, Any]]: 

1156 """Synchronize the given dimension record with the database, inserting 

1157 if it does not already exist and comparing values if it does. 

1158 

1159 Parameters 

1160 ---------- 

1161 element : `DimensionElement` or `str` 

1162 The `DimensionElement` or name thereof that identifies the table 

1163 records will be inserted into. 

1164 row : `dict` or `DimensionRecord` 

1165 The record to insert. 

1166 conform : `bool`, optional 

1167 If `False` (`True` is default) perform no checking or conversions, 

1168 and assume that ``element`` is a `DimensionElement` instance and 

1169 ``data`` is a one or more `DimensionRecord` instances of the 

1170 appropriate subclass. 

1171 update: `bool`, optional 

1172 If `True` (`False` is default), update the existing record in the 

1173 database if there is a conflict. 

1174 

1175 Returns 

1176 ------- 

1177 inserted_or_updated : `bool` or `dict` 

1178 `True` if a new row was inserted, `False` if no changes were 

1179 needed, or a `dict` mapping updated column names to their old 

1180 values if an update was performed (only possible if 

1181 ``update=True``). 

1182 

1183 Raises 

1184 ------ 

1185 ConflictingDefinitionError 

1186 Raised if the record exists in the database (according to primary 

1187 key lookup) but is inconsistent with the given one. 

1188 """ 

1189 raise NotImplementedError() 

1190 

1191 @abstractmethod 

1192 def queryDatasetTypes( 

1193 self, 

1194 expression: Any = ..., 

1195 *, 

1196 components: Optional[bool] = None, 

1197 missing: Optional[List[str]] = None, 

1198 ) -> Iterator[DatasetType]: 

1199 """Iterate over the dataset types whose names match an expression. 

1200 

1201 Parameters 

1202 ---------- 

1203 expression : `Any`, optional 

1204 An expression that fully or partially identifies the dataset types 

1205 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1206 ``...`` can be used to return all dataset types, and is the 

1207 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1208 information. 

1209 components : `bool`, optional 

1210 If `True`, apply all expression patterns to component dataset type 

1211 names as well. If `False`, never apply patterns to components. 

1212 If `None` (default), apply patterns to components only if their 

1213 parent datasets were not matched by the expression. 

1214 Fully-specified component datasets (`str` or `DatasetType` 

1215 instances) are always included. 

1216 missing : `list` of `str`, optional 

1217 String dataset type names that were explicitly given (i.e. not 

1218 regular expression patterns) but not found will be appended to this 

1219 list, if it is provided. 

1220 

1221 Yields 

1222 ------ 

1223 datasetType : `DatasetType` 

1224 A `DatasetType` instance whose name matches ``expression``. 

1225 

1226 Raises 

1227 ------ 

1228 DatasetTypeExpressionError 

1229 Raised when ``expression`` is invalid. 

1230 """ 

1231 raise NotImplementedError() 

1232 

1233 @abstractmethod 

1234 def queryCollections( 

1235 self, 

1236 expression: Any = ..., 

1237 datasetType: Optional[DatasetType] = None, 

1238 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1239 flattenChains: bool = False, 

1240 includeChains: Optional[bool] = None, 

1241 ) -> Iterator[str]: 

1242 """Iterate over the collections whose names match an expression. 

1243 

1244 Parameters 

1245 ---------- 

1246 expression : `Any`, optional 

1247 An expression that identifies the collections to return, such as 

1248 a `str` (for full matches or partial matches via globs), 

1249 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1250 can be used to return all collections, and is the default. 

1251 See :ref:`daf_butler_collection_expressions` for more information. 

1252 datasetType : `DatasetType`, optional 

1253 If provided, only yield collections that may contain datasets of 

1254 this type. This is a conservative approximation in general; it may 

1255 yield collections that do not have any such datasets. 

1256 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1257 `CollectionType`, optional 

1258 If provided, only yield collections of these types. 

1259 flattenChains : `bool`, optional 

1260 If `True` (`False` is default), recursively yield the child 

1261 collections of matching `~CollectionType.CHAINED` collections. 

1262 includeChains : `bool`, optional 

1263 If `True`, yield records for matching `~CollectionType.CHAINED` 

1264 collections. Default is the opposite of ``flattenChains``: include 

1265 either CHAINED collections or their children, but not both. 

1266 

1267 Yields 

1268 ------ 

1269 collection : `str` 

1270 The name of a collection that matches ``expression``. 

1271 

1272 Raises 

1273 ------ 

1274 CollectionExpressionError 

1275 Raised when ``expression`` is invalid. 

1276 

1277 Notes 

1278 ----- 

1279 The order in which collections are returned is unspecified, except that 

1280 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1281 to be in the order in which they are searched. When multiple parent 

1282 `~CollectionType.CHAINED` collections match the same criteria, the 

1283 order in which the two lists appear is unspecified, and the lists of 

1284 children may be incomplete if a child has multiple parents. 

1285 """ 

1286 raise NotImplementedError() 

1287 

1288 @abstractmethod 

1289 def queryDatasets( 

1290 self, 

1291 datasetType: Any, 

1292 *, 

1293 collections: Any = None, 

1294 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1295 dataId: Optional[DataId] = None, 

1296 where: Optional[str] = None, 

1297 findFirst: bool = False, 

1298 components: Optional[bool] = None, 

1299 bind: Optional[Mapping[str, Any]] = None, 

1300 check: bool = True, 

1301 **kwargs: Any, 

1302 ) -> DatasetQueryResults: 

1303 """Query for and iterate over dataset references matching user-provided 

1304 criteria. 

1305 

1306 Parameters 

1307 ---------- 

1308 datasetType 

1309 An expression that fully or partially identifies the dataset types 

1310 to be queried. Allowed types include `DatasetType`, `str`, 

1311 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1312 be used to query all dataset types. See 

1313 :ref:`daf_butler_dataset_type_expressions` for more information. 

1314 collections: optional 

1315 An expression that identifies the collections to search, such as a 

1316 `str` (for full matches or partial matches via globs), `re.Pattern` 

1317 (for partial matches), or iterable thereof. ``...`` can be used to 

1318 search all collections (actually just all `~CollectionType.RUN` 

1319 collections, because this will still find all datasets). 

1320 If not provided, ``self.default.collections`` is used. See 

1321 :ref:`daf_butler_collection_expressions` for more information. 

1322 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1323 Dimensions to include in the query (in addition to those used 

1324 to identify the queried dataset type(s)), either to constrain 

1325 the resulting datasets to those for which a matching dimension 

1326 exists, or to relate the dataset type's dimensions to dimensions 

1327 referenced by the ``dataId`` or ``where`` arguments. 

1328 dataId : `dict` or `DataCoordinate`, optional 

1329 A data ID whose key-value pairs are used as equality constraints 

1330 in the query. 

1331 where : `str`, optional 

1332 A string expression similar to a SQL WHERE clause. May involve 

1333 any column of a dimension table or (as a shortcut for the primary 

1334 key column of a dimension table) dimension name. See 

1335 :ref:`daf_butler_dimension_expressions` for more information. 

1336 findFirst : `bool`, optional 

1337 If `True` (`False` is default), for each result data ID, only 

1338 yield one `DatasetRef` of each `DatasetType`, from the first 

1339 collection in which a dataset of that dataset type appears 

1340 (according to the order of ``collections`` passed in). If `True`, 

1341 ``collections`` must not contain regular expressions and may not 

1342 be ``...``. 

1343 components : `bool`, optional 

1344 If `True`, apply all dataset expression patterns to component 

1345 dataset type names as well. If `False`, never apply patterns to 

1346 components. If `None` (default), apply patterns to components only 

1347 if their parent datasets were not matched by the expression. 

1348 Fully-specified component datasets (`str` or `DatasetType` 

1349 instances) are always included. 

1350 bind : `Mapping`, optional 

1351 Mapping containing literal values that should be injected into the 

1352 ``where`` expression, keyed by the identifiers they replace. 

1353 check : `bool`, optional 

1354 If `True` (default) check the query for consistency before 

1355 executing it. This may reject some valid queries that resemble 

1356 common mistakes (e.g. queries for visits without specifying an 

1357 instrument). 

1358 **kwargs 

1359 Additional keyword arguments are forwarded to 

1360 `DataCoordinate.standardize` when processing the ``dataId`` 

1361 argument (and may be used to provide a constraining data ID even 

1362 when the ``dataId`` argument is `None`). 

1363 

1364 Returns 

1365 ------- 

1366 refs : `queries.DatasetQueryResults` 

1367 Dataset references matching the given query criteria. Nested data 

1368 IDs are guaranteed to include values for all implied dimensions 

1369 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1370 include dimension records (`DataCoordinate.hasRecords` will be 

1371 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1372 on the result object (which returns a new one). 

1373 

1374 Raises 

1375 ------ 

1376 DatasetTypeExpressionError 

1377 Raised when ``datasetType`` expression is invalid. 

1378 TypeError 

1379 Raised when the arguments are incompatible, such as when a 

1380 collection wildcard is passed when ``findFirst`` is `True`, or 

1381 when ``collections`` is `None` and``self.defaults.collections`` is 

1382 also `None`. 

1383 DataIdError 

1384 Raised when ``dataId`` or keyword arguments specify unknown 

1385 dimensions or values, or when they contain inconsistent values. 

1386 UserExpressionError 

1387 Raised when ``where`` expression is invalid. 

1388 

1389 Notes 

1390 ----- 

1391 When multiple dataset types are queried in a single call, the 

1392 results of this operation are equivalent to querying for each dataset 

1393 type separately in turn, and no information about the relationships 

1394 between datasets of different types is included. In contexts where 

1395 that kind of information is important, the recommended pattern is to 

1396 use `queryDataIds` to first obtain data IDs (possibly with the 

1397 desired dataset types and collections passed as constraints to the 

1398 query), and then use multiple (generally much simpler) calls to 

1399 `queryDatasets` with the returned data IDs passed as constraints. 

1400 """ 

1401 raise NotImplementedError() 

1402 

1403 @abstractmethod 

1404 def queryDataIds( 

1405 self, 

1406 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1407 *, 

1408 dataId: Optional[DataId] = None, 

1409 datasets: Any = None, 

1410 collections: Any = None, 

1411 where: Optional[str] = None, 

1412 components: Optional[bool] = None, 

1413 bind: Optional[Mapping[str, Any]] = None, 

1414 check: bool = True, 

1415 **kwargs: Any, 

1416 ) -> DataCoordinateQueryResults: 

1417 """Query for data IDs matching user-provided criteria. 

1418 

1419 Parameters 

1420 ---------- 

1421 dimensions : `Dimension` or `str`, or iterable thereof 

1422 The dimensions of the data IDs to yield, as either `Dimension` 

1423 instances or `str`. Will be automatically expanded to a complete 

1424 `DimensionGraph`. 

1425 dataId : `dict` or `DataCoordinate`, optional 

1426 A data ID whose key-value pairs are used as equality constraints 

1427 in the query. 

1428 datasets : `Any`, optional 

1429 An expression that fully or partially identifies dataset types 

1430 that should constrain the yielded data IDs. For example, including 

1431 "raw" here would constrain the yielded ``instrument``, 

1432 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1433 those for which at least one "raw" dataset exists in 

1434 ``collections``. Allowed types include `DatasetType`, `str`, 

1435 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1436 expressions, ``...`` is not permitted - it doesn't make sense to 

1437 constrain data IDs on the existence of *all* datasets. 

1438 See :ref:`daf_butler_dataset_type_expressions` for more 

1439 information. 

1440 collections: `Any`, optional 

1441 An expression that identifies the collections to search for 

1442 datasets, such as a `str` (for full matches or partial matches 

1443 via globs), `re.Pattern` (for partial matches), or iterable 

1444 thereof. ``...`` can be used to search all collections (actually 

1445 just all `~CollectionType.RUN` collections, because this will 

1446 still find all datasets). If not provided, 

1447 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1448 is also passed. See :ref:`daf_butler_collection_expressions` for 

1449 more information. 

1450 where : `str`, optional 

1451 A string expression similar to a SQL WHERE clause. May involve 

1452 any column of a dimension table or (as a shortcut for the primary 

1453 key column of a dimension table) dimension name. See 

1454 :ref:`daf_butler_dimension_expressions` for more information. 

1455 components : `bool`, optional 

1456 If `True`, apply all dataset expression patterns to component 

1457 dataset type names as well. If `False`, never apply patterns to 

1458 components. If `None` (default), apply patterns to components only 

1459 if their parent datasets were not matched by the expression. 

1460 Fully-specified component datasets (`str` or `DatasetType` 

1461 instances) are always included. 

1462 bind : `Mapping`, optional 

1463 Mapping containing literal values that should be injected into the 

1464 ``where`` expression, keyed by the identifiers they replace. 

1465 check : `bool`, optional 

1466 If `True` (default) check the query for consistency before 

1467 executing it. This may reject some valid queries that resemble 

1468 common mistakes (e.g. queries for visits without specifying an 

1469 instrument). 

1470 **kwargs 

1471 Additional keyword arguments are forwarded to 

1472 `DataCoordinate.standardize` when processing the ``dataId`` 

1473 argument (and may be used to provide a constraining data ID even 

1474 when the ``dataId`` argument is `None`). 

1475 

1476 Returns 

1477 ------- 

1478 dataIds : `DataCoordinateQueryResults` 

1479 Data IDs matching the given query parameters. These are guaranteed 

1480 to identify all dimensions (`DataCoordinate.hasFull` returns 

1481 `True`), but will not contain `DimensionRecord` objects 

1482 (`DataCoordinate.hasRecords` returns `False`). Call 

1483 `DataCoordinateQueryResults.expanded` on the returned object to 

1484 fetch those (and consider using 

1485 `DataCoordinateQueryResults.materialize` on the returned object 

1486 first if the expected number of rows is very large). See 

1487 documentation for those methods for additional information. 

1488 

1489 Raises 

1490 ------ 

1491 NoDefaultCollectionError 

1492 Raised if ``collections`` is `None` and 

1493 ``self.defaults.collections`` is `None`. 

1494 CollectionExpressionError 

1495 Raised when ``collections`` expression is invalid. 

1496 DataIdError 

1497 Raised when ``dataId`` or keyword arguments specify unknown 

1498 dimensions or values, or when they contain inconsistent values. 

1499 DatasetTypeExpressionError 

1500 Raised when ``datasetType`` expression is invalid. 

1501 UserExpressionError 

1502 Raised when ``where`` expression is invalid. 

1503 """ 

1504 raise NotImplementedError() 

1505 

1506 @abstractmethod 

1507 def queryDimensionRecords( 

1508 self, 

1509 element: Union[DimensionElement, str], 

1510 *, 

1511 dataId: Optional[DataId] = None, 

1512 datasets: Any = None, 

1513 collections: Any = None, 

1514 where: Optional[str] = None, 

1515 components: Optional[bool] = None, 

1516 bind: Optional[Mapping[str, Any]] = None, 

1517 check: bool = True, 

1518 **kwargs: Any, 

1519 ) -> DimensionRecordQueryResults: 

1520 """Query for dimension information matching user-provided criteria. 

1521 

1522 Parameters 

1523 ---------- 

1524 element : `DimensionElement` or `str` 

1525 The dimension element to obtain records for. 

1526 dataId : `dict` or `DataCoordinate`, optional 

1527 A data ID whose key-value pairs are used as equality constraints 

1528 in the query. 

1529 datasets : `Any`, optional 

1530 An expression that fully or partially identifies dataset types 

1531 that should constrain the yielded records. See `queryDataIds` and 

1532 :ref:`daf_butler_dataset_type_expressions` for more information. 

1533 collections : `Any`, optional 

1534 An expression that identifies the collections to search for 

1535 datasets, such as a `str` (for full matches or partial matches 

1536 via globs), `re.Pattern` (for partial matches), or iterable 

1537 thereof. ``...`` can be used to search all collections (actually 

1538 just all `~CollectionType.RUN` collections, because this will 

1539 still find all datasets). If not provided, 

1540 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1541 is also passed. See :ref:`daf_butler_collection_expressions` for 

1542 more information. 

1543 where : `str`, optional 

1544 A string expression similar to a SQL WHERE clause. See 

1545 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1546 information. 

1547 components : `bool`, optional 

1548 Whether to apply dataset expressions to components as well. 

1549 See `queryDataIds` for more information. 

1550 bind : `Mapping`, optional 

1551 Mapping containing literal values that should be injected into the 

1552 ``where`` expression, keyed by the identifiers they replace. 

1553 check : `bool`, optional 

1554 If `True` (default) check the query for consistency before 

1555 executing it. This may reject some valid queries that resemble 

1556 common mistakes (e.g. queries for visits without specifying an 

1557 instrument). 

1558 **kwargs 

1559 Additional keyword arguments are forwarded to 

1560 `DataCoordinate.standardize` when processing the ``dataId`` 

1561 argument (and may be used to provide a constraining data ID even 

1562 when the ``dataId`` argument is `None`). 

1563 

1564 Returns 

1565 ------- 

1566 dataIds : `DimensionRecordQueryResults` 

1567 Data IDs matching the given query parameters. 

1568 

1569 Raises 

1570 ------ 

1571 NoDefaultCollectionError 

1572 Raised if ``collections`` is `None` and 

1573 ``self.defaults.collections`` is `None`. 

1574 CollectionExpressionError 

1575 Raised when ``collections`` expression is invalid. 

1576 DataIdError 

1577 Raised when ``dataId`` or keyword arguments specify unknown 

1578 dimensions or values, or when they contain inconsistent values. 

1579 DatasetTypeExpressionError 

1580 Raised when ``datasetType`` expression is invalid. 

1581 UserExpressionError 

1582 Raised when ``where`` expression is invalid. 

1583 """ 

1584 raise NotImplementedError() 

1585 

1586 @abstractmethod 

1587 def queryDatasetAssociations( 

1588 self, 

1589 datasetType: Union[str, DatasetType], 

1590 collections: Any = ..., 

1591 *, 

1592 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1593 flattenChains: bool = False, 

1594 ) -> Iterator[DatasetAssociation]: 

1595 """Iterate over dataset-collection combinations where the dataset is in 

1596 the collection. 

1597 

1598 This method is a temporary placeholder for better support for 

1599 association results in `queryDatasets`. It will probably be 

1600 removed in the future, and should be avoided in production code 

1601 whenever possible. 

1602 

1603 Parameters 

1604 ---------- 

1605 datasetType : `DatasetType` or `str` 

1606 A dataset type object or the name of one. 

1607 collections: `Any`, optional 

1608 An expression that identifies the collections to search for 

1609 datasets, such as a `str` (for full matches or partial matches 

1610 via globs), `re.Pattern` (for partial matches), or iterable 

1611 thereof. ``...`` can be used to search all collections (actually 

1612 just all `~CollectionType.RUN` collections, because this will still 

1613 find all datasets). If not provided, ``self.default.collections`` 

1614 is used. See :ref:`daf_butler_collection_expressions` for more 

1615 information. 

1616 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1617 If provided, only yield associations from collections of these 

1618 types. 

1619 flattenChains : `bool`, optional 

1620 If `True` (default) search in the children of 

1621 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1622 collections are ignored. 

1623 

1624 Yields 

1625 ------ 

1626 association : `DatasetAssociation` 

1627 Object representing the relationship between a single dataset and 

1628 a single collection. 

1629 

1630 Raises 

1631 ------ 

1632 NoDefaultCollectionError 

1633 Raised if ``collections`` is `None` and 

1634 ``self.defaults.collections`` is `None`. 

1635 CollectionExpressionError 

1636 Raised when ``collections`` expression is invalid. 

1637 """ 

1638 raise NotImplementedError() 

1639 

1640 storageClasses: StorageClassFactory 

1641 """All storage classes known to the registry (`StorageClassFactory`). 

1642 """ 

1643 

1644 datasetIdFactory: DatasetIdFactory 

1645 """Factory for dataset IDs."""