Coverage for python/lsst/daf/butler/registry/_registry.py: 61%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

182 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28from abc import ABC, abstractmethod 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Set, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44from lsst.resources import ResourcePathExpression 

45from lsst.utils import doImportType 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataId, 

51 DatasetAssociation, 

52 DatasetId, 

53 DatasetRef, 

54 DatasetType, 

55 Dimension, 

56 DimensionConfig, 

57 DimensionElement, 

58 DimensionGraph, 

59 DimensionRecord, 

60 DimensionUniverse, 

61 NameLookupMapping, 

62 StorageClassFactory, 

63 Timespan, 

64) 

65from ._collectionType import CollectionType 

66from ._config import RegistryConfig 

67from ._defaults import RegistryDefaults 

68from .interfaces import DatasetIdGenEnum 

69from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

70from .summaries import CollectionSummary 

71from .wildcards import CollectionSearch 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from .._butlerConfig import ButlerConfig 

75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager 

76 

77_LOG = logging.getLogger(__name__) 

78 

79 

80class Registry(ABC): 

81 """Abstract Registry interface. 

82 

83 Each registry implementation can have its own constructor parameters. 

84 The assumption is that an instance of a specific subclass will be 

85 constructed from configuration using `Registry.fromConfig()`. 

86 The base class will look for a ``cls`` entry and call that specific 

87 `fromConfig()` method. 

88 

89 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

90 property. No other properties are assumed shared between implementations. 

91 """ 

92 

93 defaultConfigFile: Optional[str] = None 

94 """Path to configuration defaults. Accessed within the ``configs`` resource 

95 or relative to a search path. Can be None if no defaults specified. 

96 """ 

97 

98 @classmethod 

99 def forceRegistryConfig( 

100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

101 ) -> RegistryConfig: 

102 """Force the supplied config to a `RegistryConfig`. 

103 

104 Parameters 

105 ---------- 

106 config : `RegistryConfig`, `Config` or `str` or `None` 

107 Registry configuration, if missing then default configuration will 

108 be loaded from registry.yaml. 

109 

110 Returns 

111 ------- 

112 registry_config : `RegistryConfig` 

113 A registry config. 

114 """ 

115 if not isinstance(config, RegistryConfig): 

116 if isinstance(config, (str, Config)) or config is None: 

117 config = RegistryConfig(config) 

118 else: 

119 raise ValueError(f"Incompatible Registry configuration: {config}") 

120 return config 

121 

122 @classmethod 

123 def determineTrampoline( 

124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

125 ) -> Tuple[Type[Registry], RegistryConfig]: 

126 """Return class to use to instantiate real registry. 

127 

128 Parameters 

129 ---------- 

130 config : `RegistryConfig` or `str`, optional 

131 Registry configuration, if missing then default configuration will 

132 be loaded from registry.yaml. 

133 

134 Returns 

135 ------- 

136 requested_cls : `type` of `Registry` 

137 The real registry class to use. 

138 registry_config : `RegistryConfig` 

139 The `RegistryConfig` to use. 

140 """ 

141 config = cls.forceRegistryConfig(config) 

142 

143 # Default to the standard registry 

144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

145 registry_cls = doImportType(registry_cls_name) 

146 if registry_cls is cls: 

147 raise ValueError("Can not instantiate the abstract base Registry from config") 

148 if not issubclass(registry_cls, Registry): 

149 raise TypeError( 

150 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

151 ) 

152 return registry_cls, config 

153 

154 @classmethod 

155 def createFromConfig( 

156 cls, 

157 config: Optional[Union[RegistryConfig, str]] = None, 

158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

159 butlerRoot: Optional[ResourcePathExpression] = None, 

160 ) -> Registry: 

161 """Create registry database and return `Registry` instance. 

162 

163 This method initializes database contents, database must be empty 

164 prior to calling this method. 

165 

166 Parameters 

167 ---------- 

168 config : `RegistryConfig` or `str`, optional 

169 Registry configuration, if missing then default configuration will 

170 be loaded from registry.yaml. 

171 dimensionConfig : `DimensionConfig` or `str`, optional 

172 Dimensions configuration, if missing then default configuration 

173 will be loaded from dimensions.yaml. 

174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

175 Path to the repository root this `Registry` will manage. 

176 

177 Returns 

178 ------- 

179 registry : `Registry` 

180 A new `Registry` instance. 

181 

182 Notes 

183 ----- 

184 This class will determine the concrete `Registry` subclass to 

185 use from configuration. Each subclass should implement this method 

186 even if it can not create a registry. 

187 """ 

188 registry_cls, registry_config = cls.determineTrampoline(config) 

189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

190 

191 @classmethod 

192 def fromConfig( 

193 cls, 

194 config: Union[ButlerConfig, RegistryConfig, Config, str], 

195 butlerRoot: Optional[ResourcePathExpression] = None, 

196 writeable: bool = True, 

197 defaults: Optional[RegistryDefaults] = None, 

198 ) -> Registry: 

199 """Create `Registry` subclass instance from `config`. 

200 

201 Registry database must be initialized prior to calling this method. 

202 

203 Parameters 

204 ---------- 

205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

206 Registry configuration 

207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

208 Path to the repository root this `Registry` will manage. 

209 writeable : `bool`, optional 

210 If `True` (default) create a read-write connection to the database. 

211 defaults : `RegistryDefaults`, optional 

212 Default collection search path and/or output `~CollectionType.RUN` 

213 collection. 

214 

215 Returns 

216 ------- 

217 registry : `Registry` (subclass) 

218 A new `Registry` subclass instance. 

219 

220 Notes 

221 ----- 

222 This class will determine the concrete `Registry` subclass to 

223 use from configuration. Each subclass should implement this method. 

224 """ 

225 # The base class implementation should trampoline to the correct 

226 # subclass. No implementation should ever use this implementation 

227 # directly. If no class is specified, default to the standard 

228 # registry. 

229 registry_cls, registry_config = cls.determineTrampoline(config) 

230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

231 

232 @abstractmethod 

233 def isWriteable(self) -> bool: 

234 """Return `True` if this registry allows write operations, and `False` 

235 otherwise. 

236 """ 

237 raise NotImplementedError() 

238 

239 @abstractmethod 

240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

241 """Create a new `Registry` backed by the same data repository and 

242 connection as this one, but independent defaults. 

243 

244 Parameters 

245 ---------- 

246 defaults : `RegistryDefaults`, optional 

247 Default collections and data ID values for the new registry. If 

248 not provided, ``self.defaults`` will be used (but future changes 

249 to either registry's defaults will not affect the other). 

250 

251 Returns 

252 ------- 

253 copy : `Registry` 

254 A new `Registry` instance with its own defaults. 

255 

256 Notes 

257 ----- 

258 Because the new registry shares a connection with the original, they 

259 also share transaction state (despite the fact that their `transaction` 

260 context manager methods do not reflect this), and must be used with 

261 care. 

262 """ 

263 raise NotImplementedError() 

264 

265 @property 

266 @abstractmethod 

267 def dimensions(self) -> DimensionUniverse: 

268 """Definitions of all dimensions recognized by this `Registry` 

269 (`DimensionUniverse`). 

270 """ 

271 raise NotImplementedError() 

272 

273 @property 

274 def defaults(self) -> RegistryDefaults: 

275 """Default collection search path and/or output `~CollectionType.RUN` 

276 collection (`RegistryDefaults`). 

277 

278 This is an immutable struct whose components may not be set 

279 individually, but the entire struct can be set by assigning to this 

280 property. 

281 """ 

282 return self._defaults 

283 

284 @defaults.setter 

285 def defaults(self, value: RegistryDefaults) -> None: 

286 if value.run is not None: 

287 self.registerRun(value.run) 

288 value.finish(self) 

289 self._defaults = value 

290 

291 @abstractmethod 

292 def refresh(self) -> None: 

293 """Refresh all in-memory state by querying the database. 

294 

295 This may be necessary to enable querying for entities added by other 

296 registry instances after this one was constructed. 

297 """ 

298 raise NotImplementedError() 

299 

300 @contextlib.contextmanager 

301 @abstractmethod 

302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

303 """Return a context manager that represents a transaction.""" 

304 raise NotImplementedError() 

305 

306 def resetConnectionPool(self) -> None: 

307 """Reset connection pool for registry if relevant. 

308 

309 This operation can be used reset connections to servers when 

310 using registry with fork-based multiprocessing. This method should 

311 usually be called by the child process immediately 

312 after the fork. 

313 

314 The base class implementation is a no-op. 

315 """ 

316 pass 

317 

318 @abstractmethod 

319 def registerCollection( 

320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

321 ) -> bool: 

322 """Add a new collection if one with the given name does not exist. 

323 

324 Parameters 

325 ---------- 

326 name : `str` 

327 The name of the collection to create. 

328 type : `CollectionType` 

329 Enum value indicating the type of collection to create. 

330 doc : `str`, optional 

331 Documentation string for the collection. 

332 

333 Returns 

334 ------- 

335 registered : `bool` 

336 Boolean indicating whether the collection was already registered 

337 or was created by this call. 

338 

339 Notes 

340 ----- 

341 This method cannot be called within transactions, as it needs to be 

342 able to perform its own transaction to be concurrent. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def getCollectionType(self, name: str) -> CollectionType: 

348 """Return an enumeration value indicating the type of the given 

349 collection. 

350 

351 Parameters 

352 ---------- 

353 name : `str` 

354 The name of the collection. 

355 

356 Returns 

357 ------- 

358 type : `CollectionType` 

359 Enum value indicating the type of this collection. 

360 

361 Raises 

362 ------ 

363 MissingCollectionError 

364 Raised if no collection with the given name exists. 

365 """ 

366 raise NotImplementedError() 

367 

368 @abstractmethod 

369 def _get_collection_record(self, name: str) -> CollectionRecord: 

370 """Return the record for this collection. 

371 

372 Parameters 

373 ---------- 

374 name : `str` 

375 Name of the collection for which the record is to be retrieved. 

376 

377 Returns 

378 ------- 

379 record : `CollectionRecord` 

380 The record for this collection. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

386 """Add a new run if one with the given name does not exist. 

387 

388 Parameters 

389 ---------- 

390 name : `str` 

391 The name of the run to create. 

392 doc : `str`, optional 

393 Documentation string for the collection. 

394 

395 Returns 

396 ------- 

397 registered : `bool` 

398 Boolean indicating whether a new run was registered. `False` 

399 if it already existed. 

400 

401 Notes 

402 ----- 

403 This method cannot be called within transactions, as it needs to be 

404 able to perform its own transaction to be concurrent. 

405 """ 

406 raise NotImplementedError() 

407 

408 @abstractmethod 

409 def removeCollection(self, name: str) -> None: 

410 """Remove the given collection from the registry. 

411 

412 Parameters 

413 ---------- 

414 name : `str` 

415 The name of the collection to remove. 

416 

417 Raises 

418 ------ 

419 MissingCollectionError 

420 Raised if no collection with the given name exists. 

421 sqlalchemy.IntegrityError 

422 Raised if the database rows associated with the collection are 

423 still referenced by some other table, such as a dataset in a 

424 datastore (for `~CollectionType.RUN` collections only) or a 

425 `~CollectionType.CHAINED` collection of which this collection is 

426 a child. 

427 

428 Notes 

429 ----- 

430 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

431 in it will removed from the `Registry` database. This requires that 

432 those datasets be removed (or at least trashed) from any datastores 

433 that hold them first. 

434 

435 A collection may not be deleted as long as it is referenced by a 

436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

437 be deleted or redefined first. 

438 """ 

439 raise NotImplementedError() 

440 

441 @abstractmethod 

442 def getCollectionChain(self, parent: str) -> CollectionSearch: 

443 """Return the child collections in a `~CollectionType.CHAINED` 

444 collection. 

445 

446 Parameters 

447 ---------- 

448 parent : `str` 

449 Name of the chained collection. Must have already been added via 

450 a call to `Registry.registerCollection`. 

451 

452 Returns 

453 ------- 

454 children : `CollectionSearch` 

455 An object that defines the search path of the collection. 

456 See :ref:`daf_butler_collection_expressions` for more information. 

457 

458 Raises 

459 ------ 

460 MissingCollectionError 

461 Raised if ``parent`` does not exist in the `Registry`. 

462 CollectionTypeError 

463 Raised if ``parent`` does not correspond to a 

464 `~CollectionType.CHAINED` collection. 

465 """ 

466 raise NotImplementedError() 

467 

468 @abstractmethod 

469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

470 """Define or redefine a `~CollectionType.CHAINED` collection. 

471 

472 Parameters 

473 ---------- 

474 parent : `str` 

475 Name of the chained collection. Must have already been added via 

476 a call to `Registry.registerCollection`. 

477 children : `Any` 

478 An expression defining an ordered search of child collections, 

479 generally an iterable of `str`; see 

480 :ref:`daf_butler_collection_expressions` for more information. 

481 flatten : `bool`, optional 

482 If `True` (`False` is default), recursively flatten out any nested 

483 `~CollectionType.CHAINED` collections in ``children`` first. 

484 

485 Raises 

486 ------ 

487 MissingCollectionError 

488 Raised when any of the given collections do not exist in the 

489 `Registry`. 

490 CollectionTypeError 

491 Raised if ``parent`` does not correspond to a 

492 `~CollectionType.CHAINED` collection. 

493 ValueError 

494 Raised if the given collections contains a cycle. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def getCollectionParentChains(self, collection: str) -> Set[str]: 

500 """Return the CHAINED collections that directly contain the given one. 

501 

502 Parameters 

503 ---------- 

504 name : `str` 

505 Name of the collection. 

506 

507 Returns 

508 ------- 

509 chains : `set` of `str` 

510 Set of `~CollectionType.CHAINED` collection names. 

511 """ 

512 raise NotImplementedError() 

513 

514 @abstractmethod 

515 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

516 """Retrieve the documentation string for a collection. 

517 

518 Parameters 

519 ---------- 

520 name : `str` 

521 Name of the collection. 

522 

523 Returns 

524 ------- 

525 docs : `str` or `None` 

526 Docstring for the collection with the given name. 

527 """ 

528 raise NotImplementedError() 

529 

530 @abstractmethod 

531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

532 """Set the documentation string for a collection. 

533 

534 Parameters 

535 ---------- 

536 name : `str` 

537 Name of the collection. 

538 docs : `str` or `None` 

539 Docstring for the collection with the given name; will replace any 

540 existing docstring. Passing `None` will remove any existing 

541 docstring. 

542 """ 

543 raise NotImplementedError() 

544 

545 @abstractmethod 

546 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

547 """Return a summary for the given collection. 

548 

549 Parameters 

550 ---------- 

551 collection : `str` 

552 Name of the collection for which a summary is to be retrieved. 

553 

554 Returns 

555 ------- 

556 summary : `CollectionSummary` 

557 Summary of the dataset types and governor dimension values in 

558 this collection. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

564 """ 

565 Add a new `DatasetType` to the Registry. 

566 

567 It is not an error to register the same `DatasetType` twice. 

568 

569 Parameters 

570 ---------- 

571 datasetType : `DatasetType` 

572 The `DatasetType` to be added. 

573 

574 Returns 

575 ------- 

576 inserted : `bool` 

577 `True` if ``datasetType`` was inserted, `False` if an identical 

578 existing `DatsetType` was found. Note that in either case the 

579 DatasetType is guaranteed to be defined in the Registry 

580 consistently with the given definition. 

581 

582 Raises 

583 ------ 

584 ValueError 

585 Raised if the dimensions or storage class are invalid. 

586 ConflictingDefinitionError 

587 Raised if this DatasetType is already registered with a different 

588 definition. 

589 

590 Notes 

591 ----- 

592 This method cannot be called within transactions, as it needs to be 

593 able to perform its own transaction to be concurrent. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def removeDatasetType(self, name: str) -> None: 

599 """Remove the named `DatasetType` from the registry. 

600 

601 .. warning:: 

602 

603 Registry implementations can cache the dataset type definitions. 

604 This means that deleting the dataset type definition may result in 

605 unexpected behavior from other butler processes that are active 

606 that have not seen the deletion. 

607 

608 Parameters 

609 ---------- 

610 name : `str` 

611 Name of the type to be removed. 

612 

613 Raises 

614 ------ 

615 lsst.daf.butler.registry.OrphanedRecordError 

616 Raised if an attempt is made to remove the dataset type definition 

617 when there are already datasets associated with it. 

618 

619 Notes 

620 ----- 

621 If the dataset type is not registered the method will return without 

622 action. 

623 """ 

624 raise NotImplementedError() 

625 

626 @abstractmethod 

627 def getDatasetType(self, name: str) -> DatasetType: 

628 """Get the `DatasetType`. 

629 

630 Parameters 

631 ---------- 

632 name : `str` 

633 Name of the type. 

634 

635 Returns 

636 ------- 

637 type : `DatasetType` 

638 The `DatasetType` associated with the given name. 

639 

640 Raises 

641 ------ 

642 KeyError 

643 Requested named DatasetType could not be found in registry. 

644 """ 

645 raise NotImplementedError() 

646 

647 @abstractmethod 

648 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

649 """Test whether the given dataset ID generation mode is supported by 

650 `insertDatasets`. 

651 

652 Parameters 

653 ---------- 

654 mode : `DatasetIdGenEnum` 

655 Enum value for the mode to test. 

656 

657 Returns 

658 ------- 

659 supported : `bool` 

660 Whether the given mode is supported. 

661 """ 

662 raise NotImplementedError() 

663 

664 @abstractmethod 

665 def findDataset( 

666 self, 

667 datasetType: Union[DatasetType, str], 

668 dataId: Optional[DataId] = None, 

669 *, 

670 collections: Any = None, 

671 timespan: Optional[Timespan] = None, 

672 **kwargs: Any, 

673 ) -> Optional[DatasetRef]: 

674 """Find a dataset given its `DatasetType` and data ID. 

675 

676 This can be used to obtain a `DatasetRef` that permits the dataset to 

677 be read from a `Datastore`. If the dataset is a component and can not 

678 be found using the provided dataset type, a dataset ref for the parent 

679 will be returned instead but with the correct dataset type. 

680 

681 Parameters 

682 ---------- 

683 datasetType : `DatasetType` or `str` 

684 A `DatasetType` or the name of one. 

685 dataId : `dict` or `DataCoordinate`, optional 

686 A `dict`-like object containing the `Dimension` links that identify 

687 the dataset within a collection. 

688 collections, optional. 

689 An expression that fully or partially identifies the collections to 

690 search for the dataset; see 

691 :ref:`daf_butler_collection_expressions` for more information. 

692 Defaults to ``self.defaults.collections``. 

693 timespan : `Timespan`, optional 

694 A timespan that the validity range of the dataset must overlap. 

695 If not provided, any `~CollectionType.CALIBRATION` collections 

696 matched by the ``collections`` argument will not be searched. 

697 **kwargs 

698 Additional keyword arguments passed to 

699 `DataCoordinate.standardize` to convert ``dataId`` to a true 

700 `DataCoordinate` or augment an existing one. 

701 

702 Returns 

703 ------- 

704 ref : `DatasetRef` 

705 A reference to the dataset, or `None` if no matching Dataset 

706 was found. 

707 

708 Raises 

709 ------ 

710 NoDefaultCollectionError 

711 Raised if ``collections`` is `None` and 

712 ``self.defaults.collections`` is `None`. 

713 LookupError 

714 Raised if one or more data ID keys are missing. 

715 KeyError 

716 Raised if the dataset type does not exist. 

717 MissingCollectionError 

718 Raised if any of ``collections`` does not exist in the registry. 

719 

720 Notes 

721 ----- 

722 This method simply returns `None` and does not raise an exception even 

723 when the set of collections searched is intrinsically incompatible with 

724 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

725 only `~CollectionType.CALIBRATION` collections are being searched. 

726 This may make it harder to debug some lookup failures, but the behavior 

727 is intentional; we consider it more important that failed searches are 

728 reported consistently, regardless of the reason, and that adding 

729 additional collections that do not contain a match to the search path 

730 never changes the behavior. 

731 """ 

732 raise NotImplementedError() 

733 

734 @abstractmethod 

735 def insertDatasets( 

736 self, 

737 datasetType: Union[DatasetType, str], 

738 dataIds: Iterable[DataId], 

739 run: Optional[str] = None, 

740 expand: bool = True, 

741 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

742 ) -> List[DatasetRef]: 

743 """Insert one or more datasets into the `Registry` 

744 

745 This always adds new datasets; to associate existing datasets with 

746 a new collection, use ``associate``. 

747 

748 Parameters 

749 ---------- 

750 datasetType : `DatasetType` or `str` 

751 A `DatasetType` or the name of one. 

752 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

753 Dimension-based identifiers for the new datasets. 

754 run : `str`, optional 

755 The name of the run that produced the datasets. Defaults to 

756 ``self.defaults.run``. 

757 expand : `bool`, optional 

758 If `True` (default), expand data IDs as they are inserted. This is 

759 necessary in general to allow datastore to generate file templates, 

760 but it may be disabled if the caller can guarantee this is 

761 unnecessary. 

762 idGenerationMode : `DatasetIdGenEnum`, optional 

763 Specifies option for generating dataset IDs. By default unique IDs 

764 are generated for each inserted dataset. 

765 

766 Returns 

767 ------- 

768 refs : `list` of `DatasetRef` 

769 Resolved `DatasetRef` instances for all given data IDs (in the same 

770 order). 

771 

772 Raises 

773 ------ 

774 DatasetTypeError 

775 Raised if ``datasetType`` is not known to registry. 

776 CollectionTypeError 

777 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

778 NoDefaultCollectionError 

779 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

780 ConflictingDefinitionError 

781 If a dataset with the same dataset type and data ID as one of those 

782 given already exists in ``run``. 

783 MissingCollectionError 

784 Raised if ``run`` does not exist in the registry. 

785 """ 

786 raise NotImplementedError() 

787 

788 @abstractmethod 

789 def _importDatasets( 

790 self, 

791 datasets: Iterable[DatasetRef], 

792 expand: bool = True, 

793 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

794 reuseIds: bool = False, 

795 ) -> List[DatasetRef]: 

796 """Import one or more datasets into the `Registry`. 

797 

798 Difference from `insertDatasets` method is that this method accepts 

799 `DatasetRef` instances which should already be resolved and have a 

800 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

801 `uuid.UUID`) then datasets which already exist in the registry will be 

802 ignored if imported again. 

803 

804 Parameters 

805 ---------- 

806 datasets : `~collections.abc.Iterable` of `DatasetRef` 

807 Datasets to be inserted. All `DatasetRef` instances must have 

808 identical ``datasetType`` and ``run`` attributes. ``run`` 

809 attribute can be `None` and defaults to ``self.defaults.run``. 

810 Datasets can specify ``id`` attribute which will be used for 

811 inserted datasets. All dataset IDs must have the same type 

812 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

813 configured backend then IDs will be ignored and new IDs will be 

814 generated by backend. 

815 expand : `bool`, optional 

816 If `True` (default), expand data IDs as they are inserted. This is 

817 necessary in general to allow datastore to generate file templates, 

818 but it may be disabled if the caller can guarantee this is 

819 unnecessary. 

820 idGenerationMode : `DatasetIdGenEnum`, optional 

821 Specifies option for generating dataset IDs when IDs are not 

822 provided or their type does not match backend type. By default 

823 unique IDs are generated for each inserted dataset. 

824 reuseIds : `bool`, optional 

825 If `True` then forces re-use of imported dataset IDs for integer 

826 IDs which are normally generated as auto-incremented; exception 

827 will be raised if imported IDs clash with existing ones. This 

828 option has no effect on the use of globally-unique IDs which are 

829 always re-used (or generated if integer IDs are being imported). 

830 

831 Returns 

832 ------- 

833 refs : `list` of `DatasetRef` 

834 Resolved `DatasetRef` instances for all given data IDs (in the same 

835 order). If any of ``datasets`` has an ID which already exists in 

836 the database then it will not be inserted or updated, but a 

837 resolved `DatasetRef` will be returned for it in any case. 

838 

839 Raises 

840 ------ 

841 NoDefaultCollectionError 

842 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

843 DatasetTypeError 

844 Raised if datasets correspond to more than one dataset type or 

845 dataset type is not known to registry. 

846 ConflictingDefinitionError 

847 If a dataset with the same dataset type and data ID as one of those 

848 given already exists in ``run``. 

849 MissingCollectionError 

850 Raised if ``run`` does not exist in the registry. 

851 

852 Notes 

853 ----- 

854 This method is considered package-private and internal to Butler 

855 implementation. Clients outside daf_butler package should not use this 

856 method. 

857 """ 

858 raise NotImplementedError() 

859 

860 @abstractmethod 

861 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

862 """Retrieve a Dataset entry. 

863 

864 Parameters 

865 ---------- 

866 id : `DatasetId` 

867 The unique identifier for the dataset. 

868 

869 Returns 

870 ------- 

871 ref : `DatasetRef` or `None` 

872 A ref to the Dataset, or `None` if no matching Dataset 

873 was found. 

874 """ 

875 raise NotImplementedError() 

876 

877 @abstractmethod 

878 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

879 """Remove datasets from the Registry. 

880 

881 The datasets will be removed unconditionally from all collections, and 

882 any `Quantum` that consumed this dataset will instead be marked with 

883 having a NULL input. `Datastore` records will *not* be deleted; the 

884 caller is responsible for ensuring that the dataset has already been 

885 removed from all Datastores. 

886 

887 Parameters 

888 ---------- 

889 refs : `Iterable` of `DatasetRef` 

890 References to the datasets to be removed. Must include a valid 

891 ``id`` attribute, and should be considered invalidated upon return. 

892 

893 Raises 

894 ------ 

895 AmbiguousDatasetError 

896 Raised if any ``ref.id`` is `None`. 

897 OrphanedRecordError 

898 Raised if any dataset is still present in any `Datastore`. 

899 """ 

900 raise NotImplementedError() 

901 

902 @abstractmethod 

903 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

904 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

905 

906 If a DatasetRef with the same exact ID is already in a collection 

907 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

908 data ID but with different ID exists in the collection, 

909 `ConflictingDefinitionError` is raised. 

910 

911 Parameters 

912 ---------- 

913 collection : `str` 

914 Indicates the collection the datasets should be associated with. 

915 refs : `Iterable` [ `DatasetRef` ] 

916 An iterable of resolved `DatasetRef` instances that already exist 

917 in this `Registry`. 

918 

919 Raises 

920 ------ 

921 ConflictingDefinitionError 

922 If a Dataset with the given `DatasetRef` already exists in the 

923 given collection. 

924 AmbiguousDatasetError 

925 Raised if ``any(ref.id is None for ref in refs)``. 

926 MissingCollectionError 

927 Raised if ``collection`` does not exist in the registry. 

928 CollectionTypeError 

929 Raise adding new datasets to the given ``collection`` is not 

930 allowed. 

931 """ 

932 raise NotImplementedError() 

933 

934 @abstractmethod 

935 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

936 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

937 

938 ``collection`` and ``ref`` combinations that are not currently 

939 associated are silently ignored. 

940 

941 Parameters 

942 ---------- 

943 collection : `str` 

944 The collection the datasets should no longer be associated with. 

945 refs : `Iterable` [ `DatasetRef` ] 

946 An iterable of resolved `DatasetRef` instances that already exist 

947 in this `Registry`. 

948 

949 Raises 

950 ------ 

951 AmbiguousDatasetError 

952 Raised if any of the given dataset references is unresolved. 

953 MissingCollectionError 

954 Raised if ``collection`` does not exist in the registry. 

955 CollectionTypeError 

956 Raise adding new datasets to the given ``collection`` is not 

957 allowed. 

958 """ 

959 raise NotImplementedError() 

960 

961 @abstractmethod 

962 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

963 """Associate one or more datasets with a calibration collection and a 

964 validity range within it. 

965 

966 Parameters 

967 ---------- 

968 collection : `str` 

969 The name of an already-registered `~CollectionType.CALIBRATION` 

970 collection. 

971 refs : `Iterable` [ `DatasetRef` ] 

972 Datasets to be associated. 

973 timespan : `Timespan` 

974 The validity range for these datasets within the collection. 

975 

976 Raises 

977 ------ 

978 AmbiguousDatasetError 

979 Raised if any of the given `DatasetRef` instances is unresolved. 

980 ConflictingDefinitionError 

981 Raised if the collection already contains a different dataset with 

982 the same `DatasetType` and data ID and an overlapping validity 

983 range. 

984 CollectionTypeError 

985 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

986 collection or if one or more datasets are of a dataset type for 

987 which `DatasetType.isCalibration` returns `False`. 

988 """ 

989 raise NotImplementedError() 

990 

991 @abstractmethod 

992 def decertify( 

993 self, 

994 collection: str, 

995 datasetType: Union[str, DatasetType], 

996 timespan: Timespan, 

997 *, 

998 dataIds: Optional[Iterable[DataId]] = None, 

999 ) -> None: 

1000 """Remove or adjust datasets to clear a validity range within a 

1001 calibration collection. 

1002 

1003 Parameters 

1004 ---------- 

1005 collection : `str` 

1006 The name of an already-registered `~CollectionType.CALIBRATION` 

1007 collection. 

1008 datasetType : `str` or `DatasetType` 

1009 Name or `DatasetType` instance for the datasets to be decertified. 

1010 timespan : `Timespan`, optional 

1011 The validity range to remove datasets from within the collection. 

1012 Datasets that overlap this range but are not contained by it will 

1013 have their validity ranges adjusted to not overlap it, which may 

1014 split a single dataset validity range into two. 

1015 dataIds : `Iterable` [ `DataId` ], optional 

1016 Data IDs that should be decertified within the given validity range 

1017 If `None`, all data IDs for ``self.datasetType`` will be 

1018 decertified. 

1019 

1020 Raises 

1021 ------ 

1022 CollectionTypeError 

1023 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1024 collection or if ``datasetType.isCalibration() is False``. 

1025 """ 

1026 raise NotImplementedError() 

1027 

1028 @abstractmethod 

1029 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1030 """Return an object that allows a new `Datastore` instance to 

1031 communicate with this `Registry`. 

1032 

1033 Returns 

1034 ------- 

1035 manager : `DatastoreRegistryBridgeManager` 

1036 Object that mediates communication between this `Registry` and its 

1037 associated datastores. 

1038 """ 

1039 raise NotImplementedError() 

1040 

1041 @abstractmethod 

1042 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1043 """Retrieve datastore locations for a given dataset. 

1044 

1045 Parameters 

1046 ---------- 

1047 ref : `DatasetRef` 

1048 A reference to the dataset for which to retrieve storage 

1049 information. 

1050 

1051 Returns 

1052 ------- 

1053 datastores : `Iterable` [ `str` ] 

1054 All the matching datastores holding this dataset. 

1055 

1056 Raises 

1057 ------ 

1058 AmbiguousDatasetError 

1059 Raised if ``ref.id`` is `None`. 

1060 """ 

1061 raise NotImplementedError() 

1062 

1063 @abstractmethod 

1064 def expandDataId( 

1065 self, 

1066 dataId: Optional[DataId] = None, 

1067 *, 

1068 graph: Optional[DimensionGraph] = None, 

1069 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1070 withDefaults: bool = True, 

1071 **kwargs: Any, 

1072 ) -> DataCoordinate: 

1073 """Expand a dimension-based data ID to include additional information. 

1074 

1075 Parameters 

1076 ---------- 

1077 dataId : `DataCoordinate` or `dict`, optional 

1078 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1079 graph : `DimensionGraph`, optional 

1080 Set of dimensions for the expanded ID. If `None`, the dimensions 

1081 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1082 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1083 ``graph`` are silently ignored, providing a way to extract and 

1084 ``graph`` expand a subset of a data ID. 

1085 records : `Mapping` [`str`, `DimensionRecord`], optional 

1086 Dimension record data to use before querying the database for that 

1087 data, keyed by element name. 

1088 withDefaults : `bool`, optional 

1089 Utilize ``self.defaults.dataId`` to fill in missing governor 

1090 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1091 used). 

1092 **kwargs 

1093 Additional keywords are treated like additional key-value pairs for 

1094 ``dataId``, extending and overriding 

1095 

1096 Returns 

1097 ------- 

1098 expanded : `DataCoordinate` 

1099 A data ID that includes full metadata for all of the dimensions it 

1100 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1101 ``expanded.hasFull()`` both return `True`. 

1102 

1103 Raises 

1104 ------ 

1105 DataIdError 

1106 Raised when ``dataId`` or keyword arguments specify unknown 

1107 dimensions or values, or when a resulting data ID contains 

1108 contradictory key-value pairs, according to dimension 

1109 relationships. 

1110 """ 

1111 raise NotImplementedError() 

1112 

1113 @abstractmethod 

1114 def insertDimensionData( 

1115 self, 

1116 element: Union[DimensionElement, str], 

1117 *data: Union[Mapping[str, Any], DimensionRecord], 

1118 conform: bool = True, 

1119 replace: bool = False, 

1120 ) -> None: 

1121 """Insert one or more dimension records into the database. 

1122 

1123 Parameters 

1124 ---------- 

1125 element : `DimensionElement` or `str` 

1126 The `DimensionElement` or name thereof that identifies the table 

1127 records will be inserted into. 

1128 data : `dict` or `DimensionRecord` (variadic) 

1129 One or more records to insert. 

1130 conform : `bool`, optional 

1131 If `False` (`True` is default) perform no checking or conversions, 

1132 and assume that ``element`` is a `DimensionElement` instance and 

1133 ``data`` is a one or more `DimensionRecord` instances of the 

1134 appropriate subclass. 

1135 replace: `bool`, optional 

1136 If `True` (`False` is default), replace existing records in the 

1137 database if there is a conflict. 

1138 """ 

1139 raise NotImplementedError() 

1140 

1141 @abstractmethod 

1142 def syncDimensionData( 

1143 self, 

1144 element: Union[DimensionElement, str], 

1145 row: Union[Mapping[str, Any], DimensionRecord], 

1146 conform: bool = True, 

1147 update: bool = False, 

1148 ) -> Union[bool, Dict[str, Any]]: 

1149 """Synchronize the given dimension record with the database, inserting 

1150 if it does not already exist and comparing values if it does. 

1151 

1152 Parameters 

1153 ---------- 

1154 element : `DimensionElement` or `str` 

1155 The `DimensionElement` or name thereof that identifies the table 

1156 records will be inserted into. 

1157 row : `dict` or `DimensionRecord` 

1158 The record to insert. 

1159 conform : `bool`, optional 

1160 If `False` (`True` is default) perform no checking or conversions, 

1161 and assume that ``element`` is a `DimensionElement` instance and 

1162 ``data`` is a one or more `DimensionRecord` instances of the 

1163 appropriate subclass. 

1164 update: `bool`, optional 

1165 If `True` (`False` is default), update the existing record in the 

1166 database if there is a conflict. 

1167 

1168 Returns 

1169 ------- 

1170 inserted_or_updated : `bool` or `dict` 

1171 `True` if a new row was inserted, `False` if no changes were 

1172 needed, or a `dict` mapping updated column names to their old 

1173 values if an update was performed (only possible if 

1174 ``update=True``). 

1175 

1176 Raises 

1177 ------ 

1178 ConflictingDefinitionError 

1179 Raised if the record exists in the database (according to primary 

1180 key lookup) but is inconsistent with the given one. 

1181 """ 

1182 raise NotImplementedError() 

1183 

1184 @abstractmethod 

1185 def queryDatasetTypes( 

1186 self, 

1187 expression: Any = ..., 

1188 *, 

1189 components: Optional[bool] = None, 

1190 missing: Optional[List[str]] = None, 

1191 ) -> Iterator[DatasetType]: 

1192 """Iterate over the dataset types whose names match an expression. 

1193 

1194 Parameters 

1195 ---------- 

1196 expression : `Any`, optional 

1197 An expression that fully or partially identifies the dataset types 

1198 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1199 ``...`` can be used to return all dataset types, and is the 

1200 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1201 information. 

1202 components : `bool`, optional 

1203 If `True`, apply all expression patterns to component dataset type 

1204 names as well. If `False`, never apply patterns to components. 

1205 If `None` (default), apply patterns to components only if their 

1206 parent datasets were not matched by the expression. 

1207 Fully-specified component datasets (`str` or `DatasetType` 

1208 instances) are always included. 

1209 missing : `list` of `str`, optional 

1210 String dataset type names that were explicitly given (i.e. not 

1211 regular expression patterns) but not found will be appended to this 

1212 list, if it is provided. 

1213 

1214 Yields 

1215 ------ 

1216 datasetType : `DatasetType` 

1217 A `DatasetType` instance whose name matches ``expression``. 

1218 

1219 Raises 

1220 ------ 

1221 DatasetTypeExpressionError 

1222 Raised when ``expression`` is invalid. 

1223 """ 

1224 raise NotImplementedError() 

1225 

1226 @abstractmethod 

1227 def queryCollections( 

1228 self, 

1229 expression: Any = ..., 

1230 datasetType: Optional[DatasetType] = None, 

1231 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1232 flattenChains: bool = False, 

1233 includeChains: Optional[bool] = None, 

1234 ) -> Iterator[str]: 

1235 """Iterate over the collections whose names match an expression. 

1236 

1237 Collection names are sorted alphabetically. 

1238 

1239 Parameters 

1240 ---------- 

1241 expression : `Any`, optional 

1242 An expression that identifies the collections to return, such as 

1243 a `str` (for full matches or partial matches via globs), 

1244 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1245 can be used to return all collections, and is the default. 

1246 See :ref:`daf_butler_collection_expressions` for more information. 

1247 datasetType : `DatasetType`, optional 

1248 If provided, only yield collections that may contain datasets of 

1249 this type. This is a conservative approximation in general; it may 

1250 yield collections that do not have any such datasets. 

1251 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1252 `CollectionType`, optional 

1253 If provided, only yield collections of these types. 

1254 flattenChains : `bool`, optional 

1255 If `True` (`False` is default), recursively yield the child 

1256 collections of matching `~CollectionType.CHAINED` collections. 

1257 includeChains : `bool`, optional 

1258 If `True`, yield records for matching `~CollectionType.CHAINED` 

1259 collections. Default is the opposite of ``flattenChains``: include 

1260 either CHAINED collections or their children, but not both. 

1261 

1262 Yields 

1263 ------ 

1264 collection : `str` 

1265 The name of a collection that matches ``expression``. 

1266 

1267 Raises 

1268 ------ 

1269 CollectionExpressionError 

1270 Raised when ``expression`` is invalid. 

1271 """ 

1272 raise NotImplementedError() 

1273 

1274 @abstractmethod 

1275 def queryDatasets( 

1276 self, 

1277 datasetType: Any, 

1278 *, 

1279 collections: Any = None, 

1280 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1281 dataId: Optional[DataId] = None, 

1282 where: Optional[str] = None, 

1283 findFirst: bool = False, 

1284 components: Optional[bool] = None, 

1285 bind: Optional[Mapping[str, Any]] = None, 

1286 check: bool = True, 

1287 **kwargs: Any, 

1288 ) -> DatasetQueryResults: 

1289 """Query for and iterate over dataset references matching user-provided 

1290 criteria. 

1291 

1292 Parameters 

1293 ---------- 

1294 datasetType 

1295 An expression that fully or partially identifies the dataset types 

1296 to be queried. Allowed types include `DatasetType`, `str`, 

1297 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1298 be used to query all dataset types. See 

1299 :ref:`daf_butler_dataset_type_expressions` for more information. 

1300 collections: optional 

1301 An expression that identifies the collections to search, such as a 

1302 `str` (for full matches or partial matches via globs), `re.Pattern` 

1303 (for partial matches), or iterable thereof. ``...`` can be used to 

1304 search all collections (actually just all `~CollectionType.RUN` 

1305 collections, because this will still find all datasets). 

1306 If not provided, ``self.default.collections`` is used. See 

1307 :ref:`daf_butler_collection_expressions` for more information. 

1308 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1309 Dimensions to include in the query (in addition to those used 

1310 to identify the queried dataset type(s)), either to constrain 

1311 the resulting datasets to those for which a matching dimension 

1312 exists, or to relate the dataset type's dimensions to dimensions 

1313 referenced by the ``dataId`` or ``where`` arguments. 

1314 dataId : `dict` or `DataCoordinate`, optional 

1315 A data ID whose key-value pairs are used as equality constraints 

1316 in the query. 

1317 where : `str`, optional 

1318 A string expression similar to a SQL WHERE clause. May involve 

1319 any column of a dimension table or (as a shortcut for the primary 

1320 key column of a dimension table) dimension name. See 

1321 :ref:`daf_butler_dimension_expressions` for more information. 

1322 findFirst : `bool`, optional 

1323 If `True` (`False` is default), for each result data ID, only 

1324 yield one `DatasetRef` of each `DatasetType`, from the first 

1325 collection in which a dataset of that dataset type appears 

1326 (according to the order of ``collections`` passed in). If `True`, 

1327 ``collections`` must not contain regular expressions and may not 

1328 be ``...``. 

1329 components : `bool`, optional 

1330 If `True`, apply all dataset expression patterns to component 

1331 dataset type names as well. If `False`, never apply patterns to 

1332 components. If `None` (default), apply patterns to components only 

1333 if their parent datasets were not matched by the expression. 

1334 Fully-specified component datasets (`str` or `DatasetType` 

1335 instances) are always included. 

1336 bind : `Mapping`, optional 

1337 Mapping containing literal values that should be injected into the 

1338 ``where`` expression, keyed by the identifiers they replace. 

1339 check : `bool`, optional 

1340 If `True` (default) check the query for consistency before 

1341 executing it. This may reject some valid queries that resemble 

1342 common mistakes (e.g. queries for visits without specifying an 

1343 instrument). 

1344 **kwargs 

1345 Additional keyword arguments are forwarded to 

1346 `DataCoordinate.standardize` when processing the ``dataId`` 

1347 argument (and may be used to provide a constraining data ID even 

1348 when the ``dataId`` argument is `None`). 

1349 

1350 Returns 

1351 ------- 

1352 refs : `queries.DatasetQueryResults` 

1353 Dataset references matching the given query criteria. Nested data 

1354 IDs are guaranteed to include values for all implied dimensions 

1355 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1356 include dimension records (`DataCoordinate.hasRecords` will be 

1357 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1358 on the result object (which returns a new one). 

1359 

1360 Raises 

1361 ------ 

1362 DatasetTypeExpressionError 

1363 Raised when ``datasetType`` expression is invalid. 

1364 TypeError 

1365 Raised when the arguments are incompatible, such as when a 

1366 collection wildcard is passed when ``findFirst`` is `True`, or 

1367 when ``collections`` is `None` and``self.defaults.collections`` is 

1368 also `None`. 

1369 DataIdError 

1370 Raised when ``dataId`` or keyword arguments specify unknown 

1371 dimensions or values, or when they contain inconsistent values. 

1372 UserExpressionError 

1373 Raised when ``where`` expression is invalid. 

1374 

1375 Notes 

1376 ----- 

1377 When multiple dataset types are queried in a single call, the 

1378 results of this operation are equivalent to querying for each dataset 

1379 type separately in turn, and no information about the relationships 

1380 between datasets of different types is included. In contexts where 

1381 that kind of information is important, the recommended pattern is to 

1382 use `queryDataIds` to first obtain data IDs (possibly with the 

1383 desired dataset types and collections passed as constraints to the 

1384 query), and then use multiple (generally much simpler) calls to 

1385 `queryDatasets` with the returned data IDs passed as constraints. 

1386 """ 

1387 raise NotImplementedError() 

1388 

1389 @abstractmethod 

1390 def queryDataIds( 

1391 self, 

1392 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1393 *, 

1394 dataId: Optional[DataId] = None, 

1395 datasets: Any = None, 

1396 collections: Any = None, 

1397 where: Optional[str] = None, 

1398 components: Optional[bool] = None, 

1399 bind: Optional[Mapping[str, Any]] = None, 

1400 check: bool = True, 

1401 **kwargs: Any, 

1402 ) -> DataCoordinateQueryResults: 

1403 """Query for data IDs matching user-provided criteria. 

1404 

1405 Parameters 

1406 ---------- 

1407 dimensions : `Dimension` or `str`, or iterable thereof 

1408 The dimensions of the data IDs to yield, as either `Dimension` 

1409 instances or `str`. Will be automatically expanded to a complete 

1410 `DimensionGraph`. 

1411 dataId : `dict` or `DataCoordinate`, optional 

1412 A data ID whose key-value pairs are used as equality constraints 

1413 in the query. 

1414 datasets : `Any`, optional 

1415 An expression that fully or partially identifies dataset types 

1416 that should constrain the yielded data IDs. For example, including 

1417 "raw" here would constrain the yielded ``instrument``, 

1418 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1419 those for which at least one "raw" dataset exists in 

1420 ``collections``. Allowed types include `DatasetType`, `str`, 

1421 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1422 expressions, ``...`` is not permitted - it doesn't make sense to 

1423 constrain data IDs on the existence of *all* datasets. 

1424 See :ref:`daf_butler_dataset_type_expressions` for more 

1425 information. 

1426 collections: `Any`, optional 

1427 An expression that identifies the collections to search for 

1428 datasets, such as a `str` (for full matches or partial matches 

1429 via globs), `re.Pattern` (for partial matches), or iterable 

1430 thereof. ``...`` can be used to search all collections (actually 

1431 just all `~CollectionType.RUN` collections, because this will 

1432 still find all datasets). If not provided, 

1433 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1434 is also passed. See :ref:`daf_butler_collection_expressions` for 

1435 more information. 

1436 where : `str`, optional 

1437 A string expression similar to a SQL WHERE clause. May involve 

1438 any column of a dimension table or (as a shortcut for the primary 

1439 key column of a dimension table) dimension name. See 

1440 :ref:`daf_butler_dimension_expressions` for more information. 

1441 components : `bool`, optional 

1442 If `True`, apply all dataset expression patterns to component 

1443 dataset type names as well. If `False`, never apply patterns to 

1444 components. If `None` (default), apply patterns to components only 

1445 if their parent datasets were not matched by the expression. 

1446 Fully-specified component datasets (`str` or `DatasetType` 

1447 instances) are always included. 

1448 bind : `Mapping`, optional 

1449 Mapping containing literal values that should be injected into the 

1450 ``where`` expression, keyed by the identifiers they replace. 

1451 check : `bool`, optional 

1452 If `True` (default) check the query for consistency before 

1453 executing it. This may reject some valid queries that resemble 

1454 common mistakes (e.g. queries for visits without specifying an 

1455 instrument). 

1456 **kwargs 

1457 Additional keyword arguments are forwarded to 

1458 `DataCoordinate.standardize` when processing the ``dataId`` 

1459 argument (and may be used to provide a constraining data ID even 

1460 when the ``dataId`` argument is `None`). 

1461 

1462 Returns 

1463 ------- 

1464 dataIds : `DataCoordinateQueryResults` 

1465 Data IDs matching the given query parameters. These are guaranteed 

1466 to identify all dimensions (`DataCoordinate.hasFull` returns 

1467 `True`), but will not contain `DimensionRecord` objects 

1468 (`DataCoordinate.hasRecords` returns `False`). Call 

1469 `DataCoordinateQueryResults.expanded` on the returned object to 

1470 fetch those (and consider using 

1471 `DataCoordinateQueryResults.materialize` on the returned object 

1472 first if the expected number of rows is very large). See 

1473 documentation for those methods for additional information. 

1474 

1475 Raises 

1476 ------ 

1477 NoDefaultCollectionError 

1478 Raised if ``collections`` is `None` and 

1479 ``self.defaults.collections`` is `None`. 

1480 CollectionExpressionError 

1481 Raised when ``collections`` expression is invalid. 

1482 DataIdError 

1483 Raised when ``dataId`` or keyword arguments specify unknown 

1484 dimensions or values, or when they contain inconsistent values. 

1485 DatasetTypeExpressionError 

1486 Raised when ``datasetType`` expression is invalid. 

1487 UserExpressionError 

1488 Raised when ``where`` expression is invalid. 

1489 """ 

1490 raise NotImplementedError() 

1491 

1492 @abstractmethod 

1493 def queryDimensionRecords( 

1494 self, 

1495 element: Union[DimensionElement, str], 

1496 *, 

1497 dataId: Optional[DataId] = None, 

1498 datasets: Any = None, 

1499 collections: Any = None, 

1500 where: Optional[str] = None, 

1501 components: Optional[bool] = None, 

1502 bind: Optional[Mapping[str, Any]] = None, 

1503 check: bool = True, 

1504 **kwargs: Any, 

1505 ) -> DimensionRecordQueryResults: 

1506 """Query for dimension information matching user-provided criteria. 

1507 

1508 Parameters 

1509 ---------- 

1510 element : `DimensionElement` or `str` 

1511 The dimension element to obtain records for. 

1512 dataId : `dict` or `DataCoordinate`, optional 

1513 A data ID whose key-value pairs are used as equality constraints 

1514 in the query. 

1515 datasets : `Any`, optional 

1516 An expression that fully or partially identifies dataset types 

1517 that should constrain the yielded records. See `queryDataIds` and 

1518 :ref:`daf_butler_dataset_type_expressions` for more information. 

1519 collections : `Any`, optional 

1520 An expression that identifies the collections to search for 

1521 datasets, such as a `str` (for full matches or partial matches 

1522 via globs), `re.Pattern` (for partial matches), or iterable 

1523 thereof. ``...`` can be used to search all collections (actually 

1524 just all `~CollectionType.RUN` collections, because this will 

1525 still find all datasets). If not provided, 

1526 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1527 is also passed. See :ref:`daf_butler_collection_expressions` for 

1528 more information. 

1529 where : `str`, optional 

1530 A string expression similar to a SQL WHERE clause. See 

1531 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1532 information. 

1533 components : `bool`, optional 

1534 Whether to apply dataset expressions to components as well. 

1535 See `queryDataIds` for more information. 

1536 bind : `Mapping`, optional 

1537 Mapping containing literal values that should be injected into the 

1538 ``where`` expression, keyed by the identifiers they replace. 

1539 check : `bool`, optional 

1540 If `True` (default) check the query for consistency before 

1541 executing it. This may reject some valid queries that resemble 

1542 common mistakes (e.g. queries for visits without specifying an 

1543 instrument). 

1544 **kwargs 

1545 Additional keyword arguments are forwarded to 

1546 `DataCoordinate.standardize` when processing the ``dataId`` 

1547 argument (and may be used to provide a constraining data ID even 

1548 when the ``dataId`` argument is `None`). 

1549 

1550 Returns 

1551 ------- 

1552 dataIds : `DimensionRecordQueryResults` 

1553 Data IDs matching the given query parameters. 

1554 

1555 Raises 

1556 ------ 

1557 NoDefaultCollectionError 

1558 Raised if ``collections`` is `None` and 

1559 ``self.defaults.collections`` is `None`. 

1560 CollectionExpressionError 

1561 Raised when ``collections`` expression is invalid. 

1562 DataIdError 

1563 Raised when ``dataId`` or keyword arguments specify unknown 

1564 dimensions or values, or when they contain inconsistent values. 

1565 DatasetTypeExpressionError 

1566 Raised when ``datasetType`` expression is invalid. 

1567 UserExpressionError 

1568 Raised when ``where`` expression is invalid. 

1569 """ 

1570 raise NotImplementedError() 

1571 

1572 @abstractmethod 

1573 def queryDatasetAssociations( 

1574 self, 

1575 datasetType: Union[str, DatasetType], 

1576 collections: Any = ..., 

1577 *, 

1578 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1579 flattenChains: bool = False, 

1580 ) -> Iterator[DatasetAssociation]: 

1581 """Iterate over dataset-collection combinations where the dataset is in 

1582 the collection. 

1583 

1584 This method is a temporary placeholder for better support for 

1585 association results in `queryDatasets`. It will probably be 

1586 removed in the future, and should be avoided in production code 

1587 whenever possible. 

1588 

1589 Parameters 

1590 ---------- 

1591 datasetType : `DatasetType` or `str` 

1592 A dataset type object or the name of one. 

1593 collections: `Any`, optional 

1594 An expression that identifies the collections to search for 

1595 datasets, such as a `str` (for full matches or partial matches 

1596 via globs), `re.Pattern` (for partial matches), or iterable 

1597 thereof. ``...`` can be used to search all collections (actually 

1598 just all `~CollectionType.RUN` collections, because this will still 

1599 find all datasets). If not provided, ``self.default.collections`` 

1600 is used. See :ref:`daf_butler_collection_expressions` for more 

1601 information. 

1602 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1603 If provided, only yield associations from collections of these 

1604 types. 

1605 flattenChains : `bool`, optional 

1606 If `True` (default) search in the children of 

1607 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1608 collections are ignored. 

1609 

1610 Yields 

1611 ------ 

1612 association : `DatasetAssociation` 

1613 Object representing the relationship between a single dataset and 

1614 a single collection. 

1615 

1616 Raises 

1617 ------ 

1618 NoDefaultCollectionError 

1619 Raised if ``collections`` is `None` and 

1620 ``self.defaults.collections`` is `None`. 

1621 CollectionExpressionError 

1622 Raised when ``collections`` expression is invalid. 

1623 """ 

1624 raise NotImplementedError() 

1625 

1626 storageClasses: StorageClassFactory 

1627 """All storage classes known to the registry (`StorageClassFactory`). 

1628 """