Coverage for python/lsst/daf/butler/registry/_registry.py: 61%

187 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-22 02:07 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28from abc import ABC, abstractmethod 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Sequence, 

39 Set, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45from lsst.resources import ResourcePathExpression 

46from lsst.utils import doImportType 

47 

48from ..core import ( 

49 Config, 

50 DataCoordinate, 

51 DataId, 

52 DatasetAssociation, 

53 DatasetId, 

54 DatasetRef, 

55 DatasetType, 

56 Dimension, 

57 DimensionConfig, 

58 DimensionElement, 

59 DimensionGraph, 

60 DimensionRecord, 

61 DimensionUniverse, 

62 NameLookupMapping, 

63 StorageClassFactory, 

64 Timespan, 

65) 

66from ._collection_summary import CollectionSummary 

67from ._collectionType import CollectionType 

68from ._config import RegistryConfig 

69from ._defaults import RegistryDefaults 

70from .interfaces import DatasetIdFactory, DatasetIdGenEnum 

71from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from .._butlerConfig import ButlerConfig 

75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager 

76 

77_LOG = logging.getLogger(__name__) 

78 

79 

80class Registry(ABC): 

81 """Abstract Registry interface. 

82 

83 Each registry implementation can have its own constructor parameters. 

84 The assumption is that an instance of a specific subclass will be 

85 constructed from configuration using `Registry.fromConfig()`. 

86 The base class will look for a ``cls`` entry and call that specific 

87 `fromConfig()` method. 

88 

89 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

90 property. No other properties are assumed shared between implementations. 

91 """ 

92 

93 defaultConfigFile: Optional[str] = None 

94 """Path to configuration defaults. Accessed within the ``configs`` resource 

95 or relative to a search path. Can be None if no defaults specified. 

96 """ 

97 

98 @classmethod 

99 def forceRegistryConfig( 

100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

101 ) -> RegistryConfig: 

102 """Force the supplied config to a `RegistryConfig`. 

103 

104 Parameters 

105 ---------- 

106 config : `RegistryConfig`, `Config` or `str` or `None` 

107 Registry configuration, if missing then default configuration will 

108 be loaded from registry.yaml. 

109 

110 Returns 

111 ------- 

112 registry_config : `RegistryConfig` 

113 A registry config. 

114 """ 

115 if not isinstance(config, RegistryConfig): 

116 if isinstance(config, (str, Config)) or config is None: 

117 config = RegistryConfig(config) 

118 else: 

119 raise ValueError(f"Incompatible Registry configuration: {config}") 

120 return config 

121 

122 @classmethod 

123 def determineTrampoline( 

124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

125 ) -> Tuple[Type[Registry], RegistryConfig]: 

126 """Return class to use to instantiate real registry. 

127 

128 Parameters 

129 ---------- 

130 config : `RegistryConfig` or `str`, optional 

131 Registry configuration, if missing then default configuration will 

132 be loaded from registry.yaml. 

133 

134 Returns 

135 ------- 

136 requested_cls : `type` of `Registry` 

137 The real registry class to use. 

138 registry_config : `RegistryConfig` 

139 The `RegistryConfig` to use. 

140 """ 

141 config = cls.forceRegistryConfig(config) 

142 

143 # Default to the standard registry 

144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

145 registry_cls = doImportType(registry_cls_name) 

146 if registry_cls is cls: 

147 raise ValueError("Can not instantiate the abstract base Registry from config") 

148 if not issubclass(registry_cls, Registry): 

149 raise TypeError( 

150 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

151 ) 

152 return registry_cls, config 

153 

154 @classmethod 

155 def createFromConfig( 

156 cls, 

157 config: Optional[Union[RegistryConfig, str]] = None, 

158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

159 butlerRoot: Optional[ResourcePathExpression] = None, 

160 ) -> Registry: 

161 """Create registry database and return `Registry` instance. 

162 

163 This method initializes database contents, database must be empty 

164 prior to calling this method. 

165 

166 Parameters 

167 ---------- 

168 config : `RegistryConfig` or `str`, optional 

169 Registry configuration, if missing then default configuration will 

170 be loaded from registry.yaml. 

171 dimensionConfig : `DimensionConfig` or `str`, optional 

172 Dimensions configuration, if missing then default configuration 

173 will be loaded from dimensions.yaml. 

174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

175 Path to the repository root this `Registry` will manage. 

176 

177 Returns 

178 ------- 

179 registry : `Registry` 

180 A new `Registry` instance. 

181 

182 Notes 

183 ----- 

184 This class will determine the concrete `Registry` subclass to 

185 use from configuration. Each subclass should implement this method 

186 even if it can not create a registry. 

187 """ 

188 registry_cls, registry_config = cls.determineTrampoline(config) 

189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

190 

191 @classmethod 

192 def fromConfig( 

193 cls, 

194 config: Union[ButlerConfig, RegistryConfig, Config, str], 

195 butlerRoot: Optional[ResourcePathExpression] = None, 

196 writeable: bool = True, 

197 defaults: Optional[RegistryDefaults] = None, 

198 ) -> Registry: 

199 """Create `Registry` subclass instance from `config`. 

200 

201 Registry database must be initialized prior to calling this method. 

202 

203 Parameters 

204 ---------- 

205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

206 Registry configuration 

207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

208 Path to the repository root this `Registry` will manage. 

209 writeable : `bool`, optional 

210 If `True` (default) create a read-write connection to the database. 

211 defaults : `RegistryDefaults`, optional 

212 Default collection search path and/or output `~CollectionType.RUN` 

213 collection. 

214 

215 Returns 

216 ------- 

217 registry : `Registry` (subclass) 

218 A new `Registry` subclass instance. 

219 

220 Notes 

221 ----- 

222 This class will determine the concrete `Registry` subclass to 

223 use from configuration. Each subclass should implement this method. 

224 """ 

225 # The base class implementation should trampoline to the correct 

226 # subclass. No implementation should ever use this implementation 

227 # directly. If no class is specified, default to the standard 

228 # registry. 

229 registry_cls, registry_config = cls.determineTrampoline(config) 

230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

231 

232 @abstractmethod 

233 def isWriteable(self) -> bool: 

234 """Return `True` if this registry allows write operations, and `False` 

235 otherwise. 

236 """ 

237 raise NotImplementedError() 

238 

239 @abstractmethod 

240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

241 """Create a new `Registry` backed by the same data repository and 

242 connection as this one, but independent defaults. 

243 

244 Parameters 

245 ---------- 

246 defaults : `RegistryDefaults`, optional 

247 Default collections and data ID values for the new registry. If 

248 not provided, ``self.defaults`` will be used (but future changes 

249 to either registry's defaults will not affect the other). 

250 

251 Returns 

252 ------- 

253 copy : `Registry` 

254 A new `Registry` instance with its own defaults. 

255 

256 Notes 

257 ----- 

258 Because the new registry shares a connection with the original, they 

259 also share transaction state (despite the fact that their `transaction` 

260 context manager methods do not reflect this), and must be used with 

261 care. 

262 """ 

263 raise NotImplementedError() 

264 

265 @property 

266 @abstractmethod 

267 def dimensions(self) -> DimensionUniverse: 

268 """Definitions of all dimensions recognized by this `Registry` 

269 (`DimensionUniverse`). 

270 """ 

271 raise NotImplementedError() 

272 

273 @property 

274 def defaults(self) -> RegistryDefaults: 

275 """Default collection search path and/or output `~CollectionType.RUN` 

276 collection (`RegistryDefaults`). 

277 

278 This is an immutable struct whose components may not be set 

279 individually, but the entire struct can be set by assigning to this 

280 property. 

281 """ 

282 return self._defaults 

283 

284 @defaults.setter 

285 def defaults(self, value: RegistryDefaults) -> None: 

286 if value.run is not None: 

287 self.registerRun(value.run) 

288 value.finish(self) 

289 self._defaults = value 

290 

291 @abstractmethod 

292 def refresh(self) -> None: 

293 """Refresh all in-memory state by querying the database. 

294 

295 This may be necessary to enable querying for entities added by other 

296 registry instances after this one was constructed. 

297 """ 

298 raise NotImplementedError() 

299 

300 @contextlib.contextmanager 

301 @abstractmethod 

302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

303 """Return a context manager that represents a transaction.""" 

304 raise NotImplementedError() 

305 

306 def resetConnectionPool(self) -> None: 

307 """Reset connection pool for registry if relevant. 

308 

309 This operation can be used reset connections to servers when 

310 using registry with fork-based multiprocessing. This method should 

311 usually be called by the child process immediately 

312 after the fork. 

313 

314 The base class implementation is a no-op. 

315 """ 

316 pass 

317 

318 @abstractmethod 

319 def registerCollection( 

320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

321 ) -> bool: 

322 """Add a new collection if one with the given name does not exist. 

323 

324 Parameters 

325 ---------- 

326 name : `str` 

327 The name of the collection to create. 

328 type : `CollectionType` 

329 Enum value indicating the type of collection to create. 

330 doc : `str`, optional 

331 Documentation string for the collection. 

332 

333 Returns 

334 ------- 

335 registered : `bool` 

336 Boolean indicating whether the collection was already registered 

337 or was created by this call. 

338 

339 Notes 

340 ----- 

341 This method cannot be called within transactions, as it needs to be 

342 able to perform its own transaction to be concurrent. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def getCollectionType(self, name: str) -> CollectionType: 

348 """Return an enumeration value indicating the type of the given 

349 collection. 

350 

351 Parameters 

352 ---------- 

353 name : `str` 

354 The name of the collection. 

355 

356 Returns 

357 ------- 

358 type : `CollectionType` 

359 Enum value indicating the type of this collection. 

360 

361 Raises 

362 ------ 

363 MissingCollectionError 

364 Raised if no collection with the given name exists. 

365 """ 

366 raise NotImplementedError() 

367 

368 @abstractmethod 

369 def _get_collection_record(self, name: str) -> CollectionRecord: 

370 """Return the record for this collection. 

371 

372 Parameters 

373 ---------- 

374 name : `str` 

375 Name of the collection for which the record is to be retrieved. 

376 

377 Returns 

378 ------- 

379 record : `CollectionRecord` 

380 The record for this collection. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

386 """Add a new run if one with the given name does not exist. 

387 

388 Parameters 

389 ---------- 

390 name : `str` 

391 The name of the run to create. 

392 doc : `str`, optional 

393 Documentation string for the collection. 

394 

395 Returns 

396 ------- 

397 registered : `bool` 

398 Boolean indicating whether a new run was registered. `False` 

399 if it already existed. 

400 

401 Notes 

402 ----- 

403 This method cannot be called within transactions, as it needs to be 

404 able to perform its own transaction to be concurrent. 

405 """ 

406 raise NotImplementedError() 

407 

408 @abstractmethod 

409 def removeCollection(self, name: str) -> None: 

410 """Remove the given collection from the registry. 

411 

412 Parameters 

413 ---------- 

414 name : `str` 

415 The name of the collection to remove. 

416 

417 Raises 

418 ------ 

419 MissingCollectionError 

420 Raised if no collection with the given name exists. 

421 sqlalchemy.IntegrityError 

422 Raised if the database rows associated with the collection are 

423 still referenced by some other table, such as a dataset in a 

424 datastore (for `~CollectionType.RUN` collections only) or a 

425 `~CollectionType.CHAINED` collection of which this collection is 

426 a child. 

427 

428 Notes 

429 ----- 

430 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

431 in it will removed from the `Registry` database. This requires that 

432 those datasets be removed (or at least trashed) from any datastores 

433 that hold them first. 

434 

435 A collection may not be deleted as long as it is referenced by a 

436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

437 be deleted or redefined first. 

438 """ 

439 raise NotImplementedError() 

440 

441 @abstractmethod 

442 def getCollectionChain(self, parent: str) -> Sequence[str]: 

443 """Return the child collections in a `~CollectionType.CHAINED` 

444 collection. 

445 

446 Parameters 

447 ---------- 

448 parent : `str` 

449 Name of the chained collection. Must have already been added via 

450 a call to `Registry.registerCollection`. 

451 

452 Returns 

453 ------- 

454 children : `Sequence` [ `str` ] 

455 An ordered sequence of collection names that are searched when the 

456 given chained collection is searched. 

457 

458 Raises 

459 ------ 

460 MissingCollectionError 

461 Raised if ``parent`` does not exist in the `Registry`. 

462 CollectionTypeError 

463 Raised if ``parent`` does not correspond to a 

464 `~CollectionType.CHAINED` collection. 

465 """ 

466 raise NotImplementedError() 

467 

468 @abstractmethod 

469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

470 """Define or redefine a `~CollectionType.CHAINED` collection. 

471 

472 Parameters 

473 ---------- 

474 parent : `str` 

475 Name of the chained collection. Must have already been added via 

476 a call to `Registry.registerCollection`. 

477 children : `Any` 

478 An expression defining an ordered search of child collections, 

479 generally an iterable of `str`; see 

480 :ref:`daf_butler_collection_expressions` for more information. 

481 flatten : `bool`, optional 

482 If `True` (`False` is default), recursively flatten out any nested 

483 `~CollectionType.CHAINED` collections in ``children`` first. 

484 

485 Raises 

486 ------ 

487 MissingCollectionError 

488 Raised when any of the given collections do not exist in the 

489 `Registry`. 

490 CollectionTypeError 

491 Raised if ``parent`` does not correspond to a 

492 `~CollectionType.CHAINED` collection. 

493 ValueError 

494 Raised if the given collections contains a cycle. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def getCollectionParentChains(self, collection: str) -> Set[str]: 

500 """Return the CHAINED collections that directly contain the given one. 

501 

502 Parameters 

503 ---------- 

504 name : `str` 

505 Name of the collection. 

506 

507 Returns 

508 ------- 

509 chains : `set` of `str` 

510 Set of `~CollectionType.CHAINED` collection names. 

511 """ 

512 raise NotImplementedError() 

513 

514 @abstractmethod 

515 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

516 """Retrieve the documentation string for a collection. 

517 

518 Parameters 

519 ---------- 

520 name : `str` 

521 Name of the collection. 

522 

523 Returns 

524 ------- 

525 docs : `str` or `None` 

526 Docstring for the collection with the given name. 

527 """ 

528 raise NotImplementedError() 

529 

530 @abstractmethod 

531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

532 """Set the documentation string for a collection. 

533 

534 Parameters 

535 ---------- 

536 name : `str` 

537 Name of the collection. 

538 docs : `str` or `None` 

539 Docstring for the collection with the given name; will replace any 

540 existing docstring. Passing `None` will remove any existing 

541 docstring. 

542 """ 

543 raise NotImplementedError() 

544 

545 @abstractmethod 

546 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

547 """Return a summary for the given collection. 

548 

549 Parameters 

550 ---------- 

551 collection : `str` 

552 Name of the collection for which a summary is to be retrieved. 

553 

554 Returns 

555 ------- 

556 summary : `CollectionSummary` 

557 Summary of the dataset types and governor dimension values in 

558 this collection. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

564 """ 

565 Add a new `DatasetType` to the Registry. 

566 

567 It is not an error to register the same `DatasetType` twice. 

568 

569 Parameters 

570 ---------- 

571 datasetType : `DatasetType` 

572 The `DatasetType` to be added. 

573 

574 Returns 

575 ------- 

576 inserted : `bool` 

577 `True` if ``datasetType`` was inserted, `False` if an identical 

578 existing `DatsetType` was found. Note that in either case the 

579 DatasetType is guaranteed to be defined in the Registry 

580 consistently with the given definition. 

581 

582 Raises 

583 ------ 

584 ValueError 

585 Raised if the dimensions or storage class are invalid. 

586 ConflictingDefinitionError 

587 Raised if this DatasetType is already registered with a different 

588 definition. 

589 

590 Notes 

591 ----- 

592 This method cannot be called within transactions, as it needs to be 

593 able to perform its own transaction to be concurrent. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

599 """Remove the named `DatasetType` from the registry. 

600 

601 .. warning:: 

602 

603 Registry implementations can cache the dataset type definitions. 

604 This means that deleting the dataset type definition may result in 

605 unexpected behavior from other butler processes that are active 

606 that have not seen the deletion. 

607 

608 Parameters 

609 ---------- 

610 name : `str` or `tuple[str, ...]` 

611 Name of the type to be removed or tuple containing a list of type 

612 names to be removed. Wildcards are allowed. 

613 

614 Raises 

615 ------ 

616 lsst.daf.butler.registry.OrphanedRecordError 

617 Raised if an attempt is made to remove the dataset type definition 

618 when there are already datasets associated with it. 

619 

620 Notes 

621 ----- 

622 If the dataset type is not registered the method will return without 

623 action. 

624 """ 

625 raise NotImplementedError() 

626 

627 @abstractmethod 

628 def getDatasetType(self, name: str) -> DatasetType: 

629 """Get the `DatasetType`. 

630 

631 Parameters 

632 ---------- 

633 name : `str` 

634 Name of the type. 

635 

636 Returns 

637 ------- 

638 type : `DatasetType` 

639 The `DatasetType` associated with the given name. 

640 

641 Raises 

642 ------ 

643 MissingDatasetTypeError 

644 Raised if the requested dataset type has not been registered. 

645 

646 Notes 

647 ----- 

648 This method handles component dataset types automatically, though most 

649 other registry operations do not. 

650 """ 

651 raise NotImplementedError() 

652 

653 @abstractmethod 

654 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

655 """Test whether the given dataset ID generation mode is supported by 

656 `insertDatasets`. 

657 

658 Parameters 

659 ---------- 

660 mode : `DatasetIdGenEnum` 

661 Enum value for the mode to test. 

662 

663 Returns 

664 ------- 

665 supported : `bool` 

666 Whether the given mode is supported. 

667 """ 

668 raise NotImplementedError() 

669 

670 @abstractmethod 

671 def findDataset( 

672 self, 

673 datasetType: Union[DatasetType, str], 

674 dataId: Optional[DataId] = None, 

675 *, 

676 collections: Any = None, 

677 timespan: Optional[Timespan] = None, 

678 **kwargs: Any, 

679 ) -> Optional[DatasetRef]: 

680 """Find a dataset given its `DatasetType` and data ID. 

681 

682 This can be used to obtain a `DatasetRef` that permits the dataset to 

683 be read from a `Datastore`. If the dataset is a component and can not 

684 be found using the provided dataset type, a dataset ref for the parent 

685 will be returned instead but with the correct dataset type. 

686 

687 Parameters 

688 ---------- 

689 datasetType : `DatasetType` or `str` 

690 A `DatasetType` or the name of one. If this is a `DatasetType` 

691 instance, its storage class will be respected and propagated to 

692 the output, even if it differs from the dataset type definition 

693 in the registry, as long as the storage classes are convertible. 

694 dataId : `dict` or `DataCoordinate`, optional 

695 A `dict`-like object containing the `Dimension` links that identify 

696 the dataset within a collection. 

697 collections, optional. 

698 An expression that fully or partially identifies the collections to 

699 search for the dataset; see 

700 :ref:`daf_butler_collection_expressions` for more information. 

701 Defaults to ``self.defaults.collections``. 

702 timespan : `Timespan`, optional 

703 A timespan that the validity range of the dataset must overlap. 

704 If not provided, any `~CollectionType.CALIBRATION` collections 

705 matched by the ``collections`` argument will not be searched. 

706 **kwargs 

707 Additional keyword arguments passed to 

708 `DataCoordinate.standardize` to convert ``dataId`` to a true 

709 `DataCoordinate` or augment an existing one. 

710 

711 Returns 

712 ------- 

713 ref : `DatasetRef` 

714 A reference to the dataset, or `None` if no matching Dataset 

715 was found. 

716 

717 Raises 

718 ------ 

719 NoDefaultCollectionError 

720 Raised if ``collections`` is `None` and 

721 ``self.defaults.collections`` is `None`. 

722 LookupError 

723 Raised if one or more data ID keys are missing. 

724 MissingDatasetTypeError 

725 Raised if the dataset type does not exist. 

726 MissingCollectionError 

727 Raised if any of ``collections`` does not exist in the registry. 

728 

729 Notes 

730 ----- 

731 This method simply returns `None` and does not raise an exception even 

732 when the set of collections searched is intrinsically incompatible with 

733 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

734 only `~CollectionType.CALIBRATION` collections are being searched. 

735 This may make it harder to debug some lookup failures, but the behavior 

736 is intentional; we consider it more important that failed searches are 

737 reported consistently, regardless of the reason, and that adding 

738 additional collections that do not contain a match to the search path 

739 never changes the behavior. 

740 

741 This method handles component dataset types automatically, though most 

742 other registry operations do not. 

743 """ 

744 raise NotImplementedError() 

745 

746 @abstractmethod 

747 def insertDatasets( 

748 self, 

749 datasetType: Union[DatasetType, str], 

750 dataIds: Iterable[DataId], 

751 run: Optional[str] = None, 

752 expand: bool = True, 

753 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

754 ) -> List[DatasetRef]: 

755 """Insert one or more datasets into the `Registry` 

756 

757 This always adds new datasets; to associate existing datasets with 

758 a new collection, use ``associate``. 

759 

760 Parameters 

761 ---------- 

762 datasetType : `DatasetType` or `str` 

763 A `DatasetType` or the name of one. 

764 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

765 Dimension-based identifiers for the new datasets. 

766 run : `str`, optional 

767 The name of the run that produced the datasets. Defaults to 

768 ``self.defaults.run``. 

769 expand : `bool`, optional 

770 If `True` (default), expand data IDs as they are inserted. This is 

771 necessary in general to allow datastore to generate file templates, 

772 but it may be disabled if the caller can guarantee this is 

773 unnecessary. 

774 idGenerationMode : `DatasetIdGenEnum`, optional 

775 Specifies option for generating dataset IDs. By default unique IDs 

776 are generated for each inserted dataset. 

777 

778 Returns 

779 ------- 

780 refs : `list` of `DatasetRef` 

781 Resolved `DatasetRef` instances for all given data IDs (in the same 

782 order). 

783 

784 Raises 

785 ------ 

786 DatasetTypeError 

787 Raised if ``datasetType`` is not known to registry. 

788 CollectionTypeError 

789 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

790 NoDefaultCollectionError 

791 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

792 ConflictingDefinitionError 

793 If a dataset with the same dataset type and data ID as one of those 

794 given already exists in ``run``. 

795 MissingCollectionError 

796 Raised if ``run`` does not exist in the registry. 

797 """ 

798 raise NotImplementedError() 

799 

800 @abstractmethod 

801 def _importDatasets( 

802 self, 

803 datasets: Iterable[DatasetRef], 

804 expand: bool = True, 

805 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

806 reuseIds: bool = False, 

807 ) -> List[DatasetRef]: 

808 """Import one or more datasets into the `Registry`. 

809 

810 Difference from `insertDatasets` method is that this method accepts 

811 `DatasetRef` instances which should already be resolved and have a 

812 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

813 `uuid.UUID`) then datasets which already exist in the registry will be 

814 ignored if imported again. 

815 

816 Parameters 

817 ---------- 

818 datasets : `~collections.abc.Iterable` of `DatasetRef` 

819 Datasets to be inserted. All `DatasetRef` instances must have 

820 identical ``datasetType`` and ``run`` attributes. ``run`` 

821 attribute can be `None` and defaults to ``self.defaults.run``. 

822 Datasets can specify ``id`` attribute which will be used for 

823 inserted datasets. All dataset IDs must have the same type 

824 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

825 configured backend then IDs will be ignored and new IDs will be 

826 generated by backend. 

827 expand : `bool`, optional 

828 If `True` (default), expand data IDs as they are inserted. This is 

829 necessary in general to allow datastore to generate file templates, 

830 but it may be disabled if the caller can guarantee this is 

831 unnecessary. 

832 idGenerationMode : `DatasetIdGenEnum`, optional 

833 Specifies option for generating dataset IDs when IDs are not 

834 provided or their type does not match backend type. By default 

835 unique IDs are generated for each inserted dataset. 

836 reuseIds : `bool`, optional 

837 If `True` then forces re-use of imported dataset IDs for integer 

838 IDs which are normally generated as auto-incremented; exception 

839 will be raised if imported IDs clash with existing ones. This 

840 option has no effect on the use of globally-unique IDs which are 

841 always re-used (or generated if integer IDs are being imported). 

842 

843 Returns 

844 ------- 

845 refs : `list` of `DatasetRef` 

846 Resolved `DatasetRef` instances for all given data IDs (in the same 

847 order). If any of ``datasets`` has an ID which already exists in 

848 the database then it will not be inserted or updated, but a 

849 resolved `DatasetRef` will be returned for it in any case. 

850 

851 Raises 

852 ------ 

853 NoDefaultCollectionError 

854 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

855 DatasetTypeError 

856 Raised if datasets correspond to more than one dataset type or 

857 dataset type is not known to registry. 

858 ConflictingDefinitionError 

859 If a dataset with the same dataset type and data ID as one of those 

860 given already exists in ``run``. 

861 MissingCollectionError 

862 Raised if ``run`` does not exist in the registry. 

863 

864 Notes 

865 ----- 

866 This method is considered package-private and internal to Butler 

867 implementation. Clients outside daf_butler package should not use this 

868 method. 

869 """ 

870 raise NotImplementedError() 

871 

872 @abstractmethod 

873 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

874 """Retrieve a Dataset entry. 

875 

876 Parameters 

877 ---------- 

878 id : `DatasetId` 

879 The unique identifier for the dataset. 

880 

881 Returns 

882 ------- 

883 ref : `DatasetRef` or `None` 

884 A ref to the Dataset, or `None` if no matching Dataset 

885 was found. 

886 """ 

887 raise NotImplementedError() 

888 

889 @abstractmethod 

890 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

891 """Remove datasets from the Registry. 

892 

893 The datasets will be removed unconditionally from all collections, and 

894 any `Quantum` that consumed this dataset will instead be marked with 

895 having a NULL input. `Datastore` records will *not* be deleted; the 

896 caller is responsible for ensuring that the dataset has already been 

897 removed from all Datastores. 

898 

899 Parameters 

900 ---------- 

901 refs : `Iterable` of `DatasetRef` 

902 References to the datasets to be removed. Must include a valid 

903 ``id`` attribute, and should be considered invalidated upon return. 

904 

905 Raises 

906 ------ 

907 AmbiguousDatasetError 

908 Raised if any ``ref.id`` is `None`. 

909 OrphanedRecordError 

910 Raised if any dataset is still present in any `Datastore`. 

911 """ 

912 raise NotImplementedError() 

913 

914 @abstractmethod 

915 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

916 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

917 

918 If a DatasetRef with the same exact ID is already in a collection 

919 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

920 data ID but with different ID exists in the collection, 

921 `ConflictingDefinitionError` is raised. 

922 

923 Parameters 

924 ---------- 

925 collection : `str` 

926 Indicates the collection the datasets should be associated with. 

927 refs : `Iterable` [ `DatasetRef` ] 

928 An iterable of resolved `DatasetRef` instances that already exist 

929 in this `Registry`. 

930 

931 Raises 

932 ------ 

933 ConflictingDefinitionError 

934 If a Dataset with the given `DatasetRef` already exists in the 

935 given collection. 

936 AmbiguousDatasetError 

937 Raised if ``any(ref.id is None for ref in refs)``. 

938 MissingCollectionError 

939 Raised if ``collection`` does not exist in the registry. 

940 CollectionTypeError 

941 Raise adding new datasets to the given ``collection`` is not 

942 allowed. 

943 """ 

944 raise NotImplementedError() 

945 

946 @abstractmethod 

947 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

948 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

949 

950 ``collection`` and ``ref`` combinations that are not currently 

951 associated are silently ignored. 

952 

953 Parameters 

954 ---------- 

955 collection : `str` 

956 The collection the datasets should no longer be associated with. 

957 refs : `Iterable` [ `DatasetRef` ] 

958 An iterable of resolved `DatasetRef` instances that already exist 

959 in this `Registry`. 

960 

961 Raises 

962 ------ 

963 AmbiguousDatasetError 

964 Raised if any of the given dataset references is unresolved. 

965 MissingCollectionError 

966 Raised if ``collection`` does not exist in the registry. 

967 CollectionTypeError 

968 Raise adding new datasets to the given ``collection`` is not 

969 allowed. 

970 """ 

971 raise NotImplementedError() 

972 

973 @abstractmethod 

974 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

975 """Associate one or more datasets with a calibration collection and a 

976 validity range within it. 

977 

978 Parameters 

979 ---------- 

980 collection : `str` 

981 The name of an already-registered `~CollectionType.CALIBRATION` 

982 collection. 

983 refs : `Iterable` [ `DatasetRef` ] 

984 Datasets to be associated. 

985 timespan : `Timespan` 

986 The validity range for these datasets within the collection. 

987 

988 Raises 

989 ------ 

990 AmbiguousDatasetError 

991 Raised if any of the given `DatasetRef` instances is unresolved. 

992 ConflictingDefinitionError 

993 Raised if the collection already contains a different dataset with 

994 the same `DatasetType` and data ID and an overlapping validity 

995 range. 

996 CollectionTypeError 

997 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

998 collection or if one or more datasets are of a dataset type for 

999 which `DatasetType.isCalibration` returns `False`. 

1000 """ 

1001 raise NotImplementedError() 

1002 

1003 @abstractmethod 

1004 def decertify( 

1005 self, 

1006 collection: str, 

1007 datasetType: Union[str, DatasetType], 

1008 timespan: Timespan, 

1009 *, 

1010 dataIds: Optional[Iterable[DataId]] = None, 

1011 ) -> None: 

1012 """Remove or adjust datasets to clear a validity range within a 

1013 calibration collection. 

1014 

1015 Parameters 

1016 ---------- 

1017 collection : `str` 

1018 The name of an already-registered `~CollectionType.CALIBRATION` 

1019 collection. 

1020 datasetType : `str` or `DatasetType` 

1021 Name or `DatasetType` instance for the datasets to be decertified. 

1022 timespan : `Timespan`, optional 

1023 The validity range to remove datasets from within the collection. 

1024 Datasets that overlap this range but are not contained by it will 

1025 have their validity ranges adjusted to not overlap it, which may 

1026 split a single dataset validity range into two. 

1027 dataIds : `Iterable` [ `DataId` ], optional 

1028 Data IDs that should be decertified within the given validity range 

1029 If `None`, all data IDs for ``self.datasetType`` will be 

1030 decertified. 

1031 

1032 Raises 

1033 ------ 

1034 CollectionTypeError 

1035 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1036 collection or if ``datasetType.isCalibration() is False``. 

1037 """ 

1038 raise NotImplementedError() 

1039 

1040 @abstractmethod 

1041 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1042 """Return an object that allows a new `Datastore` instance to 

1043 communicate with this `Registry`. 

1044 

1045 Returns 

1046 ------- 

1047 manager : `DatastoreRegistryBridgeManager` 

1048 Object that mediates communication between this `Registry` and its 

1049 associated datastores. 

1050 """ 

1051 raise NotImplementedError() 

1052 

1053 @abstractmethod 

1054 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1055 """Retrieve datastore locations for a given dataset. 

1056 

1057 Parameters 

1058 ---------- 

1059 ref : `DatasetRef` 

1060 A reference to the dataset for which to retrieve storage 

1061 information. 

1062 

1063 Returns 

1064 ------- 

1065 datastores : `Iterable` [ `str` ] 

1066 All the matching datastores holding this dataset. 

1067 

1068 Raises 

1069 ------ 

1070 AmbiguousDatasetError 

1071 Raised if ``ref.id`` is `None`. 

1072 """ 

1073 raise NotImplementedError() 

1074 

1075 @abstractmethod 

1076 def expandDataId( 

1077 self, 

1078 dataId: Optional[DataId] = None, 

1079 *, 

1080 graph: Optional[DimensionGraph] = None, 

1081 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1082 withDefaults: bool = True, 

1083 **kwargs: Any, 

1084 ) -> DataCoordinate: 

1085 """Expand a dimension-based data ID to include additional information. 

1086 

1087 Parameters 

1088 ---------- 

1089 dataId : `DataCoordinate` or `dict`, optional 

1090 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1091 graph : `DimensionGraph`, optional 

1092 Set of dimensions for the expanded ID. If `None`, the dimensions 

1093 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1094 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1095 ``graph`` are silently ignored, providing a way to extract and 

1096 ``graph`` expand a subset of a data ID. 

1097 records : `Mapping` [`str`, `DimensionRecord`], optional 

1098 Dimension record data to use before querying the database for that 

1099 data, keyed by element name. 

1100 withDefaults : `bool`, optional 

1101 Utilize ``self.defaults.dataId`` to fill in missing governor 

1102 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1103 used). 

1104 **kwargs 

1105 Additional keywords are treated like additional key-value pairs for 

1106 ``dataId``, extending and overriding 

1107 

1108 Returns 

1109 ------- 

1110 expanded : `DataCoordinate` 

1111 A data ID that includes full metadata for all of the dimensions it 

1112 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1113 ``expanded.hasFull()`` both return `True`. 

1114 

1115 Raises 

1116 ------ 

1117 DataIdError 

1118 Raised when ``dataId`` or keyword arguments specify unknown 

1119 dimensions or values, or when a resulting data ID contains 

1120 contradictory key-value pairs, according to dimension 

1121 relationships. 

1122 

1123 Notes 

1124 ----- 

1125 This method cannot be relied upon to reject invalid data ID values 

1126 for dimensions that do actually not have any record columns. For 

1127 efficiency reasons the records for these dimensions (which have only 

1128 dimension key values that are given by the caller) may be constructed 

1129 directly rather than obtained from the registry database. 

1130 """ 

1131 raise NotImplementedError() 

1132 

1133 @abstractmethod 

1134 def insertDimensionData( 

1135 self, 

1136 element: Union[DimensionElement, str], 

1137 *data: Union[Mapping[str, Any], DimensionRecord], 

1138 conform: bool = True, 

1139 replace: bool = False, 

1140 skip_existing: bool = False, 

1141 ) -> None: 

1142 """Insert one or more dimension records into the database. 

1143 

1144 Parameters 

1145 ---------- 

1146 element : `DimensionElement` or `str` 

1147 The `DimensionElement` or name thereof that identifies the table 

1148 records will be inserted into. 

1149 data : `dict` or `DimensionRecord` (variadic) 

1150 One or more records to insert. 

1151 conform : `bool`, optional 

1152 If `False` (`True` is default) perform no checking or conversions, 

1153 and assume that ``element`` is a `DimensionElement` instance and 

1154 ``data`` is a one or more `DimensionRecord` instances of the 

1155 appropriate subclass. 

1156 replace : `bool`, optional 

1157 If `True` (`False` is default), replace existing records in the 

1158 database if there is a conflict. 

1159 skip_existing : `bool`, optional 

1160 If `True` (`False` is default), skip insertion if a record with 

1161 the same primary key values already exists. Unlike 

1162 `syncDimensionData`, this will not detect when the given record 

1163 differs from what is in the database, and should not be used when 

1164 this is a concern. 

1165 """ 

1166 raise NotImplementedError() 

1167 

1168 @abstractmethod 

1169 def syncDimensionData( 

1170 self, 

1171 element: Union[DimensionElement, str], 

1172 row: Union[Mapping[str, Any], DimensionRecord], 

1173 conform: bool = True, 

1174 update: bool = False, 

1175 ) -> Union[bool, Dict[str, Any]]: 

1176 """Synchronize the given dimension record with the database, inserting 

1177 if it does not already exist and comparing values if it does. 

1178 

1179 Parameters 

1180 ---------- 

1181 element : `DimensionElement` or `str` 

1182 The `DimensionElement` or name thereof that identifies the table 

1183 records will be inserted into. 

1184 row : `dict` or `DimensionRecord` 

1185 The record to insert. 

1186 conform : `bool`, optional 

1187 If `False` (`True` is default) perform no checking or conversions, 

1188 and assume that ``element`` is a `DimensionElement` instance and 

1189 ``data`` is a one or more `DimensionRecord` instances of the 

1190 appropriate subclass. 

1191 update: `bool`, optional 

1192 If `True` (`False` is default), update the existing record in the 

1193 database if there is a conflict. 

1194 

1195 Returns 

1196 ------- 

1197 inserted_or_updated : `bool` or `dict` 

1198 `True` if a new row was inserted, `False` if no changes were 

1199 needed, or a `dict` mapping updated column names to their old 

1200 values if an update was performed (only possible if 

1201 ``update=True``). 

1202 

1203 Raises 

1204 ------ 

1205 ConflictingDefinitionError 

1206 Raised if the record exists in the database (according to primary 

1207 key lookup) but is inconsistent with the given one. 

1208 """ 

1209 raise NotImplementedError() 

1210 

1211 @abstractmethod 

1212 def queryDatasetTypes( 

1213 self, 

1214 expression: Any = ..., 

1215 *, 

1216 components: Optional[bool] = None, 

1217 missing: Optional[List[str]] = None, 

1218 ) -> Iterable[DatasetType]: 

1219 """Iterate over the dataset types whose names match an expression. 

1220 

1221 Parameters 

1222 ---------- 

1223 expression : `Any`, optional 

1224 An expression that fully or partially identifies the dataset types 

1225 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1226 ``...`` can be used to return all dataset types, and is the 

1227 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1228 information. 

1229 components : `bool`, optional 

1230 If `True`, apply all expression patterns to component dataset type 

1231 names as well. If `False`, never apply patterns to components. 

1232 If `None` (default), apply patterns to components only if their 

1233 parent datasets were not matched by the expression. 

1234 Fully-specified component datasets (`str` or `DatasetType` 

1235 instances) are always included. 

1236 

1237 Values other than `False` are deprecated, and only `False` will be 

1238 supported after v26. After v27 this argument will be removed 

1239 entirely. 

1240 missing : `list` of `str`, optional 

1241 String dataset type names that were explicitly given (i.e. not 

1242 regular expression patterns) but not found will be appended to this 

1243 list, if it is provided. 

1244 

1245 Returns 

1246 ------- 

1247 dataset_types : `Iterable` [ `DatasetType`] 

1248 An `Iterable` of `DatasetType` instances whose names match 

1249 ``expression``. 

1250 

1251 Raises 

1252 ------ 

1253 DatasetTypeExpressionError 

1254 Raised when ``expression`` is invalid. 

1255 """ 

1256 raise NotImplementedError() 

1257 

1258 @abstractmethod 

1259 def queryCollections( 

1260 self, 

1261 expression: Any = ..., 

1262 datasetType: Optional[DatasetType] = None, 

1263 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1264 flattenChains: bool = False, 

1265 includeChains: Optional[bool] = None, 

1266 ) -> Sequence[str]: 

1267 """Iterate over the collections whose names match an expression. 

1268 

1269 Parameters 

1270 ---------- 

1271 expression : `Any`, optional 

1272 An expression that identifies the collections to return, such as 

1273 a `str` (for full matches or partial matches via globs), 

1274 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1275 can be used to return all collections, and is the default. 

1276 See :ref:`daf_butler_collection_expressions` for more information. 

1277 datasetType : `DatasetType`, optional 

1278 If provided, only yield collections that may contain datasets of 

1279 this type. This is a conservative approximation in general; it may 

1280 yield collections that do not have any such datasets. 

1281 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1282 `CollectionType`, optional 

1283 If provided, only yield collections of these types. 

1284 flattenChains : `bool`, optional 

1285 If `True` (`False` is default), recursively yield the child 

1286 collections of matching `~CollectionType.CHAINED` collections. 

1287 includeChains : `bool`, optional 

1288 If `True`, yield records for matching `~CollectionType.CHAINED` 

1289 collections. Default is the opposite of ``flattenChains``: include 

1290 either CHAINED collections or their children, but not both. 

1291 

1292 Returns 

1293 ------- 

1294 collections : `Sequence` [ `str` ] 

1295 The names of collections that match ``expression``. 

1296 

1297 Raises 

1298 ------ 

1299 CollectionExpressionError 

1300 Raised when ``expression`` is invalid. 

1301 

1302 Notes 

1303 ----- 

1304 The order in which collections are returned is unspecified, except that 

1305 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1306 to be in the order in which they are searched. When multiple parent 

1307 `~CollectionType.CHAINED` collections match the same criteria, the 

1308 order in which the two lists appear is unspecified, and the lists of 

1309 children may be incomplete if a child has multiple parents. 

1310 """ 

1311 raise NotImplementedError() 

1312 

1313 @abstractmethod 

1314 def queryDatasets( 

1315 self, 

1316 datasetType: Any, 

1317 *, 

1318 collections: Any = None, 

1319 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1320 dataId: Optional[DataId] = None, 

1321 where: str = "", 

1322 findFirst: bool = False, 

1323 components: Optional[bool] = None, 

1324 bind: Optional[Mapping[str, Any]] = None, 

1325 check: bool = True, 

1326 **kwargs: Any, 

1327 ) -> DatasetQueryResults: 

1328 """Query for and iterate over dataset references matching user-provided 

1329 criteria. 

1330 

1331 Parameters 

1332 ---------- 

1333 datasetType 

1334 An expression that fully or partially identifies the dataset types 

1335 to be queried. Allowed types include `DatasetType`, `str`, 

1336 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1337 be used to query all dataset types. See 

1338 :ref:`daf_butler_dataset_type_expressions` for more information. 

1339 collections: optional 

1340 An expression that identifies the collections to search, such as a 

1341 `str` (for full matches or partial matches via globs), `re.Pattern` 

1342 (for partial matches), or iterable thereof. ``...`` can be used to 

1343 search all collections (actually just all `~CollectionType.RUN` 

1344 collections, because this will still find all datasets). 

1345 If not provided, ``self.default.collections`` is used. See 

1346 :ref:`daf_butler_collection_expressions` for more information. 

1347 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1348 Dimensions to include in the query (in addition to those used 

1349 to identify the queried dataset type(s)), either to constrain 

1350 the resulting datasets to those for which a matching dimension 

1351 exists, or to relate the dataset type's dimensions to dimensions 

1352 referenced by the ``dataId`` or ``where`` arguments. 

1353 dataId : `dict` or `DataCoordinate`, optional 

1354 A data ID whose key-value pairs are used as equality constraints 

1355 in the query. 

1356 where : `str`, optional 

1357 A string expression similar to a SQL WHERE clause. May involve 

1358 any column of a dimension table or (as a shortcut for the primary 

1359 key column of a dimension table) dimension name. See 

1360 :ref:`daf_butler_dimension_expressions` for more information. 

1361 findFirst : `bool`, optional 

1362 If `True` (`False` is default), for each result data ID, only 

1363 yield one `DatasetRef` of each `DatasetType`, from the first 

1364 collection in which a dataset of that dataset type appears 

1365 (according to the order of ``collections`` passed in). If `True`, 

1366 ``collections`` must not contain regular expressions and may not 

1367 be ``...``. 

1368 components : `bool`, optional 

1369 If `True`, apply all dataset expression patterns to component 

1370 dataset type names as well. If `False`, never apply patterns to 

1371 components. If `None` (default), apply patterns to components only 

1372 if their parent datasets were not matched by the expression. 

1373 Fully-specified component datasets (`str` or `DatasetType` 

1374 instances) are always included. 

1375 

1376 Values other than `False` are deprecated, and only `False` will be 

1377 supported after v26. After v27 this argument will be removed 

1378 entirely. 

1379 bind : `Mapping`, optional 

1380 Mapping containing literal values that should be injected into the 

1381 ``where`` expression, keyed by the identifiers they replace. 

1382 check : `bool`, optional 

1383 If `True` (default) check the query for consistency before 

1384 executing it. This may reject some valid queries that resemble 

1385 common mistakes (e.g. queries for visits without specifying an 

1386 instrument). 

1387 **kwargs 

1388 Additional keyword arguments are forwarded to 

1389 `DataCoordinate.standardize` when processing the ``dataId`` 

1390 argument (and may be used to provide a constraining data ID even 

1391 when the ``dataId`` argument is `None`). 

1392 

1393 Returns 

1394 ------- 

1395 refs : `queries.DatasetQueryResults` 

1396 Dataset references matching the given query criteria. Nested data 

1397 IDs are guaranteed to include values for all implied dimensions 

1398 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1399 include dimension records (`DataCoordinate.hasRecords` will be 

1400 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1401 on the result object (which returns a new one). 

1402 

1403 Raises 

1404 ------ 

1405 DatasetTypeExpressionError 

1406 Raised when ``datasetType`` expression is invalid. 

1407 TypeError 

1408 Raised when the arguments are incompatible, such as when a 

1409 collection wildcard is passed when ``findFirst`` is `True`, or 

1410 when ``collections`` is `None` and``self.defaults.collections`` is 

1411 also `None`. 

1412 DataIdError 

1413 Raised when ``dataId`` or keyword arguments specify unknown 

1414 dimensions or values, or when they contain inconsistent values. 

1415 UserExpressionError 

1416 Raised when ``where`` expression is invalid. 

1417 

1418 Notes 

1419 ----- 

1420 When multiple dataset types are queried in a single call, the 

1421 results of this operation are equivalent to querying for each dataset 

1422 type separately in turn, and no information about the relationships 

1423 between datasets of different types is included. In contexts where 

1424 that kind of information is important, the recommended pattern is to 

1425 use `queryDataIds` to first obtain data IDs (possibly with the 

1426 desired dataset types and collections passed as constraints to the 

1427 query), and then use multiple (generally much simpler) calls to 

1428 `queryDatasets` with the returned data IDs passed as constraints. 

1429 """ 

1430 raise NotImplementedError() 

1431 

1432 @abstractmethod 

1433 def queryDataIds( 

1434 self, 

1435 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1436 *, 

1437 dataId: Optional[DataId] = None, 

1438 datasets: Any = None, 

1439 collections: Any = None, 

1440 where: str = "", 

1441 components: Optional[bool] = None, 

1442 bind: Optional[Mapping[str, Any]] = None, 

1443 check: bool = True, 

1444 **kwargs: Any, 

1445 ) -> DataCoordinateQueryResults: 

1446 """Query for data IDs matching user-provided criteria. 

1447 

1448 Parameters 

1449 ---------- 

1450 dimensions : `Dimension` or `str`, or iterable thereof 

1451 The dimensions of the data IDs to yield, as either `Dimension` 

1452 instances or `str`. Will be automatically expanded to a complete 

1453 `DimensionGraph`. 

1454 dataId : `dict` or `DataCoordinate`, optional 

1455 A data ID whose key-value pairs are used as equality constraints 

1456 in the query. 

1457 datasets : `Any`, optional 

1458 An expression that fully or partially identifies dataset types 

1459 that should constrain the yielded data IDs. For example, including 

1460 "raw" here would constrain the yielded ``instrument``, 

1461 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1462 those for which at least one "raw" dataset exists in 

1463 ``collections``. Allowed types include `DatasetType`, `str`, 

1464 and iterables thereof. Regular expression objects (i.e. 

1465 `re.Pattern`) are deprecated and will be removed after the v26 

1466 release. See :ref:`daf_butler_dataset_type_expressions` for more 

1467 information. 

1468 collections: `Any`, optional 

1469 An expression that identifies the collections to search for 

1470 datasets, such as a `str` (for full matches or partial matches 

1471 via globs), `re.Pattern` (for partial matches), or iterable 

1472 thereof. ``...`` can be used to search all collections (actually 

1473 just all `~CollectionType.RUN` collections, because this will 

1474 still find all datasets). If not provided, 

1475 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1476 is also passed. See :ref:`daf_butler_collection_expressions` for 

1477 more information. 

1478 where : `str`, optional 

1479 A string expression similar to a SQL WHERE clause. May involve 

1480 any column of a dimension table or (as a shortcut for the primary 

1481 key column of a dimension table) dimension name. See 

1482 :ref:`daf_butler_dimension_expressions` for more information. 

1483 components : `bool`, optional 

1484 If `True`, apply all dataset expression patterns to component 

1485 dataset type names as well. If `False`, never apply patterns to 

1486 components. If `None` (default), apply patterns to components only 

1487 if their parent datasets were not matched by the expression. 

1488 Fully-specified component datasets (`str` or `DatasetType` 

1489 instances) are always included. 

1490 

1491 Values other than `False` are deprecated, and only `False` will be 

1492 supported after v26. After v27 this argument will be removed 

1493 entirely. 

1494 bind : `Mapping`, optional 

1495 Mapping containing literal values that should be injected into the 

1496 ``where`` expression, keyed by the identifiers they replace. 

1497 check : `bool`, optional 

1498 If `True` (default) check the query for consistency before 

1499 executing it. This may reject some valid queries that resemble 

1500 common mistakes (e.g. queries for visits without specifying an 

1501 instrument). 

1502 **kwargs 

1503 Additional keyword arguments are forwarded to 

1504 `DataCoordinate.standardize` when processing the ``dataId`` 

1505 argument (and may be used to provide a constraining data ID even 

1506 when the ``dataId`` argument is `None`). 

1507 

1508 Returns 

1509 ------- 

1510 dataIds : `queries.DataCoordinateQueryResults` 

1511 Data IDs matching the given query parameters. These are guaranteed 

1512 to identify all dimensions (`DataCoordinate.hasFull` returns 

1513 `True`), but will not contain `DimensionRecord` objects 

1514 (`DataCoordinate.hasRecords` returns `False`). Call 

1515 `DataCoordinateQueryResults.expanded` on the returned object to 

1516 fetch those (and consider using 

1517 `DataCoordinateQueryResults.materialize` on the returned object 

1518 first if the expected number of rows is very large). See 

1519 documentation for those methods for additional information. 

1520 

1521 Raises 

1522 ------ 

1523 NoDefaultCollectionError 

1524 Raised if ``collections`` is `None` and 

1525 ``self.defaults.collections`` is `None`. 

1526 CollectionExpressionError 

1527 Raised when ``collections`` expression is invalid. 

1528 DataIdError 

1529 Raised when ``dataId`` or keyword arguments specify unknown 

1530 dimensions or values, or when they contain inconsistent values. 

1531 DatasetTypeExpressionError 

1532 Raised when ``datasetType`` expression is invalid. 

1533 UserExpressionError 

1534 Raised when ``where`` expression is invalid. 

1535 """ 

1536 raise NotImplementedError() 

1537 

1538 @abstractmethod 

1539 def queryDimensionRecords( 

1540 self, 

1541 element: Union[DimensionElement, str], 

1542 *, 

1543 dataId: Optional[DataId] = None, 

1544 datasets: Any = None, 

1545 collections: Any = None, 

1546 where: str = "", 

1547 components: Optional[bool] = None, 

1548 bind: Optional[Mapping[str, Any]] = None, 

1549 check: bool = True, 

1550 **kwargs: Any, 

1551 ) -> DimensionRecordQueryResults: 

1552 """Query for dimension information matching user-provided criteria. 

1553 

1554 Parameters 

1555 ---------- 

1556 element : `DimensionElement` or `str` 

1557 The dimension element to obtain records for. 

1558 dataId : `dict` or `DataCoordinate`, optional 

1559 A data ID whose key-value pairs are used as equality constraints 

1560 in the query. 

1561 datasets : `Any`, optional 

1562 An expression that fully or partially identifies dataset types 

1563 that should constrain the yielded records. See `queryDataIds` and 

1564 :ref:`daf_butler_dataset_type_expressions` for more information. 

1565 collections : `Any`, optional 

1566 An expression that identifies the collections to search for 

1567 datasets, such as a `str` (for full matches or partial matches 

1568 via globs), `re.Pattern` (for partial matches), or iterable 

1569 thereof. ``...`` can be used to search all collections (actually 

1570 just all `~CollectionType.RUN` collections, because this will 

1571 still find all datasets). If not provided, 

1572 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1573 is also passed. See :ref:`daf_butler_collection_expressions` for 

1574 more information. 

1575 where : `str`, optional 

1576 A string expression similar to a SQL WHERE clause. See 

1577 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1578 information. 

1579 components : `bool`, optional 

1580 Whether to apply dataset expressions to components as well. 

1581 See `queryDataIds` for more information. 

1582 

1583 Values other than `False` are deprecated, and only `False` will be 

1584 supported after v26. After v27 this argument will be removed 

1585 entirely. 

1586 bind : `Mapping`, optional 

1587 Mapping containing literal values that should be injected into the 

1588 ``where`` expression, keyed by the identifiers they replace. 

1589 check : `bool`, optional 

1590 If `True` (default) check the query for consistency before 

1591 executing it. This may reject some valid queries that resemble 

1592 common mistakes (e.g. queries for visits without specifying an 

1593 instrument). 

1594 **kwargs 

1595 Additional keyword arguments are forwarded to 

1596 `DataCoordinate.standardize` when processing the ``dataId`` 

1597 argument (and may be used to provide a constraining data ID even 

1598 when the ``dataId`` argument is `None`). 

1599 

1600 Returns 

1601 ------- 

1602 dataIds : `queries.DimensionRecordQueryResults` 

1603 Data IDs matching the given query parameters. 

1604 

1605 Raises 

1606 ------ 

1607 NoDefaultCollectionError 

1608 Raised if ``collections`` is `None` and 

1609 ``self.defaults.collections`` is `None`. 

1610 CollectionExpressionError 

1611 Raised when ``collections`` expression is invalid. 

1612 DataIdError 

1613 Raised when ``dataId`` or keyword arguments specify unknown 

1614 dimensions or values, or when they contain inconsistent values. 

1615 DatasetTypeExpressionError 

1616 Raised when ``datasetType`` expression is invalid. 

1617 UserExpressionError 

1618 Raised when ``where`` expression is invalid. 

1619 """ 

1620 raise NotImplementedError() 

1621 

1622 @abstractmethod 

1623 def queryDatasetAssociations( 

1624 self, 

1625 datasetType: Union[str, DatasetType], 

1626 collections: Any = ..., 

1627 *, 

1628 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1629 flattenChains: bool = False, 

1630 ) -> Iterator[DatasetAssociation]: 

1631 """Iterate over dataset-collection combinations where the dataset is in 

1632 the collection. 

1633 

1634 This method is a temporary placeholder for better support for 

1635 association results in `queryDatasets`. It will probably be 

1636 removed in the future, and should be avoided in production code 

1637 whenever possible. 

1638 

1639 Parameters 

1640 ---------- 

1641 datasetType : `DatasetType` or `str` 

1642 A dataset type object or the name of one. 

1643 collections: `Any`, optional 

1644 An expression that identifies the collections to search for 

1645 datasets, such as a `str` (for full matches or partial matches 

1646 via globs), `re.Pattern` (for partial matches), or iterable 

1647 thereof. ``...`` can be used to search all collections (actually 

1648 just all `~CollectionType.RUN` collections, because this will still 

1649 find all datasets). If not provided, ``self.default.collections`` 

1650 is used. See :ref:`daf_butler_collection_expressions` for more 

1651 information. 

1652 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1653 If provided, only yield associations from collections of these 

1654 types. 

1655 flattenChains : `bool`, optional 

1656 If `True` (default) search in the children of 

1657 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1658 collections are ignored. 

1659 

1660 Yields 

1661 ------ 

1662 association : `.DatasetAssociation` 

1663 Object representing the relationship between a single dataset and 

1664 a single collection. 

1665 

1666 Raises 

1667 ------ 

1668 NoDefaultCollectionError 

1669 Raised if ``collections`` is `None` and 

1670 ``self.defaults.collections`` is `None`. 

1671 CollectionExpressionError 

1672 Raised when ``collections`` expression is invalid. 

1673 """ 

1674 raise NotImplementedError() 

1675 

1676 @property 

1677 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1678 """ObsCore manager instance for this registry (`ObsCoreTableManager` 

1679 or `None`). 

1680 

1681 ObsCore manager may not be implemented for all registry backend, or 

1682 may not be enabled for many repositories. 

1683 """ 

1684 return None 

1685 

1686 storageClasses: StorageClassFactory 

1687 """All storage classes known to the registry (`StorageClassFactory`). 

1688 """ 

1689 

1690 datasetIdFactory: DatasetIdFactory 

1691 """Factory for dataset IDs."""