Coverage for python/lsst/daf/butler/registry/_registry.py: 78%

147 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-06 09:33 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28import re 

29from abc import ABC, abstractmethod 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Tuple, 

42 Type, 

43 Union, 

44) 

45 

46from lsst.resources import ResourcePathExpression 

47from lsst.utils import doImportType 

48from lsst.utils.ellipsis import Ellipsis, EllipsisType 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetId, 

56 DatasetIdFactory, 

57 DatasetIdGenEnum, 

58 DatasetRef, 

59 DatasetType, 

60 Dimension, 

61 DimensionConfig, 

62 DimensionElement, 

63 DimensionGraph, 

64 DimensionRecord, 

65 DimensionUniverse, 

66 NameLookupMapping, 

67 StorageClassFactory, 

68 Timespan, 

69) 

70from ._collection_summary import CollectionSummary 

71from ._collectionType import CollectionType 

72from ._config import RegistryConfig 

73from ._defaults import RegistryDefaults 

74from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

75from .wildcards import CollectionWildcard 

76 

77if TYPE_CHECKING: 

78 from .._butlerConfig import ButlerConfig 

79 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager 

80 

81_LOG = logging.getLogger(__name__) 

82 

83# TYpe alias for `collections` arguments. 

84CollectionArgType = str | re.Pattern | Iterable[str | re.Pattern] | EllipsisType | CollectionWildcard 

85 

86 

87class Registry(ABC): 

88 """Abstract Registry interface. 

89 

90 Each registry implementation can have its own constructor parameters. 

91 The assumption is that an instance of a specific subclass will be 

92 constructed from configuration using `Registry.fromConfig()`. 

93 The base class will look for a ``cls`` entry and call that specific 

94 `fromConfig()` method. 

95 

96 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

97 property. No other properties are assumed shared between implementations. 

98 """ 

99 

100 defaultConfigFile: Optional[str] = None 

101 """Path to configuration defaults. Accessed within the ``configs`` resource 

102 or relative to a search path. Can be None if no defaults specified. 

103 """ 

104 

105 @classmethod 

106 def forceRegistryConfig( 

107 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

108 ) -> RegistryConfig: 

109 """Force the supplied config to a `RegistryConfig`. 

110 

111 Parameters 

112 ---------- 

113 config : `RegistryConfig`, `Config` or `str` or `None` 

114 Registry configuration, if missing then default configuration will 

115 be loaded from registry.yaml. 

116 

117 Returns 

118 ------- 

119 registry_config : `RegistryConfig` 

120 A registry config. 

121 """ 

122 if not isinstance(config, RegistryConfig): 

123 if isinstance(config, (str, Config)) or config is None: 

124 config = RegistryConfig(config) 

125 else: 

126 raise ValueError(f"Incompatible Registry configuration: {config}") 

127 return config 

128 

129 @classmethod 

130 def determineTrampoline( 

131 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

132 ) -> Tuple[Type[Registry], RegistryConfig]: 

133 """Return class to use to instantiate real registry. 

134 

135 Parameters 

136 ---------- 

137 config : `RegistryConfig` or `str`, optional 

138 Registry configuration, if missing then default configuration will 

139 be loaded from registry.yaml. 

140 

141 Returns 

142 ------- 

143 requested_cls : `type` of `Registry` 

144 The real registry class to use. 

145 registry_config : `RegistryConfig` 

146 The `RegistryConfig` to use. 

147 """ 

148 config = cls.forceRegistryConfig(config) 

149 

150 # Default to the standard registry 

151 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

152 registry_cls = doImportType(registry_cls_name) 

153 if registry_cls is cls: 

154 raise ValueError("Can not instantiate the abstract base Registry from config") 

155 if not issubclass(registry_cls, Registry): 

156 raise TypeError( 

157 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

158 ) 

159 return registry_cls, config 

160 

161 @classmethod 

162 def createFromConfig( 

163 cls, 

164 config: Optional[Union[RegistryConfig, str]] = None, 

165 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

166 butlerRoot: Optional[ResourcePathExpression] = None, 

167 ) -> Registry: 

168 """Create registry database and return `Registry` instance. 

169 

170 This method initializes database contents, database must be empty 

171 prior to calling this method. 

172 

173 Parameters 

174 ---------- 

175 config : `RegistryConfig` or `str`, optional 

176 Registry configuration, if missing then default configuration will 

177 be loaded from registry.yaml. 

178 dimensionConfig : `DimensionConfig` or `str`, optional 

179 Dimensions configuration, if missing then default configuration 

180 will be loaded from dimensions.yaml. 

181 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

182 Path to the repository root this `Registry` will manage. 

183 

184 Returns 

185 ------- 

186 registry : `Registry` 

187 A new `Registry` instance. 

188 

189 Notes 

190 ----- 

191 This class will determine the concrete `Registry` subclass to 

192 use from configuration. Each subclass should implement this method 

193 even if it can not create a registry. 

194 """ 

195 registry_cls, registry_config = cls.determineTrampoline(config) 

196 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

197 

198 @classmethod 

199 def fromConfig( 

200 cls, 

201 config: Union[ButlerConfig, RegistryConfig, Config, str], 

202 butlerRoot: Optional[ResourcePathExpression] = None, 

203 writeable: bool = True, 

204 defaults: Optional[RegistryDefaults] = None, 

205 ) -> Registry: 

206 """Create `Registry` subclass instance from `config`. 

207 

208 Registry database must be initialized prior to calling this method. 

209 

210 Parameters 

211 ---------- 

212 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

213 Registry configuration 

214 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

215 Path to the repository root this `Registry` will manage. 

216 writeable : `bool`, optional 

217 If `True` (default) create a read-write connection to the database. 

218 defaults : `RegistryDefaults`, optional 

219 Default collection search path and/or output `~CollectionType.RUN` 

220 collection. 

221 

222 Returns 

223 ------- 

224 registry : `Registry` (subclass) 

225 A new `Registry` subclass instance. 

226 

227 Notes 

228 ----- 

229 This class will determine the concrete `Registry` subclass to 

230 use from configuration. Each subclass should implement this method. 

231 """ 

232 # The base class implementation should trampoline to the correct 

233 # subclass. No implementation should ever use this implementation 

234 # directly. If no class is specified, default to the standard 

235 # registry. 

236 registry_cls, registry_config = cls.determineTrampoline(config) 

237 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

238 

239 @abstractmethod 

240 def isWriteable(self) -> bool: 

241 """Return `True` if this registry allows write operations, and `False` 

242 otherwise. 

243 """ 

244 raise NotImplementedError() 

245 

246 @abstractmethod 

247 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

248 """Create a new `Registry` backed by the same data repository and 

249 connection as this one, but independent defaults. 

250 

251 Parameters 

252 ---------- 

253 defaults : `RegistryDefaults`, optional 

254 Default collections and data ID values for the new registry. If 

255 not provided, ``self.defaults`` will be used (but future changes 

256 to either registry's defaults will not affect the other). 

257 

258 Returns 

259 ------- 

260 copy : `Registry` 

261 A new `Registry` instance with its own defaults. 

262 

263 Notes 

264 ----- 

265 Because the new registry shares a connection with the original, they 

266 also share transaction state (despite the fact that their `transaction` 

267 context manager methods do not reflect this), and must be used with 

268 care. 

269 """ 

270 raise NotImplementedError() 

271 

272 @property 

273 @abstractmethod 

274 def dimensions(self) -> DimensionUniverse: 

275 """Definitions of all dimensions recognized by this `Registry` 

276 (`DimensionUniverse`). 

277 """ 

278 raise NotImplementedError() 

279 

280 @property 

281 def defaults(self) -> RegistryDefaults: 

282 """Default collection search path and/or output `~CollectionType.RUN` 

283 collection (`RegistryDefaults`). 

284 

285 This is an immutable struct whose components may not be set 

286 individually, but the entire struct can be set by assigning to this 

287 property. 

288 """ 

289 return self._defaults 

290 

291 @defaults.setter 

292 def defaults(self, value: RegistryDefaults) -> None: 

293 if value.run is not None: 

294 self.registerRun(value.run) 

295 value.finish(self) 

296 self._defaults = value 

297 

298 @abstractmethod 

299 def refresh(self) -> None: 

300 """Refresh all in-memory state by querying the database. 

301 

302 This may be necessary to enable querying for entities added by other 

303 registry instances after this one was constructed. 

304 """ 

305 raise NotImplementedError() 

306 

307 @contextlib.contextmanager 

308 @abstractmethod 

309 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

310 """Return a context manager that represents a transaction.""" 

311 raise NotImplementedError() 

312 

313 def resetConnectionPool(self) -> None: 

314 """Reset connection pool for registry if relevant. 

315 

316 This operation can be used reset connections to servers when 

317 using registry with fork-based multiprocessing. This method should 

318 usually be called by the child process immediately 

319 after the fork. 

320 

321 The base class implementation is a no-op. 

322 """ 

323 pass 

324 

325 @abstractmethod 

326 def registerCollection( 

327 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

328 ) -> bool: 

329 """Add a new collection if one with the given name does not exist. 

330 

331 Parameters 

332 ---------- 

333 name : `str` 

334 The name of the collection to create. 

335 type : `CollectionType` 

336 Enum value indicating the type of collection to create. 

337 doc : `str`, optional 

338 Documentation string for the collection. 

339 

340 Returns 

341 ------- 

342 registered : `bool` 

343 Boolean indicating whether the collection was already registered 

344 or was created by this call. 

345 

346 Notes 

347 ----- 

348 This method cannot be called within transactions, as it needs to be 

349 able to perform its own transaction to be concurrent. 

350 """ 

351 raise NotImplementedError() 

352 

353 @abstractmethod 

354 def getCollectionType(self, name: str) -> CollectionType: 

355 """Return an enumeration value indicating the type of the given 

356 collection. 

357 

358 Parameters 

359 ---------- 

360 name : `str` 

361 The name of the collection. 

362 

363 Returns 

364 ------- 

365 type : `CollectionType` 

366 Enum value indicating the type of this collection. 

367 

368 Raises 

369 ------ 

370 MissingCollectionError 

371 Raised if no collection with the given name exists. 

372 """ 

373 raise NotImplementedError() 

374 

375 @abstractmethod 

376 def _get_collection_record(self, name: str) -> CollectionRecord: 

377 """Return the record for this collection. 

378 

379 Parameters 

380 ---------- 

381 name : `str` 

382 Name of the collection for which the record is to be retrieved. 

383 

384 Returns 

385 ------- 

386 record : `CollectionRecord` 

387 The record for this collection. 

388 """ 

389 raise NotImplementedError() 

390 

391 @abstractmethod 

392 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

393 """Add a new run if one with the given name does not exist. 

394 

395 Parameters 

396 ---------- 

397 name : `str` 

398 The name of the run to create. 

399 doc : `str`, optional 

400 Documentation string for the collection. 

401 

402 Returns 

403 ------- 

404 registered : `bool` 

405 Boolean indicating whether a new run was registered. `False` 

406 if it already existed. 

407 

408 Notes 

409 ----- 

410 This method cannot be called within transactions, as it needs to be 

411 able to perform its own transaction to be concurrent. 

412 """ 

413 raise NotImplementedError() 

414 

415 @abstractmethod 

416 def removeCollection(self, name: str) -> None: 

417 """Remove the given collection from the registry. 

418 

419 Parameters 

420 ---------- 

421 name : `str` 

422 The name of the collection to remove. 

423 

424 Raises 

425 ------ 

426 MissingCollectionError 

427 Raised if no collection with the given name exists. 

428 sqlalchemy.IntegrityError 

429 Raised if the database rows associated with the collection are 

430 still referenced by some other table, such as a dataset in a 

431 datastore (for `~CollectionType.RUN` collections only) or a 

432 `~CollectionType.CHAINED` collection of which this collection is 

433 a child. 

434 

435 Notes 

436 ----- 

437 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

438 in it will removed from the `Registry` database. This requires that 

439 those datasets be removed (or at least trashed) from any datastores 

440 that hold them first. 

441 

442 A collection may not be deleted as long as it is referenced by a 

443 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

444 be deleted or redefined first. 

445 """ 

446 raise NotImplementedError() 

447 

448 @abstractmethod 

449 def getCollectionChain(self, parent: str) -> Sequence[str]: 

450 """Return the child collections in a `~CollectionType.CHAINED` 

451 collection. 

452 

453 Parameters 

454 ---------- 

455 parent : `str` 

456 Name of the chained collection. Must have already been added via 

457 a call to `Registry.registerCollection`. 

458 

459 Returns 

460 ------- 

461 children : `Sequence` [ `str` ] 

462 An ordered sequence of collection names that are searched when the 

463 given chained collection is searched. 

464 

465 Raises 

466 ------ 

467 MissingCollectionError 

468 Raised if ``parent`` does not exist in the `Registry`. 

469 CollectionTypeError 

470 Raised if ``parent`` does not correspond to a 

471 `~CollectionType.CHAINED` collection. 

472 """ 

473 raise NotImplementedError() 

474 

475 @abstractmethod 

476 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

477 """Define or redefine a `~CollectionType.CHAINED` collection. 

478 

479 Parameters 

480 ---------- 

481 parent : `str` 

482 Name of the chained collection. Must have already been added via 

483 a call to `Registry.registerCollection`. 

484 children : `Any` 

485 An expression defining an ordered search of child collections, 

486 generally an iterable of `str`; see 

487 :ref:`daf_butler_collection_expressions` for more information. 

488 flatten : `bool`, optional 

489 If `True` (`False` is default), recursively flatten out any nested 

490 `~CollectionType.CHAINED` collections in ``children`` first. 

491 

492 Raises 

493 ------ 

494 MissingCollectionError 

495 Raised when any of the given collections do not exist in the 

496 `Registry`. 

497 CollectionTypeError 

498 Raised if ``parent`` does not correspond to a 

499 `~CollectionType.CHAINED` collection. 

500 ValueError 

501 Raised if the given collections contains a cycle. 

502 """ 

503 raise NotImplementedError() 

504 

505 @abstractmethod 

506 def getCollectionParentChains(self, collection: str) -> Set[str]: 

507 """Return the CHAINED collections that directly contain the given one. 

508 

509 Parameters 

510 ---------- 

511 name : `str` 

512 Name of the collection. 

513 

514 Returns 

515 ------- 

516 chains : `set` of `str` 

517 Set of `~CollectionType.CHAINED` collection names. 

518 """ 

519 raise NotImplementedError() 

520 

521 @abstractmethod 

522 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

523 """Retrieve the documentation string for a collection. 

524 

525 Parameters 

526 ---------- 

527 name : `str` 

528 Name of the collection. 

529 

530 Returns 

531 ------- 

532 docs : `str` or `None` 

533 Docstring for the collection with the given name. 

534 """ 

535 raise NotImplementedError() 

536 

537 @abstractmethod 

538 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

539 """Set the documentation string for a collection. 

540 

541 Parameters 

542 ---------- 

543 name : `str` 

544 Name of the collection. 

545 docs : `str` or `None` 

546 Docstring for the collection with the given name; will replace any 

547 existing docstring. Passing `None` will remove any existing 

548 docstring. 

549 """ 

550 raise NotImplementedError() 

551 

552 @abstractmethod 

553 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

554 """Return a summary for the given collection. 

555 

556 Parameters 

557 ---------- 

558 collection : `str` 

559 Name of the collection for which a summary is to be retrieved. 

560 

561 Returns 

562 ------- 

563 summary : `CollectionSummary` 

564 Summary of the dataset types and governor dimension values in 

565 this collection. 

566 """ 

567 raise NotImplementedError() 

568 

569 @abstractmethod 

570 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

571 """ 

572 Add a new `DatasetType` to the Registry. 

573 

574 It is not an error to register the same `DatasetType` twice. 

575 

576 Parameters 

577 ---------- 

578 datasetType : `DatasetType` 

579 The `DatasetType` to be added. 

580 

581 Returns 

582 ------- 

583 inserted : `bool` 

584 `True` if ``datasetType`` was inserted, `False` if an identical 

585 existing `DatsetType` was found. Note that in either case the 

586 DatasetType is guaranteed to be defined in the Registry 

587 consistently with the given definition. 

588 

589 Raises 

590 ------ 

591 ValueError 

592 Raised if the dimensions or storage class are invalid. 

593 ConflictingDefinitionError 

594 Raised if this DatasetType is already registered with a different 

595 definition. 

596 

597 Notes 

598 ----- 

599 This method cannot be called within transactions, as it needs to be 

600 able to perform its own transaction to be concurrent. 

601 """ 

602 raise NotImplementedError() 

603 

604 @abstractmethod 

605 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

606 """Remove the named `DatasetType` from the registry. 

607 

608 .. warning:: 

609 

610 Registry implementations can cache the dataset type definitions. 

611 This means that deleting the dataset type definition may result in 

612 unexpected behavior from other butler processes that are active 

613 that have not seen the deletion. 

614 

615 Parameters 

616 ---------- 

617 name : `str` or `tuple[str, ...]` 

618 Name of the type to be removed or tuple containing a list of type 

619 names to be removed. Wildcards are allowed. 

620 

621 Raises 

622 ------ 

623 lsst.daf.butler.registry.OrphanedRecordError 

624 Raised if an attempt is made to remove the dataset type definition 

625 when there are already datasets associated with it. 

626 

627 Notes 

628 ----- 

629 If the dataset type is not registered the method will return without 

630 action. 

631 """ 

632 raise NotImplementedError() 

633 

634 @abstractmethod 

635 def getDatasetType(self, name: str) -> DatasetType: 

636 """Get the `DatasetType`. 

637 

638 Parameters 

639 ---------- 

640 name : `str` 

641 Name of the type. 

642 

643 Returns 

644 ------- 

645 type : `DatasetType` 

646 The `DatasetType` associated with the given name. 

647 

648 Raises 

649 ------ 

650 MissingDatasetTypeError 

651 Raised if the requested dataset type has not been registered. 

652 

653 Notes 

654 ----- 

655 This method handles component dataset types automatically, though most 

656 other registry operations do not. 

657 """ 

658 raise NotImplementedError() 

659 

660 @abstractmethod 

661 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

662 """Test whether the given dataset ID generation mode is supported by 

663 `insertDatasets`. 

664 

665 Parameters 

666 ---------- 

667 mode : `DatasetIdGenEnum` 

668 Enum value for the mode to test. 

669 

670 Returns 

671 ------- 

672 supported : `bool` 

673 Whether the given mode is supported. 

674 """ 

675 raise NotImplementedError() 

676 

677 @abstractmethod 

678 def findDataset( 

679 self, 

680 datasetType: Union[DatasetType, str], 

681 dataId: Optional[DataId] = None, 

682 *, 

683 collections: CollectionArgType | None = None, 

684 timespan: Optional[Timespan] = None, 

685 **kwargs: Any, 

686 ) -> Optional[DatasetRef]: 

687 """Find a dataset given its `DatasetType` and data ID. 

688 

689 This can be used to obtain a `DatasetRef` that permits the dataset to 

690 be read from a `Datastore`. If the dataset is a component and can not 

691 be found using the provided dataset type, a dataset ref for the parent 

692 will be returned instead but with the correct dataset type. 

693 

694 Parameters 

695 ---------- 

696 datasetType : `DatasetType` or `str` 

697 A `DatasetType` or the name of one. If this is a `DatasetType` 

698 instance, its storage class will be respected and propagated to 

699 the output, even if it differs from the dataset type definition 

700 in the registry, as long as the storage classes are convertible. 

701 dataId : `dict` or `DataCoordinate`, optional 

702 A `dict`-like object containing the `Dimension` links that identify 

703 the dataset within a collection. 

704 collections, optional. 

705 An expression that fully or partially identifies the collections to 

706 search for the dataset; see 

707 :ref:`daf_butler_collection_expressions` for more information. 

708 Defaults to ``self.defaults.collections``. 

709 timespan : `Timespan`, optional 

710 A timespan that the validity range of the dataset must overlap. 

711 If not provided, any `~CollectionType.CALIBRATION` collections 

712 matched by the ``collections`` argument will not be searched. 

713 **kwargs 

714 Additional keyword arguments passed to 

715 `DataCoordinate.standardize` to convert ``dataId`` to a true 

716 `DataCoordinate` or augment an existing one. 

717 

718 Returns 

719 ------- 

720 ref : `DatasetRef` 

721 A reference to the dataset, or `None` if no matching Dataset 

722 was found. 

723 

724 Raises 

725 ------ 

726 NoDefaultCollectionError 

727 Raised if ``collections`` is `None` and 

728 ``self.defaults.collections`` is `None`. 

729 LookupError 

730 Raised if one or more data ID keys are missing. 

731 MissingDatasetTypeError 

732 Raised if the dataset type does not exist. 

733 MissingCollectionError 

734 Raised if any of ``collections`` does not exist in the registry. 

735 

736 Notes 

737 ----- 

738 This method simply returns `None` and does not raise an exception even 

739 when the set of collections searched is intrinsically incompatible with 

740 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

741 only `~CollectionType.CALIBRATION` collections are being searched. 

742 This may make it harder to debug some lookup failures, but the behavior 

743 is intentional; we consider it more important that failed searches are 

744 reported consistently, regardless of the reason, and that adding 

745 additional collections that do not contain a match to the search path 

746 never changes the behavior. 

747 

748 This method handles component dataset types automatically, though most 

749 other registry operations do not. 

750 """ 

751 raise NotImplementedError() 

752 

753 @abstractmethod 

754 def insertDatasets( 

755 self, 

756 datasetType: Union[DatasetType, str], 

757 dataIds: Iterable[DataId], 

758 run: Optional[str] = None, 

759 expand: bool = True, 

760 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

761 ) -> List[DatasetRef]: 

762 """Insert one or more datasets into the `Registry` 

763 

764 This always adds new datasets; to associate existing datasets with 

765 a new collection, use ``associate``. 

766 

767 Parameters 

768 ---------- 

769 datasetType : `DatasetType` or `str` 

770 A `DatasetType` or the name of one. 

771 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

772 Dimension-based identifiers for the new datasets. 

773 run : `str`, optional 

774 The name of the run that produced the datasets. Defaults to 

775 ``self.defaults.run``. 

776 expand : `bool`, optional 

777 If `True` (default), expand data IDs as they are inserted. This is 

778 necessary in general to allow datastore to generate file templates, 

779 but it may be disabled if the caller can guarantee this is 

780 unnecessary. 

781 idGenerationMode : `DatasetIdGenEnum`, optional 

782 Specifies option for generating dataset IDs. By default unique IDs 

783 are generated for each inserted dataset. 

784 

785 Returns 

786 ------- 

787 refs : `list` of `DatasetRef` 

788 Resolved `DatasetRef` instances for all given data IDs (in the same 

789 order). 

790 

791 Raises 

792 ------ 

793 DatasetTypeError 

794 Raised if ``datasetType`` is not known to registry. 

795 CollectionTypeError 

796 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

797 NoDefaultCollectionError 

798 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

799 ConflictingDefinitionError 

800 If a dataset with the same dataset type and data ID as one of those 

801 given already exists in ``run``. 

802 MissingCollectionError 

803 Raised if ``run`` does not exist in the registry. 

804 """ 

805 raise NotImplementedError() 

806 

807 @abstractmethod 

808 def _importDatasets( 

809 self, 

810 datasets: Iterable[DatasetRef], 

811 expand: bool = True, 

812 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

813 reuseIds: bool = False, 

814 ) -> List[DatasetRef]: 

815 """Import one or more datasets into the `Registry`. 

816 

817 Difference from `insertDatasets` method is that this method accepts 

818 `DatasetRef` instances which should already be resolved and have a 

819 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

820 `uuid.UUID`) then datasets which already exist in the registry will be 

821 ignored if imported again. 

822 

823 Parameters 

824 ---------- 

825 datasets : `~collections.abc.Iterable` of `DatasetRef` 

826 Datasets to be inserted. All `DatasetRef` instances must have 

827 identical ``datasetType`` and ``run`` attributes. ``run`` 

828 attribute can be `None` and defaults to ``self.defaults.run``. 

829 Datasets can specify ``id`` attribute which will be used for 

830 inserted datasets. All dataset IDs must have the same type 

831 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

832 configured backend then IDs will be ignored and new IDs will be 

833 generated by backend. 

834 expand : `bool`, optional 

835 If `True` (default), expand data IDs as they are inserted. This is 

836 necessary in general to allow datastore to generate file templates, 

837 but it may be disabled if the caller can guarantee this is 

838 unnecessary. 

839 idGenerationMode : `DatasetIdGenEnum`, optional 

840 Specifies option for generating dataset IDs when IDs are not 

841 provided or their type does not match backend type. By default 

842 unique IDs are generated for each inserted dataset. 

843 reuseIds : `bool`, optional 

844 If `True` then forces re-use of imported dataset IDs for integer 

845 IDs which are normally generated as auto-incremented; exception 

846 will be raised if imported IDs clash with existing ones. This 

847 option has no effect on the use of globally-unique IDs which are 

848 always re-used (or generated if integer IDs are being imported). 

849 

850 Returns 

851 ------- 

852 refs : `list` of `DatasetRef` 

853 Resolved `DatasetRef` instances for all given data IDs (in the same 

854 order). If any of ``datasets`` has an ID which already exists in 

855 the database then it will not be inserted or updated, but a 

856 resolved `DatasetRef` will be returned for it in any case. 

857 

858 Raises 

859 ------ 

860 NoDefaultCollectionError 

861 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

862 DatasetTypeError 

863 Raised if datasets correspond to more than one dataset type or 

864 dataset type is not known to registry. 

865 ConflictingDefinitionError 

866 If a dataset with the same dataset type and data ID as one of those 

867 given already exists in ``run``. 

868 MissingCollectionError 

869 Raised if ``run`` does not exist in the registry. 

870 

871 Notes 

872 ----- 

873 This method is considered package-private and internal to Butler 

874 implementation. Clients outside daf_butler package should not use this 

875 method. 

876 """ 

877 raise NotImplementedError() 

878 

879 @abstractmethod 

880 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

881 """Retrieve a Dataset entry. 

882 

883 Parameters 

884 ---------- 

885 id : `DatasetId` 

886 The unique identifier for the dataset. 

887 

888 Returns 

889 ------- 

890 ref : `DatasetRef` or `None` 

891 A ref to the Dataset, or `None` if no matching Dataset 

892 was found. 

893 """ 

894 raise NotImplementedError() 

895 

896 @abstractmethod 

897 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

898 """Remove datasets from the Registry. 

899 

900 The datasets will be removed unconditionally from all collections, and 

901 any `Quantum` that consumed this dataset will instead be marked with 

902 having a NULL input. `Datastore` records will *not* be deleted; the 

903 caller is responsible for ensuring that the dataset has already been 

904 removed from all Datastores. 

905 

906 Parameters 

907 ---------- 

908 refs : `Iterable` of `DatasetRef` 

909 References to the datasets to be removed. Must include a valid 

910 ``id`` attribute, and should be considered invalidated upon return. 

911 

912 Raises 

913 ------ 

914 AmbiguousDatasetError 

915 Raised if any ``ref.id`` is `None`. 

916 OrphanedRecordError 

917 Raised if any dataset is still present in any `Datastore`. 

918 """ 

919 raise NotImplementedError() 

920 

921 @abstractmethod 

922 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

923 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

924 

925 If a DatasetRef with the same exact ID is already in a collection 

926 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

927 data ID but with different ID exists in the collection, 

928 `ConflictingDefinitionError` is raised. 

929 

930 Parameters 

931 ---------- 

932 collection : `str` 

933 Indicates the collection the datasets should be associated with. 

934 refs : `Iterable` [ `DatasetRef` ] 

935 An iterable of resolved `DatasetRef` instances that already exist 

936 in this `Registry`. 

937 

938 Raises 

939 ------ 

940 ConflictingDefinitionError 

941 If a Dataset with the given `DatasetRef` already exists in the 

942 given collection. 

943 AmbiguousDatasetError 

944 Raised if ``any(ref.id is None for ref in refs)``. 

945 MissingCollectionError 

946 Raised if ``collection`` does not exist in the registry. 

947 CollectionTypeError 

948 Raise adding new datasets to the given ``collection`` is not 

949 allowed. 

950 """ 

951 raise NotImplementedError() 

952 

953 @abstractmethod 

954 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

955 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

956 

957 ``collection`` and ``ref`` combinations that are not currently 

958 associated are silently ignored. 

959 

960 Parameters 

961 ---------- 

962 collection : `str` 

963 The collection the datasets should no longer be associated with. 

964 refs : `Iterable` [ `DatasetRef` ] 

965 An iterable of resolved `DatasetRef` instances that already exist 

966 in this `Registry`. 

967 

968 Raises 

969 ------ 

970 AmbiguousDatasetError 

971 Raised if any of the given dataset references is unresolved. 

972 MissingCollectionError 

973 Raised if ``collection`` does not exist in the registry. 

974 CollectionTypeError 

975 Raise adding new datasets to the given ``collection`` is not 

976 allowed. 

977 """ 

978 raise NotImplementedError() 

979 

980 @abstractmethod 

981 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

982 """Associate one or more datasets with a calibration collection and a 

983 validity range within it. 

984 

985 Parameters 

986 ---------- 

987 collection : `str` 

988 The name of an already-registered `~CollectionType.CALIBRATION` 

989 collection. 

990 refs : `Iterable` [ `DatasetRef` ] 

991 Datasets to be associated. 

992 timespan : `Timespan` 

993 The validity range for these datasets within the collection. 

994 

995 Raises 

996 ------ 

997 AmbiguousDatasetError 

998 Raised if any of the given `DatasetRef` instances is unresolved. 

999 ConflictingDefinitionError 

1000 Raised if the collection already contains a different dataset with 

1001 the same `DatasetType` and data ID and an overlapping validity 

1002 range. 

1003 CollectionTypeError 

1004 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1005 collection or if one or more datasets are of a dataset type for 

1006 which `DatasetType.isCalibration` returns `False`. 

1007 """ 

1008 raise NotImplementedError() 

1009 

1010 @abstractmethod 

1011 def decertify( 

1012 self, 

1013 collection: str, 

1014 datasetType: Union[str, DatasetType], 

1015 timespan: Timespan, 

1016 *, 

1017 dataIds: Optional[Iterable[DataId]] = None, 

1018 ) -> None: 

1019 """Remove or adjust datasets to clear a validity range within a 

1020 calibration collection. 

1021 

1022 Parameters 

1023 ---------- 

1024 collection : `str` 

1025 The name of an already-registered `~CollectionType.CALIBRATION` 

1026 collection. 

1027 datasetType : `str` or `DatasetType` 

1028 Name or `DatasetType` instance for the datasets to be decertified. 

1029 timespan : `Timespan`, optional 

1030 The validity range to remove datasets from within the collection. 

1031 Datasets that overlap this range but are not contained by it will 

1032 have their validity ranges adjusted to not overlap it, which may 

1033 split a single dataset validity range into two. 

1034 dataIds : `Iterable` [ `DataId` ], optional 

1035 Data IDs that should be decertified within the given validity range 

1036 If `None`, all data IDs for ``self.datasetType`` will be 

1037 decertified. 

1038 

1039 Raises 

1040 ------ 

1041 CollectionTypeError 

1042 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1043 collection or if ``datasetType.isCalibration() is False``. 

1044 """ 

1045 raise NotImplementedError() 

1046 

1047 @abstractmethod 

1048 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1049 """Return an object that allows a new `Datastore` instance to 

1050 communicate with this `Registry`. 

1051 

1052 Returns 

1053 ------- 

1054 manager : `DatastoreRegistryBridgeManager` 

1055 Object that mediates communication between this `Registry` and its 

1056 associated datastores. 

1057 """ 

1058 raise NotImplementedError() 

1059 

1060 @abstractmethod 

1061 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1062 """Retrieve datastore locations for a given dataset. 

1063 

1064 Parameters 

1065 ---------- 

1066 ref : `DatasetRef` 

1067 A reference to the dataset for which to retrieve storage 

1068 information. 

1069 

1070 Returns 

1071 ------- 

1072 datastores : `Iterable` [ `str` ] 

1073 All the matching datastores holding this dataset. 

1074 

1075 Raises 

1076 ------ 

1077 AmbiguousDatasetError 

1078 Raised if ``ref.id`` is `None`. 

1079 """ 

1080 raise NotImplementedError() 

1081 

1082 @abstractmethod 

1083 def expandDataId( 

1084 self, 

1085 dataId: Optional[DataId] = None, 

1086 *, 

1087 graph: Optional[DimensionGraph] = None, 

1088 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1089 withDefaults: bool = True, 

1090 **kwargs: Any, 

1091 ) -> DataCoordinate: 

1092 """Expand a dimension-based data ID to include additional information. 

1093 

1094 Parameters 

1095 ---------- 

1096 dataId : `DataCoordinate` or `dict`, optional 

1097 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1098 graph : `DimensionGraph`, optional 

1099 Set of dimensions for the expanded ID. If `None`, the dimensions 

1100 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1101 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1102 ``graph`` are silently ignored, providing a way to extract and 

1103 ``graph`` expand a subset of a data ID. 

1104 records : `Mapping` [`str`, `DimensionRecord`], optional 

1105 Dimension record data to use before querying the database for that 

1106 data, keyed by element name. 

1107 withDefaults : `bool`, optional 

1108 Utilize ``self.defaults.dataId`` to fill in missing governor 

1109 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1110 used). 

1111 **kwargs 

1112 Additional keywords are treated like additional key-value pairs for 

1113 ``dataId``, extending and overriding 

1114 

1115 Returns 

1116 ------- 

1117 expanded : `DataCoordinate` 

1118 A data ID that includes full metadata for all of the dimensions it 

1119 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1120 ``expanded.hasFull()`` both return `True`. 

1121 

1122 Raises 

1123 ------ 

1124 DataIdError 

1125 Raised when ``dataId`` or keyword arguments specify unknown 

1126 dimensions or values, or when a resulting data ID contains 

1127 contradictory key-value pairs, according to dimension 

1128 relationships. 

1129 

1130 Notes 

1131 ----- 

1132 This method cannot be relied upon to reject invalid data ID values 

1133 for dimensions that do actually not have any record columns. For 

1134 efficiency reasons the records for these dimensions (which have only 

1135 dimension key values that are given by the caller) may be constructed 

1136 directly rather than obtained from the registry database. 

1137 """ 

1138 raise NotImplementedError() 

1139 

1140 @abstractmethod 

1141 def insertDimensionData( 

1142 self, 

1143 element: Union[DimensionElement, str], 

1144 *data: Union[Mapping[str, Any], DimensionRecord], 

1145 conform: bool = True, 

1146 replace: bool = False, 

1147 skip_existing: bool = False, 

1148 ) -> None: 

1149 """Insert one or more dimension records into the database. 

1150 

1151 Parameters 

1152 ---------- 

1153 element : `DimensionElement` or `str` 

1154 The `DimensionElement` or name thereof that identifies the table 

1155 records will be inserted into. 

1156 data : `dict` or `DimensionRecord` (variadic) 

1157 One or more records to insert. 

1158 conform : `bool`, optional 

1159 If `False` (`True` is default) perform no checking or conversions, 

1160 and assume that ``element`` is a `DimensionElement` instance and 

1161 ``data`` is a one or more `DimensionRecord` instances of the 

1162 appropriate subclass. 

1163 replace : `bool`, optional 

1164 If `True` (`False` is default), replace existing records in the 

1165 database if there is a conflict. 

1166 skip_existing : `bool`, optional 

1167 If `True` (`False` is default), skip insertion if a record with 

1168 the same primary key values already exists. Unlike 

1169 `syncDimensionData`, this will not detect when the given record 

1170 differs from what is in the database, and should not be used when 

1171 this is a concern. 

1172 """ 

1173 raise NotImplementedError() 

1174 

1175 @abstractmethod 

1176 def syncDimensionData( 

1177 self, 

1178 element: Union[DimensionElement, str], 

1179 row: Union[Mapping[str, Any], DimensionRecord], 

1180 conform: bool = True, 

1181 update: bool = False, 

1182 ) -> Union[bool, Dict[str, Any]]: 

1183 """Synchronize the given dimension record with the database, inserting 

1184 if it does not already exist and comparing values if it does. 

1185 

1186 Parameters 

1187 ---------- 

1188 element : `DimensionElement` or `str` 

1189 The `DimensionElement` or name thereof that identifies the table 

1190 records will be inserted into. 

1191 row : `dict` or `DimensionRecord` 

1192 The record to insert. 

1193 conform : `bool`, optional 

1194 If `False` (`True` is default) perform no checking or conversions, 

1195 and assume that ``element`` is a `DimensionElement` instance and 

1196 ``data`` is a one or more `DimensionRecord` instances of the 

1197 appropriate subclass. 

1198 update: `bool`, optional 

1199 If `True` (`False` is default), update the existing record in the 

1200 database if there is a conflict. 

1201 

1202 Returns 

1203 ------- 

1204 inserted_or_updated : `bool` or `dict` 

1205 `True` if a new row was inserted, `False` if no changes were 

1206 needed, or a `dict` mapping updated column names to their old 

1207 values if an update was performed (only possible if 

1208 ``update=True``). 

1209 

1210 Raises 

1211 ------ 

1212 ConflictingDefinitionError 

1213 Raised if the record exists in the database (according to primary 

1214 key lookup) but is inconsistent with the given one. 

1215 """ 

1216 raise NotImplementedError() 

1217 

1218 @abstractmethod 

1219 def queryDatasetTypes( 

1220 self, 

1221 expression: Any = ..., 

1222 *, 

1223 components: Optional[bool] = None, 

1224 missing: Optional[List[str]] = None, 

1225 ) -> Iterable[DatasetType]: 

1226 """Iterate over the dataset types whose names match an expression. 

1227 

1228 Parameters 

1229 ---------- 

1230 expression : `Any`, optional 

1231 An expression that fully or partially identifies the dataset types 

1232 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1233 ``...`` can be used to return all dataset types, and is the 

1234 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1235 information. 

1236 components : `bool`, optional 

1237 If `True`, apply all expression patterns to component dataset type 

1238 names as well. If `False`, never apply patterns to components. 

1239 If `None` (default), apply patterns to components only if their 

1240 parent datasets were not matched by the expression. 

1241 Fully-specified component datasets (`str` or `DatasetType` 

1242 instances) are always included. 

1243 

1244 Values other than `False` are deprecated, and only `False` will be 

1245 supported after v26. After v27 this argument will be removed 

1246 entirely. 

1247 missing : `list` of `str`, optional 

1248 String dataset type names that were explicitly given (i.e. not 

1249 regular expression patterns) but not found will be appended to this 

1250 list, if it is provided. 

1251 

1252 Returns 

1253 ------- 

1254 dataset_types : `Iterable` [ `DatasetType`] 

1255 An `Iterable` of `DatasetType` instances whose names match 

1256 ``expression``. 

1257 

1258 Raises 

1259 ------ 

1260 DatasetTypeExpressionError 

1261 Raised when ``expression`` is invalid. 

1262 """ 

1263 raise NotImplementedError() 

1264 

1265 @abstractmethod 

1266 def queryCollections( 

1267 self, 

1268 expression: Any = ..., 

1269 datasetType: Optional[DatasetType] = None, 

1270 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1271 flattenChains: bool = False, 

1272 includeChains: Optional[bool] = None, 

1273 ) -> Sequence[str]: 

1274 """Iterate over the collections whose names match an expression. 

1275 

1276 Parameters 

1277 ---------- 

1278 expression : `Any`, optional 

1279 An expression that identifies the collections to return, such as 

1280 a `str` (for full matches or partial matches via globs), 

1281 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1282 can be used to return all collections, and is the default. 

1283 See :ref:`daf_butler_collection_expressions` for more information. 

1284 datasetType : `DatasetType`, optional 

1285 If provided, only yield collections that may contain datasets of 

1286 this type. This is a conservative approximation in general; it may 

1287 yield collections that do not have any such datasets. 

1288 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1289 `CollectionType`, optional 

1290 If provided, only yield collections of these types. 

1291 flattenChains : `bool`, optional 

1292 If `True` (`False` is default), recursively yield the child 

1293 collections of matching `~CollectionType.CHAINED` collections. 

1294 includeChains : `bool`, optional 

1295 If `True`, yield records for matching `~CollectionType.CHAINED` 

1296 collections. Default is the opposite of ``flattenChains``: include 

1297 either CHAINED collections or their children, but not both. 

1298 

1299 Returns 

1300 ------- 

1301 collections : `Sequence` [ `str` ] 

1302 The names of collections that match ``expression``. 

1303 

1304 Raises 

1305 ------ 

1306 CollectionExpressionError 

1307 Raised when ``expression`` is invalid. 

1308 

1309 Notes 

1310 ----- 

1311 The order in which collections are returned is unspecified, except that 

1312 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1313 to be in the order in which they are searched. When multiple parent 

1314 `~CollectionType.CHAINED` collections match the same criteria, the 

1315 order in which the two lists appear is unspecified, and the lists of 

1316 children may be incomplete if a child has multiple parents. 

1317 """ 

1318 raise NotImplementedError() 

1319 

1320 @abstractmethod 

1321 def queryDatasets( 

1322 self, 

1323 datasetType: Any, 

1324 *, 

1325 collections: CollectionArgType | None = None, 

1326 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1327 dataId: Optional[DataId] = None, 

1328 where: str = "", 

1329 findFirst: bool = False, 

1330 components: Optional[bool] = None, 

1331 bind: Optional[Mapping[str, Any]] = None, 

1332 check: bool = True, 

1333 **kwargs: Any, 

1334 ) -> DatasetQueryResults: 

1335 """Query for and iterate over dataset references matching user-provided 

1336 criteria. 

1337 

1338 Parameters 

1339 ---------- 

1340 datasetType 

1341 An expression that fully or partially identifies the dataset types 

1342 to be queried. Allowed types include `DatasetType`, `str`, 

1343 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1344 be used to query all dataset types. See 

1345 :ref:`daf_butler_dataset_type_expressions` for more information. 

1346 collections: optional 

1347 An expression that identifies the collections to search, such as a 

1348 `str` (for full matches or partial matches via globs), `re.Pattern` 

1349 (for partial matches), or iterable thereof. ``...`` can be used to 

1350 search all collections (actually just all `~CollectionType.RUN` 

1351 collections, because this will still find all datasets). 

1352 If not provided, ``self.default.collections`` is used. See 

1353 :ref:`daf_butler_collection_expressions` for more information. 

1354 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1355 Dimensions to include in the query (in addition to those used 

1356 to identify the queried dataset type(s)), either to constrain 

1357 the resulting datasets to those for which a matching dimension 

1358 exists, or to relate the dataset type's dimensions to dimensions 

1359 referenced by the ``dataId`` or ``where`` arguments. 

1360 dataId : `dict` or `DataCoordinate`, optional 

1361 A data ID whose key-value pairs are used as equality constraints 

1362 in the query. 

1363 where : `str`, optional 

1364 A string expression similar to a SQL WHERE clause. May involve 

1365 any column of a dimension table or (as a shortcut for the primary 

1366 key column of a dimension table) dimension name. See 

1367 :ref:`daf_butler_dimension_expressions` for more information. 

1368 findFirst : `bool`, optional 

1369 If `True` (`False` is default), for each result data ID, only 

1370 yield one `DatasetRef` of each `DatasetType`, from the first 

1371 collection in which a dataset of that dataset type appears 

1372 (according to the order of ``collections`` passed in). If `True`, 

1373 ``collections`` must not contain regular expressions and may not 

1374 be ``...``. 

1375 components : `bool`, optional 

1376 If `True`, apply all dataset expression patterns to component 

1377 dataset type names as well. If `False`, never apply patterns to 

1378 components. If `None` (default), apply patterns to components only 

1379 if their parent datasets were not matched by the expression. 

1380 Fully-specified component datasets (`str` or `DatasetType` 

1381 instances) are always included. 

1382 

1383 Values other than `False` are deprecated, and only `False` will be 

1384 supported after v26. After v27 this argument will be removed 

1385 entirely. 

1386 bind : `Mapping`, optional 

1387 Mapping containing literal values that should be injected into the 

1388 ``where`` expression, keyed by the identifiers they replace. 

1389 check : `bool`, optional 

1390 If `True` (default) check the query for consistency before 

1391 executing it. This may reject some valid queries that resemble 

1392 common mistakes (e.g. queries for visits without specifying an 

1393 instrument). 

1394 **kwargs 

1395 Additional keyword arguments are forwarded to 

1396 `DataCoordinate.standardize` when processing the ``dataId`` 

1397 argument (and may be used to provide a constraining data ID even 

1398 when the ``dataId`` argument is `None`). 

1399 

1400 Returns 

1401 ------- 

1402 refs : `queries.DatasetQueryResults` 

1403 Dataset references matching the given query criteria. Nested data 

1404 IDs are guaranteed to include values for all implied dimensions 

1405 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1406 include dimension records (`DataCoordinate.hasRecords` will be 

1407 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1408 on the result object (which returns a new one). 

1409 

1410 Raises 

1411 ------ 

1412 DatasetTypeExpressionError 

1413 Raised when ``datasetType`` expression is invalid. 

1414 TypeError 

1415 Raised when the arguments are incompatible, such as when a 

1416 collection wildcard is passed when ``findFirst`` is `True`, or 

1417 when ``collections`` is `None` and``self.defaults.collections`` is 

1418 also `None`. 

1419 DataIdError 

1420 Raised when ``dataId`` or keyword arguments specify unknown 

1421 dimensions or values, or when they contain inconsistent values. 

1422 UserExpressionError 

1423 Raised when ``where`` expression is invalid. 

1424 

1425 Notes 

1426 ----- 

1427 When multiple dataset types are queried in a single call, the 

1428 results of this operation are equivalent to querying for each dataset 

1429 type separately in turn, and no information about the relationships 

1430 between datasets of different types is included. In contexts where 

1431 that kind of information is important, the recommended pattern is to 

1432 use `queryDataIds` to first obtain data IDs (possibly with the 

1433 desired dataset types and collections passed as constraints to the 

1434 query), and then use multiple (generally much simpler) calls to 

1435 `queryDatasets` with the returned data IDs passed as constraints. 

1436 """ 

1437 raise NotImplementedError() 

1438 

1439 @abstractmethod 

1440 def queryDataIds( 

1441 self, 

1442 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1443 *, 

1444 dataId: Optional[DataId] = None, 

1445 datasets: Any = None, 

1446 collections: CollectionArgType | None = None, 

1447 where: str = "", 

1448 components: Optional[bool] = None, 

1449 bind: Optional[Mapping[str, Any]] = None, 

1450 check: bool = True, 

1451 **kwargs: Any, 

1452 ) -> DataCoordinateQueryResults: 

1453 """Query for data IDs matching user-provided criteria. 

1454 

1455 Parameters 

1456 ---------- 

1457 dimensions : `Dimension` or `str`, or iterable thereof 

1458 The dimensions of the data IDs to yield, as either `Dimension` 

1459 instances or `str`. Will be automatically expanded to a complete 

1460 `DimensionGraph`. 

1461 dataId : `dict` or `DataCoordinate`, optional 

1462 A data ID whose key-value pairs are used as equality constraints 

1463 in the query. 

1464 datasets : `Any`, optional 

1465 An expression that fully or partially identifies dataset types 

1466 that should constrain the yielded data IDs. For example, including 

1467 "raw" here would constrain the yielded ``instrument``, 

1468 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1469 those for which at least one "raw" dataset exists in 

1470 ``collections``. Allowed types include `DatasetType`, `str`, 

1471 and iterables thereof. Regular expression objects (i.e. 

1472 `re.Pattern`) are deprecated and will be removed after the v26 

1473 release. See :ref:`daf_butler_dataset_type_expressions` for more 

1474 information. 

1475 collections: `Any`, optional 

1476 An expression that identifies the collections to search for 

1477 datasets, such as a `str` (for full matches or partial matches 

1478 via globs), `re.Pattern` (for partial matches), or iterable 

1479 thereof. ``...`` can be used to search all collections (actually 

1480 just all `~CollectionType.RUN` collections, because this will 

1481 still find all datasets). If not provided, 

1482 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1483 is also passed. See :ref:`daf_butler_collection_expressions` for 

1484 more information. 

1485 where : `str`, optional 

1486 A string expression similar to a SQL WHERE clause. May involve 

1487 any column of a dimension table or (as a shortcut for the primary 

1488 key column of a dimension table) dimension name. See 

1489 :ref:`daf_butler_dimension_expressions` for more information. 

1490 components : `bool`, optional 

1491 If `True`, apply all dataset expression patterns to component 

1492 dataset type names as well. If `False`, never apply patterns to 

1493 components. If `None` (default), apply patterns to components only 

1494 if their parent datasets were not matched by the expression. 

1495 Fully-specified component datasets (`str` or `DatasetType` 

1496 instances) are always included. 

1497 

1498 Values other than `False` are deprecated, and only `False` will be 

1499 supported after v26. After v27 this argument will be removed 

1500 entirely. 

1501 bind : `Mapping`, optional 

1502 Mapping containing literal values that should be injected into the 

1503 ``where`` expression, keyed by the identifiers they replace. 

1504 check : `bool`, optional 

1505 If `True` (default) check the query for consistency before 

1506 executing it. This may reject some valid queries that resemble 

1507 common mistakes (e.g. queries for visits without specifying an 

1508 instrument). 

1509 **kwargs 

1510 Additional keyword arguments are forwarded to 

1511 `DataCoordinate.standardize` when processing the ``dataId`` 

1512 argument (and may be used to provide a constraining data ID even 

1513 when the ``dataId`` argument is `None`). 

1514 

1515 Returns 

1516 ------- 

1517 dataIds : `queries.DataCoordinateQueryResults` 

1518 Data IDs matching the given query parameters. These are guaranteed 

1519 to identify all dimensions (`DataCoordinate.hasFull` returns 

1520 `True`), but will not contain `DimensionRecord` objects 

1521 (`DataCoordinate.hasRecords` returns `False`). Call 

1522 `DataCoordinateQueryResults.expanded` on the returned object to 

1523 fetch those (and consider using 

1524 `DataCoordinateQueryResults.materialize` on the returned object 

1525 first if the expected number of rows is very large). See 

1526 documentation for those methods for additional information. 

1527 

1528 Raises 

1529 ------ 

1530 NoDefaultCollectionError 

1531 Raised if ``collections`` is `None` and 

1532 ``self.defaults.collections`` is `None`. 

1533 CollectionExpressionError 

1534 Raised when ``collections`` expression is invalid. 

1535 DataIdError 

1536 Raised when ``dataId`` or keyword arguments specify unknown 

1537 dimensions or values, or when they contain inconsistent values. 

1538 DatasetTypeExpressionError 

1539 Raised when ``datasetType`` expression is invalid. 

1540 UserExpressionError 

1541 Raised when ``where`` expression is invalid. 

1542 """ 

1543 raise NotImplementedError() 

1544 

1545 @abstractmethod 

1546 def queryDimensionRecords( 

1547 self, 

1548 element: Union[DimensionElement, str], 

1549 *, 

1550 dataId: Optional[DataId] = None, 

1551 datasets: Any = None, 

1552 collections: CollectionArgType | None = None, 

1553 where: str = "", 

1554 components: Optional[bool] = None, 

1555 bind: Optional[Mapping[str, Any]] = None, 

1556 check: bool = True, 

1557 **kwargs: Any, 

1558 ) -> DimensionRecordQueryResults: 

1559 """Query for dimension information matching user-provided criteria. 

1560 

1561 Parameters 

1562 ---------- 

1563 element : `DimensionElement` or `str` 

1564 The dimension element to obtain records for. 

1565 dataId : `dict` or `DataCoordinate`, optional 

1566 A data ID whose key-value pairs are used as equality constraints 

1567 in the query. 

1568 datasets : `Any`, optional 

1569 An expression that fully or partially identifies dataset types 

1570 that should constrain the yielded records. See `queryDataIds` and 

1571 :ref:`daf_butler_dataset_type_expressions` for more information. 

1572 collections : `Any`, optional 

1573 An expression that identifies the collections to search for 

1574 datasets, such as a `str` (for full matches or partial matches 

1575 via globs), `re.Pattern` (for partial matches), or iterable 

1576 thereof. ``...`` can be used to search all collections (actually 

1577 just all `~CollectionType.RUN` collections, because this will 

1578 still find all datasets). If not provided, 

1579 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1580 is also passed. See :ref:`daf_butler_collection_expressions` for 

1581 more information. 

1582 where : `str`, optional 

1583 A string expression similar to a SQL WHERE clause. See 

1584 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1585 information. 

1586 components : `bool`, optional 

1587 Whether to apply dataset expressions to components as well. 

1588 See `queryDataIds` for more information. 

1589 

1590 Values other than `False` are deprecated, and only `False` will be 

1591 supported after v26. After v27 this argument will be removed 

1592 entirely. 

1593 bind : `Mapping`, optional 

1594 Mapping containing literal values that should be injected into the 

1595 ``where`` expression, keyed by the identifiers they replace. 

1596 check : `bool`, optional 

1597 If `True` (default) check the query for consistency before 

1598 executing it. This may reject some valid queries that resemble 

1599 common mistakes (e.g. queries for visits without specifying an 

1600 instrument). 

1601 **kwargs 

1602 Additional keyword arguments are forwarded to 

1603 `DataCoordinate.standardize` when processing the ``dataId`` 

1604 argument (and may be used to provide a constraining data ID even 

1605 when the ``dataId`` argument is `None`). 

1606 

1607 Returns 

1608 ------- 

1609 dataIds : `queries.DimensionRecordQueryResults` 

1610 Data IDs matching the given query parameters. 

1611 

1612 Raises 

1613 ------ 

1614 NoDefaultCollectionError 

1615 Raised if ``collections`` is `None` and 

1616 ``self.defaults.collections`` is `None`. 

1617 CollectionExpressionError 

1618 Raised when ``collections`` expression is invalid. 

1619 DataIdError 

1620 Raised when ``dataId`` or keyword arguments specify unknown 

1621 dimensions or values, or when they contain inconsistent values. 

1622 DatasetTypeExpressionError 

1623 Raised when ``datasetType`` expression is invalid. 

1624 UserExpressionError 

1625 Raised when ``where`` expression is invalid. 

1626 """ 

1627 raise NotImplementedError() 

1628 

1629 @abstractmethod 

1630 def queryDatasetAssociations( 

1631 self, 

1632 datasetType: Union[str, DatasetType], 

1633 collections: CollectionArgType | None = Ellipsis, 

1634 *, 

1635 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1636 flattenChains: bool = False, 

1637 ) -> Iterator[DatasetAssociation]: 

1638 """Iterate over dataset-collection combinations where the dataset is in 

1639 the collection. 

1640 

1641 This method is a temporary placeholder for better support for 

1642 association results in `queryDatasets`. It will probably be 

1643 removed in the future, and should be avoided in production code 

1644 whenever possible. 

1645 

1646 Parameters 

1647 ---------- 

1648 datasetType : `DatasetType` or `str` 

1649 A dataset type object or the name of one. 

1650 collections: `Any`, optional 

1651 An expression that identifies the collections to search for 

1652 datasets, such as a `str` (for full matches or partial matches 

1653 via globs), `re.Pattern` (for partial matches), or iterable 

1654 thereof. ``...`` can be used to search all collections (actually 

1655 just all `~CollectionType.RUN` collections, because this will still 

1656 find all datasets). If not provided, ``self.default.collections`` 

1657 is used. See :ref:`daf_butler_collection_expressions` for more 

1658 information. 

1659 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1660 If provided, only yield associations from collections of these 

1661 types. 

1662 flattenChains : `bool`, optional 

1663 If `True` (default) search in the children of 

1664 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1665 collections are ignored. 

1666 

1667 Yields 

1668 ------ 

1669 association : `.DatasetAssociation` 

1670 Object representing the relationship between a single dataset and 

1671 a single collection. 

1672 

1673 Raises 

1674 ------ 

1675 NoDefaultCollectionError 

1676 Raised if ``collections`` is `None` and 

1677 ``self.defaults.collections`` is `None`. 

1678 CollectionExpressionError 

1679 Raised when ``collections`` expression is invalid. 

1680 """ 

1681 raise NotImplementedError() 

1682 

1683 @property 

1684 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1685 """ObsCore manager instance for this registry (`ObsCoreTableManager` 

1686 or `None`). 

1687 

1688 ObsCore manager may not be implemented for all registry backend, or 

1689 may not be enabled for many repositories. 

1690 """ 

1691 return None 

1692 

1693 storageClasses: StorageClassFactory 

1694 """All storage classes known to the registry (`StorageClassFactory`). 

1695 """ 

1696 

1697 datasetIdFactory: DatasetIdFactory 

1698 """Factory for dataset IDs."""