Coverage for python/lsst/daf/butler/registry/_registry.py: 78%

143 statements  

« prev     ^ index     » next       coverage.py v7.2.4, created at 2023-04-29 02:58 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28from abc import ABC, abstractmethod 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Sequence, 

39 Set, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45from lsst.resources import ResourcePathExpression 

46from lsst.utils import doImportType 

47 

48from ..core import ( 

49 Config, 

50 DataCoordinate, 

51 DataId, 

52 DatasetAssociation, 

53 DatasetId, 

54 DatasetIdFactory, 

55 DatasetIdGenEnum, 

56 DatasetRef, 

57 DatasetType, 

58 Dimension, 

59 DimensionConfig, 

60 DimensionElement, 

61 DimensionGraph, 

62 DimensionRecord, 

63 DimensionUniverse, 

64 NameLookupMapping, 

65 StorageClassFactory, 

66 Timespan, 

67) 

68from ._collection_summary import CollectionSummary 

69from ._collectionType import CollectionType 

70from ._config import RegistryConfig 

71from ._defaults import RegistryDefaults 

72from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

73 

74if TYPE_CHECKING: 

75 from .._butlerConfig import ButlerConfig 

76 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager 

77 

78_LOG = logging.getLogger(__name__) 

79 

80 

81class Registry(ABC): 

82 """Abstract Registry interface. 

83 

84 Each registry implementation can have its own constructor parameters. 

85 The assumption is that an instance of a specific subclass will be 

86 constructed from configuration using `Registry.fromConfig()`. 

87 The base class will look for a ``cls`` entry and call that specific 

88 `fromConfig()` method. 

89 

90 All subclasses should store `RegistryDefaults` in a ``_defaults`` 

91 property. No other properties are assumed shared between implementations. 

92 """ 

93 

94 defaultConfigFile: Optional[str] = None 

95 """Path to configuration defaults. Accessed within the ``configs`` resource 

96 or relative to a search path. Can be None if no defaults specified. 

97 """ 

98 

99 @classmethod 

100 def forceRegistryConfig( 

101 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

102 ) -> RegistryConfig: 

103 """Force the supplied config to a `RegistryConfig`. 

104 

105 Parameters 

106 ---------- 

107 config : `RegistryConfig`, `Config` or `str` or `None` 

108 Registry configuration, if missing then default configuration will 

109 be loaded from registry.yaml. 

110 

111 Returns 

112 ------- 

113 registry_config : `RegistryConfig` 

114 A registry config. 

115 """ 

116 if not isinstance(config, RegistryConfig): 

117 if isinstance(config, (str, Config)) or config is None: 

118 config = RegistryConfig(config) 

119 else: 

120 raise ValueError(f"Incompatible Registry configuration: {config}") 

121 return config 

122 

123 @classmethod 

124 def determineTrampoline( 

125 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]] 

126 ) -> Tuple[Type[Registry], RegistryConfig]: 

127 """Return class to use to instantiate real registry. 

128 

129 Parameters 

130 ---------- 

131 config : `RegistryConfig` or `str`, optional 

132 Registry configuration, if missing then default configuration will 

133 be loaded from registry.yaml. 

134 

135 Returns 

136 ------- 

137 requested_cls : `type` of `Registry` 

138 The real registry class to use. 

139 registry_config : `RegistryConfig` 

140 The `RegistryConfig` to use. 

141 """ 

142 config = cls.forceRegistryConfig(config) 

143 

144 # Default to the standard registry 

145 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

146 registry_cls = doImportType(registry_cls_name) 

147 if registry_cls is cls: 

148 raise ValueError("Can not instantiate the abstract base Registry from config") 

149 if not issubclass(registry_cls, Registry): 

150 raise TypeError( 

151 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

152 ) 

153 return registry_cls, config 

154 

155 @classmethod 

156 def createFromConfig( 

157 cls, 

158 config: Optional[Union[RegistryConfig, str]] = None, 

159 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

160 butlerRoot: Optional[ResourcePathExpression] = None, 

161 ) -> Registry: 

162 """Create registry database and return `Registry` instance. 

163 

164 This method initializes database contents, database must be empty 

165 prior to calling this method. 

166 

167 Parameters 

168 ---------- 

169 config : `RegistryConfig` or `str`, optional 

170 Registry configuration, if missing then default configuration will 

171 be loaded from registry.yaml. 

172 dimensionConfig : `DimensionConfig` or `str`, optional 

173 Dimensions configuration, if missing then default configuration 

174 will be loaded from dimensions.yaml. 

175 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

176 Path to the repository root this `Registry` will manage. 

177 

178 Returns 

179 ------- 

180 registry : `Registry` 

181 A new `Registry` instance. 

182 

183 Notes 

184 ----- 

185 This class will determine the concrete `Registry` subclass to 

186 use from configuration. Each subclass should implement this method 

187 even if it can not create a registry. 

188 """ 

189 registry_cls, registry_config = cls.determineTrampoline(config) 

190 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

191 

192 @classmethod 

193 def fromConfig( 

194 cls, 

195 config: Union[ButlerConfig, RegistryConfig, Config, str], 

196 butlerRoot: Optional[ResourcePathExpression] = None, 

197 writeable: bool = True, 

198 defaults: Optional[RegistryDefaults] = None, 

199 ) -> Registry: 

200 """Create `Registry` subclass instance from `config`. 

201 

202 Registry database must be initialized prior to calling this method. 

203 

204 Parameters 

205 ---------- 

206 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

207 Registry configuration 

208 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

209 Path to the repository root this `Registry` will manage. 

210 writeable : `bool`, optional 

211 If `True` (default) create a read-write connection to the database. 

212 defaults : `RegistryDefaults`, optional 

213 Default collection search path and/or output `~CollectionType.RUN` 

214 collection. 

215 

216 Returns 

217 ------- 

218 registry : `Registry` (subclass) 

219 A new `Registry` subclass instance. 

220 

221 Notes 

222 ----- 

223 This class will determine the concrete `Registry` subclass to 

224 use from configuration. Each subclass should implement this method. 

225 """ 

226 # The base class implementation should trampoline to the correct 

227 # subclass. No implementation should ever use this implementation 

228 # directly. If no class is specified, default to the standard 

229 # registry. 

230 registry_cls, registry_config = cls.determineTrampoline(config) 

231 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

232 

233 @abstractmethod 

234 def isWriteable(self) -> bool: 

235 """Return `True` if this registry allows write operations, and `False` 

236 otherwise. 

237 """ 

238 raise NotImplementedError() 

239 

240 @abstractmethod 

241 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

242 """Create a new `Registry` backed by the same data repository and 

243 connection as this one, but independent defaults. 

244 

245 Parameters 

246 ---------- 

247 defaults : `RegistryDefaults`, optional 

248 Default collections and data ID values for the new registry. If 

249 not provided, ``self.defaults`` will be used (but future changes 

250 to either registry's defaults will not affect the other). 

251 

252 Returns 

253 ------- 

254 copy : `Registry` 

255 A new `Registry` instance with its own defaults. 

256 

257 Notes 

258 ----- 

259 Because the new registry shares a connection with the original, they 

260 also share transaction state (despite the fact that their `transaction` 

261 context manager methods do not reflect this), and must be used with 

262 care. 

263 """ 

264 raise NotImplementedError() 

265 

266 @property 

267 @abstractmethod 

268 def dimensions(self) -> DimensionUniverse: 

269 """Definitions of all dimensions recognized by this `Registry` 

270 (`DimensionUniverse`). 

271 """ 

272 raise NotImplementedError() 

273 

274 @property 

275 def defaults(self) -> RegistryDefaults: 

276 """Default collection search path and/or output `~CollectionType.RUN` 

277 collection (`RegistryDefaults`). 

278 

279 This is an immutable struct whose components may not be set 

280 individually, but the entire struct can be set by assigning to this 

281 property. 

282 """ 

283 return self._defaults 

284 

285 @defaults.setter 

286 def defaults(self, value: RegistryDefaults) -> None: 

287 if value.run is not None: 

288 self.registerRun(value.run) 

289 value.finish(self) 

290 self._defaults = value 

291 

292 @abstractmethod 

293 def refresh(self) -> None: 

294 """Refresh all in-memory state by querying the database. 

295 

296 This may be necessary to enable querying for entities added by other 

297 registry instances after this one was constructed. 

298 """ 

299 raise NotImplementedError() 

300 

301 @contextlib.contextmanager 

302 @abstractmethod 

303 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

304 """Return a context manager that represents a transaction.""" 

305 raise NotImplementedError() 

306 

307 def resetConnectionPool(self) -> None: 

308 """Reset connection pool for registry if relevant. 

309 

310 This operation can be used reset connections to servers when 

311 using registry with fork-based multiprocessing. This method should 

312 usually be called by the child process immediately 

313 after the fork. 

314 

315 The base class implementation is a no-op. 

316 """ 

317 pass 

318 

319 @abstractmethod 

320 def registerCollection( 

321 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

322 ) -> bool: 

323 """Add a new collection if one with the given name does not exist. 

324 

325 Parameters 

326 ---------- 

327 name : `str` 

328 The name of the collection to create. 

329 type : `CollectionType` 

330 Enum value indicating the type of collection to create. 

331 doc : `str`, optional 

332 Documentation string for the collection. 

333 

334 Returns 

335 ------- 

336 registered : `bool` 

337 Boolean indicating whether the collection was already registered 

338 or was created by this call. 

339 

340 Notes 

341 ----- 

342 This method cannot be called within transactions, as it needs to be 

343 able to perform its own transaction to be concurrent. 

344 """ 

345 raise NotImplementedError() 

346 

347 @abstractmethod 

348 def getCollectionType(self, name: str) -> CollectionType: 

349 """Return an enumeration value indicating the type of the given 

350 collection. 

351 

352 Parameters 

353 ---------- 

354 name : `str` 

355 The name of the collection. 

356 

357 Returns 

358 ------- 

359 type : `CollectionType` 

360 Enum value indicating the type of this collection. 

361 

362 Raises 

363 ------ 

364 MissingCollectionError 

365 Raised if no collection with the given name exists. 

366 """ 

367 raise NotImplementedError() 

368 

369 @abstractmethod 

370 def _get_collection_record(self, name: str) -> CollectionRecord: 

371 """Return the record for this collection. 

372 

373 Parameters 

374 ---------- 

375 name : `str` 

376 Name of the collection for which the record is to be retrieved. 

377 

378 Returns 

379 ------- 

380 record : `CollectionRecord` 

381 The record for this collection. 

382 """ 

383 raise NotImplementedError() 

384 

385 @abstractmethod 

386 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

387 """Add a new run if one with the given name does not exist. 

388 

389 Parameters 

390 ---------- 

391 name : `str` 

392 The name of the run to create. 

393 doc : `str`, optional 

394 Documentation string for the collection. 

395 

396 Returns 

397 ------- 

398 registered : `bool` 

399 Boolean indicating whether a new run was registered. `False` 

400 if it already existed. 

401 

402 Notes 

403 ----- 

404 This method cannot be called within transactions, as it needs to be 

405 able to perform its own transaction to be concurrent. 

406 """ 

407 raise NotImplementedError() 

408 

409 @abstractmethod 

410 def removeCollection(self, name: str) -> None: 

411 """Remove the given collection from the registry. 

412 

413 Parameters 

414 ---------- 

415 name : `str` 

416 The name of the collection to remove. 

417 

418 Raises 

419 ------ 

420 MissingCollectionError 

421 Raised if no collection with the given name exists. 

422 sqlalchemy.IntegrityError 

423 Raised if the database rows associated with the collection are 

424 still referenced by some other table, such as a dataset in a 

425 datastore (for `~CollectionType.RUN` collections only) or a 

426 `~CollectionType.CHAINED` collection of which this collection is 

427 a child. 

428 

429 Notes 

430 ----- 

431 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

432 in it will removed from the `Registry` database. This requires that 

433 those datasets be removed (or at least trashed) from any datastores 

434 that hold them first. 

435 

436 A collection may not be deleted as long as it is referenced by a 

437 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

438 be deleted or redefined first. 

439 """ 

440 raise NotImplementedError() 

441 

442 @abstractmethod 

443 def getCollectionChain(self, parent: str) -> Sequence[str]: 

444 """Return the child collections in a `~CollectionType.CHAINED` 

445 collection. 

446 

447 Parameters 

448 ---------- 

449 parent : `str` 

450 Name of the chained collection. Must have already been added via 

451 a call to `Registry.registerCollection`. 

452 

453 Returns 

454 ------- 

455 children : `Sequence` [ `str` ] 

456 An ordered sequence of collection names that are searched when the 

457 given chained collection is searched. 

458 

459 Raises 

460 ------ 

461 MissingCollectionError 

462 Raised if ``parent`` does not exist in the `Registry`. 

463 CollectionTypeError 

464 Raised if ``parent`` does not correspond to a 

465 `~CollectionType.CHAINED` collection. 

466 """ 

467 raise NotImplementedError() 

468 

469 @abstractmethod 

470 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

471 """Define or redefine a `~CollectionType.CHAINED` collection. 

472 

473 Parameters 

474 ---------- 

475 parent : `str` 

476 Name of the chained collection. Must have already been added via 

477 a call to `Registry.registerCollection`. 

478 children : `Any` 

479 An expression defining an ordered search of child collections, 

480 generally an iterable of `str`; see 

481 :ref:`daf_butler_collection_expressions` for more information. 

482 flatten : `bool`, optional 

483 If `True` (`False` is default), recursively flatten out any nested 

484 `~CollectionType.CHAINED` collections in ``children`` first. 

485 

486 Raises 

487 ------ 

488 MissingCollectionError 

489 Raised when any of the given collections do not exist in the 

490 `Registry`. 

491 CollectionTypeError 

492 Raised if ``parent`` does not correspond to a 

493 `~CollectionType.CHAINED` collection. 

494 ValueError 

495 Raised if the given collections contains a cycle. 

496 """ 

497 raise NotImplementedError() 

498 

499 @abstractmethod 

500 def getCollectionParentChains(self, collection: str) -> Set[str]: 

501 """Return the CHAINED collections that directly contain the given one. 

502 

503 Parameters 

504 ---------- 

505 name : `str` 

506 Name of the collection. 

507 

508 Returns 

509 ------- 

510 chains : `set` of `str` 

511 Set of `~CollectionType.CHAINED` collection names. 

512 """ 

513 raise NotImplementedError() 

514 

515 @abstractmethod 

516 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

517 """Retrieve the documentation string for a collection. 

518 

519 Parameters 

520 ---------- 

521 name : `str` 

522 Name of the collection. 

523 

524 Returns 

525 ------- 

526 docs : `str` or `None` 

527 Docstring for the collection with the given name. 

528 """ 

529 raise NotImplementedError() 

530 

531 @abstractmethod 

532 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

533 """Set the documentation string for a collection. 

534 

535 Parameters 

536 ---------- 

537 name : `str` 

538 Name of the collection. 

539 docs : `str` or `None` 

540 Docstring for the collection with the given name; will replace any 

541 existing docstring. Passing `None` will remove any existing 

542 docstring. 

543 """ 

544 raise NotImplementedError() 

545 

546 @abstractmethod 

547 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

548 """Return a summary for the given collection. 

549 

550 Parameters 

551 ---------- 

552 collection : `str` 

553 Name of the collection for which a summary is to be retrieved. 

554 

555 Returns 

556 ------- 

557 summary : `CollectionSummary` 

558 Summary of the dataset types and governor dimension values in 

559 this collection. 

560 """ 

561 raise NotImplementedError() 

562 

563 @abstractmethod 

564 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

565 """ 

566 Add a new `DatasetType` to the Registry. 

567 

568 It is not an error to register the same `DatasetType` twice. 

569 

570 Parameters 

571 ---------- 

572 datasetType : `DatasetType` 

573 The `DatasetType` to be added. 

574 

575 Returns 

576 ------- 

577 inserted : `bool` 

578 `True` if ``datasetType`` was inserted, `False` if an identical 

579 existing `DatsetType` was found. Note that in either case the 

580 DatasetType is guaranteed to be defined in the Registry 

581 consistently with the given definition. 

582 

583 Raises 

584 ------ 

585 ValueError 

586 Raised if the dimensions or storage class are invalid. 

587 ConflictingDefinitionError 

588 Raised if this DatasetType is already registered with a different 

589 definition. 

590 

591 Notes 

592 ----- 

593 This method cannot be called within transactions, as it needs to be 

594 able to perform its own transaction to be concurrent. 

595 """ 

596 raise NotImplementedError() 

597 

598 @abstractmethod 

599 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

600 """Remove the named `DatasetType` from the registry. 

601 

602 .. warning:: 

603 

604 Registry implementations can cache the dataset type definitions. 

605 This means that deleting the dataset type definition may result in 

606 unexpected behavior from other butler processes that are active 

607 that have not seen the deletion. 

608 

609 Parameters 

610 ---------- 

611 name : `str` or `tuple[str, ...]` 

612 Name of the type to be removed or tuple containing a list of type 

613 names to be removed. Wildcards are allowed. 

614 

615 Raises 

616 ------ 

617 lsst.daf.butler.registry.OrphanedRecordError 

618 Raised if an attempt is made to remove the dataset type definition 

619 when there are already datasets associated with it. 

620 

621 Notes 

622 ----- 

623 If the dataset type is not registered the method will return without 

624 action. 

625 """ 

626 raise NotImplementedError() 

627 

628 @abstractmethod 

629 def getDatasetType(self, name: str) -> DatasetType: 

630 """Get the `DatasetType`. 

631 

632 Parameters 

633 ---------- 

634 name : `str` 

635 Name of the type. 

636 

637 Returns 

638 ------- 

639 type : `DatasetType` 

640 The `DatasetType` associated with the given name. 

641 

642 Raises 

643 ------ 

644 MissingDatasetTypeError 

645 Raised if the requested dataset type has not been registered. 

646 

647 Notes 

648 ----- 

649 This method handles component dataset types automatically, though most 

650 other registry operations do not. 

651 """ 

652 raise NotImplementedError() 

653 

654 @abstractmethod 

655 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

656 """Test whether the given dataset ID generation mode is supported by 

657 `insertDatasets`. 

658 

659 Parameters 

660 ---------- 

661 mode : `DatasetIdGenEnum` 

662 Enum value for the mode to test. 

663 

664 Returns 

665 ------- 

666 supported : `bool` 

667 Whether the given mode is supported. 

668 """ 

669 raise NotImplementedError() 

670 

671 @abstractmethod 

672 def findDataset( 

673 self, 

674 datasetType: Union[DatasetType, str], 

675 dataId: Optional[DataId] = None, 

676 *, 

677 collections: Any = None, 

678 timespan: Optional[Timespan] = None, 

679 **kwargs: Any, 

680 ) -> Optional[DatasetRef]: 

681 """Find a dataset given its `DatasetType` and data ID. 

682 

683 This can be used to obtain a `DatasetRef` that permits the dataset to 

684 be read from a `Datastore`. If the dataset is a component and can not 

685 be found using the provided dataset type, a dataset ref for the parent 

686 will be returned instead but with the correct dataset type. 

687 

688 Parameters 

689 ---------- 

690 datasetType : `DatasetType` or `str` 

691 A `DatasetType` or the name of one. If this is a `DatasetType` 

692 instance, its storage class will be respected and propagated to 

693 the output, even if it differs from the dataset type definition 

694 in the registry, as long as the storage classes are convertible. 

695 dataId : `dict` or `DataCoordinate`, optional 

696 A `dict`-like object containing the `Dimension` links that identify 

697 the dataset within a collection. 

698 collections, optional. 

699 An expression that fully or partially identifies the collections to 

700 search for the dataset; see 

701 :ref:`daf_butler_collection_expressions` for more information. 

702 Defaults to ``self.defaults.collections``. 

703 timespan : `Timespan`, optional 

704 A timespan that the validity range of the dataset must overlap. 

705 If not provided, any `~CollectionType.CALIBRATION` collections 

706 matched by the ``collections`` argument will not be searched. 

707 **kwargs 

708 Additional keyword arguments passed to 

709 `DataCoordinate.standardize` to convert ``dataId`` to a true 

710 `DataCoordinate` or augment an existing one. 

711 

712 Returns 

713 ------- 

714 ref : `DatasetRef` 

715 A reference to the dataset, or `None` if no matching Dataset 

716 was found. 

717 

718 Raises 

719 ------ 

720 NoDefaultCollectionError 

721 Raised if ``collections`` is `None` and 

722 ``self.defaults.collections`` is `None`. 

723 LookupError 

724 Raised if one or more data ID keys are missing. 

725 MissingDatasetTypeError 

726 Raised if the dataset type does not exist. 

727 MissingCollectionError 

728 Raised if any of ``collections`` does not exist in the registry. 

729 

730 Notes 

731 ----- 

732 This method simply returns `None` and does not raise an exception even 

733 when the set of collections searched is intrinsically incompatible with 

734 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

735 only `~CollectionType.CALIBRATION` collections are being searched. 

736 This may make it harder to debug some lookup failures, but the behavior 

737 is intentional; we consider it more important that failed searches are 

738 reported consistently, regardless of the reason, and that adding 

739 additional collections that do not contain a match to the search path 

740 never changes the behavior. 

741 

742 This method handles component dataset types automatically, though most 

743 other registry operations do not. 

744 """ 

745 raise NotImplementedError() 

746 

747 @abstractmethod 

748 def insertDatasets( 

749 self, 

750 datasetType: Union[DatasetType, str], 

751 dataIds: Iterable[DataId], 

752 run: Optional[str] = None, 

753 expand: bool = True, 

754 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

755 ) -> List[DatasetRef]: 

756 """Insert one or more datasets into the `Registry` 

757 

758 This always adds new datasets; to associate existing datasets with 

759 a new collection, use ``associate``. 

760 

761 Parameters 

762 ---------- 

763 datasetType : `DatasetType` or `str` 

764 A `DatasetType` or the name of one. 

765 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

766 Dimension-based identifiers for the new datasets. 

767 run : `str`, optional 

768 The name of the run that produced the datasets. Defaults to 

769 ``self.defaults.run``. 

770 expand : `bool`, optional 

771 If `True` (default), expand data IDs as they are inserted. This is 

772 necessary in general to allow datastore to generate file templates, 

773 but it may be disabled if the caller can guarantee this is 

774 unnecessary. 

775 idGenerationMode : `DatasetIdGenEnum`, optional 

776 Specifies option for generating dataset IDs. By default unique IDs 

777 are generated for each inserted dataset. 

778 

779 Returns 

780 ------- 

781 refs : `list` of `DatasetRef` 

782 Resolved `DatasetRef` instances for all given data IDs (in the same 

783 order). 

784 

785 Raises 

786 ------ 

787 DatasetTypeError 

788 Raised if ``datasetType`` is not known to registry. 

789 CollectionTypeError 

790 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

791 NoDefaultCollectionError 

792 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

793 ConflictingDefinitionError 

794 If a dataset with the same dataset type and data ID as one of those 

795 given already exists in ``run``. 

796 MissingCollectionError 

797 Raised if ``run`` does not exist in the registry. 

798 """ 

799 raise NotImplementedError() 

800 

801 @abstractmethod 

802 def _importDatasets( 

803 self, 

804 datasets: Iterable[DatasetRef], 

805 expand: bool = True, 

806 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

807 reuseIds: bool = False, 

808 ) -> List[DatasetRef]: 

809 """Import one or more datasets into the `Registry`. 

810 

811 Difference from `insertDatasets` method is that this method accepts 

812 `DatasetRef` instances which should already be resolved and have a 

813 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

814 `uuid.UUID`) then datasets which already exist in the registry will be 

815 ignored if imported again. 

816 

817 Parameters 

818 ---------- 

819 datasets : `~collections.abc.Iterable` of `DatasetRef` 

820 Datasets to be inserted. All `DatasetRef` instances must have 

821 identical ``datasetType`` and ``run`` attributes. ``run`` 

822 attribute can be `None` and defaults to ``self.defaults.run``. 

823 Datasets can specify ``id`` attribute which will be used for 

824 inserted datasets. All dataset IDs must have the same type 

825 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

826 configured backend then IDs will be ignored and new IDs will be 

827 generated by backend. 

828 expand : `bool`, optional 

829 If `True` (default), expand data IDs as they are inserted. This is 

830 necessary in general to allow datastore to generate file templates, 

831 but it may be disabled if the caller can guarantee this is 

832 unnecessary. 

833 idGenerationMode : `DatasetIdGenEnum`, optional 

834 Specifies option for generating dataset IDs when IDs are not 

835 provided or their type does not match backend type. By default 

836 unique IDs are generated for each inserted dataset. 

837 reuseIds : `bool`, optional 

838 If `True` then forces re-use of imported dataset IDs for integer 

839 IDs which are normally generated as auto-incremented; exception 

840 will be raised if imported IDs clash with existing ones. This 

841 option has no effect on the use of globally-unique IDs which are 

842 always re-used (or generated if integer IDs are being imported). 

843 

844 Returns 

845 ------- 

846 refs : `list` of `DatasetRef` 

847 Resolved `DatasetRef` instances for all given data IDs (in the same 

848 order). If any of ``datasets`` has an ID which already exists in 

849 the database then it will not be inserted or updated, but a 

850 resolved `DatasetRef` will be returned for it in any case. 

851 

852 Raises 

853 ------ 

854 NoDefaultCollectionError 

855 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

856 DatasetTypeError 

857 Raised if datasets correspond to more than one dataset type or 

858 dataset type is not known to registry. 

859 ConflictingDefinitionError 

860 If a dataset with the same dataset type and data ID as one of those 

861 given already exists in ``run``. 

862 MissingCollectionError 

863 Raised if ``run`` does not exist in the registry. 

864 

865 Notes 

866 ----- 

867 This method is considered package-private and internal to Butler 

868 implementation. Clients outside daf_butler package should not use this 

869 method. 

870 """ 

871 raise NotImplementedError() 

872 

873 @abstractmethod 

874 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

875 """Retrieve a Dataset entry. 

876 

877 Parameters 

878 ---------- 

879 id : `DatasetId` 

880 The unique identifier for the dataset. 

881 

882 Returns 

883 ------- 

884 ref : `DatasetRef` or `None` 

885 A ref to the Dataset, or `None` if no matching Dataset 

886 was found. 

887 """ 

888 raise NotImplementedError() 

889 

890 @abstractmethod 

891 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

892 """Remove datasets from the Registry. 

893 

894 The datasets will be removed unconditionally from all collections, and 

895 any `Quantum` that consumed this dataset will instead be marked with 

896 having a NULL input. `Datastore` records will *not* be deleted; the 

897 caller is responsible for ensuring that the dataset has already been 

898 removed from all Datastores. 

899 

900 Parameters 

901 ---------- 

902 refs : `Iterable` of `DatasetRef` 

903 References to the datasets to be removed. Must include a valid 

904 ``id`` attribute, and should be considered invalidated upon return. 

905 

906 Raises 

907 ------ 

908 AmbiguousDatasetError 

909 Raised if any ``ref.id`` is `None`. 

910 OrphanedRecordError 

911 Raised if any dataset is still present in any `Datastore`. 

912 """ 

913 raise NotImplementedError() 

914 

915 @abstractmethod 

916 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

917 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

918 

919 If a DatasetRef with the same exact ID is already in a collection 

920 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

921 data ID but with different ID exists in the collection, 

922 `ConflictingDefinitionError` is raised. 

923 

924 Parameters 

925 ---------- 

926 collection : `str` 

927 Indicates the collection the datasets should be associated with. 

928 refs : `Iterable` [ `DatasetRef` ] 

929 An iterable of resolved `DatasetRef` instances that already exist 

930 in this `Registry`. 

931 

932 Raises 

933 ------ 

934 ConflictingDefinitionError 

935 If a Dataset with the given `DatasetRef` already exists in the 

936 given collection. 

937 AmbiguousDatasetError 

938 Raised if ``any(ref.id is None for ref in refs)``. 

939 MissingCollectionError 

940 Raised if ``collection`` does not exist in the registry. 

941 CollectionTypeError 

942 Raise adding new datasets to the given ``collection`` is not 

943 allowed. 

944 """ 

945 raise NotImplementedError() 

946 

947 @abstractmethod 

948 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

949 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

950 

951 ``collection`` and ``ref`` combinations that are not currently 

952 associated are silently ignored. 

953 

954 Parameters 

955 ---------- 

956 collection : `str` 

957 The collection the datasets should no longer be associated with. 

958 refs : `Iterable` [ `DatasetRef` ] 

959 An iterable of resolved `DatasetRef` instances that already exist 

960 in this `Registry`. 

961 

962 Raises 

963 ------ 

964 AmbiguousDatasetError 

965 Raised if any of the given dataset references is unresolved. 

966 MissingCollectionError 

967 Raised if ``collection`` does not exist in the registry. 

968 CollectionTypeError 

969 Raise adding new datasets to the given ``collection`` is not 

970 allowed. 

971 """ 

972 raise NotImplementedError() 

973 

974 @abstractmethod 

975 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

976 """Associate one or more datasets with a calibration collection and a 

977 validity range within it. 

978 

979 Parameters 

980 ---------- 

981 collection : `str` 

982 The name of an already-registered `~CollectionType.CALIBRATION` 

983 collection. 

984 refs : `Iterable` [ `DatasetRef` ] 

985 Datasets to be associated. 

986 timespan : `Timespan` 

987 The validity range for these datasets within the collection. 

988 

989 Raises 

990 ------ 

991 AmbiguousDatasetError 

992 Raised if any of the given `DatasetRef` instances is unresolved. 

993 ConflictingDefinitionError 

994 Raised if the collection already contains a different dataset with 

995 the same `DatasetType` and data ID and an overlapping validity 

996 range. 

997 CollectionTypeError 

998 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

999 collection or if one or more datasets are of a dataset type for 

1000 which `DatasetType.isCalibration` returns `False`. 

1001 """ 

1002 raise NotImplementedError() 

1003 

1004 @abstractmethod 

1005 def decertify( 

1006 self, 

1007 collection: str, 

1008 datasetType: Union[str, DatasetType], 

1009 timespan: Timespan, 

1010 *, 

1011 dataIds: Optional[Iterable[DataId]] = None, 

1012 ) -> None: 

1013 """Remove or adjust datasets to clear a validity range within a 

1014 calibration collection. 

1015 

1016 Parameters 

1017 ---------- 

1018 collection : `str` 

1019 The name of an already-registered `~CollectionType.CALIBRATION` 

1020 collection. 

1021 datasetType : `str` or `DatasetType` 

1022 Name or `DatasetType` instance for the datasets to be decertified. 

1023 timespan : `Timespan`, optional 

1024 The validity range to remove datasets from within the collection. 

1025 Datasets that overlap this range but are not contained by it will 

1026 have their validity ranges adjusted to not overlap it, which may 

1027 split a single dataset validity range into two. 

1028 dataIds : `Iterable` [ `DataId` ], optional 

1029 Data IDs that should be decertified within the given validity range 

1030 If `None`, all data IDs for ``self.datasetType`` will be 

1031 decertified. 

1032 

1033 Raises 

1034 ------ 

1035 CollectionTypeError 

1036 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1037 collection or if ``datasetType.isCalibration() is False``. 

1038 """ 

1039 raise NotImplementedError() 

1040 

1041 @abstractmethod 

1042 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1043 """Return an object that allows a new `Datastore` instance to 

1044 communicate with this `Registry`. 

1045 

1046 Returns 

1047 ------- 

1048 manager : `DatastoreRegistryBridgeManager` 

1049 Object that mediates communication between this `Registry` and its 

1050 associated datastores. 

1051 """ 

1052 raise NotImplementedError() 

1053 

1054 @abstractmethod 

1055 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1056 """Retrieve datastore locations for a given dataset. 

1057 

1058 Parameters 

1059 ---------- 

1060 ref : `DatasetRef` 

1061 A reference to the dataset for which to retrieve storage 

1062 information. 

1063 

1064 Returns 

1065 ------- 

1066 datastores : `Iterable` [ `str` ] 

1067 All the matching datastores holding this dataset. 

1068 

1069 Raises 

1070 ------ 

1071 AmbiguousDatasetError 

1072 Raised if ``ref.id`` is `None`. 

1073 """ 

1074 raise NotImplementedError() 

1075 

1076 @abstractmethod 

1077 def expandDataId( 

1078 self, 

1079 dataId: Optional[DataId] = None, 

1080 *, 

1081 graph: Optional[DimensionGraph] = None, 

1082 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

1083 withDefaults: bool = True, 

1084 **kwargs: Any, 

1085 ) -> DataCoordinate: 

1086 """Expand a dimension-based data ID to include additional information. 

1087 

1088 Parameters 

1089 ---------- 

1090 dataId : `DataCoordinate` or `dict`, optional 

1091 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1092 graph : `DimensionGraph`, optional 

1093 Set of dimensions for the expanded ID. If `None`, the dimensions 

1094 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1095 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1096 ``graph`` are silently ignored, providing a way to extract and 

1097 ``graph`` expand a subset of a data ID. 

1098 records : `Mapping` [`str`, `DimensionRecord`], optional 

1099 Dimension record data to use before querying the database for that 

1100 data, keyed by element name. 

1101 withDefaults : `bool`, optional 

1102 Utilize ``self.defaults.dataId`` to fill in missing governor 

1103 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1104 used). 

1105 **kwargs 

1106 Additional keywords are treated like additional key-value pairs for 

1107 ``dataId``, extending and overriding 

1108 

1109 Returns 

1110 ------- 

1111 expanded : `DataCoordinate` 

1112 A data ID that includes full metadata for all of the dimensions it 

1113 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1114 ``expanded.hasFull()`` both return `True`. 

1115 

1116 Raises 

1117 ------ 

1118 DataIdError 

1119 Raised when ``dataId`` or keyword arguments specify unknown 

1120 dimensions or values, or when a resulting data ID contains 

1121 contradictory key-value pairs, according to dimension 

1122 relationships. 

1123 

1124 Notes 

1125 ----- 

1126 This method cannot be relied upon to reject invalid data ID values 

1127 for dimensions that do actually not have any record columns. For 

1128 efficiency reasons the records for these dimensions (which have only 

1129 dimension key values that are given by the caller) may be constructed 

1130 directly rather than obtained from the registry database. 

1131 """ 

1132 raise NotImplementedError() 

1133 

1134 @abstractmethod 

1135 def insertDimensionData( 

1136 self, 

1137 element: Union[DimensionElement, str], 

1138 *data: Union[Mapping[str, Any], DimensionRecord], 

1139 conform: bool = True, 

1140 replace: bool = False, 

1141 skip_existing: bool = False, 

1142 ) -> None: 

1143 """Insert one or more dimension records into the database. 

1144 

1145 Parameters 

1146 ---------- 

1147 element : `DimensionElement` or `str` 

1148 The `DimensionElement` or name thereof that identifies the table 

1149 records will be inserted into. 

1150 data : `dict` or `DimensionRecord` (variadic) 

1151 One or more records to insert. 

1152 conform : `bool`, optional 

1153 If `False` (`True` is default) perform no checking or conversions, 

1154 and assume that ``element`` is a `DimensionElement` instance and 

1155 ``data`` is a one or more `DimensionRecord` instances of the 

1156 appropriate subclass. 

1157 replace : `bool`, optional 

1158 If `True` (`False` is default), replace existing records in the 

1159 database if there is a conflict. 

1160 skip_existing : `bool`, optional 

1161 If `True` (`False` is default), skip insertion if a record with 

1162 the same primary key values already exists. Unlike 

1163 `syncDimensionData`, this will not detect when the given record 

1164 differs from what is in the database, and should not be used when 

1165 this is a concern. 

1166 """ 

1167 raise NotImplementedError() 

1168 

1169 @abstractmethod 

1170 def syncDimensionData( 

1171 self, 

1172 element: Union[DimensionElement, str], 

1173 row: Union[Mapping[str, Any], DimensionRecord], 

1174 conform: bool = True, 

1175 update: bool = False, 

1176 ) -> Union[bool, Dict[str, Any]]: 

1177 """Synchronize the given dimension record with the database, inserting 

1178 if it does not already exist and comparing values if it does. 

1179 

1180 Parameters 

1181 ---------- 

1182 element : `DimensionElement` or `str` 

1183 The `DimensionElement` or name thereof that identifies the table 

1184 records will be inserted into. 

1185 row : `dict` or `DimensionRecord` 

1186 The record to insert. 

1187 conform : `bool`, optional 

1188 If `False` (`True` is default) perform no checking or conversions, 

1189 and assume that ``element`` is a `DimensionElement` instance and 

1190 ``data`` is a one or more `DimensionRecord` instances of the 

1191 appropriate subclass. 

1192 update: `bool`, optional 

1193 If `True` (`False` is default), update the existing record in the 

1194 database if there is a conflict. 

1195 

1196 Returns 

1197 ------- 

1198 inserted_or_updated : `bool` or `dict` 

1199 `True` if a new row was inserted, `False` if no changes were 

1200 needed, or a `dict` mapping updated column names to their old 

1201 values if an update was performed (only possible if 

1202 ``update=True``). 

1203 

1204 Raises 

1205 ------ 

1206 ConflictingDefinitionError 

1207 Raised if the record exists in the database (according to primary 

1208 key lookup) but is inconsistent with the given one. 

1209 """ 

1210 raise NotImplementedError() 

1211 

1212 @abstractmethod 

1213 def queryDatasetTypes( 

1214 self, 

1215 expression: Any = ..., 

1216 *, 

1217 components: Optional[bool] = None, 

1218 missing: Optional[List[str]] = None, 

1219 ) -> Iterable[DatasetType]: 

1220 """Iterate over the dataset types whose names match an expression. 

1221 

1222 Parameters 

1223 ---------- 

1224 expression : `Any`, optional 

1225 An expression that fully or partially identifies the dataset types 

1226 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1227 ``...`` can be used to return all dataset types, and is the 

1228 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1229 information. 

1230 components : `bool`, optional 

1231 If `True`, apply all expression patterns to component dataset type 

1232 names as well. If `False`, never apply patterns to components. 

1233 If `None` (default), apply patterns to components only if their 

1234 parent datasets were not matched by the expression. 

1235 Fully-specified component datasets (`str` or `DatasetType` 

1236 instances) are always included. 

1237 

1238 Values other than `False` are deprecated, and only `False` will be 

1239 supported after v26. After v27 this argument will be removed 

1240 entirely. 

1241 missing : `list` of `str`, optional 

1242 String dataset type names that were explicitly given (i.e. not 

1243 regular expression patterns) but not found will be appended to this 

1244 list, if it is provided. 

1245 

1246 Returns 

1247 ------- 

1248 dataset_types : `Iterable` [ `DatasetType`] 

1249 An `Iterable` of `DatasetType` instances whose names match 

1250 ``expression``. 

1251 

1252 Raises 

1253 ------ 

1254 DatasetTypeExpressionError 

1255 Raised when ``expression`` is invalid. 

1256 """ 

1257 raise NotImplementedError() 

1258 

1259 @abstractmethod 

1260 def queryCollections( 

1261 self, 

1262 expression: Any = ..., 

1263 datasetType: Optional[DatasetType] = None, 

1264 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

1265 flattenChains: bool = False, 

1266 includeChains: Optional[bool] = None, 

1267 ) -> Sequence[str]: 

1268 """Iterate over the collections whose names match an expression. 

1269 

1270 Parameters 

1271 ---------- 

1272 expression : `Any`, optional 

1273 An expression that identifies the collections to return, such as 

1274 a `str` (for full matches or partial matches via globs), 

1275 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1276 can be used to return all collections, and is the default. 

1277 See :ref:`daf_butler_collection_expressions` for more information. 

1278 datasetType : `DatasetType`, optional 

1279 If provided, only yield collections that may contain datasets of 

1280 this type. This is a conservative approximation in general; it may 

1281 yield collections that do not have any such datasets. 

1282 collectionTypes : `AbstractSet` [ `CollectionType` ] or \ 

1283 `CollectionType`, optional 

1284 If provided, only yield collections of these types. 

1285 flattenChains : `bool`, optional 

1286 If `True` (`False` is default), recursively yield the child 

1287 collections of matching `~CollectionType.CHAINED` collections. 

1288 includeChains : `bool`, optional 

1289 If `True`, yield records for matching `~CollectionType.CHAINED` 

1290 collections. Default is the opposite of ``flattenChains``: include 

1291 either CHAINED collections or their children, but not both. 

1292 

1293 Returns 

1294 ------- 

1295 collections : `Sequence` [ `str` ] 

1296 The names of collections that match ``expression``. 

1297 

1298 Raises 

1299 ------ 

1300 CollectionExpressionError 

1301 Raised when ``expression`` is invalid. 

1302 

1303 Notes 

1304 ----- 

1305 The order in which collections are returned is unspecified, except that 

1306 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1307 to be in the order in which they are searched. When multiple parent 

1308 `~CollectionType.CHAINED` collections match the same criteria, the 

1309 order in which the two lists appear is unspecified, and the lists of 

1310 children may be incomplete if a child has multiple parents. 

1311 """ 

1312 raise NotImplementedError() 

1313 

1314 @abstractmethod 

1315 def queryDatasets( 

1316 self, 

1317 datasetType: Any, 

1318 *, 

1319 collections: Any = None, 

1320 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1321 dataId: Optional[DataId] = None, 

1322 where: str = "", 

1323 findFirst: bool = False, 

1324 components: Optional[bool] = None, 

1325 bind: Optional[Mapping[str, Any]] = None, 

1326 check: bool = True, 

1327 **kwargs: Any, 

1328 ) -> DatasetQueryResults: 

1329 """Query for and iterate over dataset references matching user-provided 

1330 criteria. 

1331 

1332 Parameters 

1333 ---------- 

1334 datasetType 

1335 An expression that fully or partially identifies the dataset types 

1336 to be queried. Allowed types include `DatasetType`, `str`, 

1337 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1338 be used to query all dataset types. See 

1339 :ref:`daf_butler_dataset_type_expressions` for more information. 

1340 collections: optional 

1341 An expression that identifies the collections to search, such as a 

1342 `str` (for full matches or partial matches via globs), `re.Pattern` 

1343 (for partial matches), or iterable thereof. ``...`` can be used to 

1344 search all collections (actually just all `~CollectionType.RUN` 

1345 collections, because this will still find all datasets). 

1346 If not provided, ``self.default.collections`` is used. See 

1347 :ref:`daf_butler_collection_expressions` for more information. 

1348 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1349 Dimensions to include in the query (in addition to those used 

1350 to identify the queried dataset type(s)), either to constrain 

1351 the resulting datasets to those for which a matching dimension 

1352 exists, or to relate the dataset type's dimensions to dimensions 

1353 referenced by the ``dataId`` or ``where`` arguments. 

1354 dataId : `dict` or `DataCoordinate`, optional 

1355 A data ID whose key-value pairs are used as equality constraints 

1356 in the query. 

1357 where : `str`, optional 

1358 A string expression similar to a SQL WHERE clause. May involve 

1359 any column of a dimension table or (as a shortcut for the primary 

1360 key column of a dimension table) dimension name. See 

1361 :ref:`daf_butler_dimension_expressions` for more information. 

1362 findFirst : `bool`, optional 

1363 If `True` (`False` is default), for each result data ID, only 

1364 yield one `DatasetRef` of each `DatasetType`, from the first 

1365 collection in which a dataset of that dataset type appears 

1366 (according to the order of ``collections`` passed in). If `True`, 

1367 ``collections`` must not contain regular expressions and may not 

1368 be ``...``. 

1369 components : `bool`, optional 

1370 If `True`, apply all dataset expression patterns to component 

1371 dataset type names as well. If `False`, never apply patterns to 

1372 components. If `None` (default), apply patterns to components only 

1373 if their parent datasets were not matched by the expression. 

1374 Fully-specified component datasets (`str` or `DatasetType` 

1375 instances) are always included. 

1376 

1377 Values other than `False` are deprecated, and only `False` will be 

1378 supported after v26. After v27 this argument will be removed 

1379 entirely. 

1380 bind : `Mapping`, optional 

1381 Mapping containing literal values that should be injected into the 

1382 ``where`` expression, keyed by the identifiers they replace. 

1383 check : `bool`, optional 

1384 If `True` (default) check the query for consistency before 

1385 executing it. This may reject some valid queries that resemble 

1386 common mistakes (e.g. queries for visits without specifying an 

1387 instrument). 

1388 **kwargs 

1389 Additional keyword arguments are forwarded to 

1390 `DataCoordinate.standardize` when processing the ``dataId`` 

1391 argument (and may be used to provide a constraining data ID even 

1392 when the ``dataId`` argument is `None`). 

1393 

1394 Returns 

1395 ------- 

1396 refs : `queries.DatasetQueryResults` 

1397 Dataset references matching the given query criteria. Nested data 

1398 IDs are guaranteed to include values for all implied dimensions 

1399 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1400 include dimension records (`DataCoordinate.hasRecords` will be 

1401 `False`) unless `~queries.DatasetQueryResults.expanded` is called 

1402 on the result object (which returns a new one). 

1403 

1404 Raises 

1405 ------ 

1406 DatasetTypeExpressionError 

1407 Raised when ``datasetType`` expression is invalid. 

1408 TypeError 

1409 Raised when the arguments are incompatible, such as when a 

1410 collection wildcard is passed when ``findFirst`` is `True`, or 

1411 when ``collections`` is `None` and``self.defaults.collections`` is 

1412 also `None`. 

1413 DataIdError 

1414 Raised when ``dataId`` or keyword arguments specify unknown 

1415 dimensions or values, or when they contain inconsistent values. 

1416 UserExpressionError 

1417 Raised when ``where`` expression is invalid. 

1418 

1419 Notes 

1420 ----- 

1421 When multiple dataset types are queried in a single call, the 

1422 results of this operation are equivalent to querying for each dataset 

1423 type separately in turn, and no information about the relationships 

1424 between datasets of different types is included. In contexts where 

1425 that kind of information is important, the recommended pattern is to 

1426 use `queryDataIds` to first obtain data IDs (possibly with the 

1427 desired dataset types and collections passed as constraints to the 

1428 query), and then use multiple (generally much simpler) calls to 

1429 `queryDatasets` with the returned data IDs passed as constraints. 

1430 """ 

1431 raise NotImplementedError() 

1432 

1433 @abstractmethod 

1434 def queryDataIds( 

1435 self, 

1436 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1437 *, 

1438 dataId: Optional[DataId] = None, 

1439 datasets: Any = None, 

1440 collections: Any = None, 

1441 where: str = "", 

1442 components: Optional[bool] = None, 

1443 bind: Optional[Mapping[str, Any]] = None, 

1444 check: bool = True, 

1445 **kwargs: Any, 

1446 ) -> DataCoordinateQueryResults: 

1447 """Query for data IDs matching user-provided criteria. 

1448 

1449 Parameters 

1450 ---------- 

1451 dimensions : `Dimension` or `str`, or iterable thereof 

1452 The dimensions of the data IDs to yield, as either `Dimension` 

1453 instances or `str`. Will be automatically expanded to a complete 

1454 `DimensionGraph`. 

1455 dataId : `dict` or `DataCoordinate`, optional 

1456 A data ID whose key-value pairs are used as equality constraints 

1457 in the query. 

1458 datasets : `Any`, optional 

1459 An expression that fully or partially identifies dataset types 

1460 that should constrain the yielded data IDs. For example, including 

1461 "raw" here would constrain the yielded ``instrument``, 

1462 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1463 those for which at least one "raw" dataset exists in 

1464 ``collections``. Allowed types include `DatasetType`, `str`, 

1465 and iterables thereof. Regular expression objects (i.e. 

1466 `re.Pattern`) are deprecated and will be removed after the v26 

1467 release. See :ref:`daf_butler_dataset_type_expressions` for more 

1468 information. 

1469 collections: `Any`, optional 

1470 An expression that identifies the collections to search for 

1471 datasets, such as a `str` (for full matches or partial matches 

1472 via globs), `re.Pattern` (for partial matches), or iterable 

1473 thereof. ``...`` can be used to search all collections (actually 

1474 just all `~CollectionType.RUN` collections, because this will 

1475 still find all datasets). If not provided, 

1476 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1477 is also passed. See :ref:`daf_butler_collection_expressions` for 

1478 more information. 

1479 where : `str`, optional 

1480 A string expression similar to a SQL WHERE clause. May involve 

1481 any column of a dimension table or (as a shortcut for the primary 

1482 key column of a dimension table) dimension name. See 

1483 :ref:`daf_butler_dimension_expressions` for more information. 

1484 components : `bool`, optional 

1485 If `True`, apply all dataset expression patterns to component 

1486 dataset type names as well. If `False`, never apply patterns to 

1487 components. If `None` (default), apply patterns to components only 

1488 if their parent datasets were not matched by the expression. 

1489 Fully-specified component datasets (`str` or `DatasetType` 

1490 instances) are always included. 

1491 

1492 Values other than `False` are deprecated, and only `False` will be 

1493 supported after v26. After v27 this argument will be removed 

1494 entirely. 

1495 bind : `Mapping`, optional 

1496 Mapping containing literal values that should be injected into the 

1497 ``where`` expression, keyed by the identifiers they replace. 

1498 check : `bool`, optional 

1499 If `True` (default) check the query for consistency before 

1500 executing it. This may reject some valid queries that resemble 

1501 common mistakes (e.g. queries for visits without specifying an 

1502 instrument). 

1503 **kwargs 

1504 Additional keyword arguments are forwarded to 

1505 `DataCoordinate.standardize` when processing the ``dataId`` 

1506 argument (and may be used to provide a constraining data ID even 

1507 when the ``dataId`` argument is `None`). 

1508 

1509 Returns 

1510 ------- 

1511 dataIds : `queries.DataCoordinateQueryResults` 

1512 Data IDs matching the given query parameters. These are guaranteed 

1513 to identify all dimensions (`DataCoordinate.hasFull` returns 

1514 `True`), but will not contain `DimensionRecord` objects 

1515 (`DataCoordinate.hasRecords` returns `False`). Call 

1516 `DataCoordinateQueryResults.expanded` on the returned object to 

1517 fetch those (and consider using 

1518 `DataCoordinateQueryResults.materialize` on the returned object 

1519 first if the expected number of rows is very large). See 

1520 documentation for those methods for additional information. 

1521 

1522 Raises 

1523 ------ 

1524 NoDefaultCollectionError 

1525 Raised if ``collections`` is `None` and 

1526 ``self.defaults.collections`` is `None`. 

1527 CollectionExpressionError 

1528 Raised when ``collections`` expression is invalid. 

1529 DataIdError 

1530 Raised when ``dataId`` or keyword arguments specify unknown 

1531 dimensions or values, or when they contain inconsistent values. 

1532 DatasetTypeExpressionError 

1533 Raised when ``datasetType`` expression is invalid. 

1534 UserExpressionError 

1535 Raised when ``where`` expression is invalid. 

1536 """ 

1537 raise NotImplementedError() 

1538 

1539 @abstractmethod 

1540 def queryDimensionRecords( 

1541 self, 

1542 element: Union[DimensionElement, str], 

1543 *, 

1544 dataId: Optional[DataId] = None, 

1545 datasets: Any = None, 

1546 collections: Any = None, 

1547 where: str = "", 

1548 components: Optional[bool] = None, 

1549 bind: Optional[Mapping[str, Any]] = None, 

1550 check: bool = True, 

1551 **kwargs: Any, 

1552 ) -> DimensionRecordQueryResults: 

1553 """Query for dimension information matching user-provided criteria. 

1554 

1555 Parameters 

1556 ---------- 

1557 element : `DimensionElement` or `str` 

1558 The dimension element to obtain records for. 

1559 dataId : `dict` or `DataCoordinate`, optional 

1560 A data ID whose key-value pairs are used as equality constraints 

1561 in the query. 

1562 datasets : `Any`, optional 

1563 An expression that fully or partially identifies dataset types 

1564 that should constrain the yielded records. See `queryDataIds` and 

1565 :ref:`daf_butler_dataset_type_expressions` for more information. 

1566 collections : `Any`, optional 

1567 An expression that identifies the collections to search for 

1568 datasets, such as a `str` (for full matches or partial matches 

1569 via globs), `re.Pattern` (for partial matches), or iterable 

1570 thereof. ``...`` can be used to search all collections (actually 

1571 just all `~CollectionType.RUN` collections, because this will 

1572 still find all datasets). If not provided, 

1573 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1574 is also passed. See :ref:`daf_butler_collection_expressions` for 

1575 more information. 

1576 where : `str`, optional 

1577 A string expression similar to a SQL WHERE clause. See 

1578 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1579 information. 

1580 components : `bool`, optional 

1581 Whether to apply dataset expressions to components as well. 

1582 See `queryDataIds` for more information. 

1583 

1584 Values other than `False` are deprecated, and only `False` will be 

1585 supported after v26. After v27 this argument will be removed 

1586 entirely. 

1587 bind : `Mapping`, optional 

1588 Mapping containing literal values that should be injected into the 

1589 ``where`` expression, keyed by the identifiers they replace. 

1590 check : `bool`, optional 

1591 If `True` (default) check the query for consistency before 

1592 executing it. This may reject some valid queries that resemble 

1593 common mistakes (e.g. queries for visits without specifying an 

1594 instrument). 

1595 **kwargs 

1596 Additional keyword arguments are forwarded to 

1597 `DataCoordinate.standardize` when processing the ``dataId`` 

1598 argument (and may be used to provide a constraining data ID even 

1599 when the ``dataId`` argument is `None`). 

1600 

1601 Returns 

1602 ------- 

1603 dataIds : `queries.DimensionRecordQueryResults` 

1604 Data IDs matching the given query parameters. 

1605 

1606 Raises 

1607 ------ 

1608 NoDefaultCollectionError 

1609 Raised if ``collections`` is `None` and 

1610 ``self.defaults.collections`` is `None`. 

1611 CollectionExpressionError 

1612 Raised when ``collections`` expression is invalid. 

1613 DataIdError 

1614 Raised when ``dataId`` or keyword arguments specify unknown 

1615 dimensions or values, or when they contain inconsistent values. 

1616 DatasetTypeExpressionError 

1617 Raised when ``datasetType`` expression is invalid. 

1618 UserExpressionError 

1619 Raised when ``where`` expression is invalid. 

1620 """ 

1621 raise NotImplementedError() 

1622 

1623 @abstractmethod 

1624 def queryDatasetAssociations( 

1625 self, 

1626 datasetType: Union[str, DatasetType], 

1627 collections: Any = ..., 

1628 *, 

1629 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1630 flattenChains: bool = False, 

1631 ) -> Iterator[DatasetAssociation]: 

1632 """Iterate over dataset-collection combinations where the dataset is in 

1633 the collection. 

1634 

1635 This method is a temporary placeholder for better support for 

1636 association results in `queryDatasets`. It will probably be 

1637 removed in the future, and should be avoided in production code 

1638 whenever possible. 

1639 

1640 Parameters 

1641 ---------- 

1642 datasetType : `DatasetType` or `str` 

1643 A dataset type object or the name of one. 

1644 collections: `Any`, optional 

1645 An expression that identifies the collections to search for 

1646 datasets, such as a `str` (for full matches or partial matches 

1647 via globs), `re.Pattern` (for partial matches), or iterable 

1648 thereof. ``...`` can be used to search all collections (actually 

1649 just all `~CollectionType.RUN` collections, because this will still 

1650 find all datasets). If not provided, ``self.default.collections`` 

1651 is used. See :ref:`daf_butler_collection_expressions` for more 

1652 information. 

1653 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

1654 If provided, only yield associations from collections of these 

1655 types. 

1656 flattenChains : `bool`, optional 

1657 If `True` (default) search in the children of 

1658 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1659 collections are ignored. 

1660 

1661 Yields 

1662 ------ 

1663 association : `.DatasetAssociation` 

1664 Object representing the relationship between a single dataset and 

1665 a single collection. 

1666 

1667 Raises 

1668 ------ 

1669 NoDefaultCollectionError 

1670 Raised if ``collections`` is `None` and 

1671 ``self.defaults.collections`` is `None`. 

1672 CollectionExpressionError 

1673 Raised when ``collections`` expression is invalid. 

1674 """ 

1675 raise NotImplementedError() 

1676 

1677 @property 

1678 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1679 """ObsCore manager instance for this registry (`ObsCoreTableManager` 

1680 or `None`). 

1681 

1682 ObsCore manager may not be implemented for all registry backend, or 

1683 may not be enabled for many repositories. 

1684 """ 

1685 return None 

1686 

1687 storageClasses: StorageClassFactory 

1688 """All storage classes known to the registry (`StorageClassFactory`). 

1689 """ 

1690 

1691 datasetIdFactory: DatasetIdFactory 

1692 """Factory for dataset IDs."""