Coverage for python/lsst/daf/butler/registry/_registry.py: 78%

148 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-14 09:11 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Registry",) 

25 

26import contextlib 

27import logging 

28import re 

29from abc import ABC, abstractmethod 

30from collections.abc import Iterable, Iterator, Mapping, Sequence 

31from types import EllipsisType 

32from typing import TYPE_CHECKING, Any 

33 

34from lsst.resources import ResourcePathExpression 

35from lsst.utils import doImportType 

36 

37from ..core import ( 

38 Config, 

39 DataCoordinate, 

40 DataId, 

41 DatasetAssociation, 

42 DatasetId, 

43 DatasetIdFactory, 

44 DatasetIdGenEnum, 

45 DatasetRef, 

46 DatasetType, 

47 Dimension, 

48 DimensionConfig, 

49 DimensionElement, 

50 DimensionGraph, 

51 DimensionRecord, 

52 DimensionUniverse, 

53 NameLookupMapping, 

54 StorageClassFactory, 

55 Timespan, 

56) 

57from ._collection_summary import CollectionSummary 

58from ._collectionType import CollectionType 

59from ._config import RegistryConfig 

60from ._defaults import RegistryDefaults 

61from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

62from .wildcards import CollectionWildcard 

63 

64if TYPE_CHECKING: 

65 from .._butlerConfig import ButlerConfig 

66 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager 

67 

68_LOG = logging.getLogger(__name__) 

69 

70# TYpe alias for `collections` arguments. 

71CollectionArgType = str | re.Pattern | Iterable[str | re.Pattern] | EllipsisType | CollectionWildcard 

72 

73 

74class Registry(ABC): 

75 """Abstract Registry interface. 

76 

77 Each registry implementation can have its own constructor parameters. 

78 The assumption is that an instance of a specific subclass will be 

79 constructed from configuration using `Registry.fromConfig()`. 

80 The base class will look for a ``cls`` entry and call that specific 

81 `fromConfig()` method. 

82 

83 All subclasses should store `~lsst.daf.butler.registry.RegistryDefaults` in 

84 a ``_defaults`` property. No other properties are assumed shared between 

85 implementations. 

86 """ 

87 

88 defaultConfigFile: str | None = None 

89 """Path to configuration defaults. Accessed within the ``configs`` resource 

90 or relative to a search path. Can be None if no defaults specified. 

91 """ 

92 

93 @classmethod 

94 def forceRegistryConfig( 

95 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

96 ) -> RegistryConfig: 

97 """Force the supplied config to a `RegistryConfig`. 

98 

99 Parameters 

100 ---------- 

101 config : `RegistryConfig`, `Config` or `str` or `None` 

102 Registry configuration, if missing then default configuration will 

103 be loaded from registry.yaml. 

104 

105 Returns 

106 ------- 

107 registry_config : `RegistryConfig` 

108 A registry config. 

109 """ 

110 if not isinstance(config, RegistryConfig): 

111 if isinstance(config, (str, Config)) or config is None: 

112 config = RegistryConfig(config) 

113 else: 

114 raise ValueError(f"Incompatible Registry configuration: {config}") 

115 return config 

116 

117 @classmethod 

118 def determineTrampoline( 

119 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

120 ) -> tuple[type[Registry], RegistryConfig]: 

121 """Return class to use to instantiate real registry. 

122 

123 Parameters 

124 ---------- 

125 config : `RegistryConfig` or `str`, optional 

126 Registry configuration, if missing then default configuration will 

127 be loaded from registry.yaml. 

128 

129 Returns 

130 ------- 

131 requested_cls : `type` of `Registry` 

132 The real registry class to use. 

133 registry_config : `RegistryConfig` 

134 The `RegistryConfig` to use. 

135 """ 

136 config = cls.forceRegistryConfig(config) 

137 

138 # Default to the standard registry 

139 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") 

140 registry_cls = doImportType(registry_cls_name) 

141 if registry_cls is cls: 

142 raise ValueError("Can not instantiate the abstract base Registry from config") 

143 if not issubclass(registry_cls, Registry): 

144 raise TypeError( 

145 f"Registry class obtained from config {registry_cls_name} is not a Registry class." 

146 ) 

147 return registry_cls, config 

148 

149 @classmethod 

150 def createFromConfig( 

151 cls, 

152 config: RegistryConfig | str | None = None, 

153 dimensionConfig: DimensionConfig | str | None = None, 

154 butlerRoot: ResourcePathExpression | None = None, 

155 ) -> Registry: 

156 """Create registry database and return `Registry` instance. 

157 

158 This method initializes database contents, database must be empty 

159 prior to calling this method. 

160 

161 Parameters 

162 ---------- 

163 config : `RegistryConfig` or `str`, optional 

164 Registry configuration, if missing then default configuration will 

165 be loaded from registry.yaml. 

166 dimensionConfig : `DimensionConfig` or `str`, optional 

167 Dimensions configuration, if missing then default configuration 

168 will be loaded from dimensions.yaml. 

169 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

170 Path to the repository root this `Registry` will manage. 

171 

172 Returns 

173 ------- 

174 registry : `Registry` 

175 A new `Registry` instance. 

176 

177 Notes 

178 ----- 

179 This class will determine the concrete `Registry` subclass to 

180 use from configuration. Each subclass should implement this method 

181 even if it can not create a registry. 

182 """ 

183 registry_cls, registry_config = cls.determineTrampoline(config) 

184 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) 

185 

186 @classmethod 

187 def fromConfig( 

188 cls, 

189 config: ButlerConfig | RegistryConfig | Config | str, 

190 butlerRoot: ResourcePathExpression | None = None, 

191 writeable: bool = True, 

192 defaults: RegistryDefaults | None = None, 

193 ) -> Registry: 

194 """Create `Registry` subclass instance from ``config``. 

195 

196 Registry database must be initialized prior to calling this method. 

197 

198 Parameters 

199 ---------- 

200 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

201 Registry configuration 

202 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

203 Path to the repository root this `Registry` will manage. 

204 writeable : `bool`, optional 

205 If `True` (default) create a read-write connection to the database. 

206 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

207 Default collection search path and/or output `~CollectionType.RUN` 

208 collection. 

209 

210 Returns 

211 ------- 

212 registry : `Registry` (subclass) 

213 A new `Registry` subclass instance. 

214 

215 Notes 

216 ----- 

217 This class will determine the concrete `Registry` subclass to 

218 use from configuration. Each subclass should implement this method. 

219 """ 

220 # The base class implementation should trampoline to the correct 

221 # subclass. No implementation should ever use this implementation 

222 # directly. If no class is specified, default to the standard 

223 # registry. 

224 registry_cls, registry_config = cls.determineTrampoline(config) 

225 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) 

226 

227 @abstractmethod 

228 def isWriteable(self) -> bool: 

229 """Return `True` if this registry allows write operations, and `False` 

230 otherwise. 

231 """ 

232 raise NotImplementedError() 

233 

234 @abstractmethod 

235 def copy(self, defaults: RegistryDefaults | None = None) -> Registry: 

236 """Create a new `Registry` backed by the same data repository and 

237 connection as this one, but independent defaults. 

238 

239 Parameters 

240 ---------- 

241 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

242 Default collections and data ID values for the new registry. If 

243 not provided, ``self.defaults`` will be used (but future changes 

244 to either registry's defaults will not affect the other). 

245 

246 Returns 

247 ------- 

248 copy : `Registry` 

249 A new `Registry` instance with its own defaults. 

250 

251 Notes 

252 ----- 

253 Because the new registry shares a connection with the original, they 

254 also share transaction state (despite the fact that their `transaction` 

255 context manager methods do not reflect this), and must be used with 

256 care. 

257 """ 

258 raise NotImplementedError() 

259 

260 @property 

261 @abstractmethod 

262 def dimensions(self) -> DimensionUniverse: 

263 """Definitions of all dimensions recognized by this `Registry` 

264 (`DimensionUniverse`). 

265 """ 

266 raise NotImplementedError() 

267 

268 @property 

269 def defaults(self) -> RegistryDefaults: 

270 """Default collection search path and/or output `~CollectionType.RUN` 

271 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

272 

273 This is an immutable struct whose components may not be set 

274 individually, but the entire struct can be set by assigning to this 

275 property. 

276 """ 

277 return self._defaults 

278 

279 @defaults.setter 

280 def defaults(self, value: RegistryDefaults) -> None: 

281 if value.run is not None: 

282 self.registerRun(value.run) 

283 value.finish(self) 

284 self._defaults = value 

285 

286 @abstractmethod 

287 def refresh(self) -> None: 

288 """Refresh all in-memory state by querying the database. 

289 

290 This may be necessary to enable querying for entities added by other 

291 registry instances after this one was constructed. 

292 """ 

293 raise NotImplementedError() 

294 

295 @contextlib.contextmanager 

296 @abstractmethod 

297 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

298 """Return a context manager that represents a transaction.""" 

299 raise NotImplementedError() 

300 

301 def resetConnectionPool(self) -> None: 

302 """Reset connection pool for registry if relevant. 

303 

304 This operation can be used reset connections to servers when 

305 using registry with fork-based multiprocessing. This method should 

306 usually be called by the child process immediately 

307 after the fork. 

308 

309 The base class implementation is a no-op. 

310 """ 

311 pass 

312 

313 @abstractmethod 

314 def registerCollection( 

315 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

316 ) -> bool: 

317 """Add a new collection if one with the given name does not exist. 

318 

319 Parameters 

320 ---------- 

321 name : `str` 

322 The name of the collection to create. 

323 type : `CollectionType` 

324 Enum value indicating the type of collection to create. 

325 doc : `str`, optional 

326 Documentation string for the collection. 

327 

328 Returns 

329 ------- 

330 registered : `bool` 

331 Boolean indicating whether the collection was already registered 

332 or was created by this call. 

333 

334 Notes 

335 ----- 

336 This method cannot be called within transactions, as it needs to be 

337 able to perform its own transaction to be concurrent. 

338 """ 

339 raise NotImplementedError() 

340 

341 @abstractmethod 

342 def getCollectionType(self, name: str) -> CollectionType: 

343 """Return an enumeration value indicating the type of the given 

344 collection. 

345 

346 Parameters 

347 ---------- 

348 name : `str` 

349 The name of the collection. 

350 

351 Returns 

352 ------- 

353 type : `CollectionType` 

354 Enum value indicating the type of this collection. 

355 

356 Raises 

357 ------ 

358 lsst.daf.butler.registry.MissingCollectionError 

359 Raised if no collection with the given name exists. 

360 """ 

361 raise NotImplementedError() 

362 

363 @abstractmethod 

364 def _get_collection_record(self, name: str) -> CollectionRecord: 

365 """Return the record for this collection. 

366 

367 Parameters 

368 ---------- 

369 name : `str` 

370 Name of the collection for which the record is to be retrieved. 

371 

372 Returns 

373 ------- 

374 record : `CollectionRecord` 

375 The record for this collection. 

376 """ 

377 raise NotImplementedError() 

378 

379 @abstractmethod 

380 def registerRun(self, name: str, doc: str | None = None) -> bool: 

381 """Add a new run if one with the given name does not exist. 

382 

383 Parameters 

384 ---------- 

385 name : `str` 

386 The name of the run to create. 

387 doc : `str`, optional 

388 Documentation string for the collection. 

389 

390 Returns 

391 ------- 

392 registered : `bool` 

393 Boolean indicating whether a new run was registered. `False` 

394 if it already existed. 

395 

396 Notes 

397 ----- 

398 This method cannot be called within transactions, as it needs to be 

399 able to perform its own transaction to be concurrent. 

400 """ 

401 raise NotImplementedError() 

402 

403 @abstractmethod 

404 def removeCollection(self, name: str) -> None: 

405 """Remove the given collection from the registry. 

406 

407 Parameters 

408 ---------- 

409 name : `str` 

410 The name of the collection to remove. 

411 

412 Raises 

413 ------ 

414 lsst.daf.butler.registry.MissingCollectionError 

415 Raised if no collection with the given name exists. 

416 sqlalchemy.exc.IntegrityError 

417 Raised if the database rows associated with the collection are 

418 still referenced by some other table, such as a dataset in a 

419 datastore (for `~CollectionType.RUN` collections only) or a 

420 `~CollectionType.CHAINED` collection of which this collection is 

421 a child. 

422 

423 Notes 

424 ----- 

425 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

426 in it will removed from the `Registry` database. This requires that 

427 those datasets be removed (or at least trashed) from any datastores 

428 that hold them first. 

429 

430 A collection may not be deleted as long as it is referenced by a 

431 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

432 be deleted or redefined first. 

433 """ 

434 raise NotImplementedError() 

435 

436 @abstractmethod 

437 def getCollectionChain(self, parent: str) -> Sequence[str]: 

438 """Return the child collections in a `~CollectionType.CHAINED` 

439 collection. 

440 

441 Parameters 

442 ---------- 

443 parent : `str` 

444 Name of the chained collection. Must have already been added via 

445 a call to `Registry.registerCollection`. 

446 

447 Returns 

448 ------- 

449 children : `~collections.abc.Sequence` [ `str` ] 

450 An ordered sequence of collection names that are searched when the 

451 given chained collection is searched. 

452 

453 Raises 

454 ------ 

455 lsst.daf.butler.registry.MissingCollectionError 

456 Raised if ``parent`` does not exist in the `Registry`. 

457 lsst.daf.butler.registry.CollectionTypeError 

458 Raised if ``parent`` does not correspond to a 

459 `~CollectionType.CHAINED` collection. 

460 """ 

461 raise NotImplementedError() 

462 

463 @abstractmethod 

464 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

465 """Define or redefine a `~CollectionType.CHAINED` collection. 

466 

467 Parameters 

468 ---------- 

469 parent : `str` 

470 Name of the chained collection. Must have already been added via 

471 a call to `Registry.registerCollection`. 

472 children : collection expression 

473 An expression defining an ordered search of child collections, 

474 generally an iterable of `str`; see 

475 :ref:`daf_butler_collection_expressions` for more information. 

476 flatten : `bool`, optional 

477 If `True` (`False` is default), recursively flatten out any nested 

478 `~CollectionType.CHAINED` collections in ``children`` first. 

479 

480 Raises 

481 ------ 

482 lsst.daf.butler.registry.MissingCollectionError 

483 Raised when any of the given collections do not exist in the 

484 `Registry`. 

485 lsst.daf.butler.registry.CollectionTypeError 

486 Raised if ``parent`` does not correspond to a 

487 `~CollectionType.CHAINED` collection. 

488 ValueError 

489 Raised if the given collections contains a cycle. 

490 """ 

491 raise NotImplementedError() 

492 

493 @abstractmethod 

494 def getCollectionParentChains(self, collection: str) -> set[str]: 

495 """Return the CHAINED collections that directly contain the given one. 

496 

497 Parameters 

498 ---------- 

499 name : `str` 

500 Name of the collection. 

501 

502 Returns 

503 ------- 

504 chains : `set` of `str` 

505 Set of `~CollectionType.CHAINED` collection names. 

506 """ 

507 raise NotImplementedError() 

508 

509 @abstractmethod 

510 def getCollectionDocumentation(self, collection: str) -> str | None: 

511 """Retrieve the documentation string for a collection. 

512 

513 Parameters 

514 ---------- 

515 name : `str` 

516 Name of the collection. 

517 

518 Returns 

519 ------- 

520 docs : `str` or `None` 

521 Docstring for the collection with the given name. 

522 """ 

523 raise NotImplementedError() 

524 

525 @abstractmethod 

526 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

527 """Set the documentation string for a collection. 

528 

529 Parameters 

530 ---------- 

531 name : `str` 

532 Name of the collection. 

533 docs : `str` or `None` 

534 Docstring for the collection with the given name; will replace any 

535 existing docstring. Passing `None` will remove any existing 

536 docstring. 

537 """ 

538 raise NotImplementedError() 

539 

540 @abstractmethod 

541 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

542 """Return a summary for the given collection. 

543 

544 Parameters 

545 ---------- 

546 collection : `str` 

547 Name of the collection for which a summary is to be retrieved. 

548 

549 Returns 

550 ------- 

551 summary : `~lsst.daf.butler.registry.CollectionSummary` 

552 Summary of the dataset types and governor dimension values in 

553 this collection. 

554 """ 

555 raise NotImplementedError() 

556 

557 @abstractmethod 

558 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

559 """Add a new `DatasetType` to the Registry. 

560 

561 It is not an error to register the same `DatasetType` twice. 

562 

563 Parameters 

564 ---------- 

565 datasetType : `DatasetType` 

566 The `DatasetType` to be added. 

567 

568 Returns 

569 ------- 

570 inserted : `bool` 

571 `True` if ``datasetType`` was inserted, `False` if an identical 

572 existing `DatasetType` was found. Note that in either case the 

573 DatasetType is guaranteed to be defined in the Registry 

574 consistently with the given definition. 

575 

576 Raises 

577 ------ 

578 ValueError 

579 Raised if the dimensions or storage class are invalid. 

580 lsst.daf.butler.registry.ConflictingDefinitionError 

581 Raised if this `DatasetType` is already registered with a different 

582 definition. 

583 

584 Notes 

585 ----- 

586 This method cannot be called within transactions, as it needs to be 

587 able to perform its own transaction to be concurrent. 

588 """ 

589 raise NotImplementedError() 

590 

591 @abstractmethod 

592 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

593 """Remove the named `DatasetType` from the registry. 

594 

595 .. warning:: 

596 

597 Registry implementations can cache the dataset type definitions. 

598 This means that deleting the dataset type definition may result in 

599 unexpected behavior from other butler processes that are active 

600 that have not seen the deletion. 

601 

602 Parameters 

603 ---------- 

604 name : `str` or `tuple` [`str`] 

605 Name of the type to be removed or tuple containing a list of type 

606 names to be removed. Wildcards are allowed. 

607 

608 Raises 

609 ------ 

610 lsst.daf.butler.registry.OrphanedRecordError 

611 Raised if an attempt is made to remove the dataset type definition 

612 when there are already datasets associated with it. 

613 

614 Notes 

615 ----- 

616 If the dataset type is not registered the method will return without 

617 action. 

618 """ 

619 raise NotImplementedError() 

620 

621 @abstractmethod 

622 def getDatasetType(self, name: str) -> DatasetType: 

623 """Get the `DatasetType`. 

624 

625 Parameters 

626 ---------- 

627 name : `str` 

628 Name of the type. 

629 

630 Returns 

631 ------- 

632 type : `DatasetType` 

633 The `DatasetType` associated with the given name. 

634 

635 Raises 

636 ------ 

637 lsst.daf.butler.registry.MissingDatasetTypeError 

638 Raised if the requested dataset type has not been registered. 

639 

640 Notes 

641 ----- 

642 This method handles component dataset types automatically, though most 

643 other registry operations do not. 

644 """ 

645 raise NotImplementedError() 

646 

647 @abstractmethod 

648 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

649 """Test whether the given dataset ID generation mode is supported by 

650 `insertDatasets`. 

651 

652 Parameters 

653 ---------- 

654 mode : `DatasetIdGenEnum` 

655 Enum value for the mode to test. 

656 

657 Returns 

658 ------- 

659 supported : `bool` 

660 Whether the given mode is supported. 

661 """ 

662 raise NotImplementedError() 

663 

664 @abstractmethod 

665 def findDataset( 

666 self, 

667 datasetType: DatasetType | str, 

668 dataId: DataId | None = None, 

669 *, 

670 collections: CollectionArgType | None = None, 

671 timespan: Timespan | None = None, 

672 **kwargs: Any, 

673 ) -> DatasetRef | None: 

674 """Find a dataset given its `DatasetType` and data ID. 

675 

676 This can be used to obtain a `DatasetRef` that permits the dataset to 

677 be read from a `Datastore`. If the dataset is a component and can not 

678 be found using the provided dataset type, a dataset ref for the parent 

679 will be returned instead but with the correct dataset type. 

680 

681 Parameters 

682 ---------- 

683 datasetType : `DatasetType` or `str` 

684 A `DatasetType` or the name of one. If this is a `DatasetType` 

685 instance, its storage class will be respected and propagated to 

686 the output, even if it differs from the dataset type definition 

687 in the registry, as long as the storage classes are convertible. 

688 dataId : `dict` or `DataCoordinate`, optional 

689 A `dict`-like object containing the `Dimension` links that identify 

690 the dataset within a collection. 

691 collections : collection expression, optional 

692 An expression that fully or partially identifies the collections to 

693 search for the dataset; see 

694 :ref:`daf_butler_collection_expressions` for more information. 

695 Defaults to ``self.defaults.collections``. 

696 timespan : `Timespan`, optional 

697 A timespan that the validity range of the dataset must overlap. 

698 If not provided, any `~CollectionType.CALIBRATION` collections 

699 matched by the ``collections`` argument will not be searched. 

700 **kwargs 

701 Additional keyword arguments passed to 

702 `DataCoordinate.standardize` to convert ``dataId`` to a true 

703 `DataCoordinate` or augment an existing one. 

704 

705 Returns 

706 ------- 

707 ref : `DatasetRef` 

708 A reference to the dataset, or `None` if no matching Dataset 

709 was found. 

710 

711 Raises 

712 ------ 

713 lsst.daf.butler.registry.NoDefaultCollectionError 

714 Raised if ``collections`` is `None` and 

715 ``self.defaults.collections`` is `None`. 

716 LookupError 

717 Raised if one or more data ID keys are missing. 

718 lsst.daf.butler.registry.MissingDatasetTypeError 

719 Raised if the dataset type does not exist. 

720 lsst.daf.butler.registry.MissingCollectionError 

721 Raised if any of ``collections`` does not exist in the registry. 

722 

723 Notes 

724 ----- 

725 This method simply returns `None` and does not raise an exception even 

726 when the set of collections searched is intrinsically incompatible with 

727 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

728 only `~CollectionType.CALIBRATION` collections are being searched. 

729 This may make it harder to debug some lookup failures, but the behavior 

730 is intentional; we consider it more important that failed searches are 

731 reported consistently, regardless of the reason, and that adding 

732 additional collections that do not contain a match to the search path 

733 never changes the behavior. 

734 

735 This method handles component dataset types automatically, though most 

736 other registry operations do not. 

737 """ 

738 raise NotImplementedError() 

739 

740 @abstractmethod 

741 def insertDatasets( 

742 self, 

743 datasetType: DatasetType | str, 

744 dataIds: Iterable[DataId], 

745 run: str | None = None, 

746 expand: bool = True, 

747 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

748 ) -> list[DatasetRef]: 

749 """Insert one or more datasets into the `Registry` 

750 

751 This always adds new datasets; to associate existing datasets with 

752 a new collection, use ``associate``. 

753 

754 Parameters 

755 ---------- 

756 datasetType : `DatasetType` or `str` 

757 A `DatasetType` or the name of one. 

758 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

759 Dimension-based identifiers for the new datasets. 

760 run : `str`, optional 

761 The name of the run that produced the datasets. Defaults to 

762 ``self.defaults.run``. 

763 expand : `bool`, optional 

764 If `True` (default), expand data IDs as they are inserted. This is 

765 necessary in general to allow datastore to generate file templates, 

766 but it may be disabled if the caller can guarantee this is 

767 unnecessary. 

768 idGenerationMode : `DatasetIdGenEnum`, optional 

769 Specifies option for generating dataset IDs. By default unique IDs 

770 are generated for each inserted dataset. 

771 

772 Returns 

773 ------- 

774 refs : `list` of `DatasetRef` 

775 Resolved `DatasetRef` instances for all given data IDs (in the same 

776 order). 

777 

778 Raises 

779 ------ 

780 lsst.daf.butler.registry.DatasetTypeError 

781 Raised if ``datasetType`` is not known to registry. 

782 lsst.daf.butler.registry.CollectionTypeError 

783 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

784 lsst.daf.butler.registry.NoDefaultCollectionError 

785 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

786 lsst.daf.butler.registry.ConflictingDefinitionError 

787 If a dataset with the same dataset type and data ID as one of those 

788 given already exists in ``run``. 

789 lsst.daf.butler.registry.MissingCollectionError 

790 Raised if ``run`` does not exist in the registry. 

791 """ 

792 raise NotImplementedError() 

793 

794 @abstractmethod 

795 def _importDatasets( 

796 self, 

797 datasets: Iterable[DatasetRef], 

798 expand: bool = True, 

799 ) -> list[DatasetRef]: 

800 """Import one or more datasets into the `Registry`. 

801 

802 Difference from `insertDatasets` method is that this method accepts 

803 `DatasetRef` instances which should already be resolved and have a 

804 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

805 `uuid.UUID`) then datasets which already exist in the registry will be 

806 ignored if imported again. 

807 

808 Parameters 

809 ---------- 

810 datasets : `~collections.abc.Iterable` of `DatasetRef` 

811 Datasets to be inserted. All `DatasetRef` instances must have 

812 identical ``datasetType`` and ``run`` attributes. ``run`` 

813 attribute can be `None` and defaults to ``self.defaults.run``. 

814 Datasets can specify ``id`` attribute which will be used for 

815 inserted datasets. All dataset IDs must have the same type 

816 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

817 configured backend then IDs will be ignored and new IDs will be 

818 generated by backend. 

819 expand : `bool`, optional 

820 If `True` (default), expand data IDs as they are inserted. This is 

821 necessary in general to allow datastore to generate file templates, 

822 but it may be disabled if the caller can guarantee this is 

823 unnecessary. 

824 

825 Returns 

826 ------- 

827 refs : `list` of `DatasetRef` 

828 Resolved `DatasetRef` instances for all given data IDs (in the same 

829 order). If any of ``datasets`` has an ID which already exists in 

830 the database then it will not be inserted or updated, but a 

831 resolved `DatasetRef` will be returned for it in any case. 

832 

833 Raises 

834 ------ 

835 lsst.daf.butler.registry.NoDefaultCollectionError 

836 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

837 lsst.daf.butler.registry.DatasetTypeError 

838 Raised if datasets correspond to more than one dataset type or 

839 dataset type is not known to registry. 

840 lsst.daf.butler.registry.ConflictingDefinitionError 

841 If a dataset with the same dataset type and data ID as one of those 

842 given already exists in ``run``. 

843 lsst.daf.butler.registry.MissingCollectionError 

844 Raised if ``run`` does not exist in the registry. 

845 

846 Notes 

847 ----- 

848 This method is considered package-private and internal to Butler 

849 implementation. Clients outside daf_butler package should not use this 

850 method. 

851 """ 

852 raise NotImplementedError() 

853 

854 @abstractmethod 

855 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

856 """Retrieve a Dataset entry. 

857 

858 Parameters 

859 ---------- 

860 id : `DatasetId` 

861 The unique identifier for the dataset. 

862 

863 Returns 

864 ------- 

865 ref : `DatasetRef` or `None` 

866 A ref to the Dataset, or `None` if no matching Dataset 

867 was found. 

868 """ 

869 raise NotImplementedError() 

870 

871 @abstractmethod 

872 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

873 """Remove datasets from the Registry. 

874 

875 The datasets will be removed unconditionally from all collections, and 

876 any `Quantum` that consumed this dataset will instead be marked with 

877 having a NULL input. `Datastore` records will *not* be deleted; the 

878 caller is responsible for ensuring that the dataset has already been 

879 removed from all Datastores. 

880 

881 Parameters 

882 ---------- 

883 refs : `~collections.abc.Iterable` [`DatasetRef`] 

884 References to the datasets to be removed. Must include a valid 

885 ``id`` attribute, and should be considered invalidated upon return. 

886 

887 Raises 

888 ------ 

889 lsst.daf.butler.AmbiguousDatasetError 

890 Raised if any ``ref.id`` is `None`. 

891 lsst.daf.butler.registry.OrphanedRecordError 

892 Raised if any dataset is still present in any `Datastore`. 

893 """ 

894 raise NotImplementedError() 

895 

896 @abstractmethod 

897 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

898 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

899 

900 If a DatasetRef with the same exact ID is already in a collection 

901 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

902 data ID but with different ID exists in the collection, 

903 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

904 

905 Parameters 

906 ---------- 

907 collection : `str` 

908 Indicates the collection the datasets should be associated with. 

909 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

910 An iterable of resolved `DatasetRef` instances that already exist 

911 in this `Registry`. 

912 

913 Raises 

914 ------ 

915 lsst.daf.butler.registry.ConflictingDefinitionError 

916 If a Dataset with the given `DatasetRef` already exists in the 

917 given collection. 

918 lsst.daf.butler.registry.MissingCollectionError 

919 Raised if ``collection`` does not exist in the registry. 

920 lsst.daf.butler.registry.CollectionTypeError 

921 Raise adding new datasets to the given ``collection`` is not 

922 allowed. 

923 """ 

924 raise NotImplementedError() 

925 

926 @abstractmethod 

927 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

928 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

929 

930 ``collection`` and ``ref`` combinations that are not currently 

931 associated are silently ignored. 

932 

933 Parameters 

934 ---------- 

935 collection : `str` 

936 The collection the datasets should no longer be associated with. 

937 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

938 An iterable of resolved `DatasetRef` instances that already exist 

939 in this `Registry`. 

940 

941 Raises 

942 ------ 

943 lsst.daf.butler.AmbiguousDatasetError 

944 Raised if any of the given dataset references is unresolved. 

945 lsst.daf.butler.registry.MissingCollectionError 

946 Raised if ``collection`` does not exist in the registry. 

947 lsst.daf.butler.registry.CollectionTypeError 

948 Raise adding new datasets to the given ``collection`` is not 

949 allowed. 

950 """ 

951 raise NotImplementedError() 

952 

953 @abstractmethod 

954 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

955 """Associate one or more datasets with a calibration collection and a 

956 validity range within it. 

957 

958 Parameters 

959 ---------- 

960 collection : `str` 

961 The name of an already-registered `~CollectionType.CALIBRATION` 

962 collection. 

963 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

964 Datasets to be associated. 

965 timespan : `Timespan` 

966 The validity range for these datasets within the collection. 

967 

968 Raises 

969 ------ 

970 lsst.daf.butler.AmbiguousDatasetError 

971 Raised if any of the given `DatasetRef` instances is unresolved. 

972 lsst.daf.butler.registry.ConflictingDefinitionError 

973 Raised if the collection already contains a different dataset with 

974 the same `DatasetType` and data ID and an overlapping validity 

975 range. 

976 lsst.daf.butler.registry.CollectionTypeError 

977 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

978 collection or if one or more datasets are of a dataset type for 

979 which `DatasetType.isCalibration` returns `False`. 

980 """ 

981 raise NotImplementedError() 

982 

983 @abstractmethod 

984 def decertify( 

985 self, 

986 collection: str, 

987 datasetType: str | DatasetType, 

988 timespan: Timespan, 

989 *, 

990 dataIds: Iterable[DataId] | None = None, 

991 ) -> None: 

992 """Remove or adjust datasets to clear a validity range within a 

993 calibration collection. 

994 

995 Parameters 

996 ---------- 

997 collection : `str` 

998 The name of an already-registered `~CollectionType.CALIBRATION` 

999 collection. 

1000 datasetType : `str` or `DatasetType` 

1001 Name or `DatasetType` instance for the datasets to be decertified. 

1002 timespan : `Timespan`, optional 

1003 The validity range to remove datasets from within the collection. 

1004 Datasets that overlap this range but are not contained by it will 

1005 have their validity ranges adjusted to not overlap it, which may 

1006 split a single dataset validity range into two. 

1007 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1008 Data IDs that should be decertified within the given validity range 

1009 If `None`, all data IDs for ``self.datasetType`` will be 

1010 decertified. 

1011 

1012 Raises 

1013 ------ 

1014 lsst.daf.butler.registry.CollectionTypeError 

1015 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1016 collection or if ``datasetType.isCalibration() is False``. 

1017 """ 

1018 raise NotImplementedError() 

1019 

1020 @abstractmethod 

1021 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1022 """Return an object that allows a new `Datastore` instance to 

1023 communicate with this `Registry`. 

1024 

1025 Returns 

1026 ------- 

1027 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1028 Object that mediates communication between this `Registry` and its 

1029 associated datastores. 

1030 """ 

1031 raise NotImplementedError() 

1032 

1033 @abstractmethod 

1034 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1035 """Retrieve datastore locations for a given dataset. 

1036 

1037 Parameters 

1038 ---------- 

1039 ref : `DatasetRef` 

1040 A reference to the dataset for which to retrieve storage 

1041 information. 

1042 

1043 Returns 

1044 ------- 

1045 datastores : `~collections.abc.Iterable` [ `str` ] 

1046 All the matching datastores holding this dataset. 

1047 

1048 Raises 

1049 ------ 

1050 lsst.daf.butler.AmbiguousDatasetError 

1051 Raised if ``ref.id`` is `None`. 

1052 """ 

1053 raise NotImplementedError() 

1054 

1055 @abstractmethod 

1056 def expandDataId( 

1057 self, 

1058 dataId: DataId | None = None, 

1059 *, 

1060 graph: DimensionGraph | None = None, 

1061 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1062 withDefaults: bool = True, 

1063 **kwargs: Any, 

1064 ) -> DataCoordinate: 

1065 """Expand a dimension-based data ID to include additional information. 

1066 

1067 Parameters 

1068 ---------- 

1069 dataId : `DataCoordinate` or `dict`, optional 

1070 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1071 graph : `DimensionGraph`, optional 

1072 Set of dimensions for the expanded ID. If `None`, the dimensions 

1073 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1074 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1075 ``graph`` are silently ignored, providing a way to extract and 

1076 ``graph`` expand a subset of a data ID. 

1077 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1078 optional 

1079 Dimension record data to use before querying the database for that 

1080 data, keyed by element name. 

1081 withDefaults : `bool`, optional 

1082 Utilize ``self.defaults.dataId`` to fill in missing governor 

1083 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1084 used). 

1085 **kwargs 

1086 Additional keywords are treated like additional key-value pairs for 

1087 ``dataId``, extending and overriding 

1088 

1089 Returns 

1090 ------- 

1091 expanded : `DataCoordinate` 

1092 A data ID that includes full metadata for all of the dimensions it 

1093 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1094 ``expanded.hasFull()`` both return `True`. 

1095 

1096 Raises 

1097 ------ 

1098 lsst.daf.butler.registry.DataIdError 

1099 Raised when ``dataId`` or keyword arguments specify unknown 

1100 dimensions or values, or when a resulting data ID contains 

1101 contradictory key-value pairs, according to dimension 

1102 relationships. 

1103 

1104 Notes 

1105 ----- 

1106 This method cannot be relied upon to reject invalid data ID values 

1107 for dimensions that do actually not have any record columns. For 

1108 efficiency reasons the records for these dimensions (which have only 

1109 dimension key values that are given by the caller) may be constructed 

1110 directly rather than obtained from the registry database. 

1111 """ 

1112 raise NotImplementedError() 

1113 

1114 @abstractmethod 

1115 def insertDimensionData( 

1116 self, 

1117 element: DimensionElement | str, 

1118 *data: Mapping[str, Any] | DimensionRecord, 

1119 conform: bool = True, 

1120 replace: bool = False, 

1121 skip_existing: bool = False, 

1122 ) -> None: 

1123 """Insert one or more dimension records into the database. 

1124 

1125 Parameters 

1126 ---------- 

1127 element : `DimensionElement` or `str` 

1128 The `DimensionElement` or name thereof that identifies the table 

1129 records will be inserted into. 

1130 *data : `dict` or `DimensionRecord` 

1131 One or more records to insert. 

1132 conform : `bool`, optional 

1133 If `False` (`True` is default) perform no checking or conversions, 

1134 and assume that ``element`` is a `DimensionElement` instance and 

1135 ``data`` is a one or more `DimensionRecord` instances of the 

1136 appropriate subclass. 

1137 replace : `bool`, optional 

1138 If `True` (`False` is default), replace existing records in the 

1139 database if there is a conflict. 

1140 skip_existing : `bool`, optional 

1141 If `True` (`False` is default), skip insertion if a record with 

1142 the same primary key values already exists. Unlike 

1143 `syncDimensionData`, this will not detect when the given record 

1144 differs from what is in the database, and should not be used when 

1145 this is a concern. 

1146 """ 

1147 raise NotImplementedError() 

1148 

1149 @abstractmethod 

1150 def syncDimensionData( 

1151 self, 

1152 element: DimensionElement | str, 

1153 row: Mapping[str, Any] | DimensionRecord, 

1154 conform: bool = True, 

1155 update: bool = False, 

1156 ) -> bool | dict[str, Any]: 

1157 """Synchronize the given dimension record with the database, inserting 

1158 if it does not already exist and comparing values if it does. 

1159 

1160 Parameters 

1161 ---------- 

1162 element : `DimensionElement` or `str` 

1163 The `DimensionElement` or name thereof that identifies the table 

1164 records will be inserted into. 

1165 row : `dict` or `DimensionRecord` 

1166 The record to insert. 

1167 conform : `bool`, optional 

1168 If `False` (`True` is default) perform no checking or conversions, 

1169 and assume that ``element`` is a `DimensionElement` instance and 

1170 ``data`` is a one or more `DimensionRecord` instances of the 

1171 appropriate subclass. 

1172 update : `bool`, optional 

1173 If `True` (`False` is default), update the existing record in the 

1174 database if there is a conflict. 

1175 

1176 Returns 

1177 ------- 

1178 inserted_or_updated : `bool` or `dict` 

1179 `True` if a new row was inserted, `False` if no changes were 

1180 needed, or a `dict` mapping updated column names to their old 

1181 values if an update was performed (only possible if 

1182 ``update=True``). 

1183 

1184 Raises 

1185 ------ 

1186 lsst.daf.butler.registry.ConflictingDefinitionError 

1187 Raised if the record exists in the database (according to primary 

1188 key lookup) but is inconsistent with the given one. 

1189 """ 

1190 raise NotImplementedError() 

1191 

1192 @abstractmethod 

1193 def queryDatasetTypes( 

1194 self, 

1195 expression: Any = ..., 

1196 *, 

1197 components: bool | None = None, 

1198 missing: list[str] | None = None, 

1199 ) -> Iterable[DatasetType]: 

1200 """Iterate over the dataset types whose names match an expression. 

1201 

1202 Parameters 

1203 ---------- 

1204 expression : dataset type expression, optional 

1205 An expression that fully or partially identifies the dataset types 

1206 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1207 ``...`` can be used to return all dataset types, and is the 

1208 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1209 information. 

1210 components : `bool`, optional 

1211 If `True`, apply all expression patterns to component dataset type 

1212 names as well. If `False`, never apply patterns to components. 

1213 If `None` (default), apply patterns to components only if their 

1214 parent datasets were not matched by the expression. 

1215 Fully-specified component datasets (`str` or `DatasetType` 

1216 instances) are always included. 

1217 

1218 Values other than `False` are deprecated, and only `False` will be 

1219 supported after v26. After v27 this argument will be removed 

1220 entirely. 

1221 missing : `list` of `str`, optional 

1222 String dataset type names that were explicitly given (i.e. not 

1223 regular expression patterns) but not found will be appended to this 

1224 list, if it is provided. 

1225 

1226 Returns 

1227 ------- 

1228 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1229 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1230 names match ``expression``. 

1231 

1232 Raises 

1233 ------ 

1234 lsst.daf.butler.registry.DatasetTypeExpressionError 

1235 Raised when ``expression`` is invalid. 

1236 """ 

1237 raise NotImplementedError() 

1238 

1239 @abstractmethod 

1240 def queryCollections( 

1241 self, 

1242 expression: Any = ..., 

1243 datasetType: DatasetType | None = None, 

1244 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1245 flattenChains: bool = False, 

1246 includeChains: bool | None = None, 

1247 ) -> Sequence[str]: 

1248 """Iterate over the collections whose names match an expression. 

1249 

1250 Parameters 

1251 ---------- 

1252 expression : collection expression, optional 

1253 An expression that identifies the collections to return, such as 

1254 a `str` (for full matches or partial matches via globs), 

1255 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1256 can be used to return all collections, and is the default. 

1257 See :ref:`daf_butler_collection_expressions` for more information. 

1258 datasetType : `DatasetType`, optional 

1259 If provided, only yield collections that may contain datasets of 

1260 this type. This is a conservative approximation in general; it may 

1261 yield collections that do not have any such datasets. 

1262 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1263 `CollectionType`, optional 

1264 If provided, only yield collections of these types. 

1265 flattenChains : `bool`, optional 

1266 If `True` (`False` is default), recursively yield the child 

1267 collections of matching `~CollectionType.CHAINED` collections. 

1268 includeChains : `bool`, optional 

1269 If `True`, yield records for matching `~CollectionType.CHAINED` 

1270 collections. Default is the opposite of ``flattenChains``: include 

1271 either CHAINED collections or their children, but not both. 

1272 

1273 Returns 

1274 ------- 

1275 collections : `~collections.abc.Sequence` [ `str` ] 

1276 The names of collections that match ``expression``. 

1277 

1278 Raises 

1279 ------ 

1280 lsst.daf.butler.registry.CollectionExpressionError 

1281 Raised when ``expression`` is invalid. 

1282 

1283 Notes 

1284 ----- 

1285 The order in which collections are returned is unspecified, except that 

1286 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1287 to be in the order in which they are searched. When multiple parent 

1288 `~CollectionType.CHAINED` collections match the same criteria, the 

1289 order in which the two lists appear is unspecified, and the lists of 

1290 children may be incomplete if a child has multiple parents. 

1291 """ 

1292 raise NotImplementedError() 

1293 

1294 @abstractmethod 

1295 def queryDatasets( 

1296 self, 

1297 datasetType: Any, 

1298 *, 

1299 collections: CollectionArgType | None = None, 

1300 dimensions: Iterable[Dimension | str] | None = None, 

1301 dataId: DataId | None = None, 

1302 where: str = "", 

1303 findFirst: bool = False, 

1304 components: bool | None = None, 

1305 bind: Mapping[str, Any] | None = None, 

1306 check: bool = True, 

1307 **kwargs: Any, 

1308 ) -> DatasetQueryResults: 

1309 """Query for and iterate over dataset references matching user-provided 

1310 criteria. 

1311 

1312 Parameters 

1313 ---------- 

1314 datasetType : dataset type expression 

1315 An expression that fully or partially identifies the dataset types 

1316 to be queried. Allowed types include `DatasetType`, `str`, 

1317 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1318 be used to query all dataset types. See 

1319 :ref:`daf_butler_dataset_type_expressions` for more information. 

1320 collections : collection expression, optional 

1321 An expression that identifies the collections to search, such as a 

1322 `str` (for full matches or partial matches via globs), `re.Pattern` 

1323 (for partial matches), or iterable thereof. ``...`` can be used to 

1324 search all collections (actually just all `~CollectionType.RUN` 

1325 collections, because this will still find all datasets). 

1326 If not provided, ``self.default.collections`` is used. See 

1327 :ref:`daf_butler_collection_expressions` for more information. 

1328 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1329 Dimensions to include in the query (in addition to those used 

1330 to identify the queried dataset type(s)), either to constrain 

1331 the resulting datasets to those for which a matching dimension 

1332 exists, or to relate the dataset type's dimensions to dimensions 

1333 referenced by the ``dataId`` or ``where`` arguments. 

1334 dataId : `dict` or `DataCoordinate`, optional 

1335 A data ID whose key-value pairs are used as equality constraints 

1336 in the query. 

1337 where : `str`, optional 

1338 A string expression similar to a SQL WHERE clause. May involve 

1339 any column of a dimension table or (as a shortcut for the primary 

1340 key column of a dimension table) dimension name. See 

1341 :ref:`daf_butler_dimension_expressions` for more information. 

1342 findFirst : `bool`, optional 

1343 If `True` (`False` is default), for each result data ID, only 

1344 yield one `DatasetRef` of each `DatasetType`, from the first 

1345 collection in which a dataset of that dataset type appears 

1346 (according to the order of ``collections`` passed in). If `True`, 

1347 ``collections`` must not contain regular expressions and may not 

1348 be ``...``. 

1349 components : `bool`, optional 

1350 If `True`, apply all dataset expression patterns to component 

1351 dataset type names as well. If `False`, never apply patterns to 

1352 components. If `None` (default), apply patterns to components only 

1353 if their parent datasets were not matched by the expression. 

1354 Fully-specified component datasets (`str` or `DatasetType` 

1355 instances) are always included. 

1356 

1357 Values other than `False` are deprecated, and only `False` will be 

1358 supported after v26. After v27 this argument will be removed 

1359 entirely. 

1360 bind : `~collections.abc.Mapping`, optional 

1361 Mapping containing literal values that should be injected into the 

1362 ``where`` expression, keyed by the identifiers they replace. 

1363 Values of collection type can be expanded in some cases; see 

1364 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1365 information. 

1366 check : `bool`, optional 

1367 If `True` (default) check the query for consistency before 

1368 executing it. This may reject some valid queries that resemble 

1369 common mistakes (e.g. queries for visits without specifying an 

1370 instrument). 

1371 **kwargs 

1372 Additional keyword arguments are forwarded to 

1373 `DataCoordinate.standardize` when processing the ``dataId`` 

1374 argument (and may be used to provide a constraining data ID even 

1375 when the ``dataId`` argument is `None`). 

1376 

1377 Returns 

1378 ------- 

1379 refs : `.queries.DatasetQueryResults` 

1380 Dataset references matching the given query criteria. Nested data 

1381 IDs are guaranteed to include values for all implied dimensions 

1382 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1383 include dimension records (`DataCoordinate.hasRecords` will be 

1384 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

1385 called on the result object (which returns a new one). 

1386 

1387 Raises 

1388 ------ 

1389 lsst.daf.butler.registry.DatasetTypeExpressionError 

1390 Raised when ``datasetType`` expression is invalid. 

1391 TypeError 

1392 Raised when the arguments are incompatible, such as when a 

1393 collection wildcard is passed when ``findFirst`` is `True`, or 

1394 when ``collections`` is `None` and ``self.defaults.collections`` is 

1395 also `None`. 

1396 lsst.daf.butler.registry.DataIdError 

1397 Raised when ``dataId`` or keyword arguments specify unknown 

1398 dimensions or values, or when they contain inconsistent values. 

1399 lsst.daf.butler.registry.UserExpressionError 

1400 Raised when ``where`` expression is invalid. 

1401 

1402 Notes 

1403 ----- 

1404 When multiple dataset types are queried in a single call, the 

1405 results of this operation are equivalent to querying for each dataset 

1406 type separately in turn, and no information about the relationships 

1407 between datasets of different types is included. In contexts where 

1408 that kind of information is important, the recommended pattern is to 

1409 use `queryDataIds` to first obtain data IDs (possibly with the 

1410 desired dataset types and collections passed as constraints to the 

1411 query), and then use multiple (generally much simpler) calls to 

1412 `queryDatasets` with the returned data IDs passed as constraints. 

1413 """ 

1414 raise NotImplementedError() 

1415 

1416 @abstractmethod 

1417 def queryDataIds( 

1418 self, 

1419 dimensions: Iterable[Dimension | str] | Dimension | str, 

1420 *, 

1421 dataId: DataId | None = None, 

1422 datasets: Any = None, 

1423 collections: CollectionArgType | None = None, 

1424 where: str = "", 

1425 components: bool | None = None, 

1426 bind: Mapping[str, Any] | None = None, 

1427 check: bool = True, 

1428 **kwargs: Any, 

1429 ) -> DataCoordinateQueryResults: 

1430 """Query for data IDs matching user-provided criteria. 

1431 

1432 Parameters 

1433 ---------- 

1434 dimensions : `Dimension` or `str`, or iterable thereof 

1435 The dimensions of the data IDs to yield, as either `Dimension` 

1436 instances or `str`. Will be automatically expanded to a complete 

1437 `DimensionGraph`. 

1438 dataId : `dict` or `DataCoordinate`, optional 

1439 A data ID whose key-value pairs are used as equality constraints 

1440 in the query. 

1441 datasets : dataset type expression, optional 

1442 An expression that fully or partially identifies dataset types 

1443 that should constrain the yielded data IDs. For example, including 

1444 "raw" here would constrain the yielded ``instrument``, 

1445 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1446 those for which at least one "raw" dataset exists in 

1447 ``collections``. Allowed types include `DatasetType`, `str`, 

1448 and iterables thereof. Regular expression objects (i.e. 

1449 `re.Pattern`) are deprecated and will be removed after the v26 

1450 release. See :ref:`daf_butler_dataset_type_expressions` for more 

1451 information. 

1452 collections : collection expression, optional 

1453 An expression that identifies the collections to search for 

1454 datasets, such as a `str` (for full matches or partial matches 

1455 via globs), `re.Pattern` (for partial matches), or iterable 

1456 thereof. ``...`` can be used to search all collections (actually 

1457 just all `~CollectionType.RUN` collections, because this will 

1458 still find all datasets). If not provided, 

1459 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1460 is also passed. See :ref:`daf_butler_collection_expressions` for 

1461 more information. 

1462 where : `str`, optional 

1463 A string expression similar to a SQL WHERE clause. May involve 

1464 any column of a dimension table or (as a shortcut for the primary 

1465 key column of a dimension table) dimension name. See 

1466 :ref:`daf_butler_dimension_expressions` for more information. 

1467 components : `bool`, optional 

1468 If `True`, apply all dataset expression patterns to component 

1469 dataset type names as well. If `False`, never apply patterns to 

1470 components. If `None` (default), apply patterns to components only 

1471 if their parent datasets were not matched by the expression. 

1472 Fully-specified component datasets (`str` or `DatasetType` 

1473 instances) are always included. 

1474 

1475 Values other than `False` are deprecated, and only `False` will be 

1476 supported after v26. After v27 this argument will be removed 

1477 entirely. 

1478 bind : `~collections.abc.Mapping`, optional 

1479 Mapping containing literal values that should be injected into the 

1480 ``where`` expression, keyed by the identifiers they replace. 

1481 Values of collection type can be expanded in some cases; see 

1482 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1483 information. 

1484 check : `bool`, optional 

1485 If `True` (default) check the query for consistency before 

1486 executing it. This may reject some valid queries that resemble 

1487 common mistakes (e.g. queries for visits without specifying an 

1488 instrument). 

1489 **kwargs 

1490 Additional keyword arguments are forwarded to 

1491 `DataCoordinate.standardize` when processing the ``dataId`` 

1492 argument (and may be used to provide a constraining data ID even 

1493 when the ``dataId`` argument is `None`). 

1494 

1495 Returns 

1496 ------- 

1497 dataIds : `.queries.DataCoordinateQueryResults` 

1498 Data IDs matching the given query parameters. These are guaranteed 

1499 to identify all dimensions (`DataCoordinate.hasFull` returns 

1500 `True`), but will not contain `DimensionRecord` objects 

1501 (`DataCoordinate.hasRecords` returns `False`). Call 

1502 `~.queries.DataCoordinateQueryResults.expanded` on the 

1503 returned object to fetch those (and consider using 

1504 `~.queries.DataCoordinateQueryResults.materialize` on the 

1505 returned object first if the expected number of rows is very 

1506 large). See documentation for those methods for additional 

1507 information. 

1508 

1509 Raises 

1510 ------ 

1511 lsst.daf.butler.registry.NoDefaultCollectionError 

1512 Raised if ``collections`` is `None` and 

1513 ``self.defaults.collections`` is `None`. 

1514 lsst.daf.butler.registry.CollectionExpressionError 

1515 Raised when ``collections`` expression is invalid. 

1516 lsst.daf.butler.registry.DataIdError 

1517 Raised when ``dataId`` or keyword arguments specify unknown 

1518 dimensions or values, or when they contain inconsistent values. 

1519 lsst.daf.butler.registry.DatasetTypeExpressionError 

1520 Raised when ``datasetType`` expression is invalid. 

1521 lsst.daf.butler.registry.UserExpressionError 

1522 Raised when ``where`` expression is invalid. 

1523 """ 

1524 raise NotImplementedError() 

1525 

1526 @abstractmethod 

1527 def queryDimensionRecords( 

1528 self, 

1529 element: DimensionElement | str, 

1530 *, 

1531 dataId: DataId | None = None, 

1532 datasets: Any = None, 

1533 collections: CollectionArgType | None = None, 

1534 where: str = "", 

1535 components: bool | None = None, 

1536 bind: Mapping[str, Any] | None = None, 

1537 check: bool = True, 

1538 **kwargs: Any, 

1539 ) -> DimensionRecordQueryResults: 

1540 """Query for dimension information matching user-provided criteria. 

1541 

1542 Parameters 

1543 ---------- 

1544 element : `DimensionElement` or `str` 

1545 The dimension element to obtain records for. 

1546 dataId : `dict` or `DataCoordinate`, optional 

1547 A data ID whose key-value pairs are used as equality constraints 

1548 in the query. 

1549 datasets : dataset type expression, optional 

1550 An expression that fully or partially identifies dataset types 

1551 that should constrain the yielded records. See `queryDataIds` and 

1552 :ref:`daf_butler_dataset_type_expressions` for more information. 

1553 collections : collection expression, optional 

1554 An expression that identifies the collections to search for 

1555 datasets, such as a `str` (for full matches or partial matches 

1556 via globs), `re.Pattern` (for partial matches), or iterable 

1557 thereof. ``...`` can be used to search all collections (actually 

1558 just all `~CollectionType.RUN` collections, because this will 

1559 still find all datasets). If not provided, 

1560 ``self.default.collections`` is used. Ignored unless ``datasets`` 

1561 is also passed. See :ref:`daf_butler_collection_expressions` for 

1562 more information. 

1563 where : `str`, optional 

1564 A string expression similar to a SQL WHERE clause. See 

1565 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1566 information. 

1567 components : `bool`, optional 

1568 Whether to apply dataset expressions to components as well. 

1569 See `queryDataIds` for more information. 

1570 

1571 Values other than `False` are deprecated, and only `False` will be 

1572 supported after v26. After v27 this argument will be removed 

1573 entirely. 

1574 bind : `~collections.abc.Mapping`, optional 

1575 Mapping containing literal values that should be injected into the 

1576 ``where`` expression, keyed by the identifiers they replace. 

1577 Values of collection type can be expanded in some cases; see 

1578 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1579 information. 

1580 check : `bool`, optional 

1581 If `True` (default) check the query for consistency before 

1582 executing it. This may reject some valid queries that resemble 

1583 common mistakes (e.g. queries for visits without specifying an 

1584 instrument). 

1585 **kwargs 

1586 Additional keyword arguments are forwarded to 

1587 `DataCoordinate.standardize` when processing the ``dataId`` 

1588 argument (and may be used to provide a constraining data ID even 

1589 when the ``dataId`` argument is `None`). 

1590 

1591 Returns 

1592 ------- 

1593 dataIds : `.queries.DimensionRecordQueryResults` 

1594 Data IDs matching the given query parameters. 

1595 

1596 Raises 

1597 ------ 

1598 lsst.daf.butler.registry.NoDefaultCollectionError 

1599 Raised if ``collections`` is `None` and 

1600 ``self.defaults.collections`` is `None`. 

1601 lsst.daf.butler.registry.CollectionExpressionError 

1602 Raised when ``collections`` expression is invalid. 

1603 lsst.daf.butler.registry.DataIdError 

1604 Raised when ``dataId`` or keyword arguments specify unknown 

1605 dimensions or values, or when they contain inconsistent values. 

1606 lsst.daf.butler.registry.DatasetTypeExpressionError 

1607 Raised when ``datasetType`` expression is invalid. 

1608 lsst.daf.butler.registry.UserExpressionError 

1609 Raised when ``where`` expression is invalid. 

1610 """ 

1611 raise NotImplementedError() 

1612 

1613 @abstractmethod 

1614 def queryDatasetAssociations( 

1615 self, 

1616 datasetType: str | DatasetType, 

1617 collections: CollectionArgType | None = ..., 

1618 *, 

1619 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1620 flattenChains: bool = False, 

1621 ) -> Iterator[DatasetAssociation]: 

1622 """Iterate over dataset-collection combinations where the dataset is in 

1623 the collection. 

1624 

1625 This method is a temporary placeholder for better support for 

1626 association results in `queryDatasets`. It will probably be 

1627 removed in the future, and should be avoided in production code 

1628 whenever possible. 

1629 

1630 Parameters 

1631 ---------- 

1632 datasetType : `DatasetType` or `str` 

1633 A dataset type object or the name of one. 

1634 collections : collection expression, optional 

1635 An expression that identifies the collections to search for 

1636 datasets, such as a `str` (for full matches or partial matches 

1637 via globs), `re.Pattern` (for partial matches), or iterable 

1638 thereof. ``...`` can be used to search all collections (actually 

1639 just all `~CollectionType.RUN` collections, because this will still 

1640 find all datasets). If not provided, ``self.default.collections`` 

1641 is used. See :ref:`daf_butler_collection_expressions` for more 

1642 information. 

1643 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

1644 If provided, only yield associations from collections of these 

1645 types. 

1646 flattenChains : `bool`, optional 

1647 If `True` (default) search in the children of 

1648 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED`` 

1649 collections are ignored. 

1650 

1651 Yields 

1652 ------ 

1653 association : `.DatasetAssociation` 

1654 Object representing the relationship between a single dataset and 

1655 a single collection. 

1656 

1657 Raises 

1658 ------ 

1659 lsst.daf.butler.registry.NoDefaultCollectionError 

1660 Raised if ``collections`` is `None` and 

1661 ``self.defaults.collections`` is `None`. 

1662 lsst.daf.butler.registry.CollectionExpressionError 

1663 Raised when ``collections`` expression is invalid. 

1664 """ 

1665 raise NotImplementedError() 

1666 

1667 @property 

1668 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1669 """ObsCore manager instance for this registry 

1670 (`~.interfaces.ObsCoreTableManager` 

1671 or `None`). 

1672 

1673 ObsCore manager may not be implemented for all registry backend, or 

1674 may not be enabled for many repositories. 

1675 """ 

1676 return None 

1677 

1678 storageClasses: StorageClassFactory 

1679 """All storage classes known to the registry (`StorageClassFactory`). 

1680 """ 

1681 

1682 datasetIdFactory: DatasetIdFactory 

1683 """Factory for dataset IDs."""