Coverage for python/lsst/daf/butler/registry/_registry.py: 78%
147 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Registry",)
26import contextlib
27import logging
28import re
29from abc import ABC, abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Dict,
34 Iterable,
35 Iterator,
36 List,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Tuple,
42 Type,
43 Union,
44)
46from lsst.resources import ResourcePathExpression
47from lsst.utils import doImportType
48from lsst.utils.ellipsis import Ellipsis, EllipsisType
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetId,
56 DatasetIdFactory,
57 DatasetIdGenEnum,
58 DatasetRef,
59 DatasetType,
60 Dimension,
61 DimensionConfig,
62 DimensionElement,
63 DimensionGraph,
64 DimensionRecord,
65 DimensionUniverse,
66 NameLookupMapping,
67 StorageClassFactory,
68 Timespan,
69)
70from ._collection_summary import CollectionSummary
71from ._collectionType import CollectionType
72from ._config import RegistryConfig
73from ._defaults import RegistryDefaults
74from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
75from .wildcards import CollectionWildcard
77if TYPE_CHECKING:
78 from .._butlerConfig import ButlerConfig
79 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager
81_LOG = logging.getLogger(__name__)
83# TYpe alias for `collections` arguments.
84CollectionArgType = str | re.Pattern | Iterable[str | re.Pattern] | EllipsisType | CollectionWildcard
87class Registry(ABC):
88 """Abstract Registry interface.
90 Each registry implementation can have its own constructor parameters.
91 The assumption is that an instance of a specific subclass will be
92 constructed from configuration using `Registry.fromConfig()`.
93 The base class will look for a ``cls`` entry and call that specific
94 `fromConfig()` method.
96 All subclasses should store `RegistryDefaults` in a ``_defaults``
97 property. No other properties are assumed shared between implementations.
98 """
100 defaultConfigFile: Optional[str] = None
101 """Path to configuration defaults. Accessed within the ``configs`` resource
102 or relative to a search path. Can be None if no defaults specified.
103 """
105 @classmethod
106 def forceRegistryConfig(
107 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
108 ) -> RegistryConfig:
109 """Force the supplied config to a `RegistryConfig`.
111 Parameters
112 ----------
113 config : `RegistryConfig`, `Config` or `str` or `None`
114 Registry configuration, if missing then default configuration will
115 be loaded from registry.yaml.
117 Returns
118 -------
119 registry_config : `RegistryConfig`
120 A registry config.
121 """
122 if not isinstance(config, RegistryConfig):
123 if isinstance(config, (str, Config)) or config is None:
124 config = RegistryConfig(config)
125 else:
126 raise ValueError(f"Incompatible Registry configuration: {config}")
127 return config
129 @classmethod
130 def determineTrampoline(
131 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
132 ) -> Tuple[Type[Registry], RegistryConfig]:
133 """Return class to use to instantiate real registry.
135 Parameters
136 ----------
137 config : `RegistryConfig` or `str`, optional
138 Registry configuration, if missing then default configuration will
139 be loaded from registry.yaml.
141 Returns
142 -------
143 requested_cls : `type` of `Registry`
144 The real registry class to use.
145 registry_config : `RegistryConfig`
146 The `RegistryConfig` to use.
147 """
148 config = cls.forceRegistryConfig(config)
150 # Default to the standard registry
151 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
152 registry_cls = doImportType(registry_cls_name)
153 if registry_cls is cls:
154 raise ValueError("Can not instantiate the abstract base Registry from config")
155 if not issubclass(registry_cls, Registry):
156 raise TypeError(
157 f"Registry class obtained from config {registry_cls_name} is not a Registry class."
158 )
159 return registry_cls, config
161 @classmethod
162 def createFromConfig(
163 cls,
164 config: Optional[Union[RegistryConfig, str]] = None,
165 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
166 butlerRoot: Optional[ResourcePathExpression] = None,
167 ) -> Registry:
168 """Create registry database and return `Registry` instance.
170 This method initializes database contents, database must be empty
171 prior to calling this method.
173 Parameters
174 ----------
175 config : `RegistryConfig` or `str`, optional
176 Registry configuration, if missing then default configuration will
177 be loaded from registry.yaml.
178 dimensionConfig : `DimensionConfig` or `str`, optional
179 Dimensions configuration, if missing then default configuration
180 will be loaded from dimensions.yaml.
181 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
182 Path to the repository root this `Registry` will manage.
184 Returns
185 -------
186 registry : `Registry`
187 A new `Registry` instance.
189 Notes
190 -----
191 This class will determine the concrete `Registry` subclass to
192 use from configuration. Each subclass should implement this method
193 even if it can not create a registry.
194 """
195 registry_cls, registry_config = cls.determineTrampoline(config)
196 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
198 @classmethod
199 def fromConfig(
200 cls,
201 config: Union[ButlerConfig, RegistryConfig, Config, str],
202 butlerRoot: Optional[ResourcePathExpression] = None,
203 writeable: bool = True,
204 defaults: Optional[RegistryDefaults] = None,
205 ) -> Registry:
206 """Create `Registry` subclass instance from `config`.
208 Registry database must be initialized prior to calling this method.
210 Parameters
211 ----------
212 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
213 Registry configuration
214 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
215 Path to the repository root this `Registry` will manage.
216 writeable : `bool`, optional
217 If `True` (default) create a read-write connection to the database.
218 defaults : `RegistryDefaults`, optional
219 Default collection search path and/or output `~CollectionType.RUN`
220 collection.
222 Returns
223 -------
224 registry : `Registry` (subclass)
225 A new `Registry` subclass instance.
227 Notes
228 -----
229 This class will determine the concrete `Registry` subclass to
230 use from configuration. Each subclass should implement this method.
231 """
232 # The base class implementation should trampoline to the correct
233 # subclass. No implementation should ever use this implementation
234 # directly. If no class is specified, default to the standard
235 # registry.
236 registry_cls, registry_config = cls.determineTrampoline(config)
237 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
239 @abstractmethod
240 def isWriteable(self) -> bool:
241 """Return `True` if this registry allows write operations, and `False`
242 otherwise.
243 """
244 raise NotImplementedError()
246 @abstractmethod
247 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
248 """Create a new `Registry` backed by the same data repository and
249 connection as this one, but independent defaults.
251 Parameters
252 ----------
253 defaults : `RegistryDefaults`, optional
254 Default collections and data ID values for the new registry. If
255 not provided, ``self.defaults`` will be used (but future changes
256 to either registry's defaults will not affect the other).
258 Returns
259 -------
260 copy : `Registry`
261 A new `Registry` instance with its own defaults.
263 Notes
264 -----
265 Because the new registry shares a connection with the original, they
266 also share transaction state (despite the fact that their `transaction`
267 context manager methods do not reflect this), and must be used with
268 care.
269 """
270 raise NotImplementedError()
272 @property
273 @abstractmethod
274 def dimensions(self) -> DimensionUniverse:
275 """Definitions of all dimensions recognized by this `Registry`
276 (`DimensionUniverse`).
277 """
278 raise NotImplementedError()
280 @property
281 def defaults(self) -> RegistryDefaults:
282 """Default collection search path and/or output `~CollectionType.RUN`
283 collection (`RegistryDefaults`).
285 This is an immutable struct whose components may not be set
286 individually, but the entire struct can be set by assigning to this
287 property.
288 """
289 return self._defaults
291 @defaults.setter
292 def defaults(self, value: RegistryDefaults) -> None:
293 if value.run is not None:
294 self.registerRun(value.run)
295 value.finish(self)
296 self._defaults = value
298 @abstractmethod
299 def refresh(self) -> None:
300 """Refresh all in-memory state by querying the database.
302 This may be necessary to enable querying for entities added by other
303 registry instances after this one was constructed.
304 """
305 raise NotImplementedError()
307 @contextlib.contextmanager
308 @abstractmethod
309 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
310 """Return a context manager that represents a transaction."""
311 raise NotImplementedError()
313 def resetConnectionPool(self) -> None:
314 """Reset connection pool for registry if relevant.
316 This operation can be used reset connections to servers when
317 using registry with fork-based multiprocessing. This method should
318 usually be called by the child process immediately
319 after the fork.
321 The base class implementation is a no-op.
322 """
323 pass
325 @abstractmethod
326 def registerCollection(
327 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
328 ) -> bool:
329 """Add a new collection if one with the given name does not exist.
331 Parameters
332 ----------
333 name : `str`
334 The name of the collection to create.
335 type : `CollectionType`
336 Enum value indicating the type of collection to create.
337 doc : `str`, optional
338 Documentation string for the collection.
340 Returns
341 -------
342 registered : `bool`
343 Boolean indicating whether the collection was already registered
344 or was created by this call.
346 Notes
347 -----
348 This method cannot be called within transactions, as it needs to be
349 able to perform its own transaction to be concurrent.
350 """
351 raise NotImplementedError()
353 @abstractmethod
354 def getCollectionType(self, name: str) -> CollectionType:
355 """Return an enumeration value indicating the type of the given
356 collection.
358 Parameters
359 ----------
360 name : `str`
361 The name of the collection.
363 Returns
364 -------
365 type : `CollectionType`
366 Enum value indicating the type of this collection.
368 Raises
369 ------
370 MissingCollectionError
371 Raised if no collection with the given name exists.
372 """
373 raise NotImplementedError()
375 @abstractmethod
376 def _get_collection_record(self, name: str) -> CollectionRecord:
377 """Return the record for this collection.
379 Parameters
380 ----------
381 name : `str`
382 Name of the collection for which the record is to be retrieved.
384 Returns
385 -------
386 record : `CollectionRecord`
387 The record for this collection.
388 """
389 raise NotImplementedError()
391 @abstractmethod
392 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
393 """Add a new run if one with the given name does not exist.
395 Parameters
396 ----------
397 name : `str`
398 The name of the run to create.
399 doc : `str`, optional
400 Documentation string for the collection.
402 Returns
403 -------
404 registered : `bool`
405 Boolean indicating whether a new run was registered. `False`
406 if it already existed.
408 Notes
409 -----
410 This method cannot be called within transactions, as it needs to be
411 able to perform its own transaction to be concurrent.
412 """
413 raise NotImplementedError()
415 @abstractmethod
416 def removeCollection(self, name: str) -> None:
417 """Remove the given collection from the registry.
419 Parameters
420 ----------
421 name : `str`
422 The name of the collection to remove.
424 Raises
425 ------
426 MissingCollectionError
427 Raised if no collection with the given name exists.
428 sqlalchemy.IntegrityError
429 Raised if the database rows associated with the collection are
430 still referenced by some other table, such as a dataset in a
431 datastore (for `~CollectionType.RUN` collections only) or a
432 `~CollectionType.CHAINED` collection of which this collection is
433 a child.
435 Notes
436 -----
437 If this is a `~CollectionType.RUN` collection, all datasets and quanta
438 in it will removed from the `Registry` database. This requires that
439 those datasets be removed (or at least trashed) from any datastores
440 that hold them first.
442 A collection may not be deleted as long as it is referenced by a
443 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
444 be deleted or redefined first.
445 """
446 raise NotImplementedError()
448 @abstractmethod
449 def getCollectionChain(self, parent: str) -> Sequence[str]:
450 """Return the child collections in a `~CollectionType.CHAINED`
451 collection.
453 Parameters
454 ----------
455 parent : `str`
456 Name of the chained collection. Must have already been added via
457 a call to `Registry.registerCollection`.
459 Returns
460 -------
461 children : `Sequence` [ `str` ]
462 An ordered sequence of collection names that are searched when the
463 given chained collection is searched.
465 Raises
466 ------
467 MissingCollectionError
468 Raised if ``parent`` does not exist in the `Registry`.
469 CollectionTypeError
470 Raised if ``parent`` does not correspond to a
471 `~CollectionType.CHAINED` collection.
472 """
473 raise NotImplementedError()
475 @abstractmethod
476 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
477 """Define or redefine a `~CollectionType.CHAINED` collection.
479 Parameters
480 ----------
481 parent : `str`
482 Name of the chained collection. Must have already been added via
483 a call to `Registry.registerCollection`.
484 children : `Any`
485 An expression defining an ordered search of child collections,
486 generally an iterable of `str`; see
487 :ref:`daf_butler_collection_expressions` for more information.
488 flatten : `bool`, optional
489 If `True` (`False` is default), recursively flatten out any nested
490 `~CollectionType.CHAINED` collections in ``children`` first.
492 Raises
493 ------
494 MissingCollectionError
495 Raised when any of the given collections do not exist in the
496 `Registry`.
497 CollectionTypeError
498 Raised if ``parent`` does not correspond to a
499 `~CollectionType.CHAINED` collection.
500 ValueError
501 Raised if the given collections contains a cycle.
502 """
503 raise NotImplementedError()
505 @abstractmethod
506 def getCollectionParentChains(self, collection: str) -> Set[str]:
507 """Return the CHAINED collections that directly contain the given one.
509 Parameters
510 ----------
511 name : `str`
512 Name of the collection.
514 Returns
515 -------
516 chains : `set` of `str`
517 Set of `~CollectionType.CHAINED` collection names.
518 """
519 raise NotImplementedError()
521 @abstractmethod
522 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
523 """Retrieve the documentation string for a collection.
525 Parameters
526 ----------
527 name : `str`
528 Name of the collection.
530 Returns
531 -------
532 docs : `str` or `None`
533 Docstring for the collection with the given name.
534 """
535 raise NotImplementedError()
537 @abstractmethod
538 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
539 """Set the documentation string for a collection.
541 Parameters
542 ----------
543 name : `str`
544 Name of the collection.
545 docs : `str` or `None`
546 Docstring for the collection with the given name; will replace any
547 existing docstring. Passing `None` will remove any existing
548 docstring.
549 """
550 raise NotImplementedError()
552 @abstractmethod
553 def getCollectionSummary(self, collection: str) -> CollectionSummary:
554 """Return a summary for the given collection.
556 Parameters
557 ----------
558 collection : `str`
559 Name of the collection for which a summary is to be retrieved.
561 Returns
562 -------
563 summary : `CollectionSummary`
564 Summary of the dataset types and governor dimension values in
565 this collection.
566 """
567 raise NotImplementedError()
569 @abstractmethod
570 def registerDatasetType(self, datasetType: DatasetType) -> bool:
571 """
572 Add a new `DatasetType` to the Registry.
574 It is not an error to register the same `DatasetType` twice.
576 Parameters
577 ----------
578 datasetType : `DatasetType`
579 The `DatasetType` to be added.
581 Returns
582 -------
583 inserted : `bool`
584 `True` if ``datasetType`` was inserted, `False` if an identical
585 existing `DatsetType` was found. Note that in either case the
586 DatasetType is guaranteed to be defined in the Registry
587 consistently with the given definition.
589 Raises
590 ------
591 ValueError
592 Raised if the dimensions or storage class are invalid.
593 ConflictingDefinitionError
594 Raised if this DatasetType is already registered with a different
595 definition.
597 Notes
598 -----
599 This method cannot be called within transactions, as it needs to be
600 able to perform its own transaction to be concurrent.
601 """
602 raise NotImplementedError()
604 @abstractmethod
605 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
606 """Remove the named `DatasetType` from the registry.
608 .. warning::
610 Registry implementations can cache the dataset type definitions.
611 This means that deleting the dataset type definition may result in
612 unexpected behavior from other butler processes that are active
613 that have not seen the deletion.
615 Parameters
616 ----------
617 name : `str` or `tuple[str, ...]`
618 Name of the type to be removed or tuple containing a list of type
619 names to be removed. Wildcards are allowed.
621 Raises
622 ------
623 lsst.daf.butler.registry.OrphanedRecordError
624 Raised if an attempt is made to remove the dataset type definition
625 when there are already datasets associated with it.
627 Notes
628 -----
629 If the dataset type is not registered the method will return without
630 action.
631 """
632 raise NotImplementedError()
634 @abstractmethod
635 def getDatasetType(self, name: str) -> DatasetType:
636 """Get the `DatasetType`.
638 Parameters
639 ----------
640 name : `str`
641 Name of the type.
643 Returns
644 -------
645 type : `DatasetType`
646 The `DatasetType` associated with the given name.
648 Raises
649 ------
650 MissingDatasetTypeError
651 Raised if the requested dataset type has not been registered.
653 Notes
654 -----
655 This method handles component dataset types automatically, though most
656 other registry operations do not.
657 """
658 raise NotImplementedError()
660 @abstractmethod
661 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
662 """Test whether the given dataset ID generation mode is supported by
663 `insertDatasets`.
665 Parameters
666 ----------
667 mode : `DatasetIdGenEnum`
668 Enum value for the mode to test.
670 Returns
671 -------
672 supported : `bool`
673 Whether the given mode is supported.
674 """
675 raise NotImplementedError()
677 @abstractmethod
678 def findDataset(
679 self,
680 datasetType: Union[DatasetType, str],
681 dataId: Optional[DataId] = None,
682 *,
683 collections: CollectionArgType | None = None,
684 timespan: Optional[Timespan] = None,
685 **kwargs: Any,
686 ) -> Optional[DatasetRef]:
687 """Find a dataset given its `DatasetType` and data ID.
689 This can be used to obtain a `DatasetRef` that permits the dataset to
690 be read from a `Datastore`. If the dataset is a component and can not
691 be found using the provided dataset type, a dataset ref for the parent
692 will be returned instead but with the correct dataset type.
694 Parameters
695 ----------
696 datasetType : `DatasetType` or `str`
697 A `DatasetType` or the name of one. If this is a `DatasetType`
698 instance, its storage class will be respected and propagated to
699 the output, even if it differs from the dataset type definition
700 in the registry, as long as the storage classes are convertible.
701 dataId : `dict` or `DataCoordinate`, optional
702 A `dict`-like object containing the `Dimension` links that identify
703 the dataset within a collection.
704 collections, optional.
705 An expression that fully or partially identifies the collections to
706 search for the dataset; see
707 :ref:`daf_butler_collection_expressions` for more information.
708 Defaults to ``self.defaults.collections``.
709 timespan : `Timespan`, optional
710 A timespan that the validity range of the dataset must overlap.
711 If not provided, any `~CollectionType.CALIBRATION` collections
712 matched by the ``collections`` argument will not be searched.
713 **kwargs
714 Additional keyword arguments passed to
715 `DataCoordinate.standardize` to convert ``dataId`` to a true
716 `DataCoordinate` or augment an existing one.
718 Returns
719 -------
720 ref : `DatasetRef`
721 A reference to the dataset, or `None` if no matching Dataset
722 was found.
724 Raises
725 ------
726 NoDefaultCollectionError
727 Raised if ``collections`` is `None` and
728 ``self.defaults.collections`` is `None`.
729 LookupError
730 Raised if one or more data ID keys are missing.
731 MissingDatasetTypeError
732 Raised if the dataset type does not exist.
733 MissingCollectionError
734 Raised if any of ``collections`` does not exist in the registry.
736 Notes
737 -----
738 This method simply returns `None` and does not raise an exception even
739 when the set of collections searched is intrinsically incompatible with
740 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
741 only `~CollectionType.CALIBRATION` collections are being searched.
742 This may make it harder to debug some lookup failures, but the behavior
743 is intentional; we consider it more important that failed searches are
744 reported consistently, regardless of the reason, and that adding
745 additional collections that do not contain a match to the search path
746 never changes the behavior.
748 This method handles component dataset types automatically, though most
749 other registry operations do not.
750 """
751 raise NotImplementedError()
753 @abstractmethod
754 def insertDatasets(
755 self,
756 datasetType: Union[DatasetType, str],
757 dataIds: Iterable[DataId],
758 run: Optional[str] = None,
759 expand: bool = True,
760 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
761 ) -> List[DatasetRef]:
762 """Insert one or more datasets into the `Registry`
764 This always adds new datasets; to associate existing datasets with
765 a new collection, use ``associate``.
767 Parameters
768 ----------
769 datasetType : `DatasetType` or `str`
770 A `DatasetType` or the name of one.
771 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
772 Dimension-based identifiers for the new datasets.
773 run : `str`, optional
774 The name of the run that produced the datasets. Defaults to
775 ``self.defaults.run``.
776 expand : `bool`, optional
777 If `True` (default), expand data IDs as they are inserted. This is
778 necessary in general to allow datastore to generate file templates,
779 but it may be disabled if the caller can guarantee this is
780 unnecessary.
781 idGenerationMode : `DatasetIdGenEnum`, optional
782 Specifies option for generating dataset IDs. By default unique IDs
783 are generated for each inserted dataset.
785 Returns
786 -------
787 refs : `list` of `DatasetRef`
788 Resolved `DatasetRef` instances for all given data IDs (in the same
789 order).
791 Raises
792 ------
793 DatasetTypeError
794 Raised if ``datasetType`` is not known to registry.
795 CollectionTypeError
796 Raised if ``run`` collection type is not `~CollectionType.RUN`.
797 NoDefaultCollectionError
798 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
799 ConflictingDefinitionError
800 If a dataset with the same dataset type and data ID as one of those
801 given already exists in ``run``.
802 MissingCollectionError
803 Raised if ``run`` does not exist in the registry.
804 """
805 raise NotImplementedError()
807 @abstractmethod
808 def _importDatasets(
809 self,
810 datasets: Iterable[DatasetRef],
811 expand: bool = True,
812 ) -> List[DatasetRef]:
813 """Import one or more datasets into the `Registry`.
815 Difference from `insertDatasets` method is that this method accepts
816 `DatasetRef` instances which should already be resolved and have a
817 dataset ID. If registry supports globally-unique dataset IDs (e.g.
818 `uuid.UUID`) then datasets which already exist in the registry will be
819 ignored if imported again.
821 Parameters
822 ----------
823 datasets : `~collections.abc.Iterable` of `DatasetRef`
824 Datasets to be inserted. All `DatasetRef` instances must have
825 identical ``datasetType`` and ``run`` attributes. ``run``
826 attribute can be `None` and defaults to ``self.defaults.run``.
827 Datasets can specify ``id`` attribute which will be used for
828 inserted datasets. All dataset IDs must have the same type
829 (`int` or `uuid.UUID`), if type of dataset IDs does not match
830 configured backend then IDs will be ignored and new IDs will be
831 generated by backend.
832 expand : `bool`, optional
833 If `True` (default), expand data IDs as they are inserted. This is
834 necessary in general to allow datastore to generate file templates,
835 but it may be disabled if the caller can guarantee this is
836 unnecessary.
838 Returns
839 -------
840 refs : `list` of `DatasetRef`
841 Resolved `DatasetRef` instances for all given data IDs (in the same
842 order). If any of ``datasets`` has an ID which already exists in
843 the database then it will not be inserted or updated, but a
844 resolved `DatasetRef` will be returned for it in any case.
846 Raises
847 ------
848 NoDefaultCollectionError
849 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
850 DatasetTypeError
851 Raised if datasets correspond to more than one dataset type or
852 dataset type is not known to registry.
853 ConflictingDefinitionError
854 If a dataset with the same dataset type and data ID as one of those
855 given already exists in ``run``.
856 MissingCollectionError
857 Raised if ``run`` does not exist in the registry.
859 Notes
860 -----
861 This method is considered package-private and internal to Butler
862 implementation. Clients outside daf_butler package should not use this
863 method.
864 """
865 raise NotImplementedError()
867 @abstractmethod
868 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
869 """Retrieve a Dataset entry.
871 Parameters
872 ----------
873 id : `DatasetId`
874 The unique identifier for the dataset.
876 Returns
877 -------
878 ref : `DatasetRef` or `None`
879 A ref to the Dataset, or `None` if no matching Dataset
880 was found.
881 """
882 raise NotImplementedError()
884 @abstractmethod
885 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
886 """Remove datasets from the Registry.
888 The datasets will be removed unconditionally from all collections, and
889 any `Quantum` that consumed this dataset will instead be marked with
890 having a NULL input. `Datastore` records will *not* be deleted; the
891 caller is responsible for ensuring that the dataset has already been
892 removed from all Datastores.
894 Parameters
895 ----------
896 refs : `Iterable` of `DatasetRef`
897 References to the datasets to be removed. Must include a valid
898 ``id`` attribute, and should be considered invalidated upon return.
900 Raises
901 ------
902 AmbiguousDatasetError
903 Raised if any ``ref.id`` is `None`.
904 OrphanedRecordError
905 Raised if any dataset is still present in any `Datastore`.
906 """
907 raise NotImplementedError()
909 @abstractmethod
910 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
911 """Add existing datasets to a `~CollectionType.TAGGED` collection.
913 If a DatasetRef with the same exact ID is already in a collection
914 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
915 data ID but with different ID exists in the collection,
916 `ConflictingDefinitionError` is raised.
918 Parameters
919 ----------
920 collection : `str`
921 Indicates the collection the datasets should be associated with.
922 refs : `Iterable` [ `DatasetRef` ]
923 An iterable of resolved `DatasetRef` instances that already exist
924 in this `Registry`.
926 Raises
927 ------
928 ConflictingDefinitionError
929 If a Dataset with the given `DatasetRef` already exists in the
930 given collection.
931 MissingCollectionError
932 Raised if ``collection`` does not exist in the registry.
933 CollectionTypeError
934 Raise adding new datasets to the given ``collection`` is not
935 allowed.
936 """
937 raise NotImplementedError()
939 @abstractmethod
940 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
941 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
943 ``collection`` and ``ref`` combinations that are not currently
944 associated are silently ignored.
946 Parameters
947 ----------
948 collection : `str`
949 The collection the datasets should no longer be associated with.
950 refs : `Iterable` [ `DatasetRef` ]
951 An iterable of resolved `DatasetRef` instances that already exist
952 in this `Registry`.
954 Raises
955 ------
956 AmbiguousDatasetError
957 Raised if any of the given dataset references is unresolved.
958 MissingCollectionError
959 Raised if ``collection`` does not exist in the registry.
960 CollectionTypeError
961 Raise adding new datasets to the given ``collection`` is not
962 allowed.
963 """
964 raise NotImplementedError()
966 @abstractmethod
967 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
968 """Associate one or more datasets with a calibration collection and a
969 validity range within it.
971 Parameters
972 ----------
973 collection : `str`
974 The name of an already-registered `~CollectionType.CALIBRATION`
975 collection.
976 refs : `Iterable` [ `DatasetRef` ]
977 Datasets to be associated.
978 timespan : `Timespan`
979 The validity range for these datasets within the collection.
981 Raises
982 ------
983 AmbiguousDatasetError
984 Raised if any of the given `DatasetRef` instances is unresolved.
985 ConflictingDefinitionError
986 Raised if the collection already contains a different dataset with
987 the same `DatasetType` and data ID and an overlapping validity
988 range.
989 CollectionTypeError
990 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
991 collection or if one or more datasets are of a dataset type for
992 which `DatasetType.isCalibration` returns `False`.
993 """
994 raise NotImplementedError()
996 @abstractmethod
997 def decertify(
998 self,
999 collection: str,
1000 datasetType: Union[str, DatasetType],
1001 timespan: Timespan,
1002 *,
1003 dataIds: Optional[Iterable[DataId]] = None,
1004 ) -> None:
1005 """Remove or adjust datasets to clear a validity range within a
1006 calibration collection.
1008 Parameters
1009 ----------
1010 collection : `str`
1011 The name of an already-registered `~CollectionType.CALIBRATION`
1012 collection.
1013 datasetType : `str` or `DatasetType`
1014 Name or `DatasetType` instance for the datasets to be decertified.
1015 timespan : `Timespan`, optional
1016 The validity range to remove datasets from within the collection.
1017 Datasets that overlap this range but are not contained by it will
1018 have their validity ranges adjusted to not overlap it, which may
1019 split a single dataset validity range into two.
1020 dataIds : `Iterable` [ `DataId` ], optional
1021 Data IDs that should be decertified within the given validity range
1022 If `None`, all data IDs for ``self.datasetType`` will be
1023 decertified.
1025 Raises
1026 ------
1027 CollectionTypeError
1028 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1029 collection or if ``datasetType.isCalibration() is False``.
1030 """
1031 raise NotImplementedError()
1033 @abstractmethod
1034 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1035 """Return an object that allows a new `Datastore` instance to
1036 communicate with this `Registry`.
1038 Returns
1039 -------
1040 manager : `DatastoreRegistryBridgeManager`
1041 Object that mediates communication between this `Registry` and its
1042 associated datastores.
1043 """
1044 raise NotImplementedError()
1046 @abstractmethod
1047 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1048 """Retrieve datastore locations for a given dataset.
1050 Parameters
1051 ----------
1052 ref : `DatasetRef`
1053 A reference to the dataset for which to retrieve storage
1054 information.
1056 Returns
1057 -------
1058 datastores : `Iterable` [ `str` ]
1059 All the matching datastores holding this dataset.
1061 Raises
1062 ------
1063 AmbiguousDatasetError
1064 Raised if ``ref.id`` is `None`.
1065 """
1066 raise NotImplementedError()
1068 @abstractmethod
1069 def expandDataId(
1070 self,
1071 dataId: Optional[DataId] = None,
1072 *,
1073 graph: Optional[DimensionGraph] = None,
1074 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
1075 withDefaults: bool = True,
1076 **kwargs: Any,
1077 ) -> DataCoordinate:
1078 """Expand a dimension-based data ID to include additional information.
1080 Parameters
1081 ----------
1082 dataId : `DataCoordinate` or `dict`, optional
1083 Data ID to be expanded; augmented and overridden by ``kwargs``.
1084 graph : `DimensionGraph`, optional
1085 Set of dimensions for the expanded ID. If `None`, the dimensions
1086 will be inferred from the keys of ``dataId`` and ``kwargs``.
1087 Dimensions that are in ``dataId`` or ``kwargs`` but not in
1088 ``graph`` are silently ignored, providing a way to extract and
1089 ``graph`` expand a subset of a data ID.
1090 records : `Mapping` [`str`, `DimensionRecord`], optional
1091 Dimension record data to use before querying the database for that
1092 data, keyed by element name.
1093 withDefaults : `bool`, optional
1094 Utilize ``self.defaults.dataId`` to fill in missing governor
1095 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1096 used).
1097 **kwargs
1098 Additional keywords are treated like additional key-value pairs for
1099 ``dataId``, extending and overriding
1101 Returns
1102 -------
1103 expanded : `DataCoordinate`
1104 A data ID that includes full metadata for all of the dimensions it
1105 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1106 ``expanded.hasFull()`` both return `True`.
1108 Raises
1109 ------
1110 DataIdError
1111 Raised when ``dataId`` or keyword arguments specify unknown
1112 dimensions or values, or when a resulting data ID contains
1113 contradictory key-value pairs, according to dimension
1114 relationships.
1116 Notes
1117 -----
1118 This method cannot be relied upon to reject invalid data ID values
1119 for dimensions that do actually not have any record columns. For
1120 efficiency reasons the records for these dimensions (which have only
1121 dimension key values that are given by the caller) may be constructed
1122 directly rather than obtained from the registry database.
1123 """
1124 raise NotImplementedError()
1126 @abstractmethod
1127 def insertDimensionData(
1128 self,
1129 element: Union[DimensionElement, str],
1130 *data: Union[Mapping[str, Any], DimensionRecord],
1131 conform: bool = True,
1132 replace: bool = False,
1133 skip_existing: bool = False,
1134 ) -> None:
1135 """Insert one or more dimension records into the database.
1137 Parameters
1138 ----------
1139 element : `DimensionElement` or `str`
1140 The `DimensionElement` or name thereof that identifies the table
1141 records will be inserted into.
1142 data : `dict` or `DimensionRecord` (variadic)
1143 One or more records to insert.
1144 conform : `bool`, optional
1145 If `False` (`True` is default) perform no checking or conversions,
1146 and assume that ``element`` is a `DimensionElement` instance and
1147 ``data`` is a one or more `DimensionRecord` instances of the
1148 appropriate subclass.
1149 replace : `bool`, optional
1150 If `True` (`False` is default), replace existing records in the
1151 database if there is a conflict.
1152 skip_existing : `bool`, optional
1153 If `True` (`False` is default), skip insertion if a record with
1154 the same primary key values already exists. Unlike
1155 `syncDimensionData`, this will not detect when the given record
1156 differs from what is in the database, and should not be used when
1157 this is a concern.
1158 """
1159 raise NotImplementedError()
1161 @abstractmethod
1162 def syncDimensionData(
1163 self,
1164 element: Union[DimensionElement, str],
1165 row: Union[Mapping[str, Any], DimensionRecord],
1166 conform: bool = True,
1167 update: bool = False,
1168 ) -> Union[bool, Dict[str, Any]]:
1169 """Synchronize the given dimension record with the database, inserting
1170 if it does not already exist and comparing values if it does.
1172 Parameters
1173 ----------
1174 element : `DimensionElement` or `str`
1175 The `DimensionElement` or name thereof that identifies the table
1176 records will be inserted into.
1177 row : `dict` or `DimensionRecord`
1178 The record to insert.
1179 conform : `bool`, optional
1180 If `False` (`True` is default) perform no checking or conversions,
1181 and assume that ``element`` is a `DimensionElement` instance and
1182 ``data`` is a one or more `DimensionRecord` instances of the
1183 appropriate subclass.
1184 update: `bool`, optional
1185 If `True` (`False` is default), update the existing record in the
1186 database if there is a conflict.
1188 Returns
1189 -------
1190 inserted_or_updated : `bool` or `dict`
1191 `True` if a new row was inserted, `False` if no changes were
1192 needed, or a `dict` mapping updated column names to their old
1193 values if an update was performed (only possible if
1194 ``update=True``).
1196 Raises
1197 ------
1198 ConflictingDefinitionError
1199 Raised if the record exists in the database (according to primary
1200 key lookup) but is inconsistent with the given one.
1201 """
1202 raise NotImplementedError()
1204 @abstractmethod
1205 def queryDatasetTypes(
1206 self,
1207 expression: Any = ...,
1208 *,
1209 components: Optional[bool] = None,
1210 missing: Optional[List[str]] = None,
1211 ) -> Iterable[DatasetType]:
1212 """Iterate over the dataset types whose names match an expression.
1214 Parameters
1215 ----------
1216 expression : `Any`, optional
1217 An expression that fully or partially identifies the dataset types
1218 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1219 ``...`` can be used to return all dataset types, and is the
1220 default. See :ref:`daf_butler_dataset_type_expressions` for more
1221 information.
1222 components : `bool`, optional
1223 If `True`, apply all expression patterns to component dataset type
1224 names as well. If `False`, never apply patterns to components.
1225 If `None` (default), apply patterns to components only if their
1226 parent datasets were not matched by the expression.
1227 Fully-specified component datasets (`str` or `DatasetType`
1228 instances) are always included.
1230 Values other than `False` are deprecated, and only `False` will be
1231 supported after v26. After v27 this argument will be removed
1232 entirely.
1233 missing : `list` of `str`, optional
1234 String dataset type names that were explicitly given (i.e. not
1235 regular expression patterns) but not found will be appended to this
1236 list, if it is provided.
1238 Returns
1239 -------
1240 dataset_types : `Iterable` [ `DatasetType`]
1241 An `Iterable` of `DatasetType` instances whose names match
1242 ``expression``.
1244 Raises
1245 ------
1246 DatasetTypeExpressionError
1247 Raised when ``expression`` is invalid.
1248 """
1249 raise NotImplementedError()
1251 @abstractmethod
1252 def queryCollections(
1253 self,
1254 expression: Any = ...,
1255 datasetType: Optional[DatasetType] = None,
1256 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
1257 flattenChains: bool = False,
1258 includeChains: Optional[bool] = None,
1259 ) -> Sequence[str]:
1260 """Iterate over the collections whose names match an expression.
1262 Parameters
1263 ----------
1264 expression : `Any`, optional
1265 An expression that identifies the collections to return, such as
1266 a `str` (for full matches or partial matches via globs),
1267 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1268 can be used to return all collections, and is the default.
1269 See :ref:`daf_butler_collection_expressions` for more information.
1270 datasetType : `DatasetType`, optional
1271 If provided, only yield collections that may contain datasets of
1272 this type. This is a conservative approximation in general; it may
1273 yield collections that do not have any such datasets.
1274 collectionTypes : `AbstractSet` [ `CollectionType` ] or \
1275 `CollectionType`, optional
1276 If provided, only yield collections of these types.
1277 flattenChains : `bool`, optional
1278 If `True` (`False` is default), recursively yield the child
1279 collections of matching `~CollectionType.CHAINED` collections.
1280 includeChains : `bool`, optional
1281 If `True`, yield records for matching `~CollectionType.CHAINED`
1282 collections. Default is the opposite of ``flattenChains``: include
1283 either CHAINED collections or their children, but not both.
1285 Returns
1286 -------
1287 collections : `Sequence` [ `str` ]
1288 The names of collections that match ``expression``.
1290 Raises
1291 ------
1292 CollectionExpressionError
1293 Raised when ``expression`` is invalid.
1295 Notes
1296 -----
1297 The order in which collections are returned is unspecified, except that
1298 the children of a `~CollectionType.CHAINED` collection are guaranteed
1299 to be in the order in which they are searched. When multiple parent
1300 `~CollectionType.CHAINED` collections match the same criteria, the
1301 order in which the two lists appear is unspecified, and the lists of
1302 children may be incomplete if a child has multiple parents.
1303 """
1304 raise NotImplementedError()
1306 @abstractmethod
1307 def queryDatasets(
1308 self,
1309 datasetType: Any,
1310 *,
1311 collections: CollectionArgType | None = None,
1312 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1313 dataId: Optional[DataId] = None,
1314 where: str = "",
1315 findFirst: bool = False,
1316 components: Optional[bool] = None,
1317 bind: Optional[Mapping[str, Any]] = None,
1318 check: bool = True,
1319 **kwargs: Any,
1320 ) -> DatasetQueryResults:
1321 """Query for and iterate over dataset references matching user-provided
1322 criteria.
1324 Parameters
1325 ----------
1326 datasetType
1327 An expression that fully or partially identifies the dataset types
1328 to be queried. Allowed types include `DatasetType`, `str`,
1329 `re.Pattern`, and iterables thereof. The special value ``...`` can
1330 be used to query all dataset types. See
1331 :ref:`daf_butler_dataset_type_expressions` for more information.
1332 collections: optional
1333 An expression that identifies the collections to search, such as a
1334 `str` (for full matches or partial matches via globs), `re.Pattern`
1335 (for partial matches), or iterable thereof. ``...`` can be used to
1336 search all collections (actually just all `~CollectionType.RUN`
1337 collections, because this will still find all datasets).
1338 If not provided, ``self.default.collections`` is used. See
1339 :ref:`daf_butler_collection_expressions` for more information.
1340 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1341 Dimensions to include in the query (in addition to those used
1342 to identify the queried dataset type(s)), either to constrain
1343 the resulting datasets to those for which a matching dimension
1344 exists, or to relate the dataset type's dimensions to dimensions
1345 referenced by the ``dataId`` or ``where`` arguments.
1346 dataId : `dict` or `DataCoordinate`, optional
1347 A data ID whose key-value pairs are used as equality constraints
1348 in the query.
1349 where : `str`, optional
1350 A string expression similar to a SQL WHERE clause. May involve
1351 any column of a dimension table or (as a shortcut for the primary
1352 key column of a dimension table) dimension name. See
1353 :ref:`daf_butler_dimension_expressions` for more information.
1354 findFirst : `bool`, optional
1355 If `True` (`False` is default), for each result data ID, only
1356 yield one `DatasetRef` of each `DatasetType`, from the first
1357 collection in which a dataset of that dataset type appears
1358 (according to the order of ``collections`` passed in). If `True`,
1359 ``collections`` must not contain regular expressions and may not
1360 be ``...``.
1361 components : `bool`, optional
1362 If `True`, apply all dataset expression patterns to component
1363 dataset type names as well. If `False`, never apply patterns to
1364 components. If `None` (default), apply patterns to components only
1365 if their parent datasets were not matched by the expression.
1366 Fully-specified component datasets (`str` or `DatasetType`
1367 instances) are always included.
1369 Values other than `False` are deprecated, and only `False` will be
1370 supported after v26. After v27 this argument will be removed
1371 entirely.
1372 bind : `Mapping`, optional
1373 Mapping containing literal values that should be injected into the
1374 ``where`` expression, keyed by the identifiers they replace.
1375 check : `bool`, optional
1376 If `True` (default) check the query for consistency before
1377 executing it. This may reject some valid queries that resemble
1378 common mistakes (e.g. queries for visits without specifying an
1379 instrument).
1380 **kwargs
1381 Additional keyword arguments are forwarded to
1382 `DataCoordinate.standardize` when processing the ``dataId``
1383 argument (and may be used to provide a constraining data ID even
1384 when the ``dataId`` argument is `None`).
1386 Returns
1387 -------
1388 refs : `queries.DatasetQueryResults`
1389 Dataset references matching the given query criteria. Nested data
1390 IDs are guaranteed to include values for all implied dimensions
1391 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1392 include dimension records (`DataCoordinate.hasRecords` will be
1393 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1394 on the result object (which returns a new one).
1396 Raises
1397 ------
1398 DatasetTypeExpressionError
1399 Raised when ``datasetType`` expression is invalid.
1400 TypeError
1401 Raised when the arguments are incompatible, such as when a
1402 collection wildcard is passed when ``findFirst`` is `True`, or
1403 when ``collections`` is `None` and``self.defaults.collections`` is
1404 also `None`.
1405 DataIdError
1406 Raised when ``dataId`` or keyword arguments specify unknown
1407 dimensions or values, or when they contain inconsistent values.
1408 UserExpressionError
1409 Raised when ``where`` expression is invalid.
1411 Notes
1412 -----
1413 When multiple dataset types are queried in a single call, the
1414 results of this operation are equivalent to querying for each dataset
1415 type separately in turn, and no information about the relationships
1416 between datasets of different types is included. In contexts where
1417 that kind of information is important, the recommended pattern is to
1418 use `queryDataIds` to first obtain data IDs (possibly with the
1419 desired dataset types and collections passed as constraints to the
1420 query), and then use multiple (generally much simpler) calls to
1421 `queryDatasets` with the returned data IDs passed as constraints.
1422 """
1423 raise NotImplementedError()
1425 @abstractmethod
1426 def queryDataIds(
1427 self,
1428 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1429 *,
1430 dataId: Optional[DataId] = None,
1431 datasets: Any = None,
1432 collections: CollectionArgType | None = None,
1433 where: str = "",
1434 components: Optional[bool] = None,
1435 bind: Optional[Mapping[str, Any]] = None,
1436 check: bool = True,
1437 **kwargs: Any,
1438 ) -> DataCoordinateQueryResults:
1439 """Query for data IDs matching user-provided criteria.
1441 Parameters
1442 ----------
1443 dimensions : `Dimension` or `str`, or iterable thereof
1444 The dimensions of the data IDs to yield, as either `Dimension`
1445 instances or `str`. Will be automatically expanded to a complete
1446 `DimensionGraph`.
1447 dataId : `dict` or `DataCoordinate`, optional
1448 A data ID whose key-value pairs are used as equality constraints
1449 in the query.
1450 datasets : `Any`, optional
1451 An expression that fully or partially identifies dataset types
1452 that should constrain the yielded data IDs. For example, including
1453 "raw" here would constrain the yielded ``instrument``,
1454 ``exposure``, ``detector``, and ``physical_filter`` values to only
1455 those for which at least one "raw" dataset exists in
1456 ``collections``. Allowed types include `DatasetType`, `str`,
1457 and iterables thereof. Regular expression objects (i.e.
1458 `re.Pattern`) are deprecated and will be removed after the v26
1459 release. See :ref:`daf_butler_dataset_type_expressions` for more
1460 information.
1461 collections: `Any`, optional
1462 An expression that identifies the collections to search for
1463 datasets, such as a `str` (for full matches or partial matches
1464 via globs), `re.Pattern` (for partial matches), or iterable
1465 thereof. ``...`` can be used to search all collections (actually
1466 just all `~CollectionType.RUN` collections, because this will
1467 still find all datasets). If not provided,
1468 ``self.default.collections`` is used. Ignored unless ``datasets``
1469 is also passed. See :ref:`daf_butler_collection_expressions` for
1470 more information.
1471 where : `str`, optional
1472 A string expression similar to a SQL WHERE clause. May involve
1473 any column of a dimension table or (as a shortcut for the primary
1474 key column of a dimension table) dimension name. See
1475 :ref:`daf_butler_dimension_expressions` for more information.
1476 components : `bool`, optional
1477 If `True`, apply all dataset expression patterns to component
1478 dataset type names as well. If `False`, never apply patterns to
1479 components. If `None` (default), apply patterns to components only
1480 if their parent datasets were not matched by the expression.
1481 Fully-specified component datasets (`str` or `DatasetType`
1482 instances) are always included.
1484 Values other than `False` are deprecated, and only `False` will be
1485 supported after v26. After v27 this argument will be removed
1486 entirely.
1487 bind : `Mapping`, optional
1488 Mapping containing literal values that should be injected into the
1489 ``where`` expression, keyed by the identifiers they replace.
1490 check : `bool`, optional
1491 If `True` (default) check the query for consistency before
1492 executing it. This may reject some valid queries that resemble
1493 common mistakes (e.g. queries for visits without specifying an
1494 instrument).
1495 **kwargs
1496 Additional keyword arguments are forwarded to
1497 `DataCoordinate.standardize` when processing the ``dataId``
1498 argument (and may be used to provide a constraining data ID even
1499 when the ``dataId`` argument is `None`).
1501 Returns
1502 -------
1503 dataIds : `queries.DataCoordinateQueryResults`
1504 Data IDs matching the given query parameters. These are guaranteed
1505 to identify all dimensions (`DataCoordinate.hasFull` returns
1506 `True`), but will not contain `DimensionRecord` objects
1507 (`DataCoordinate.hasRecords` returns `False`). Call
1508 `DataCoordinateQueryResults.expanded` on the returned object to
1509 fetch those (and consider using
1510 `DataCoordinateQueryResults.materialize` on the returned object
1511 first if the expected number of rows is very large). See
1512 documentation for those methods for additional information.
1514 Raises
1515 ------
1516 NoDefaultCollectionError
1517 Raised if ``collections`` is `None` and
1518 ``self.defaults.collections`` is `None`.
1519 CollectionExpressionError
1520 Raised when ``collections`` expression is invalid.
1521 DataIdError
1522 Raised when ``dataId`` or keyword arguments specify unknown
1523 dimensions or values, or when they contain inconsistent values.
1524 DatasetTypeExpressionError
1525 Raised when ``datasetType`` expression is invalid.
1526 UserExpressionError
1527 Raised when ``where`` expression is invalid.
1528 """
1529 raise NotImplementedError()
1531 @abstractmethod
1532 def queryDimensionRecords(
1533 self,
1534 element: Union[DimensionElement, str],
1535 *,
1536 dataId: Optional[DataId] = None,
1537 datasets: Any = None,
1538 collections: CollectionArgType | None = None,
1539 where: str = "",
1540 components: Optional[bool] = None,
1541 bind: Optional[Mapping[str, Any]] = None,
1542 check: bool = True,
1543 **kwargs: Any,
1544 ) -> DimensionRecordQueryResults:
1545 """Query for dimension information matching user-provided criteria.
1547 Parameters
1548 ----------
1549 element : `DimensionElement` or `str`
1550 The dimension element to obtain records for.
1551 dataId : `dict` or `DataCoordinate`, optional
1552 A data ID whose key-value pairs are used as equality constraints
1553 in the query.
1554 datasets : `Any`, optional
1555 An expression that fully or partially identifies dataset types
1556 that should constrain the yielded records. See `queryDataIds` and
1557 :ref:`daf_butler_dataset_type_expressions` for more information.
1558 collections : `Any`, optional
1559 An expression that identifies the collections to search for
1560 datasets, such as a `str` (for full matches or partial matches
1561 via globs), `re.Pattern` (for partial matches), or iterable
1562 thereof. ``...`` can be used to search all collections (actually
1563 just all `~CollectionType.RUN` collections, because this will
1564 still find all datasets). If not provided,
1565 ``self.default.collections`` is used. Ignored unless ``datasets``
1566 is also passed. See :ref:`daf_butler_collection_expressions` for
1567 more information.
1568 where : `str`, optional
1569 A string expression similar to a SQL WHERE clause. See
1570 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1571 information.
1572 components : `bool`, optional
1573 Whether to apply dataset expressions to components as well.
1574 See `queryDataIds` for more information.
1576 Values other than `False` are deprecated, and only `False` will be
1577 supported after v26. After v27 this argument will be removed
1578 entirely.
1579 bind : `Mapping`, optional
1580 Mapping containing literal values that should be injected into the
1581 ``where`` expression, keyed by the identifiers they replace.
1582 check : `bool`, optional
1583 If `True` (default) check the query for consistency before
1584 executing it. This may reject some valid queries that resemble
1585 common mistakes (e.g. queries for visits without specifying an
1586 instrument).
1587 **kwargs
1588 Additional keyword arguments are forwarded to
1589 `DataCoordinate.standardize` when processing the ``dataId``
1590 argument (and may be used to provide a constraining data ID even
1591 when the ``dataId`` argument is `None`).
1593 Returns
1594 -------
1595 dataIds : `queries.DimensionRecordQueryResults`
1596 Data IDs matching the given query parameters.
1598 Raises
1599 ------
1600 NoDefaultCollectionError
1601 Raised if ``collections`` is `None` and
1602 ``self.defaults.collections`` is `None`.
1603 CollectionExpressionError
1604 Raised when ``collections`` expression is invalid.
1605 DataIdError
1606 Raised when ``dataId`` or keyword arguments specify unknown
1607 dimensions or values, or when they contain inconsistent values.
1608 DatasetTypeExpressionError
1609 Raised when ``datasetType`` expression is invalid.
1610 UserExpressionError
1611 Raised when ``where`` expression is invalid.
1612 """
1613 raise NotImplementedError()
1615 @abstractmethod
1616 def queryDatasetAssociations(
1617 self,
1618 datasetType: Union[str, DatasetType],
1619 collections: CollectionArgType | None = Ellipsis,
1620 *,
1621 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1622 flattenChains: bool = False,
1623 ) -> Iterator[DatasetAssociation]:
1624 """Iterate over dataset-collection combinations where the dataset is in
1625 the collection.
1627 This method is a temporary placeholder for better support for
1628 association results in `queryDatasets`. It will probably be
1629 removed in the future, and should be avoided in production code
1630 whenever possible.
1632 Parameters
1633 ----------
1634 datasetType : `DatasetType` or `str`
1635 A dataset type object or the name of one.
1636 collections: `Any`, optional
1637 An expression that identifies the collections to search for
1638 datasets, such as a `str` (for full matches or partial matches
1639 via globs), `re.Pattern` (for partial matches), or iterable
1640 thereof. ``...`` can be used to search all collections (actually
1641 just all `~CollectionType.RUN` collections, because this will still
1642 find all datasets). If not provided, ``self.default.collections``
1643 is used. See :ref:`daf_butler_collection_expressions` for more
1644 information.
1645 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1646 If provided, only yield associations from collections of these
1647 types.
1648 flattenChains : `bool`, optional
1649 If `True` (default) search in the children of
1650 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1651 collections are ignored.
1653 Yields
1654 ------
1655 association : `.DatasetAssociation`
1656 Object representing the relationship between a single dataset and
1657 a single collection.
1659 Raises
1660 ------
1661 NoDefaultCollectionError
1662 Raised if ``collections`` is `None` and
1663 ``self.defaults.collections`` is `None`.
1664 CollectionExpressionError
1665 Raised when ``collections`` expression is invalid.
1666 """
1667 raise NotImplementedError()
1669 @property
1670 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1671 """ObsCore manager instance for this registry (`ObsCoreTableManager`
1672 or `None`).
1674 ObsCore manager may not be implemented for all registry backend, or
1675 may not be enabled for many repositories.
1676 """
1677 return None
1679 storageClasses: StorageClassFactory
1680 """All storage classes known to the registry (`StorageClassFactory`).
1681 """
1683 datasetIdFactory: DatasetIdFactory
1684 """Factory for dataset IDs."""