Coverage for python/lsst/daf/butler/registry/_registry.py: 61%
184 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-08 10:28 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-08 10:28 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Registry",)
26import contextlib
27import logging
28from abc import ABC, abstractmethod
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Sequence,
39 Set,
40 Tuple,
41 Type,
42 Union,
43)
45from lsst.resources import ResourcePathExpression
46from lsst.utils import doImportType
48from ..core import (
49 Config,
50 DataCoordinate,
51 DataId,
52 DatasetAssociation,
53 DatasetId,
54 DatasetRef,
55 DatasetType,
56 Dimension,
57 DimensionConfig,
58 DimensionElement,
59 DimensionGraph,
60 DimensionRecord,
61 DimensionUniverse,
62 NameLookupMapping,
63 StorageClassFactory,
64 Timespan,
65)
66from ._collection_summary import CollectionSummary
67from ._collectionType import CollectionType
68from ._config import RegistryConfig
69from ._defaults import RegistryDefaults
70from .interfaces import DatasetIdFactory, DatasetIdGenEnum
71from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._butlerConfig import ButlerConfig
75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager
77_LOG = logging.getLogger(__name__)
80class Registry(ABC):
81 """Abstract Registry interface.
83 Each registry implementation can have its own constructor parameters.
84 The assumption is that an instance of a specific subclass will be
85 constructed from configuration using `Registry.fromConfig()`.
86 The base class will look for a ``cls`` entry and call that specific
87 `fromConfig()` method.
89 All subclasses should store `RegistryDefaults` in a ``_defaults``
90 property. No other properties are assumed shared between implementations.
91 """
93 defaultConfigFile: Optional[str] = None
94 """Path to configuration defaults. Accessed within the ``configs`` resource
95 or relative to a search path. Can be None if no defaults specified.
96 """
98 @classmethod
99 def forceRegistryConfig(
100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
101 ) -> RegistryConfig:
102 """Force the supplied config to a `RegistryConfig`.
104 Parameters
105 ----------
106 config : `RegistryConfig`, `Config` or `str` or `None`
107 Registry configuration, if missing then default configuration will
108 be loaded from registry.yaml.
110 Returns
111 -------
112 registry_config : `RegistryConfig`
113 A registry config.
114 """
115 if not isinstance(config, RegistryConfig):
116 if isinstance(config, (str, Config)) or config is None:
117 config = RegistryConfig(config)
118 else:
119 raise ValueError(f"Incompatible Registry configuration: {config}")
120 return config
122 @classmethod
123 def determineTrampoline(
124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
125 ) -> Tuple[Type[Registry], RegistryConfig]:
126 """Return class to use to instantiate real registry.
128 Parameters
129 ----------
130 config : `RegistryConfig` or `str`, optional
131 Registry configuration, if missing then default configuration will
132 be loaded from registry.yaml.
134 Returns
135 -------
136 requested_cls : `type` of `Registry`
137 The real registry class to use.
138 registry_config : `RegistryConfig`
139 The `RegistryConfig` to use.
140 """
141 config = cls.forceRegistryConfig(config)
143 # Default to the standard registry
144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
145 registry_cls = doImportType(registry_cls_name)
146 if registry_cls is cls:
147 raise ValueError("Can not instantiate the abstract base Registry from config")
148 if not issubclass(registry_cls, Registry):
149 raise TypeError(
150 f"Registry class obtained from config {registry_cls_name} is not a Registry class."
151 )
152 return registry_cls, config
154 @classmethod
155 def createFromConfig(
156 cls,
157 config: Optional[Union[RegistryConfig, str]] = None,
158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
159 butlerRoot: Optional[ResourcePathExpression] = None,
160 ) -> Registry:
161 """Create registry database and return `Registry` instance.
163 This method initializes database contents, database must be empty
164 prior to calling this method.
166 Parameters
167 ----------
168 config : `RegistryConfig` or `str`, optional
169 Registry configuration, if missing then default configuration will
170 be loaded from registry.yaml.
171 dimensionConfig : `DimensionConfig` or `str`, optional
172 Dimensions configuration, if missing then default configuration
173 will be loaded from dimensions.yaml.
174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
175 Path to the repository root this `Registry` will manage.
177 Returns
178 -------
179 registry : `Registry`
180 A new `Registry` instance.
182 Notes
183 -----
184 This class will determine the concrete `Registry` subclass to
185 use from configuration. Each subclass should implement this method
186 even if it can not create a registry.
187 """
188 registry_cls, registry_config = cls.determineTrampoline(config)
189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
191 @classmethod
192 def fromConfig(
193 cls,
194 config: Union[ButlerConfig, RegistryConfig, Config, str],
195 butlerRoot: Optional[ResourcePathExpression] = None,
196 writeable: bool = True,
197 defaults: Optional[RegistryDefaults] = None,
198 ) -> Registry:
199 """Create `Registry` subclass instance from `config`.
201 Registry database must be initialized prior to calling this method.
203 Parameters
204 ----------
205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
206 Registry configuration
207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
208 Path to the repository root this `Registry` will manage.
209 writeable : `bool`, optional
210 If `True` (default) create a read-write connection to the database.
211 defaults : `RegistryDefaults`, optional
212 Default collection search path and/or output `~CollectionType.RUN`
213 collection.
215 Returns
216 -------
217 registry : `Registry` (subclass)
218 A new `Registry` subclass instance.
220 Notes
221 -----
222 This class will determine the concrete `Registry` subclass to
223 use from configuration. Each subclass should implement this method.
224 """
225 # The base class implementation should trampoline to the correct
226 # subclass. No implementation should ever use this implementation
227 # directly. If no class is specified, default to the standard
228 # registry.
229 registry_cls, registry_config = cls.determineTrampoline(config)
230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
232 @abstractmethod
233 def isWriteable(self) -> bool:
234 """Return `True` if this registry allows write operations, and `False`
235 otherwise.
236 """
237 raise NotImplementedError()
239 @abstractmethod
240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
241 """Create a new `Registry` backed by the same data repository and
242 connection as this one, but independent defaults.
244 Parameters
245 ----------
246 defaults : `RegistryDefaults`, optional
247 Default collections and data ID values for the new registry. If
248 not provided, ``self.defaults`` will be used (but future changes
249 to either registry's defaults will not affect the other).
251 Returns
252 -------
253 copy : `Registry`
254 A new `Registry` instance with its own defaults.
256 Notes
257 -----
258 Because the new registry shares a connection with the original, they
259 also share transaction state (despite the fact that their `transaction`
260 context manager methods do not reflect this), and must be used with
261 care.
262 """
263 raise NotImplementedError()
265 @property
266 @abstractmethod
267 def dimensions(self) -> DimensionUniverse:
268 """Definitions of all dimensions recognized by this `Registry`
269 (`DimensionUniverse`).
270 """
271 raise NotImplementedError()
273 @property
274 def defaults(self) -> RegistryDefaults:
275 """Default collection search path and/or output `~CollectionType.RUN`
276 collection (`RegistryDefaults`).
278 This is an immutable struct whose components may not be set
279 individually, but the entire struct can be set by assigning to this
280 property.
281 """
282 return self._defaults
284 @defaults.setter
285 def defaults(self, value: RegistryDefaults) -> None:
286 if value.run is not None:
287 self.registerRun(value.run)
288 value.finish(self)
289 self._defaults = value
291 @abstractmethod
292 def refresh(self) -> None:
293 """Refresh all in-memory state by querying the database.
295 This may be necessary to enable querying for entities added by other
296 registry instances after this one was constructed.
297 """
298 raise NotImplementedError()
300 @contextlib.contextmanager
301 @abstractmethod
302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
303 """Return a context manager that represents a transaction."""
304 raise NotImplementedError()
306 def resetConnectionPool(self) -> None:
307 """Reset connection pool for registry if relevant.
309 This operation can be used reset connections to servers when
310 using registry with fork-based multiprocessing. This method should
311 usually be called by the child process immediately
312 after the fork.
314 The base class implementation is a no-op.
315 """
316 pass
318 @abstractmethod
319 def registerCollection(
320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
321 ) -> bool:
322 """Add a new collection if one with the given name does not exist.
324 Parameters
325 ----------
326 name : `str`
327 The name of the collection to create.
328 type : `CollectionType`
329 Enum value indicating the type of collection to create.
330 doc : `str`, optional
331 Documentation string for the collection.
333 Returns
334 -------
335 registered : `bool`
336 Boolean indicating whether the collection was already registered
337 or was created by this call.
339 Notes
340 -----
341 This method cannot be called within transactions, as it needs to be
342 able to perform its own transaction to be concurrent.
343 """
344 raise NotImplementedError()
346 @abstractmethod
347 def getCollectionType(self, name: str) -> CollectionType:
348 """Return an enumeration value indicating the type of the given
349 collection.
351 Parameters
352 ----------
353 name : `str`
354 The name of the collection.
356 Returns
357 -------
358 type : `CollectionType`
359 Enum value indicating the type of this collection.
361 Raises
362 ------
363 MissingCollectionError
364 Raised if no collection with the given name exists.
365 """
366 raise NotImplementedError()
368 @abstractmethod
369 def _get_collection_record(self, name: str) -> CollectionRecord:
370 """Return the record for this collection.
372 Parameters
373 ----------
374 name : `str`
375 Name of the collection for which the record is to be retrieved.
377 Returns
378 -------
379 record : `CollectionRecord`
380 The record for this collection.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
386 """Add a new run if one with the given name does not exist.
388 Parameters
389 ----------
390 name : `str`
391 The name of the run to create.
392 doc : `str`, optional
393 Documentation string for the collection.
395 Returns
396 -------
397 registered : `bool`
398 Boolean indicating whether a new run was registered. `False`
399 if it already existed.
401 Notes
402 -----
403 This method cannot be called within transactions, as it needs to be
404 able to perform its own transaction to be concurrent.
405 """
406 raise NotImplementedError()
408 @abstractmethod
409 def removeCollection(self, name: str) -> None:
410 """Remove the given collection from the registry.
412 Parameters
413 ----------
414 name : `str`
415 The name of the collection to remove.
417 Raises
418 ------
419 MissingCollectionError
420 Raised if no collection with the given name exists.
421 sqlalchemy.IntegrityError
422 Raised if the database rows associated with the collection are
423 still referenced by some other table, such as a dataset in a
424 datastore (for `~CollectionType.RUN` collections only) or a
425 `~CollectionType.CHAINED` collection of which this collection is
426 a child.
428 Notes
429 -----
430 If this is a `~CollectionType.RUN` collection, all datasets and quanta
431 in it will removed from the `Registry` database. This requires that
432 those datasets be removed (or at least trashed) from any datastores
433 that hold them first.
435 A collection may not be deleted as long as it is referenced by a
436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
437 be deleted or redefined first.
438 """
439 raise NotImplementedError()
441 @abstractmethod
442 def getCollectionChain(self, parent: str) -> Sequence[str]:
443 """Return the child collections in a `~CollectionType.CHAINED`
444 collection.
446 Parameters
447 ----------
448 parent : `str`
449 Name of the chained collection. Must have already been added via
450 a call to `Registry.registerCollection`.
452 Returns
453 -------
454 children : `Sequence` [ `str` ]
455 An ordered sequence of collection names that are searched when the
456 given chained collection is searched.
458 Raises
459 ------
460 MissingCollectionError
461 Raised if ``parent`` does not exist in the `Registry`.
462 CollectionTypeError
463 Raised if ``parent`` does not correspond to a
464 `~CollectionType.CHAINED` collection.
465 """
466 raise NotImplementedError()
468 @abstractmethod
469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
470 """Define or redefine a `~CollectionType.CHAINED` collection.
472 Parameters
473 ----------
474 parent : `str`
475 Name of the chained collection. Must have already been added via
476 a call to `Registry.registerCollection`.
477 children : `Any`
478 An expression defining an ordered search of child collections,
479 generally an iterable of `str`; see
480 :ref:`daf_butler_collection_expressions` for more information.
481 flatten : `bool`, optional
482 If `True` (`False` is default), recursively flatten out any nested
483 `~CollectionType.CHAINED` collections in ``children`` first.
485 Raises
486 ------
487 MissingCollectionError
488 Raised when any of the given collections do not exist in the
489 `Registry`.
490 CollectionTypeError
491 Raised if ``parent`` does not correspond to a
492 `~CollectionType.CHAINED` collection.
493 ValueError
494 Raised if the given collections contains a cycle.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def getCollectionParentChains(self, collection: str) -> Set[str]:
500 """Return the CHAINED collections that directly contain the given one.
502 Parameters
503 ----------
504 name : `str`
505 Name of the collection.
507 Returns
508 -------
509 chains : `set` of `str`
510 Set of `~CollectionType.CHAINED` collection names.
511 """
512 raise NotImplementedError()
514 @abstractmethod
515 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
516 """Retrieve the documentation string for a collection.
518 Parameters
519 ----------
520 name : `str`
521 Name of the collection.
523 Returns
524 -------
525 docs : `str` or `None`
526 Docstring for the collection with the given name.
527 """
528 raise NotImplementedError()
530 @abstractmethod
531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
532 """Set the documentation string for a collection.
534 Parameters
535 ----------
536 name : `str`
537 Name of the collection.
538 docs : `str` or `None`
539 Docstring for the collection with the given name; will replace any
540 existing docstring. Passing `None` will remove any existing
541 docstring.
542 """
543 raise NotImplementedError()
545 @abstractmethod
546 def getCollectionSummary(self, collection: str) -> CollectionSummary:
547 """Return a summary for the given collection.
549 Parameters
550 ----------
551 collection : `str`
552 Name of the collection for which a summary is to be retrieved.
554 Returns
555 -------
556 summary : `CollectionSummary`
557 Summary of the dataset types and governor dimension values in
558 this collection.
559 """
560 raise NotImplementedError()
562 @abstractmethod
563 def registerDatasetType(self, datasetType: DatasetType) -> bool:
564 """
565 Add a new `DatasetType` to the Registry.
567 It is not an error to register the same `DatasetType` twice.
569 Parameters
570 ----------
571 datasetType : `DatasetType`
572 The `DatasetType` to be added.
574 Returns
575 -------
576 inserted : `bool`
577 `True` if ``datasetType`` was inserted, `False` if an identical
578 existing `DatsetType` was found. Note that in either case the
579 DatasetType is guaranteed to be defined in the Registry
580 consistently with the given definition.
582 Raises
583 ------
584 ValueError
585 Raised if the dimensions or storage class are invalid.
586 ConflictingDefinitionError
587 Raised if this DatasetType is already registered with a different
588 definition.
590 Notes
591 -----
592 This method cannot be called within transactions, as it needs to be
593 able to perform its own transaction to be concurrent.
594 """
595 raise NotImplementedError()
597 @abstractmethod
598 def removeDatasetType(self, name: str) -> None:
599 """Remove the named `DatasetType` from the registry.
601 .. warning::
603 Registry implementations can cache the dataset type definitions.
604 This means that deleting the dataset type definition may result in
605 unexpected behavior from other butler processes that are active
606 that have not seen the deletion.
608 Parameters
609 ----------
610 name : `str`
611 Name of the type to be removed.
613 Raises
614 ------
615 lsst.daf.butler.registry.OrphanedRecordError
616 Raised if an attempt is made to remove the dataset type definition
617 when there are already datasets associated with it.
619 Notes
620 -----
621 If the dataset type is not registered the method will return without
622 action.
623 """
624 raise NotImplementedError()
626 @abstractmethod
627 def getDatasetType(self, name: str) -> DatasetType:
628 """Get the `DatasetType`.
630 Parameters
631 ----------
632 name : `str`
633 Name of the type.
635 Returns
636 -------
637 type : `DatasetType`
638 The `DatasetType` associated with the given name.
640 Raises
641 ------
642 MissingDatasetTypeError
643 Raised if the requested dataset type has not been registered.
645 Notes
646 -----
647 This method handles component dataset types automatically, though most
648 other registry operations do not.
649 """
650 raise NotImplementedError()
652 @abstractmethod
653 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
654 """Test whether the given dataset ID generation mode is supported by
655 `insertDatasets`.
657 Parameters
658 ----------
659 mode : `DatasetIdGenEnum`
660 Enum value for the mode to test.
662 Returns
663 -------
664 supported : `bool`
665 Whether the given mode is supported.
666 """
667 raise NotImplementedError()
669 @abstractmethod
670 def findDataset(
671 self,
672 datasetType: Union[DatasetType, str],
673 dataId: Optional[DataId] = None,
674 *,
675 collections: Any = None,
676 timespan: Optional[Timespan] = None,
677 **kwargs: Any,
678 ) -> Optional[DatasetRef]:
679 """Find a dataset given its `DatasetType` and data ID.
681 This can be used to obtain a `DatasetRef` that permits the dataset to
682 be read from a `Datastore`. If the dataset is a component and can not
683 be found using the provided dataset type, a dataset ref for the parent
684 will be returned instead but with the correct dataset type.
686 Parameters
687 ----------
688 datasetType : `DatasetType` or `str`
689 A `DatasetType` or the name of one. If this is a `DatasetType`
690 instance, its storage class will be respected and propagated to
691 the output, even if it differs from the dataset type definition
692 in the registry, as long as the storage classes are convertible.
693 dataId : `dict` or `DataCoordinate`, optional
694 A `dict`-like object containing the `Dimension` links that identify
695 the dataset within a collection.
696 collections, optional.
697 An expression that fully or partially identifies the collections to
698 search for the dataset; see
699 :ref:`daf_butler_collection_expressions` for more information.
700 Defaults to ``self.defaults.collections``.
701 timespan : `Timespan`, optional
702 A timespan that the validity range of the dataset must overlap.
703 If not provided, any `~CollectionType.CALIBRATION` collections
704 matched by the ``collections`` argument will not be searched.
705 **kwargs
706 Additional keyword arguments passed to
707 `DataCoordinate.standardize` to convert ``dataId`` to a true
708 `DataCoordinate` or augment an existing one.
710 Returns
711 -------
712 ref : `DatasetRef`
713 A reference to the dataset, or `None` if no matching Dataset
714 was found.
716 Raises
717 ------
718 NoDefaultCollectionError
719 Raised if ``collections`` is `None` and
720 ``self.defaults.collections`` is `None`.
721 LookupError
722 Raised if one or more data ID keys are missing.
723 MissingDatasetTypeError
724 Raised if the dataset type does not exist.
725 MissingCollectionError
726 Raised if any of ``collections`` does not exist in the registry.
728 Notes
729 -----
730 This method simply returns `None` and does not raise an exception even
731 when the set of collections searched is intrinsically incompatible with
732 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
733 only `~CollectionType.CALIBRATION` collections are being searched.
734 This may make it harder to debug some lookup failures, but the behavior
735 is intentional; we consider it more important that failed searches are
736 reported consistently, regardless of the reason, and that adding
737 additional collections that do not contain a match to the search path
738 never changes the behavior.
740 This method handles component dataset types automatically, though most
741 other registry operations do not.
742 """
743 raise NotImplementedError()
745 @abstractmethod
746 def insertDatasets(
747 self,
748 datasetType: Union[DatasetType, str],
749 dataIds: Iterable[DataId],
750 run: Optional[str] = None,
751 expand: bool = True,
752 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
753 ) -> List[DatasetRef]:
754 """Insert one or more datasets into the `Registry`
756 This always adds new datasets; to associate existing datasets with
757 a new collection, use ``associate``.
759 Parameters
760 ----------
761 datasetType : `DatasetType` or `str`
762 A `DatasetType` or the name of one.
763 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
764 Dimension-based identifiers for the new datasets.
765 run : `str`, optional
766 The name of the run that produced the datasets. Defaults to
767 ``self.defaults.run``.
768 expand : `bool`, optional
769 If `True` (default), expand data IDs as they are inserted. This is
770 necessary in general to allow datastore to generate file templates,
771 but it may be disabled if the caller can guarantee this is
772 unnecessary.
773 idGenerationMode : `DatasetIdGenEnum`, optional
774 Specifies option for generating dataset IDs. By default unique IDs
775 are generated for each inserted dataset.
777 Returns
778 -------
779 refs : `list` of `DatasetRef`
780 Resolved `DatasetRef` instances for all given data IDs (in the same
781 order).
783 Raises
784 ------
785 DatasetTypeError
786 Raised if ``datasetType`` is not known to registry.
787 CollectionTypeError
788 Raised if ``run`` collection type is not `~CollectionType.RUN`.
789 NoDefaultCollectionError
790 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
791 ConflictingDefinitionError
792 If a dataset with the same dataset type and data ID as one of those
793 given already exists in ``run``.
794 MissingCollectionError
795 Raised if ``run`` does not exist in the registry.
796 """
797 raise NotImplementedError()
799 @abstractmethod
800 def _importDatasets(
801 self,
802 datasets: Iterable[DatasetRef],
803 expand: bool = True,
804 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
805 reuseIds: bool = False,
806 ) -> List[DatasetRef]:
807 """Import one or more datasets into the `Registry`.
809 Difference from `insertDatasets` method is that this method accepts
810 `DatasetRef` instances which should already be resolved and have a
811 dataset ID. If registry supports globally-unique dataset IDs (e.g.
812 `uuid.UUID`) then datasets which already exist in the registry will be
813 ignored if imported again.
815 Parameters
816 ----------
817 datasets : `~collections.abc.Iterable` of `DatasetRef`
818 Datasets to be inserted. All `DatasetRef` instances must have
819 identical ``datasetType`` and ``run`` attributes. ``run``
820 attribute can be `None` and defaults to ``self.defaults.run``.
821 Datasets can specify ``id`` attribute which will be used for
822 inserted datasets. All dataset IDs must have the same type
823 (`int` or `uuid.UUID`), if type of dataset IDs does not match
824 configured backend then IDs will be ignored and new IDs will be
825 generated by backend.
826 expand : `bool`, optional
827 If `True` (default), expand data IDs as they are inserted. This is
828 necessary in general to allow datastore to generate file templates,
829 but it may be disabled if the caller can guarantee this is
830 unnecessary.
831 idGenerationMode : `DatasetIdGenEnum`, optional
832 Specifies option for generating dataset IDs when IDs are not
833 provided or their type does not match backend type. By default
834 unique IDs are generated for each inserted dataset.
835 reuseIds : `bool`, optional
836 If `True` then forces re-use of imported dataset IDs for integer
837 IDs which are normally generated as auto-incremented; exception
838 will be raised if imported IDs clash with existing ones. This
839 option has no effect on the use of globally-unique IDs which are
840 always re-used (or generated if integer IDs are being imported).
842 Returns
843 -------
844 refs : `list` of `DatasetRef`
845 Resolved `DatasetRef` instances for all given data IDs (in the same
846 order). If any of ``datasets`` has an ID which already exists in
847 the database then it will not be inserted or updated, but a
848 resolved `DatasetRef` will be returned for it in any case.
850 Raises
851 ------
852 NoDefaultCollectionError
853 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
854 DatasetTypeError
855 Raised if datasets correspond to more than one dataset type or
856 dataset type is not known to registry.
857 ConflictingDefinitionError
858 If a dataset with the same dataset type and data ID as one of those
859 given already exists in ``run``.
860 MissingCollectionError
861 Raised if ``run`` does not exist in the registry.
863 Notes
864 -----
865 This method is considered package-private and internal to Butler
866 implementation. Clients outside daf_butler package should not use this
867 method.
868 """
869 raise NotImplementedError()
871 @abstractmethod
872 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
873 """Retrieve a Dataset entry.
875 Parameters
876 ----------
877 id : `DatasetId`
878 The unique identifier for the dataset.
880 Returns
881 -------
882 ref : `DatasetRef` or `None`
883 A ref to the Dataset, or `None` if no matching Dataset
884 was found.
885 """
886 raise NotImplementedError()
888 @abstractmethod
889 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
890 """Remove datasets from the Registry.
892 The datasets will be removed unconditionally from all collections, and
893 any `Quantum` that consumed this dataset will instead be marked with
894 having a NULL input. `Datastore` records will *not* be deleted; the
895 caller is responsible for ensuring that the dataset has already been
896 removed from all Datastores.
898 Parameters
899 ----------
900 refs : `Iterable` of `DatasetRef`
901 References to the datasets to be removed. Must include a valid
902 ``id`` attribute, and should be considered invalidated upon return.
904 Raises
905 ------
906 AmbiguousDatasetError
907 Raised if any ``ref.id`` is `None`.
908 OrphanedRecordError
909 Raised if any dataset is still present in any `Datastore`.
910 """
911 raise NotImplementedError()
913 @abstractmethod
914 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
915 """Add existing datasets to a `~CollectionType.TAGGED` collection.
917 If a DatasetRef with the same exact ID is already in a collection
918 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
919 data ID but with different ID exists in the collection,
920 `ConflictingDefinitionError` is raised.
922 Parameters
923 ----------
924 collection : `str`
925 Indicates the collection the datasets should be associated with.
926 refs : `Iterable` [ `DatasetRef` ]
927 An iterable of resolved `DatasetRef` instances that already exist
928 in this `Registry`.
930 Raises
931 ------
932 ConflictingDefinitionError
933 If a Dataset with the given `DatasetRef` already exists in the
934 given collection.
935 AmbiguousDatasetError
936 Raised if ``any(ref.id is None for ref in refs)``.
937 MissingCollectionError
938 Raised if ``collection`` does not exist in the registry.
939 CollectionTypeError
940 Raise adding new datasets to the given ``collection`` is not
941 allowed.
942 """
943 raise NotImplementedError()
945 @abstractmethod
946 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
947 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
949 ``collection`` and ``ref`` combinations that are not currently
950 associated are silently ignored.
952 Parameters
953 ----------
954 collection : `str`
955 The collection the datasets should no longer be associated with.
956 refs : `Iterable` [ `DatasetRef` ]
957 An iterable of resolved `DatasetRef` instances that already exist
958 in this `Registry`.
960 Raises
961 ------
962 AmbiguousDatasetError
963 Raised if any of the given dataset references is unresolved.
964 MissingCollectionError
965 Raised if ``collection`` does not exist in the registry.
966 CollectionTypeError
967 Raise adding new datasets to the given ``collection`` is not
968 allowed.
969 """
970 raise NotImplementedError()
972 @abstractmethod
973 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
974 """Associate one or more datasets with a calibration collection and a
975 validity range within it.
977 Parameters
978 ----------
979 collection : `str`
980 The name of an already-registered `~CollectionType.CALIBRATION`
981 collection.
982 refs : `Iterable` [ `DatasetRef` ]
983 Datasets to be associated.
984 timespan : `Timespan`
985 The validity range for these datasets within the collection.
987 Raises
988 ------
989 AmbiguousDatasetError
990 Raised if any of the given `DatasetRef` instances is unresolved.
991 ConflictingDefinitionError
992 Raised if the collection already contains a different dataset with
993 the same `DatasetType` and data ID and an overlapping validity
994 range.
995 CollectionTypeError
996 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
997 collection or if one or more datasets are of a dataset type for
998 which `DatasetType.isCalibration` returns `False`.
999 """
1000 raise NotImplementedError()
1002 @abstractmethod
1003 def decertify(
1004 self,
1005 collection: str,
1006 datasetType: Union[str, DatasetType],
1007 timespan: Timespan,
1008 *,
1009 dataIds: Optional[Iterable[DataId]] = None,
1010 ) -> None:
1011 """Remove or adjust datasets to clear a validity range within a
1012 calibration collection.
1014 Parameters
1015 ----------
1016 collection : `str`
1017 The name of an already-registered `~CollectionType.CALIBRATION`
1018 collection.
1019 datasetType : `str` or `DatasetType`
1020 Name or `DatasetType` instance for the datasets to be decertified.
1021 timespan : `Timespan`, optional
1022 The validity range to remove datasets from within the collection.
1023 Datasets that overlap this range but are not contained by it will
1024 have their validity ranges adjusted to not overlap it, which may
1025 split a single dataset validity range into two.
1026 dataIds : `Iterable` [ `DataId` ], optional
1027 Data IDs that should be decertified within the given validity range
1028 If `None`, all data IDs for ``self.datasetType`` will be
1029 decertified.
1031 Raises
1032 ------
1033 CollectionTypeError
1034 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1035 collection or if ``datasetType.isCalibration() is False``.
1036 """
1037 raise NotImplementedError()
1039 @abstractmethod
1040 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1041 """Return an object that allows a new `Datastore` instance to
1042 communicate with this `Registry`.
1044 Returns
1045 -------
1046 manager : `DatastoreRegistryBridgeManager`
1047 Object that mediates communication between this `Registry` and its
1048 associated datastores.
1049 """
1050 raise NotImplementedError()
1052 @abstractmethod
1053 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1054 """Retrieve datastore locations for a given dataset.
1056 Parameters
1057 ----------
1058 ref : `DatasetRef`
1059 A reference to the dataset for which to retrieve storage
1060 information.
1062 Returns
1063 -------
1064 datastores : `Iterable` [ `str` ]
1065 All the matching datastores holding this dataset.
1067 Raises
1068 ------
1069 AmbiguousDatasetError
1070 Raised if ``ref.id`` is `None`.
1071 """
1072 raise NotImplementedError()
1074 @abstractmethod
1075 def expandDataId(
1076 self,
1077 dataId: Optional[DataId] = None,
1078 *,
1079 graph: Optional[DimensionGraph] = None,
1080 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
1081 withDefaults: bool = True,
1082 **kwargs: Any,
1083 ) -> DataCoordinate:
1084 """Expand a dimension-based data ID to include additional information.
1086 Parameters
1087 ----------
1088 dataId : `DataCoordinate` or `dict`, optional
1089 Data ID to be expanded; augmented and overridden by ``kwargs``.
1090 graph : `DimensionGraph`, optional
1091 Set of dimensions for the expanded ID. If `None`, the dimensions
1092 will be inferred from the keys of ``dataId`` and ``kwargs``.
1093 Dimensions that are in ``dataId`` or ``kwargs`` but not in
1094 ``graph`` are silently ignored, providing a way to extract and
1095 ``graph`` expand a subset of a data ID.
1096 records : `Mapping` [`str`, `DimensionRecord`], optional
1097 Dimension record data to use before querying the database for that
1098 data, keyed by element name.
1099 withDefaults : `bool`, optional
1100 Utilize ``self.defaults.dataId`` to fill in missing governor
1101 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1102 used).
1103 **kwargs
1104 Additional keywords are treated like additional key-value pairs for
1105 ``dataId``, extending and overriding
1107 Returns
1108 -------
1109 expanded : `DataCoordinate`
1110 A data ID that includes full metadata for all of the dimensions it
1111 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1112 ``expanded.hasFull()`` both return `True`.
1114 Raises
1115 ------
1116 DataIdError
1117 Raised when ``dataId`` or keyword arguments specify unknown
1118 dimensions or values, or when a resulting data ID contains
1119 contradictory key-value pairs, according to dimension
1120 relationships.
1122 Notes
1123 -----
1124 This method cannot be relied upon to reject invalid data ID values
1125 for dimensions that do actually not have any record columns. For
1126 efficiency reasons the records for these dimensions (which have only
1127 dimension key values that are given by the caller) may be constructed
1128 directly rather than obtained from the registry database.
1129 """
1130 raise NotImplementedError()
1132 @abstractmethod
1133 def insertDimensionData(
1134 self,
1135 element: Union[DimensionElement, str],
1136 *data: Union[Mapping[str, Any], DimensionRecord],
1137 conform: bool = True,
1138 replace: bool = False,
1139 skip_existing: bool = False,
1140 ) -> None:
1141 """Insert one or more dimension records into the database.
1143 Parameters
1144 ----------
1145 element : `DimensionElement` or `str`
1146 The `DimensionElement` or name thereof that identifies the table
1147 records will be inserted into.
1148 data : `dict` or `DimensionRecord` (variadic)
1149 One or more records to insert.
1150 conform : `bool`, optional
1151 If `False` (`True` is default) perform no checking or conversions,
1152 and assume that ``element`` is a `DimensionElement` instance and
1153 ``data`` is a one or more `DimensionRecord` instances of the
1154 appropriate subclass.
1155 replace : `bool`, optional
1156 If `True` (`False` is default), replace existing records in the
1157 database if there is a conflict.
1158 skip_existing : `bool`, optional
1159 If `True` (`False` is default), skip insertion if a record with
1160 the same primary key values already exists. Unlike
1161 `syncDimensionData`, this will not detect when the given record
1162 differs from what is in the database, and should not be used when
1163 this is a concern.
1164 """
1165 raise NotImplementedError()
1167 @abstractmethod
1168 def syncDimensionData(
1169 self,
1170 element: Union[DimensionElement, str],
1171 row: Union[Mapping[str, Any], DimensionRecord],
1172 conform: bool = True,
1173 update: bool = False,
1174 ) -> Union[bool, Dict[str, Any]]:
1175 """Synchronize the given dimension record with the database, inserting
1176 if it does not already exist and comparing values if it does.
1178 Parameters
1179 ----------
1180 element : `DimensionElement` or `str`
1181 The `DimensionElement` or name thereof that identifies the table
1182 records will be inserted into.
1183 row : `dict` or `DimensionRecord`
1184 The record to insert.
1185 conform : `bool`, optional
1186 If `False` (`True` is default) perform no checking or conversions,
1187 and assume that ``element`` is a `DimensionElement` instance and
1188 ``data`` is a one or more `DimensionRecord` instances of the
1189 appropriate subclass.
1190 update: `bool`, optional
1191 If `True` (`False` is default), update the existing record in the
1192 database if there is a conflict.
1194 Returns
1195 -------
1196 inserted_or_updated : `bool` or `dict`
1197 `True` if a new row was inserted, `False` if no changes were
1198 needed, or a `dict` mapping updated column names to their old
1199 values if an update was performed (only possible if
1200 ``update=True``).
1202 Raises
1203 ------
1204 ConflictingDefinitionError
1205 Raised if the record exists in the database (according to primary
1206 key lookup) but is inconsistent with the given one.
1207 """
1208 raise NotImplementedError()
1210 @abstractmethod
1211 def queryDatasetTypes(
1212 self,
1213 expression: Any = ...,
1214 *,
1215 components: Optional[bool] = None,
1216 missing: Optional[List[str]] = None,
1217 ) -> Iterable[DatasetType]:
1218 """Iterate over the dataset types whose names match an expression.
1220 Parameters
1221 ----------
1222 expression : `Any`, optional
1223 An expression that fully or partially identifies the dataset types
1224 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1225 ``...`` can be used to return all dataset types, and is the
1226 default. See :ref:`daf_butler_dataset_type_expressions` for more
1227 information.
1228 components : `bool`, optional
1229 If `True`, apply all expression patterns to component dataset type
1230 names as well. If `False`, never apply patterns to components.
1231 If `None` (default), apply patterns to components only if their
1232 parent datasets were not matched by the expression.
1233 Fully-specified component datasets (`str` or `DatasetType`
1234 instances) are always included.
1236 Values other than `False` are deprecated, and only `False` will be
1237 supported after v26. After v27 this argument will be removed
1238 entirely.
1239 missing : `list` of `str`, optional
1240 String dataset type names that were explicitly given (i.e. not
1241 regular expression patterns) but not found will be appended to this
1242 list, if it is provided.
1244 Returns
1245 -------
1246 dataset_types : `Iterable` [ `DatasetType`]
1247 An `Iterable` of `DatasetType` instances whose names match
1248 ``expression``.
1250 Raises
1251 ------
1252 DatasetTypeExpressionError
1253 Raised when ``expression`` is invalid.
1254 """
1255 raise NotImplementedError()
1257 @abstractmethod
1258 def queryCollections(
1259 self,
1260 expression: Any = ...,
1261 datasetType: Optional[DatasetType] = None,
1262 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
1263 flattenChains: bool = False,
1264 includeChains: Optional[bool] = None,
1265 ) -> Sequence[str]:
1266 """Iterate over the collections whose names match an expression.
1268 Parameters
1269 ----------
1270 expression : `Any`, optional
1271 An expression that identifies the collections to return, such as
1272 a `str` (for full matches or partial matches via globs),
1273 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1274 can be used to return all collections, and is the default.
1275 See :ref:`daf_butler_collection_expressions` for more information.
1276 datasetType : `DatasetType`, optional
1277 If provided, only yield collections that may contain datasets of
1278 this type. This is a conservative approximation in general; it may
1279 yield collections that do not have any such datasets.
1280 collectionTypes : `AbstractSet` [ `CollectionType` ] or \
1281 `CollectionType`, optional
1282 If provided, only yield collections of these types.
1283 flattenChains : `bool`, optional
1284 If `True` (`False` is default), recursively yield the child
1285 collections of matching `~CollectionType.CHAINED` collections.
1286 includeChains : `bool`, optional
1287 If `True`, yield records for matching `~CollectionType.CHAINED`
1288 collections. Default is the opposite of ``flattenChains``: include
1289 either CHAINED collections or their children, but not both.
1291 Returns
1292 -------
1293 collections : `Sequence` [ `str` ]
1294 The names of collections that match ``expression``.
1296 Raises
1297 ------
1298 CollectionExpressionError
1299 Raised when ``expression`` is invalid.
1301 Notes
1302 -----
1303 The order in which collections are returned is unspecified, except that
1304 the children of a `~CollectionType.CHAINED` collection are guaranteed
1305 to be in the order in which they are searched. When multiple parent
1306 `~CollectionType.CHAINED` collections match the same criteria, the
1307 order in which the two lists appear is unspecified, and the lists of
1308 children may be incomplete if a child has multiple parents.
1309 """
1310 raise NotImplementedError()
1312 @abstractmethod
1313 def queryDatasets(
1314 self,
1315 datasetType: Any,
1316 *,
1317 collections: Any = None,
1318 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1319 dataId: Optional[DataId] = None,
1320 where: str = "",
1321 findFirst: bool = False,
1322 components: Optional[bool] = None,
1323 bind: Optional[Mapping[str, Any]] = None,
1324 check: bool = True,
1325 **kwargs: Any,
1326 ) -> DatasetQueryResults:
1327 """Query for and iterate over dataset references matching user-provided
1328 criteria.
1330 Parameters
1331 ----------
1332 datasetType
1333 An expression that fully or partially identifies the dataset types
1334 to be queried. Allowed types include `DatasetType`, `str`,
1335 `re.Pattern`, and iterables thereof. The special value ``...`` can
1336 be used to query all dataset types. See
1337 :ref:`daf_butler_dataset_type_expressions` for more information.
1338 collections: optional
1339 An expression that identifies the collections to search, such as a
1340 `str` (for full matches or partial matches via globs), `re.Pattern`
1341 (for partial matches), or iterable thereof. ``...`` can be used to
1342 search all collections (actually just all `~CollectionType.RUN`
1343 collections, because this will still find all datasets).
1344 If not provided, ``self.default.collections`` is used. See
1345 :ref:`daf_butler_collection_expressions` for more information.
1346 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1347 Dimensions to include in the query (in addition to those used
1348 to identify the queried dataset type(s)), either to constrain
1349 the resulting datasets to those for which a matching dimension
1350 exists, or to relate the dataset type's dimensions to dimensions
1351 referenced by the ``dataId`` or ``where`` arguments.
1352 dataId : `dict` or `DataCoordinate`, optional
1353 A data ID whose key-value pairs are used as equality constraints
1354 in the query.
1355 where : `str`, optional
1356 A string expression similar to a SQL WHERE clause. May involve
1357 any column of a dimension table or (as a shortcut for the primary
1358 key column of a dimension table) dimension name. See
1359 :ref:`daf_butler_dimension_expressions` for more information.
1360 findFirst : `bool`, optional
1361 If `True` (`False` is default), for each result data ID, only
1362 yield one `DatasetRef` of each `DatasetType`, from the first
1363 collection in which a dataset of that dataset type appears
1364 (according to the order of ``collections`` passed in). If `True`,
1365 ``collections`` must not contain regular expressions and may not
1366 be ``...``.
1367 components : `bool`, optional
1368 If `True`, apply all dataset expression patterns to component
1369 dataset type names as well. If `False`, never apply patterns to
1370 components. If `None` (default), apply patterns to components only
1371 if their parent datasets were not matched by the expression.
1372 Fully-specified component datasets (`str` or `DatasetType`
1373 instances) are always included.
1375 Values other than `False` are deprecated, and only `False` will be
1376 supported after v26. After v27 this argument will be removed
1377 entirely.
1378 bind : `Mapping`, optional
1379 Mapping containing literal values that should be injected into the
1380 ``where`` expression, keyed by the identifiers they replace.
1381 check : `bool`, optional
1382 If `True` (default) check the query for consistency before
1383 executing it. This may reject some valid queries that resemble
1384 common mistakes (e.g. queries for visits without specifying an
1385 instrument).
1386 **kwargs
1387 Additional keyword arguments are forwarded to
1388 `DataCoordinate.standardize` when processing the ``dataId``
1389 argument (and may be used to provide a constraining data ID even
1390 when the ``dataId`` argument is `None`).
1392 Returns
1393 -------
1394 refs : `queries.DatasetQueryResults`
1395 Dataset references matching the given query criteria. Nested data
1396 IDs are guaranteed to include values for all implied dimensions
1397 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1398 include dimension records (`DataCoordinate.hasRecords` will be
1399 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1400 on the result object (which returns a new one).
1402 Raises
1403 ------
1404 DatasetTypeExpressionError
1405 Raised when ``datasetType`` expression is invalid.
1406 TypeError
1407 Raised when the arguments are incompatible, such as when a
1408 collection wildcard is passed when ``findFirst`` is `True`, or
1409 when ``collections`` is `None` and``self.defaults.collections`` is
1410 also `None`.
1411 DataIdError
1412 Raised when ``dataId`` or keyword arguments specify unknown
1413 dimensions or values, or when they contain inconsistent values.
1414 UserExpressionError
1415 Raised when ``where`` expression is invalid.
1417 Notes
1418 -----
1419 When multiple dataset types are queried in a single call, the
1420 results of this operation are equivalent to querying for each dataset
1421 type separately in turn, and no information about the relationships
1422 between datasets of different types is included. In contexts where
1423 that kind of information is important, the recommended pattern is to
1424 use `queryDataIds` to first obtain data IDs (possibly with the
1425 desired dataset types and collections passed as constraints to the
1426 query), and then use multiple (generally much simpler) calls to
1427 `queryDatasets` with the returned data IDs passed as constraints.
1428 """
1429 raise NotImplementedError()
1431 @abstractmethod
1432 def queryDataIds(
1433 self,
1434 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1435 *,
1436 dataId: Optional[DataId] = None,
1437 datasets: Any = None,
1438 collections: Any = None,
1439 where: str = "",
1440 components: Optional[bool] = None,
1441 bind: Optional[Mapping[str, Any]] = None,
1442 check: bool = True,
1443 **kwargs: Any,
1444 ) -> DataCoordinateQueryResults:
1445 """Query for data IDs matching user-provided criteria.
1447 Parameters
1448 ----------
1449 dimensions : `Dimension` or `str`, or iterable thereof
1450 The dimensions of the data IDs to yield, as either `Dimension`
1451 instances or `str`. Will be automatically expanded to a complete
1452 `DimensionGraph`.
1453 dataId : `dict` or `DataCoordinate`, optional
1454 A data ID whose key-value pairs are used as equality constraints
1455 in the query.
1456 datasets : `Any`, optional
1457 An expression that fully or partially identifies dataset types
1458 that should constrain the yielded data IDs. For example, including
1459 "raw" here would constrain the yielded ``instrument``,
1460 ``exposure``, ``detector``, and ``physical_filter`` values to only
1461 those for which at least one "raw" dataset exists in
1462 ``collections``. Allowed types include `DatasetType`, `str`,
1463 and iterables thereof. Regular expression objects (i.e.
1464 `re.Pattern`) are deprecated and will be removed after the v26
1465 release. See :ref:`daf_butler_dataset_type_expressions` for more
1466 information.
1467 collections: `Any`, optional
1468 An expression that identifies the collections to search for
1469 datasets, such as a `str` (for full matches or partial matches
1470 via globs), `re.Pattern` (for partial matches), or iterable
1471 thereof. ``...`` can be used to search all collections (actually
1472 just all `~CollectionType.RUN` collections, because this will
1473 still find all datasets). If not provided,
1474 ``self.default.collections`` is used. Ignored unless ``datasets``
1475 is also passed. See :ref:`daf_butler_collection_expressions` for
1476 more information.
1477 where : `str`, optional
1478 A string expression similar to a SQL WHERE clause. May involve
1479 any column of a dimension table or (as a shortcut for the primary
1480 key column of a dimension table) dimension name. See
1481 :ref:`daf_butler_dimension_expressions` for more information.
1482 components : `bool`, optional
1483 If `True`, apply all dataset expression patterns to component
1484 dataset type names as well. If `False`, never apply patterns to
1485 components. If `None` (default), apply patterns to components only
1486 if their parent datasets were not matched by the expression.
1487 Fully-specified component datasets (`str` or `DatasetType`
1488 instances) are always included.
1490 Values other than `False` are deprecated, and only `False` will be
1491 supported after v26. After v27 this argument will be removed
1492 entirely.
1493 bind : `Mapping`, optional
1494 Mapping containing literal values that should be injected into the
1495 ``where`` expression, keyed by the identifiers they replace.
1496 check : `bool`, optional
1497 If `True` (default) check the query for consistency before
1498 executing it. This may reject some valid queries that resemble
1499 common mistakes (e.g. queries for visits without specifying an
1500 instrument).
1501 **kwargs
1502 Additional keyword arguments are forwarded to
1503 `DataCoordinate.standardize` when processing the ``dataId``
1504 argument (and may be used to provide a constraining data ID even
1505 when the ``dataId`` argument is `None`).
1507 Returns
1508 -------
1509 dataIds : `queries.DataCoordinateQueryResults`
1510 Data IDs matching the given query parameters. These are guaranteed
1511 to identify all dimensions (`DataCoordinate.hasFull` returns
1512 `True`), but will not contain `DimensionRecord` objects
1513 (`DataCoordinate.hasRecords` returns `False`). Call
1514 `DataCoordinateQueryResults.expanded` on the returned object to
1515 fetch those (and consider using
1516 `DataCoordinateQueryResults.materialize` on the returned object
1517 first if the expected number of rows is very large). See
1518 documentation for those methods for additional information.
1520 Raises
1521 ------
1522 NoDefaultCollectionError
1523 Raised if ``collections`` is `None` and
1524 ``self.defaults.collections`` is `None`.
1525 CollectionExpressionError
1526 Raised when ``collections`` expression is invalid.
1527 DataIdError
1528 Raised when ``dataId`` or keyword arguments specify unknown
1529 dimensions or values, or when they contain inconsistent values.
1530 DatasetTypeExpressionError
1531 Raised when ``datasetType`` expression is invalid.
1532 UserExpressionError
1533 Raised when ``where`` expression is invalid.
1534 """
1535 raise NotImplementedError()
1537 @abstractmethod
1538 def queryDimensionRecords(
1539 self,
1540 element: Union[DimensionElement, str],
1541 *,
1542 dataId: Optional[DataId] = None,
1543 datasets: Any = None,
1544 collections: Any = None,
1545 where: str = "",
1546 components: Optional[bool] = None,
1547 bind: Optional[Mapping[str, Any]] = None,
1548 check: bool = True,
1549 **kwargs: Any,
1550 ) -> DimensionRecordQueryResults:
1551 """Query for dimension information matching user-provided criteria.
1553 Parameters
1554 ----------
1555 element : `DimensionElement` or `str`
1556 The dimension element to obtain records for.
1557 dataId : `dict` or `DataCoordinate`, optional
1558 A data ID whose key-value pairs are used as equality constraints
1559 in the query.
1560 datasets : `Any`, optional
1561 An expression that fully or partially identifies dataset types
1562 that should constrain the yielded records. See `queryDataIds` and
1563 :ref:`daf_butler_dataset_type_expressions` for more information.
1564 collections : `Any`, optional
1565 An expression that identifies the collections to search for
1566 datasets, such as a `str` (for full matches or partial matches
1567 via globs), `re.Pattern` (for partial matches), or iterable
1568 thereof. ``...`` can be used to search all collections (actually
1569 just all `~CollectionType.RUN` collections, because this will
1570 still find all datasets). If not provided,
1571 ``self.default.collections`` is used. Ignored unless ``datasets``
1572 is also passed. See :ref:`daf_butler_collection_expressions` for
1573 more information.
1574 where : `str`, optional
1575 A string expression similar to a SQL WHERE clause. See
1576 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1577 information.
1578 components : `bool`, optional
1579 Whether to apply dataset expressions to components as well.
1580 See `queryDataIds` for more information.
1582 Values other than `False` are deprecated, and only `False` will be
1583 supported after v26. After v27 this argument will be removed
1584 entirely.
1585 bind : `Mapping`, optional
1586 Mapping containing literal values that should be injected into the
1587 ``where`` expression, keyed by the identifiers they replace.
1588 check : `bool`, optional
1589 If `True` (default) check the query for consistency before
1590 executing it. This may reject some valid queries that resemble
1591 common mistakes (e.g. queries for visits without specifying an
1592 instrument).
1593 **kwargs
1594 Additional keyword arguments are forwarded to
1595 `DataCoordinate.standardize` when processing the ``dataId``
1596 argument (and may be used to provide a constraining data ID even
1597 when the ``dataId`` argument is `None`).
1599 Returns
1600 -------
1601 dataIds : `queries.DimensionRecordQueryResults`
1602 Data IDs matching the given query parameters.
1604 Raises
1605 ------
1606 NoDefaultCollectionError
1607 Raised if ``collections`` is `None` and
1608 ``self.defaults.collections`` is `None`.
1609 CollectionExpressionError
1610 Raised when ``collections`` expression is invalid.
1611 DataIdError
1612 Raised when ``dataId`` or keyword arguments specify unknown
1613 dimensions or values, or when they contain inconsistent values.
1614 DatasetTypeExpressionError
1615 Raised when ``datasetType`` expression is invalid.
1616 UserExpressionError
1617 Raised when ``where`` expression is invalid.
1618 """
1619 raise NotImplementedError()
1621 @abstractmethod
1622 def queryDatasetAssociations(
1623 self,
1624 datasetType: Union[str, DatasetType],
1625 collections: Any = ...,
1626 *,
1627 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1628 flattenChains: bool = False,
1629 ) -> Iterator[DatasetAssociation]:
1630 """Iterate over dataset-collection combinations where the dataset is in
1631 the collection.
1633 This method is a temporary placeholder for better support for
1634 association results in `queryDatasets`. It will probably be
1635 removed in the future, and should be avoided in production code
1636 whenever possible.
1638 Parameters
1639 ----------
1640 datasetType : `DatasetType` or `str`
1641 A dataset type object or the name of one.
1642 collections: `Any`, optional
1643 An expression that identifies the collections to search for
1644 datasets, such as a `str` (for full matches or partial matches
1645 via globs), `re.Pattern` (for partial matches), or iterable
1646 thereof. ``...`` can be used to search all collections (actually
1647 just all `~CollectionType.RUN` collections, because this will still
1648 find all datasets). If not provided, ``self.default.collections``
1649 is used. See :ref:`daf_butler_collection_expressions` for more
1650 information.
1651 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1652 If provided, only yield associations from collections of these
1653 types.
1654 flattenChains : `bool`, optional
1655 If `True` (default) search in the children of
1656 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1657 collections are ignored.
1659 Yields
1660 ------
1661 association : `.DatasetAssociation`
1662 Object representing the relationship between a single dataset and
1663 a single collection.
1665 Raises
1666 ------
1667 NoDefaultCollectionError
1668 Raised if ``collections`` is `None` and
1669 ``self.defaults.collections`` is `None`.
1670 CollectionExpressionError
1671 Raised when ``collections`` expression is invalid.
1672 """
1673 raise NotImplementedError()
1675 storageClasses: StorageClassFactory
1676 """All storage classes known to the registry (`StorageClassFactory`).
1677 """
1679 datasetIdFactory: DatasetIdFactory
1680 """Factory for dataset IDs."""