Coverage for python/lsst/daf/butler/registry/_registry.py: 62%
185 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Registry",)
26import contextlib
27import logging
28from abc import ABC, abstractmethod
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Set,
39 Tuple,
40 Type,
41 Union,
42)
44from lsst.resources import ResourcePathExpression
45from lsst.utils import doImportType
47from ..core import (
48 Config,
49 DataCoordinate,
50 DataId,
51 DatasetAssociation,
52 DatasetId,
53 DatasetRef,
54 DatasetType,
55 Dimension,
56 DimensionConfig,
57 DimensionElement,
58 DimensionGraph,
59 DimensionRecord,
60 DimensionUniverse,
61 NameLookupMapping,
62 StorageClassFactory,
63 Timespan,
64)
65from ._collection_summary import CollectionSummary
66from ._collectionType import CollectionType
67from ._config import RegistryConfig
68from ._defaults import RegistryDefaults
69from .interfaces import DatasetIdFactory, DatasetIdGenEnum
70from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
71from .wildcards import CollectionSearch
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._butlerConfig import ButlerConfig
75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager
77_LOG = logging.getLogger(__name__)
80class Registry(ABC):
81 """Abstract Registry interface.
83 Each registry implementation can have its own constructor parameters.
84 The assumption is that an instance of a specific subclass will be
85 constructed from configuration using `Registry.fromConfig()`.
86 The base class will look for a ``cls`` entry and call that specific
87 `fromConfig()` method.
89 All subclasses should store `RegistryDefaults` in a ``_defaults``
90 property. No other properties are assumed shared between implementations.
91 """
93 defaultConfigFile: Optional[str] = None
94 """Path to configuration defaults. Accessed within the ``configs`` resource
95 or relative to a search path. Can be None if no defaults specified.
96 """
98 @classmethod
99 def forceRegistryConfig(
100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
101 ) -> RegistryConfig:
102 """Force the supplied config to a `RegistryConfig`.
104 Parameters
105 ----------
106 config : `RegistryConfig`, `Config` or `str` or `None`
107 Registry configuration, if missing then default configuration will
108 be loaded from registry.yaml.
110 Returns
111 -------
112 registry_config : `RegistryConfig`
113 A registry config.
114 """
115 if not isinstance(config, RegistryConfig):
116 if isinstance(config, (str, Config)) or config is None:
117 config = RegistryConfig(config)
118 else:
119 raise ValueError(f"Incompatible Registry configuration: {config}")
120 return config
122 @classmethod
123 def determineTrampoline(
124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
125 ) -> Tuple[Type[Registry], RegistryConfig]:
126 """Return class to use to instantiate real registry.
128 Parameters
129 ----------
130 config : `RegistryConfig` or `str`, optional
131 Registry configuration, if missing then default configuration will
132 be loaded from registry.yaml.
134 Returns
135 -------
136 requested_cls : `type` of `Registry`
137 The real registry class to use.
138 registry_config : `RegistryConfig`
139 The `RegistryConfig` to use.
140 """
141 config = cls.forceRegistryConfig(config)
143 # Default to the standard registry
144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
145 registry_cls = doImportType(registry_cls_name)
146 if registry_cls is cls:
147 raise ValueError("Can not instantiate the abstract base Registry from config")
148 if not issubclass(registry_cls, Registry):
149 raise TypeError(
150 f"Registry class obtained from config {registry_cls_name} is not a Registry class."
151 )
152 return registry_cls, config
154 @classmethod
155 def createFromConfig(
156 cls,
157 config: Optional[Union[RegistryConfig, str]] = None,
158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
159 butlerRoot: Optional[ResourcePathExpression] = None,
160 ) -> Registry:
161 """Create registry database and return `Registry` instance.
163 This method initializes database contents, database must be empty
164 prior to calling this method.
166 Parameters
167 ----------
168 config : `RegistryConfig` or `str`, optional
169 Registry configuration, if missing then default configuration will
170 be loaded from registry.yaml.
171 dimensionConfig : `DimensionConfig` or `str`, optional
172 Dimensions configuration, if missing then default configuration
173 will be loaded from dimensions.yaml.
174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
175 Path to the repository root this `Registry` will manage.
177 Returns
178 -------
179 registry : `Registry`
180 A new `Registry` instance.
182 Notes
183 -----
184 This class will determine the concrete `Registry` subclass to
185 use from configuration. Each subclass should implement this method
186 even if it can not create a registry.
187 """
188 registry_cls, registry_config = cls.determineTrampoline(config)
189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
191 @classmethod
192 def fromConfig(
193 cls,
194 config: Union[ButlerConfig, RegistryConfig, Config, str],
195 butlerRoot: Optional[ResourcePathExpression] = None,
196 writeable: bool = True,
197 defaults: Optional[RegistryDefaults] = None,
198 ) -> Registry:
199 """Create `Registry` subclass instance from `config`.
201 Registry database must be initialized prior to calling this method.
203 Parameters
204 ----------
205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
206 Registry configuration
207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
208 Path to the repository root this `Registry` will manage.
209 writeable : `bool`, optional
210 If `True` (default) create a read-write connection to the database.
211 defaults : `RegistryDefaults`, optional
212 Default collection search path and/or output `~CollectionType.RUN`
213 collection.
215 Returns
216 -------
217 registry : `Registry` (subclass)
218 A new `Registry` subclass instance.
220 Notes
221 -----
222 This class will determine the concrete `Registry` subclass to
223 use from configuration. Each subclass should implement this method.
224 """
225 # The base class implementation should trampoline to the correct
226 # subclass. No implementation should ever use this implementation
227 # directly. If no class is specified, default to the standard
228 # registry.
229 registry_cls, registry_config = cls.determineTrampoline(config)
230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
232 @abstractmethod
233 def isWriteable(self) -> bool:
234 """Return `True` if this registry allows write operations, and `False`
235 otherwise.
236 """
237 raise NotImplementedError()
239 @abstractmethod
240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
241 """Create a new `Registry` backed by the same data repository and
242 connection as this one, but independent defaults.
244 Parameters
245 ----------
246 defaults : `RegistryDefaults`, optional
247 Default collections and data ID values for the new registry. If
248 not provided, ``self.defaults`` will be used (but future changes
249 to either registry's defaults will not affect the other).
251 Returns
252 -------
253 copy : `Registry`
254 A new `Registry` instance with its own defaults.
256 Notes
257 -----
258 Because the new registry shares a connection with the original, they
259 also share transaction state (despite the fact that their `transaction`
260 context manager methods do not reflect this), and must be used with
261 care.
262 """
263 raise NotImplementedError()
265 @property
266 @abstractmethod
267 def dimensions(self) -> DimensionUniverse:
268 """Definitions of all dimensions recognized by this `Registry`
269 (`DimensionUniverse`).
270 """
271 raise NotImplementedError()
273 @property
274 def defaults(self) -> RegistryDefaults:
275 """Default collection search path and/or output `~CollectionType.RUN`
276 collection (`RegistryDefaults`).
278 This is an immutable struct whose components may not be set
279 individually, but the entire struct can be set by assigning to this
280 property.
281 """
282 return self._defaults
284 @defaults.setter
285 def defaults(self, value: RegistryDefaults) -> None:
286 if value.run is not None:
287 self.registerRun(value.run)
288 value.finish(self)
289 self._defaults = value
291 @abstractmethod
292 def refresh(self) -> None:
293 """Refresh all in-memory state by querying the database.
295 This may be necessary to enable querying for entities added by other
296 registry instances after this one was constructed.
297 """
298 raise NotImplementedError()
300 @contextlib.contextmanager
301 @abstractmethod
302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
303 """Return a context manager that represents a transaction."""
304 raise NotImplementedError()
306 def resetConnectionPool(self) -> None:
307 """Reset connection pool for registry if relevant.
309 This operation can be used reset connections to servers when
310 using registry with fork-based multiprocessing. This method should
311 usually be called by the child process immediately
312 after the fork.
314 The base class implementation is a no-op.
315 """
316 pass
318 @abstractmethod
319 def registerCollection(
320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
321 ) -> bool:
322 """Add a new collection if one with the given name does not exist.
324 Parameters
325 ----------
326 name : `str`
327 The name of the collection to create.
328 type : `CollectionType`
329 Enum value indicating the type of collection to create.
330 doc : `str`, optional
331 Documentation string for the collection.
333 Returns
334 -------
335 registered : `bool`
336 Boolean indicating whether the collection was already registered
337 or was created by this call.
339 Notes
340 -----
341 This method cannot be called within transactions, as it needs to be
342 able to perform its own transaction to be concurrent.
343 """
344 raise NotImplementedError()
346 @abstractmethod
347 def getCollectionType(self, name: str) -> CollectionType:
348 """Return an enumeration value indicating the type of the given
349 collection.
351 Parameters
352 ----------
353 name : `str`
354 The name of the collection.
356 Returns
357 -------
358 type : `CollectionType`
359 Enum value indicating the type of this collection.
361 Raises
362 ------
363 MissingCollectionError
364 Raised if no collection with the given name exists.
365 """
366 raise NotImplementedError()
368 @abstractmethod
369 def _get_collection_record(self, name: str) -> CollectionRecord:
370 """Return the record for this collection.
372 Parameters
373 ----------
374 name : `str`
375 Name of the collection for which the record is to be retrieved.
377 Returns
378 -------
379 record : `CollectionRecord`
380 The record for this collection.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
386 """Add a new run if one with the given name does not exist.
388 Parameters
389 ----------
390 name : `str`
391 The name of the run to create.
392 doc : `str`, optional
393 Documentation string for the collection.
395 Returns
396 -------
397 registered : `bool`
398 Boolean indicating whether a new run was registered. `False`
399 if it already existed.
401 Notes
402 -----
403 This method cannot be called within transactions, as it needs to be
404 able to perform its own transaction to be concurrent.
405 """
406 raise NotImplementedError()
408 @abstractmethod
409 def removeCollection(self, name: str) -> None:
410 """Remove the given collection from the registry.
412 Parameters
413 ----------
414 name : `str`
415 The name of the collection to remove.
417 Raises
418 ------
419 MissingCollectionError
420 Raised if no collection with the given name exists.
421 sqlalchemy.IntegrityError
422 Raised if the database rows associated with the collection are
423 still referenced by some other table, such as a dataset in a
424 datastore (for `~CollectionType.RUN` collections only) or a
425 `~CollectionType.CHAINED` collection of which this collection is
426 a child.
428 Notes
429 -----
430 If this is a `~CollectionType.RUN` collection, all datasets and quanta
431 in it will removed from the `Registry` database. This requires that
432 those datasets be removed (or at least trashed) from any datastores
433 that hold them first.
435 A collection may not be deleted as long as it is referenced by a
436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
437 be deleted or redefined first.
438 """
439 raise NotImplementedError()
441 @abstractmethod
442 def getCollectionChain(self, parent: str) -> CollectionSearch:
443 """Return the child collections in a `~CollectionType.CHAINED`
444 collection.
446 Parameters
447 ----------
448 parent : `str`
449 Name of the chained collection. Must have already been added via
450 a call to `Registry.registerCollection`.
452 Returns
453 -------
454 children : `CollectionSearch`
455 An object that defines the search path of the collection.
456 See :ref:`daf_butler_collection_expressions` for more information.
458 Raises
459 ------
460 MissingCollectionError
461 Raised if ``parent`` does not exist in the `Registry`.
462 CollectionTypeError
463 Raised if ``parent`` does not correspond to a
464 `~CollectionType.CHAINED` collection.
465 """
466 raise NotImplementedError()
468 @abstractmethod
469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
470 """Define or redefine a `~CollectionType.CHAINED` collection.
472 Parameters
473 ----------
474 parent : `str`
475 Name of the chained collection. Must have already been added via
476 a call to `Registry.registerCollection`.
477 children : `Any`
478 An expression defining an ordered search of child collections,
479 generally an iterable of `str`; see
480 :ref:`daf_butler_collection_expressions` for more information.
481 flatten : `bool`, optional
482 If `True` (`False` is default), recursively flatten out any nested
483 `~CollectionType.CHAINED` collections in ``children`` first.
485 Raises
486 ------
487 MissingCollectionError
488 Raised when any of the given collections do not exist in the
489 `Registry`.
490 CollectionTypeError
491 Raised if ``parent`` does not correspond to a
492 `~CollectionType.CHAINED` collection.
493 ValueError
494 Raised if the given collections contains a cycle.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def getCollectionParentChains(self, collection: str) -> Set[str]:
500 """Return the CHAINED collections that directly contain the given one.
502 Parameters
503 ----------
504 name : `str`
505 Name of the collection.
507 Returns
508 -------
509 chains : `set` of `str`
510 Set of `~CollectionType.CHAINED` collection names.
511 """
512 raise NotImplementedError()
514 @abstractmethod
515 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
516 """Retrieve the documentation string for a collection.
518 Parameters
519 ----------
520 name : `str`
521 Name of the collection.
523 Returns
524 -------
525 docs : `str` or `None`
526 Docstring for the collection with the given name.
527 """
528 raise NotImplementedError()
530 @abstractmethod
531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
532 """Set the documentation string for a collection.
534 Parameters
535 ----------
536 name : `str`
537 Name of the collection.
538 docs : `str` or `None`
539 Docstring for the collection with the given name; will replace any
540 existing docstring. Passing `None` will remove any existing
541 docstring.
542 """
543 raise NotImplementedError()
545 @abstractmethod
546 def getCollectionSummary(self, collection: str) -> CollectionSummary:
547 """Return a summary for the given collection.
549 Parameters
550 ----------
551 collection : `str`
552 Name of the collection for which a summary is to be retrieved.
554 Returns
555 -------
556 summary : `CollectionSummary`
557 Summary of the dataset types and governor dimension values in
558 this collection.
559 """
560 raise NotImplementedError()
562 @abstractmethod
563 def registerDatasetType(self, datasetType: DatasetType) -> bool:
564 """
565 Add a new `DatasetType` to the Registry.
567 It is not an error to register the same `DatasetType` twice.
569 Parameters
570 ----------
571 datasetType : `DatasetType`
572 The `DatasetType` to be added.
574 Returns
575 -------
576 inserted : `bool`
577 `True` if ``datasetType`` was inserted, `False` if an identical
578 existing `DatsetType` was found. Note that in either case the
579 DatasetType is guaranteed to be defined in the Registry
580 consistently with the given definition.
582 Raises
583 ------
584 ValueError
585 Raised if the dimensions or storage class are invalid.
586 ConflictingDefinitionError
587 Raised if this DatasetType is already registered with a different
588 definition.
590 Notes
591 -----
592 This method cannot be called within transactions, as it needs to be
593 able to perform its own transaction to be concurrent.
594 """
595 raise NotImplementedError()
597 @abstractmethod
598 def removeDatasetType(self, name: str) -> None:
599 """Remove the named `DatasetType` from the registry.
601 .. warning::
603 Registry implementations can cache the dataset type definitions.
604 This means that deleting the dataset type definition may result in
605 unexpected behavior from other butler processes that are active
606 that have not seen the deletion.
608 Parameters
609 ----------
610 name : `str`
611 Name of the type to be removed.
613 Raises
614 ------
615 lsst.daf.butler.registry.OrphanedRecordError
616 Raised if an attempt is made to remove the dataset type definition
617 when there are already datasets associated with it.
619 Notes
620 -----
621 If the dataset type is not registered the method will return without
622 action.
623 """
624 raise NotImplementedError()
626 @abstractmethod
627 def getDatasetType(self, name: str) -> DatasetType:
628 """Get the `DatasetType`.
630 Parameters
631 ----------
632 name : `str`
633 Name of the type.
635 Returns
636 -------
637 type : `DatasetType`
638 The `DatasetType` associated with the given name.
640 Raises
641 ------
642 KeyError
643 Requested named DatasetType could not be found in registry.
644 """
645 raise NotImplementedError()
647 @abstractmethod
648 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
649 """Test whether the given dataset ID generation mode is supported by
650 `insertDatasets`.
652 Parameters
653 ----------
654 mode : `DatasetIdGenEnum`
655 Enum value for the mode to test.
657 Returns
658 -------
659 supported : `bool`
660 Whether the given mode is supported.
661 """
662 raise NotImplementedError()
664 @abstractmethod
665 def findDataset(
666 self,
667 datasetType: Union[DatasetType, str],
668 dataId: Optional[DataId] = None,
669 *,
670 collections: Any = None,
671 timespan: Optional[Timespan] = None,
672 **kwargs: Any,
673 ) -> Optional[DatasetRef]:
674 """Find a dataset given its `DatasetType` and data ID.
676 This can be used to obtain a `DatasetRef` that permits the dataset to
677 be read from a `Datastore`. If the dataset is a component and can not
678 be found using the provided dataset type, a dataset ref for the parent
679 will be returned instead but with the correct dataset type.
681 Parameters
682 ----------
683 datasetType : `DatasetType` or `str`
684 A `DatasetType` or the name of one.
685 dataId : `dict` or `DataCoordinate`, optional
686 A `dict`-like object containing the `Dimension` links that identify
687 the dataset within a collection.
688 collections, optional.
689 An expression that fully or partially identifies the collections to
690 search for the dataset; see
691 :ref:`daf_butler_collection_expressions` for more information.
692 Defaults to ``self.defaults.collections``.
693 timespan : `Timespan`, optional
694 A timespan that the validity range of the dataset must overlap.
695 If not provided, any `~CollectionType.CALIBRATION` collections
696 matched by the ``collections`` argument will not be searched.
697 **kwargs
698 Additional keyword arguments passed to
699 `DataCoordinate.standardize` to convert ``dataId`` to a true
700 `DataCoordinate` or augment an existing one.
702 Returns
703 -------
704 ref : `DatasetRef`
705 A reference to the dataset, or `None` if no matching Dataset
706 was found.
708 Raises
709 ------
710 NoDefaultCollectionError
711 Raised if ``collections`` is `None` and
712 ``self.defaults.collections`` is `None`.
713 LookupError
714 Raised if one or more data ID keys are missing.
715 KeyError
716 Raised if the dataset type does not exist.
717 MissingCollectionError
718 Raised if any of ``collections`` does not exist in the registry.
720 Notes
721 -----
722 This method simply returns `None` and does not raise an exception even
723 when the set of collections searched is intrinsically incompatible with
724 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
725 only `~CollectionType.CALIBRATION` collections are being searched.
726 This may make it harder to debug some lookup failures, but the behavior
727 is intentional; we consider it more important that failed searches are
728 reported consistently, regardless of the reason, and that adding
729 additional collections that do not contain a match to the search path
730 never changes the behavior.
731 """
732 raise NotImplementedError()
734 @abstractmethod
735 def insertDatasets(
736 self,
737 datasetType: Union[DatasetType, str],
738 dataIds: Iterable[DataId],
739 run: Optional[str] = None,
740 expand: bool = True,
741 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
742 ) -> List[DatasetRef]:
743 """Insert one or more datasets into the `Registry`
745 This always adds new datasets; to associate existing datasets with
746 a new collection, use ``associate``.
748 Parameters
749 ----------
750 datasetType : `DatasetType` or `str`
751 A `DatasetType` or the name of one.
752 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
753 Dimension-based identifiers for the new datasets.
754 run : `str`, optional
755 The name of the run that produced the datasets. Defaults to
756 ``self.defaults.run``.
757 expand : `bool`, optional
758 If `True` (default), expand data IDs as they are inserted. This is
759 necessary in general to allow datastore to generate file templates,
760 but it may be disabled if the caller can guarantee this is
761 unnecessary.
762 idGenerationMode : `DatasetIdGenEnum`, optional
763 Specifies option for generating dataset IDs. By default unique IDs
764 are generated for each inserted dataset.
766 Returns
767 -------
768 refs : `list` of `DatasetRef`
769 Resolved `DatasetRef` instances for all given data IDs (in the same
770 order).
772 Raises
773 ------
774 DatasetTypeError
775 Raised if ``datasetType`` is not known to registry.
776 CollectionTypeError
777 Raised if ``run`` collection type is not `~CollectionType.RUN`.
778 NoDefaultCollectionError
779 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
780 ConflictingDefinitionError
781 If a dataset with the same dataset type and data ID as one of those
782 given already exists in ``run``.
783 MissingCollectionError
784 Raised if ``run`` does not exist in the registry.
785 """
786 raise NotImplementedError()
788 @abstractmethod
789 def _importDatasets(
790 self,
791 datasets: Iterable[DatasetRef],
792 expand: bool = True,
793 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
794 reuseIds: bool = False,
795 ) -> List[DatasetRef]:
796 """Import one or more datasets into the `Registry`.
798 Difference from `insertDatasets` method is that this method accepts
799 `DatasetRef` instances which should already be resolved and have a
800 dataset ID. If registry supports globally-unique dataset IDs (e.g.
801 `uuid.UUID`) then datasets which already exist in the registry will be
802 ignored if imported again.
804 Parameters
805 ----------
806 datasets : `~collections.abc.Iterable` of `DatasetRef`
807 Datasets to be inserted. All `DatasetRef` instances must have
808 identical ``datasetType`` and ``run`` attributes. ``run``
809 attribute can be `None` and defaults to ``self.defaults.run``.
810 Datasets can specify ``id`` attribute which will be used for
811 inserted datasets. All dataset IDs must have the same type
812 (`int` or `uuid.UUID`), if type of dataset IDs does not match
813 configured backend then IDs will be ignored and new IDs will be
814 generated by backend.
815 expand : `bool`, optional
816 If `True` (default), expand data IDs as they are inserted. This is
817 necessary in general to allow datastore to generate file templates,
818 but it may be disabled if the caller can guarantee this is
819 unnecessary.
820 idGenerationMode : `DatasetIdGenEnum`, optional
821 Specifies option for generating dataset IDs when IDs are not
822 provided or their type does not match backend type. By default
823 unique IDs are generated for each inserted dataset.
824 reuseIds : `bool`, optional
825 If `True` then forces re-use of imported dataset IDs for integer
826 IDs which are normally generated as auto-incremented; exception
827 will be raised if imported IDs clash with existing ones. This
828 option has no effect on the use of globally-unique IDs which are
829 always re-used (or generated if integer IDs are being imported).
831 Returns
832 -------
833 refs : `list` of `DatasetRef`
834 Resolved `DatasetRef` instances for all given data IDs (in the same
835 order). If any of ``datasets`` has an ID which already exists in
836 the database then it will not be inserted or updated, but a
837 resolved `DatasetRef` will be returned for it in any case.
839 Raises
840 ------
841 NoDefaultCollectionError
842 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
843 DatasetTypeError
844 Raised if datasets correspond to more than one dataset type or
845 dataset type is not known to registry.
846 ConflictingDefinitionError
847 If a dataset with the same dataset type and data ID as one of those
848 given already exists in ``run``.
849 MissingCollectionError
850 Raised if ``run`` does not exist in the registry.
852 Notes
853 -----
854 This method is considered package-private and internal to Butler
855 implementation. Clients outside daf_butler package should not use this
856 method.
857 """
858 raise NotImplementedError()
860 @abstractmethod
861 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
862 """Retrieve a Dataset entry.
864 Parameters
865 ----------
866 id : `DatasetId`
867 The unique identifier for the dataset.
869 Returns
870 -------
871 ref : `DatasetRef` or `None`
872 A ref to the Dataset, or `None` if no matching Dataset
873 was found.
874 """
875 raise NotImplementedError()
877 @abstractmethod
878 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
879 """Remove datasets from the Registry.
881 The datasets will be removed unconditionally from all collections, and
882 any `Quantum` that consumed this dataset will instead be marked with
883 having a NULL input. `Datastore` records will *not* be deleted; the
884 caller is responsible for ensuring that the dataset has already been
885 removed from all Datastores.
887 Parameters
888 ----------
889 refs : `Iterable` of `DatasetRef`
890 References to the datasets to be removed. Must include a valid
891 ``id`` attribute, and should be considered invalidated upon return.
893 Raises
894 ------
895 AmbiguousDatasetError
896 Raised if any ``ref.id`` is `None`.
897 OrphanedRecordError
898 Raised if any dataset is still present in any `Datastore`.
899 """
900 raise NotImplementedError()
902 @abstractmethod
903 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
904 """Add existing datasets to a `~CollectionType.TAGGED` collection.
906 If a DatasetRef with the same exact ID is already in a collection
907 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
908 data ID but with different ID exists in the collection,
909 `ConflictingDefinitionError` is raised.
911 Parameters
912 ----------
913 collection : `str`
914 Indicates the collection the datasets should be associated with.
915 refs : `Iterable` [ `DatasetRef` ]
916 An iterable of resolved `DatasetRef` instances that already exist
917 in this `Registry`.
919 Raises
920 ------
921 ConflictingDefinitionError
922 If a Dataset with the given `DatasetRef` already exists in the
923 given collection.
924 AmbiguousDatasetError
925 Raised if ``any(ref.id is None for ref in refs)``.
926 MissingCollectionError
927 Raised if ``collection`` does not exist in the registry.
928 CollectionTypeError
929 Raise adding new datasets to the given ``collection`` is not
930 allowed.
931 """
932 raise NotImplementedError()
934 @abstractmethod
935 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
936 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
938 ``collection`` and ``ref`` combinations that are not currently
939 associated are silently ignored.
941 Parameters
942 ----------
943 collection : `str`
944 The collection the datasets should no longer be associated with.
945 refs : `Iterable` [ `DatasetRef` ]
946 An iterable of resolved `DatasetRef` instances that already exist
947 in this `Registry`.
949 Raises
950 ------
951 AmbiguousDatasetError
952 Raised if any of the given dataset references is unresolved.
953 MissingCollectionError
954 Raised if ``collection`` does not exist in the registry.
955 CollectionTypeError
956 Raise adding new datasets to the given ``collection`` is not
957 allowed.
958 """
959 raise NotImplementedError()
961 @abstractmethod
962 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
963 """Associate one or more datasets with a calibration collection and a
964 validity range within it.
966 Parameters
967 ----------
968 collection : `str`
969 The name of an already-registered `~CollectionType.CALIBRATION`
970 collection.
971 refs : `Iterable` [ `DatasetRef` ]
972 Datasets to be associated.
973 timespan : `Timespan`
974 The validity range for these datasets within the collection.
976 Raises
977 ------
978 AmbiguousDatasetError
979 Raised if any of the given `DatasetRef` instances is unresolved.
980 ConflictingDefinitionError
981 Raised if the collection already contains a different dataset with
982 the same `DatasetType` and data ID and an overlapping validity
983 range.
984 CollectionTypeError
985 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
986 collection or if one or more datasets are of a dataset type for
987 which `DatasetType.isCalibration` returns `False`.
988 """
989 raise NotImplementedError()
991 @abstractmethod
992 def decertify(
993 self,
994 collection: str,
995 datasetType: Union[str, DatasetType],
996 timespan: Timespan,
997 *,
998 dataIds: Optional[Iterable[DataId]] = None,
999 ) -> None:
1000 """Remove or adjust datasets to clear a validity range within a
1001 calibration collection.
1003 Parameters
1004 ----------
1005 collection : `str`
1006 The name of an already-registered `~CollectionType.CALIBRATION`
1007 collection.
1008 datasetType : `str` or `DatasetType`
1009 Name or `DatasetType` instance for the datasets to be decertified.
1010 timespan : `Timespan`, optional
1011 The validity range to remove datasets from within the collection.
1012 Datasets that overlap this range but are not contained by it will
1013 have their validity ranges adjusted to not overlap it, which may
1014 split a single dataset validity range into two.
1015 dataIds : `Iterable` [ `DataId` ], optional
1016 Data IDs that should be decertified within the given validity range
1017 If `None`, all data IDs for ``self.datasetType`` will be
1018 decertified.
1020 Raises
1021 ------
1022 CollectionTypeError
1023 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1024 collection or if ``datasetType.isCalibration() is False``.
1025 """
1026 raise NotImplementedError()
1028 @abstractmethod
1029 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1030 """Return an object that allows a new `Datastore` instance to
1031 communicate with this `Registry`.
1033 Returns
1034 -------
1035 manager : `DatastoreRegistryBridgeManager`
1036 Object that mediates communication between this `Registry` and its
1037 associated datastores.
1038 """
1039 raise NotImplementedError()
1041 @abstractmethod
1042 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1043 """Retrieve datastore locations for a given dataset.
1045 Parameters
1046 ----------
1047 ref : `DatasetRef`
1048 A reference to the dataset for which to retrieve storage
1049 information.
1051 Returns
1052 -------
1053 datastores : `Iterable` [ `str` ]
1054 All the matching datastores holding this dataset.
1056 Raises
1057 ------
1058 AmbiguousDatasetError
1059 Raised if ``ref.id`` is `None`.
1060 """
1061 raise NotImplementedError()
1063 @abstractmethod
1064 def expandDataId(
1065 self,
1066 dataId: Optional[DataId] = None,
1067 *,
1068 graph: Optional[DimensionGraph] = None,
1069 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
1070 withDefaults: bool = True,
1071 **kwargs: Any,
1072 ) -> DataCoordinate:
1073 """Expand a dimension-based data ID to include additional information.
1075 Parameters
1076 ----------
1077 dataId : `DataCoordinate` or `dict`, optional
1078 Data ID to be expanded; augmented and overridden by ``kwargs``.
1079 graph : `DimensionGraph`, optional
1080 Set of dimensions for the expanded ID. If `None`, the dimensions
1081 will be inferred from the keys of ``dataId`` and ``kwargs``.
1082 Dimensions that are in ``dataId`` or ``kwargs`` but not in
1083 ``graph`` are silently ignored, providing a way to extract and
1084 ``graph`` expand a subset of a data ID.
1085 records : `Mapping` [`str`, `DimensionRecord`], optional
1086 Dimension record data to use before querying the database for that
1087 data, keyed by element name.
1088 withDefaults : `bool`, optional
1089 Utilize ``self.defaults.dataId`` to fill in missing governor
1090 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1091 used).
1092 **kwargs
1093 Additional keywords are treated like additional key-value pairs for
1094 ``dataId``, extending and overriding
1096 Returns
1097 -------
1098 expanded : `DataCoordinate`
1099 A data ID that includes full metadata for all of the dimensions it
1100 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1101 ``expanded.hasFull()`` both return `True`.
1103 Raises
1104 ------
1105 DataIdError
1106 Raised when ``dataId`` or keyword arguments specify unknown
1107 dimensions or values, or when a resulting data ID contains
1108 contradictory key-value pairs, according to dimension
1109 relationships.
1110 """
1111 raise NotImplementedError()
1113 @abstractmethod
1114 def insertDimensionData(
1115 self,
1116 element: Union[DimensionElement, str],
1117 *data: Union[Mapping[str, Any], DimensionRecord],
1118 conform: bool = True,
1119 replace: bool = False,
1120 skip_existing: bool = False,
1121 ) -> None:
1122 """Insert one or more dimension records into the database.
1124 Parameters
1125 ----------
1126 element : `DimensionElement` or `str`
1127 The `DimensionElement` or name thereof that identifies the table
1128 records will be inserted into.
1129 data : `dict` or `DimensionRecord` (variadic)
1130 One or more records to insert.
1131 conform : `bool`, optional
1132 If `False` (`True` is default) perform no checking or conversions,
1133 and assume that ``element`` is a `DimensionElement` instance and
1134 ``data`` is a one or more `DimensionRecord` instances of the
1135 appropriate subclass.
1136 replace : `bool`, optional
1137 If `True` (`False` is default), replace existing records in the
1138 database if there is a conflict.
1139 skip_existing : `bool`, optional
1140 If `True` (`False` is default), skip insertion if a record with
1141 the same primary key values already exists. Unlike
1142 `syncDimensionData`, this will not detect when the given record
1143 differs from what is in the database, and should not be used when
1144 this is a concern.
1145 """
1146 raise NotImplementedError()
1148 @abstractmethod
1149 def syncDimensionData(
1150 self,
1151 element: Union[DimensionElement, str],
1152 row: Union[Mapping[str, Any], DimensionRecord],
1153 conform: bool = True,
1154 update: bool = False,
1155 ) -> Union[bool, Dict[str, Any]]:
1156 """Synchronize the given dimension record with the database, inserting
1157 if it does not already exist and comparing values if it does.
1159 Parameters
1160 ----------
1161 element : `DimensionElement` or `str`
1162 The `DimensionElement` or name thereof that identifies the table
1163 records will be inserted into.
1164 row : `dict` or `DimensionRecord`
1165 The record to insert.
1166 conform : `bool`, optional
1167 If `False` (`True` is default) perform no checking or conversions,
1168 and assume that ``element`` is a `DimensionElement` instance and
1169 ``data`` is a one or more `DimensionRecord` instances of the
1170 appropriate subclass.
1171 update: `bool`, optional
1172 If `True` (`False` is default), update the existing record in the
1173 database if there is a conflict.
1175 Returns
1176 -------
1177 inserted_or_updated : `bool` or `dict`
1178 `True` if a new row was inserted, `False` if no changes were
1179 needed, or a `dict` mapping updated column names to their old
1180 values if an update was performed (only possible if
1181 ``update=True``).
1183 Raises
1184 ------
1185 ConflictingDefinitionError
1186 Raised if the record exists in the database (according to primary
1187 key lookup) but is inconsistent with the given one.
1188 """
1189 raise NotImplementedError()
1191 @abstractmethod
1192 def queryDatasetTypes(
1193 self,
1194 expression: Any = ...,
1195 *,
1196 components: Optional[bool] = None,
1197 missing: Optional[List[str]] = None,
1198 ) -> Iterator[DatasetType]:
1199 """Iterate over the dataset types whose names match an expression.
1201 Parameters
1202 ----------
1203 expression : `Any`, optional
1204 An expression that fully or partially identifies the dataset types
1205 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1206 ``...`` can be used to return all dataset types, and is the
1207 default. See :ref:`daf_butler_dataset_type_expressions` for more
1208 information.
1209 components : `bool`, optional
1210 If `True`, apply all expression patterns to component dataset type
1211 names as well. If `False`, never apply patterns to components.
1212 If `None` (default), apply patterns to components only if their
1213 parent datasets were not matched by the expression.
1214 Fully-specified component datasets (`str` or `DatasetType`
1215 instances) are always included.
1216 missing : `list` of `str`, optional
1217 String dataset type names that were explicitly given (i.e. not
1218 regular expression patterns) but not found will be appended to this
1219 list, if it is provided.
1221 Yields
1222 ------
1223 datasetType : `DatasetType`
1224 A `DatasetType` instance whose name matches ``expression``.
1226 Raises
1227 ------
1228 DatasetTypeExpressionError
1229 Raised when ``expression`` is invalid.
1230 """
1231 raise NotImplementedError()
1233 @abstractmethod
1234 def queryCollections(
1235 self,
1236 expression: Any = ...,
1237 datasetType: Optional[DatasetType] = None,
1238 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
1239 flattenChains: bool = False,
1240 includeChains: Optional[bool] = None,
1241 ) -> Iterator[str]:
1242 """Iterate over the collections whose names match an expression.
1244 Parameters
1245 ----------
1246 expression : `Any`, optional
1247 An expression that identifies the collections to return, such as
1248 a `str` (for full matches or partial matches via globs),
1249 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1250 can be used to return all collections, and is the default.
1251 See :ref:`daf_butler_collection_expressions` for more information.
1252 datasetType : `DatasetType`, optional
1253 If provided, only yield collections that may contain datasets of
1254 this type. This is a conservative approximation in general; it may
1255 yield collections that do not have any such datasets.
1256 collectionTypes : `AbstractSet` [ `CollectionType` ] or \
1257 `CollectionType`, optional
1258 If provided, only yield collections of these types.
1259 flattenChains : `bool`, optional
1260 If `True` (`False` is default), recursively yield the child
1261 collections of matching `~CollectionType.CHAINED` collections.
1262 includeChains : `bool`, optional
1263 If `True`, yield records for matching `~CollectionType.CHAINED`
1264 collections. Default is the opposite of ``flattenChains``: include
1265 either CHAINED collections or their children, but not both.
1267 Yields
1268 ------
1269 collection : `str`
1270 The name of a collection that matches ``expression``.
1272 Raises
1273 ------
1274 CollectionExpressionError
1275 Raised when ``expression`` is invalid.
1277 Notes
1278 -----
1279 The order in which collections are returned is unspecified, except that
1280 the children of a `~CollectionType.CHAINED` collection are guaranteed
1281 to be in the order in which they are searched. When multiple parent
1282 `~CollectionType.CHAINED` collections match the same criteria, the
1283 order in which the two lists appear is unspecified, and the lists of
1284 children may be incomplete if a child has multiple parents.
1285 """
1286 raise NotImplementedError()
1288 @abstractmethod
1289 def queryDatasets(
1290 self,
1291 datasetType: Any,
1292 *,
1293 collections: Any = None,
1294 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1295 dataId: Optional[DataId] = None,
1296 where: Optional[str] = None,
1297 findFirst: bool = False,
1298 components: Optional[bool] = None,
1299 bind: Optional[Mapping[str, Any]] = None,
1300 check: bool = True,
1301 **kwargs: Any,
1302 ) -> DatasetQueryResults:
1303 """Query for and iterate over dataset references matching user-provided
1304 criteria.
1306 Parameters
1307 ----------
1308 datasetType
1309 An expression that fully or partially identifies the dataset types
1310 to be queried. Allowed types include `DatasetType`, `str`,
1311 `re.Pattern`, and iterables thereof. The special value ``...`` can
1312 be used to query all dataset types. See
1313 :ref:`daf_butler_dataset_type_expressions` for more information.
1314 collections: optional
1315 An expression that identifies the collections to search, such as a
1316 `str` (for full matches or partial matches via globs), `re.Pattern`
1317 (for partial matches), or iterable thereof. ``...`` can be used to
1318 search all collections (actually just all `~CollectionType.RUN`
1319 collections, because this will still find all datasets).
1320 If not provided, ``self.default.collections`` is used. See
1321 :ref:`daf_butler_collection_expressions` for more information.
1322 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1323 Dimensions to include in the query (in addition to those used
1324 to identify the queried dataset type(s)), either to constrain
1325 the resulting datasets to those for which a matching dimension
1326 exists, or to relate the dataset type's dimensions to dimensions
1327 referenced by the ``dataId`` or ``where`` arguments.
1328 dataId : `dict` or `DataCoordinate`, optional
1329 A data ID whose key-value pairs are used as equality constraints
1330 in the query.
1331 where : `str`, optional
1332 A string expression similar to a SQL WHERE clause. May involve
1333 any column of a dimension table or (as a shortcut for the primary
1334 key column of a dimension table) dimension name. See
1335 :ref:`daf_butler_dimension_expressions` for more information.
1336 findFirst : `bool`, optional
1337 If `True` (`False` is default), for each result data ID, only
1338 yield one `DatasetRef` of each `DatasetType`, from the first
1339 collection in which a dataset of that dataset type appears
1340 (according to the order of ``collections`` passed in). If `True`,
1341 ``collections`` must not contain regular expressions and may not
1342 be ``...``.
1343 components : `bool`, optional
1344 If `True`, apply all dataset expression patterns to component
1345 dataset type names as well. If `False`, never apply patterns to
1346 components. If `None` (default), apply patterns to components only
1347 if their parent datasets were not matched by the expression.
1348 Fully-specified component datasets (`str` or `DatasetType`
1349 instances) are always included.
1350 bind : `Mapping`, optional
1351 Mapping containing literal values that should be injected into the
1352 ``where`` expression, keyed by the identifiers they replace.
1353 check : `bool`, optional
1354 If `True` (default) check the query for consistency before
1355 executing it. This may reject some valid queries that resemble
1356 common mistakes (e.g. queries for visits without specifying an
1357 instrument).
1358 **kwargs
1359 Additional keyword arguments are forwarded to
1360 `DataCoordinate.standardize` when processing the ``dataId``
1361 argument (and may be used to provide a constraining data ID even
1362 when the ``dataId`` argument is `None`).
1364 Returns
1365 -------
1366 refs : `queries.DatasetQueryResults`
1367 Dataset references matching the given query criteria. Nested data
1368 IDs are guaranteed to include values for all implied dimensions
1369 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1370 include dimension records (`DataCoordinate.hasRecords` will be
1371 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1372 on the result object (which returns a new one).
1374 Raises
1375 ------
1376 DatasetTypeExpressionError
1377 Raised when ``datasetType`` expression is invalid.
1378 TypeError
1379 Raised when the arguments are incompatible, such as when a
1380 collection wildcard is passed when ``findFirst`` is `True`, or
1381 when ``collections`` is `None` and``self.defaults.collections`` is
1382 also `None`.
1383 DataIdError
1384 Raised when ``dataId`` or keyword arguments specify unknown
1385 dimensions or values, or when they contain inconsistent values.
1386 UserExpressionError
1387 Raised when ``where`` expression is invalid.
1389 Notes
1390 -----
1391 When multiple dataset types are queried in a single call, the
1392 results of this operation are equivalent to querying for each dataset
1393 type separately in turn, and no information about the relationships
1394 between datasets of different types is included. In contexts where
1395 that kind of information is important, the recommended pattern is to
1396 use `queryDataIds` to first obtain data IDs (possibly with the
1397 desired dataset types and collections passed as constraints to the
1398 query), and then use multiple (generally much simpler) calls to
1399 `queryDatasets` with the returned data IDs passed as constraints.
1400 """
1401 raise NotImplementedError()
1403 @abstractmethod
1404 def queryDataIds(
1405 self,
1406 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1407 *,
1408 dataId: Optional[DataId] = None,
1409 datasets: Any = None,
1410 collections: Any = None,
1411 where: Optional[str] = None,
1412 components: Optional[bool] = None,
1413 bind: Optional[Mapping[str, Any]] = None,
1414 check: bool = True,
1415 **kwargs: Any,
1416 ) -> DataCoordinateQueryResults:
1417 """Query for data IDs matching user-provided criteria.
1419 Parameters
1420 ----------
1421 dimensions : `Dimension` or `str`, or iterable thereof
1422 The dimensions of the data IDs to yield, as either `Dimension`
1423 instances or `str`. Will be automatically expanded to a complete
1424 `DimensionGraph`.
1425 dataId : `dict` or `DataCoordinate`, optional
1426 A data ID whose key-value pairs are used as equality constraints
1427 in the query.
1428 datasets : `Any`, optional
1429 An expression that fully or partially identifies dataset types
1430 that should constrain the yielded data IDs. For example, including
1431 "raw" here would constrain the yielded ``instrument``,
1432 ``exposure``, ``detector``, and ``physical_filter`` values to only
1433 those for which at least one "raw" dataset exists in
1434 ``collections``. Allowed types include `DatasetType`, `str`,
1435 `re.Pattern`, and iterables thereof. Unlike other dataset type
1436 expressions, ``...`` is not permitted - it doesn't make sense to
1437 constrain data IDs on the existence of *all* datasets.
1438 See :ref:`daf_butler_dataset_type_expressions` for more
1439 information.
1440 collections: `Any`, optional
1441 An expression that identifies the collections to search for
1442 datasets, such as a `str` (for full matches or partial matches
1443 via globs), `re.Pattern` (for partial matches), or iterable
1444 thereof. ``...`` can be used to search all collections (actually
1445 just all `~CollectionType.RUN` collections, because this will
1446 still find all datasets). If not provided,
1447 ``self.default.collections`` is used. Ignored unless ``datasets``
1448 is also passed. See :ref:`daf_butler_collection_expressions` for
1449 more information.
1450 where : `str`, optional
1451 A string expression similar to a SQL WHERE clause. May involve
1452 any column of a dimension table or (as a shortcut for the primary
1453 key column of a dimension table) dimension name. See
1454 :ref:`daf_butler_dimension_expressions` for more information.
1455 components : `bool`, optional
1456 If `True`, apply all dataset expression patterns to component
1457 dataset type names as well. If `False`, never apply patterns to
1458 components. If `None` (default), apply patterns to components only
1459 if their parent datasets were not matched by the expression.
1460 Fully-specified component datasets (`str` or `DatasetType`
1461 instances) are always included.
1462 bind : `Mapping`, optional
1463 Mapping containing literal values that should be injected into the
1464 ``where`` expression, keyed by the identifiers they replace.
1465 check : `bool`, optional
1466 If `True` (default) check the query for consistency before
1467 executing it. This may reject some valid queries that resemble
1468 common mistakes (e.g. queries for visits without specifying an
1469 instrument).
1470 **kwargs
1471 Additional keyword arguments are forwarded to
1472 `DataCoordinate.standardize` when processing the ``dataId``
1473 argument (and may be used to provide a constraining data ID even
1474 when the ``dataId`` argument is `None`).
1476 Returns
1477 -------
1478 dataIds : `queries.DataCoordinateQueryResults`
1479 Data IDs matching the given query parameters. These are guaranteed
1480 to identify all dimensions (`DataCoordinate.hasFull` returns
1481 `True`), but will not contain `DimensionRecord` objects
1482 (`DataCoordinate.hasRecords` returns `False`). Call
1483 `DataCoordinateQueryResults.expanded` on the returned object to
1484 fetch those (and consider using
1485 `DataCoordinateQueryResults.materialize` on the returned object
1486 first if the expected number of rows is very large). See
1487 documentation for those methods for additional information.
1489 Raises
1490 ------
1491 NoDefaultCollectionError
1492 Raised if ``collections`` is `None` and
1493 ``self.defaults.collections`` is `None`.
1494 CollectionExpressionError
1495 Raised when ``collections`` expression is invalid.
1496 DataIdError
1497 Raised when ``dataId`` or keyword arguments specify unknown
1498 dimensions or values, or when they contain inconsistent values.
1499 DatasetTypeExpressionError
1500 Raised when ``datasetType`` expression is invalid.
1501 UserExpressionError
1502 Raised when ``where`` expression is invalid.
1503 """
1504 raise NotImplementedError()
1506 @abstractmethod
1507 def queryDimensionRecords(
1508 self,
1509 element: Union[DimensionElement, str],
1510 *,
1511 dataId: Optional[DataId] = None,
1512 datasets: Any = None,
1513 collections: Any = None,
1514 where: Optional[str] = None,
1515 components: Optional[bool] = None,
1516 bind: Optional[Mapping[str, Any]] = None,
1517 check: bool = True,
1518 **kwargs: Any,
1519 ) -> DimensionRecordQueryResults:
1520 """Query for dimension information matching user-provided criteria.
1522 Parameters
1523 ----------
1524 element : `DimensionElement` or `str`
1525 The dimension element to obtain records for.
1526 dataId : `dict` or `DataCoordinate`, optional
1527 A data ID whose key-value pairs are used as equality constraints
1528 in the query.
1529 datasets : `Any`, optional
1530 An expression that fully or partially identifies dataset types
1531 that should constrain the yielded records. See `queryDataIds` and
1532 :ref:`daf_butler_dataset_type_expressions` for more information.
1533 collections : `Any`, optional
1534 An expression that identifies the collections to search for
1535 datasets, such as a `str` (for full matches or partial matches
1536 via globs), `re.Pattern` (for partial matches), or iterable
1537 thereof. ``...`` can be used to search all collections (actually
1538 just all `~CollectionType.RUN` collections, because this will
1539 still find all datasets). If not provided,
1540 ``self.default.collections`` is used. Ignored unless ``datasets``
1541 is also passed. See :ref:`daf_butler_collection_expressions` for
1542 more information.
1543 where : `str`, optional
1544 A string expression similar to a SQL WHERE clause. See
1545 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1546 information.
1547 components : `bool`, optional
1548 Whether to apply dataset expressions to components as well.
1549 See `queryDataIds` for more information.
1550 bind : `Mapping`, optional
1551 Mapping containing literal values that should be injected into the
1552 ``where`` expression, keyed by the identifiers they replace.
1553 check : `bool`, optional
1554 If `True` (default) check the query for consistency before
1555 executing it. This may reject some valid queries that resemble
1556 common mistakes (e.g. queries for visits without specifying an
1557 instrument).
1558 **kwargs
1559 Additional keyword arguments are forwarded to
1560 `DataCoordinate.standardize` when processing the ``dataId``
1561 argument (and may be used to provide a constraining data ID even
1562 when the ``dataId`` argument is `None`).
1564 Returns
1565 -------
1566 dataIds : `queries.DimensionRecordQueryResults`
1567 Data IDs matching the given query parameters.
1569 Raises
1570 ------
1571 NoDefaultCollectionError
1572 Raised if ``collections`` is `None` and
1573 ``self.defaults.collections`` is `None`.
1574 CollectionExpressionError
1575 Raised when ``collections`` expression is invalid.
1576 DataIdError
1577 Raised when ``dataId`` or keyword arguments specify unknown
1578 dimensions or values, or when they contain inconsistent values.
1579 DatasetTypeExpressionError
1580 Raised when ``datasetType`` expression is invalid.
1581 UserExpressionError
1582 Raised when ``where`` expression is invalid.
1583 """
1584 raise NotImplementedError()
1586 @abstractmethod
1587 def queryDatasetAssociations(
1588 self,
1589 datasetType: Union[str, DatasetType],
1590 collections: Any = ...,
1591 *,
1592 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1593 flattenChains: bool = False,
1594 ) -> Iterator[DatasetAssociation]:
1595 """Iterate over dataset-collection combinations where the dataset is in
1596 the collection.
1598 This method is a temporary placeholder for better support for
1599 association results in `queryDatasets`. It will probably be
1600 removed in the future, and should be avoided in production code
1601 whenever possible.
1603 Parameters
1604 ----------
1605 datasetType : `DatasetType` or `str`
1606 A dataset type object or the name of one.
1607 collections: `Any`, optional
1608 An expression that identifies the collections to search for
1609 datasets, such as a `str` (for full matches or partial matches
1610 via globs), `re.Pattern` (for partial matches), or iterable
1611 thereof. ``...`` can be used to search all collections (actually
1612 just all `~CollectionType.RUN` collections, because this will still
1613 find all datasets). If not provided, ``self.default.collections``
1614 is used. See :ref:`daf_butler_collection_expressions` for more
1615 information.
1616 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1617 If provided, only yield associations from collections of these
1618 types.
1619 flattenChains : `bool`, optional
1620 If `True` (default) search in the children of
1621 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1622 collections are ignored.
1624 Yields
1625 ------
1626 association : `.DatasetAssociation`
1627 Object representing the relationship between a single dataset and
1628 a single collection.
1630 Raises
1631 ------
1632 NoDefaultCollectionError
1633 Raised if ``collections`` is `None` and
1634 ``self.defaults.collections`` is `None`.
1635 CollectionExpressionError
1636 Raised when ``collections`` expression is invalid.
1637 """
1638 raise NotImplementedError()
1640 storageClasses: StorageClassFactory
1641 """All storage classes known to the registry (`StorageClassFactory`).
1642 """
1644 datasetIdFactory: DatasetIdFactory
1645 """Factory for dataset IDs."""