Coverage for python/lsst/daf/butler/registry/_registry.py: 61%
184 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-19 01:58 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-19 01:58 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Registry",)
26import contextlib
27import logging
28from abc import ABC, abstractmethod
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Sequence,
39 Set,
40 Tuple,
41 Type,
42 Union,
43)
45from lsst.resources import ResourcePathExpression
46from lsst.utils import doImportType
48from ..core import (
49 Config,
50 DataCoordinate,
51 DataId,
52 DatasetAssociation,
53 DatasetId,
54 DatasetRef,
55 DatasetType,
56 Dimension,
57 DimensionConfig,
58 DimensionElement,
59 DimensionGraph,
60 DimensionRecord,
61 DimensionUniverse,
62 NameLookupMapping,
63 StorageClassFactory,
64 Timespan,
65)
66from ._collection_summary import CollectionSummary
67from ._collectionType import CollectionType
68from ._config import RegistryConfig
69from ._defaults import RegistryDefaults
70from .interfaces import DatasetIdFactory, DatasetIdGenEnum
71from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._butlerConfig import ButlerConfig
75 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager
77_LOG = logging.getLogger(__name__)
80class Registry(ABC):
81 """Abstract Registry interface.
83 Each registry implementation can have its own constructor parameters.
84 The assumption is that an instance of a specific subclass will be
85 constructed from configuration using `Registry.fromConfig()`.
86 The base class will look for a ``cls`` entry and call that specific
87 `fromConfig()` method.
89 All subclasses should store `RegistryDefaults` in a ``_defaults``
90 property. No other properties are assumed shared between implementations.
91 """
93 defaultConfigFile: Optional[str] = None
94 """Path to configuration defaults. Accessed within the ``configs`` resource
95 or relative to a search path. Can be None if no defaults specified.
96 """
98 @classmethod
99 def forceRegistryConfig(
100 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
101 ) -> RegistryConfig:
102 """Force the supplied config to a `RegistryConfig`.
104 Parameters
105 ----------
106 config : `RegistryConfig`, `Config` or `str` or `None`
107 Registry configuration, if missing then default configuration will
108 be loaded from registry.yaml.
110 Returns
111 -------
112 registry_config : `RegistryConfig`
113 A registry config.
114 """
115 if not isinstance(config, RegistryConfig):
116 if isinstance(config, (str, Config)) or config is None:
117 config = RegistryConfig(config)
118 else:
119 raise ValueError(f"Incompatible Registry configuration: {config}")
120 return config
122 @classmethod
123 def determineTrampoline(
124 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
125 ) -> Tuple[Type[Registry], RegistryConfig]:
126 """Return class to use to instantiate real registry.
128 Parameters
129 ----------
130 config : `RegistryConfig` or `str`, optional
131 Registry configuration, if missing then default configuration will
132 be loaded from registry.yaml.
134 Returns
135 -------
136 requested_cls : `type` of `Registry`
137 The real registry class to use.
138 registry_config : `RegistryConfig`
139 The `RegistryConfig` to use.
140 """
141 config = cls.forceRegistryConfig(config)
143 # Default to the standard registry
144 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
145 registry_cls = doImportType(registry_cls_name)
146 if registry_cls is cls:
147 raise ValueError("Can not instantiate the abstract base Registry from config")
148 if not issubclass(registry_cls, Registry):
149 raise TypeError(
150 f"Registry class obtained from config {registry_cls_name} is not a Registry class."
151 )
152 return registry_cls, config
154 @classmethod
155 def createFromConfig(
156 cls,
157 config: Optional[Union[RegistryConfig, str]] = None,
158 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
159 butlerRoot: Optional[ResourcePathExpression] = None,
160 ) -> Registry:
161 """Create registry database and return `Registry` instance.
163 This method initializes database contents, database must be empty
164 prior to calling this method.
166 Parameters
167 ----------
168 config : `RegistryConfig` or `str`, optional
169 Registry configuration, if missing then default configuration will
170 be loaded from registry.yaml.
171 dimensionConfig : `DimensionConfig` or `str`, optional
172 Dimensions configuration, if missing then default configuration
173 will be loaded from dimensions.yaml.
174 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
175 Path to the repository root this `Registry` will manage.
177 Returns
178 -------
179 registry : `Registry`
180 A new `Registry` instance.
182 Notes
183 -----
184 This class will determine the concrete `Registry` subclass to
185 use from configuration. Each subclass should implement this method
186 even if it can not create a registry.
187 """
188 registry_cls, registry_config = cls.determineTrampoline(config)
189 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
191 @classmethod
192 def fromConfig(
193 cls,
194 config: Union[ButlerConfig, RegistryConfig, Config, str],
195 butlerRoot: Optional[ResourcePathExpression] = None,
196 writeable: bool = True,
197 defaults: Optional[RegistryDefaults] = None,
198 ) -> Registry:
199 """Create `Registry` subclass instance from `config`.
201 Registry database must be initialized prior to calling this method.
203 Parameters
204 ----------
205 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
206 Registry configuration
207 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
208 Path to the repository root this `Registry` will manage.
209 writeable : `bool`, optional
210 If `True` (default) create a read-write connection to the database.
211 defaults : `RegistryDefaults`, optional
212 Default collection search path and/or output `~CollectionType.RUN`
213 collection.
215 Returns
216 -------
217 registry : `Registry` (subclass)
218 A new `Registry` subclass instance.
220 Notes
221 -----
222 This class will determine the concrete `Registry` subclass to
223 use from configuration. Each subclass should implement this method.
224 """
225 # The base class implementation should trampoline to the correct
226 # subclass. No implementation should ever use this implementation
227 # directly. If no class is specified, default to the standard
228 # registry.
229 registry_cls, registry_config = cls.determineTrampoline(config)
230 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
232 @abstractmethod
233 def isWriteable(self) -> bool:
234 """Return `True` if this registry allows write operations, and `False`
235 otherwise.
236 """
237 raise NotImplementedError()
239 @abstractmethod
240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
241 """Create a new `Registry` backed by the same data repository and
242 connection as this one, but independent defaults.
244 Parameters
245 ----------
246 defaults : `RegistryDefaults`, optional
247 Default collections and data ID values for the new registry. If
248 not provided, ``self.defaults`` will be used (but future changes
249 to either registry's defaults will not affect the other).
251 Returns
252 -------
253 copy : `Registry`
254 A new `Registry` instance with its own defaults.
256 Notes
257 -----
258 Because the new registry shares a connection with the original, they
259 also share transaction state (despite the fact that their `transaction`
260 context manager methods do not reflect this), and must be used with
261 care.
262 """
263 raise NotImplementedError()
265 @property
266 @abstractmethod
267 def dimensions(self) -> DimensionUniverse:
268 """Definitions of all dimensions recognized by this `Registry`
269 (`DimensionUniverse`).
270 """
271 raise NotImplementedError()
273 @property
274 def defaults(self) -> RegistryDefaults:
275 """Default collection search path and/or output `~CollectionType.RUN`
276 collection (`RegistryDefaults`).
278 This is an immutable struct whose components may not be set
279 individually, but the entire struct can be set by assigning to this
280 property.
281 """
282 return self._defaults
284 @defaults.setter
285 def defaults(self, value: RegistryDefaults) -> None:
286 if value.run is not None:
287 self.registerRun(value.run)
288 value.finish(self)
289 self._defaults = value
291 @abstractmethod
292 def refresh(self) -> None:
293 """Refresh all in-memory state by querying the database.
295 This may be necessary to enable querying for entities added by other
296 registry instances after this one was constructed.
297 """
298 raise NotImplementedError()
300 @contextlib.contextmanager
301 @abstractmethod
302 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
303 """Return a context manager that represents a transaction."""
304 raise NotImplementedError()
306 def resetConnectionPool(self) -> None:
307 """Reset connection pool for registry if relevant.
309 This operation can be used reset connections to servers when
310 using registry with fork-based multiprocessing. This method should
311 usually be called by the child process immediately
312 after the fork.
314 The base class implementation is a no-op.
315 """
316 pass
318 @abstractmethod
319 def registerCollection(
320 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
321 ) -> bool:
322 """Add a new collection if one with the given name does not exist.
324 Parameters
325 ----------
326 name : `str`
327 The name of the collection to create.
328 type : `CollectionType`
329 Enum value indicating the type of collection to create.
330 doc : `str`, optional
331 Documentation string for the collection.
333 Returns
334 -------
335 registered : `bool`
336 Boolean indicating whether the collection was already registered
337 or was created by this call.
339 Notes
340 -----
341 This method cannot be called within transactions, as it needs to be
342 able to perform its own transaction to be concurrent.
343 """
344 raise NotImplementedError()
346 @abstractmethod
347 def getCollectionType(self, name: str) -> CollectionType:
348 """Return an enumeration value indicating the type of the given
349 collection.
351 Parameters
352 ----------
353 name : `str`
354 The name of the collection.
356 Returns
357 -------
358 type : `CollectionType`
359 Enum value indicating the type of this collection.
361 Raises
362 ------
363 MissingCollectionError
364 Raised if no collection with the given name exists.
365 """
366 raise NotImplementedError()
368 @abstractmethod
369 def _get_collection_record(self, name: str) -> CollectionRecord:
370 """Return the record for this collection.
372 Parameters
373 ----------
374 name : `str`
375 Name of the collection for which the record is to be retrieved.
377 Returns
378 -------
379 record : `CollectionRecord`
380 The record for this collection.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
386 """Add a new run if one with the given name does not exist.
388 Parameters
389 ----------
390 name : `str`
391 The name of the run to create.
392 doc : `str`, optional
393 Documentation string for the collection.
395 Returns
396 -------
397 registered : `bool`
398 Boolean indicating whether a new run was registered. `False`
399 if it already existed.
401 Notes
402 -----
403 This method cannot be called within transactions, as it needs to be
404 able to perform its own transaction to be concurrent.
405 """
406 raise NotImplementedError()
408 @abstractmethod
409 def removeCollection(self, name: str) -> None:
410 """Remove the given collection from the registry.
412 Parameters
413 ----------
414 name : `str`
415 The name of the collection to remove.
417 Raises
418 ------
419 MissingCollectionError
420 Raised if no collection with the given name exists.
421 sqlalchemy.IntegrityError
422 Raised if the database rows associated with the collection are
423 still referenced by some other table, such as a dataset in a
424 datastore (for `~CollectionType.RUN` collections only) or a
425 `~CollectionType.CHAINED` collection of which this collection is
426 a child.
428 Notes
429 -----
430 If this is a `~CollectionType.RUN` collection, all datasets and quanta
431 in it will removed from the `Registry` database. This requires that
432 those datasets be removed (or at least trashed) from any datastores
433 that hold them first.
435 A collection may not be deleted as long as it is referenced by a
436 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
437 be deleted or redefined first.
438 """
439 raise NotImplementedError()
441 @abstractmethod
442 def getCollectionChain(self, parent: str) -> Sequence[str]:
443 """Return the child collections in a `~CollectionType.CHAINED`
444 collection.
446 Parameters
447 ----------
448 parent : `str`
449 Name of the chained collection. Must have already been added via
450 a call to `Registry.registerCollection`.
452 Returns
453 -------
454 children : `Sequence` [ `str` ]
455 An ordered sequence of collection names that are searched when the
456 given chained collection is searched.
458 Raises
459 ------
460 MissingCollectionError
461 Raised if ``parent`` does not exist in the `Registry`.
462 CollectionTypeError
463 Raised if ``parent`` does not correspond to a
464 `~CollectionType.CHAINED` collection.
465 """
466 raise NotImplementedError()
468 @abstractmethod
469 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
470 """Define or redefine a `~CollectionType.CHAINED` collection.
472 Parameters
473 ----------
474 parent : `str`
475 Name of the chained collection. Must have already been added via
476 a call to `Registry.registerCollection`.
477 children : `Any`
478 An expression defining an ordered search of child collections,
479 generally an iterable of `str`; see
480 :ref:`daf_butler_collection_expressions` for more information.
481 flatten : `bool`, optional
482 If `True` (`False` is default), recursively flatten out any nested
483 `~CollectionType.CHAINED` collections in ``children`` first.
485 Raises
486 ------
487 MissingCollectionError
488 Raised when any of the given collections do not exist in the
489 `Registry`.
490 CollectionTypeError
491 Raised if ``parent`` does not correspond to a
492 `~CollectionType.CHAINED` collection.
493 ValueError
494 Raised if the given collections contains a cycle.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def getCollectionParentChains(self, collection: str) -> Set[str]:
500 """Return the CHAINED collections that directly contain the given one.
502 Parameters
503 ----------
504 name : `str`
505 Name of the collection.
507 Returns
508 -------
509 chains : `set` of `str`
510 Set of `~CollectionType.CHAINED` collection names.
511 """
512 raise NotImplementedError()
514 @abstractmethod
515 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
516 """Retrieve the documentation string for a collection.
518 Parameters
519 ----------
520 name : `str`
521 Name of the collection.
523 Returns
524 -------
525 docs : `str` or `None`
526 Docstring for the collection with the given name.
527 """
528 raise NotImplementedError()
530 @abstractmethod
531 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
532 """Set the documentation string for a collection.
534 Parameters
535 ----------
536 name : `str`
537 Name of the collection.
538 docs : `str` or `None`
539 Docstring for the collection with the given name; will replace any
540 existing docstring. Passing `None` will remove any existing
541 docstring.
542 """
543 raise NotImplementedError()
545 @abstractmethod
546 def getCollectionSummary(self, collection: str) -> CollectionSummary:
547 """Return a summary for the given collection.
549 Parameters
550 ----------
551 collection : `str`
552 Name of the collection for which a summary is to be retrieved.
554 Returns
555 -------
556 summary : `CollectionSummary`
557 Summary of the dataset types and governor dimension values in
558 this collection.
559 """
560 raise NotImplementedError()
562 @abstractmethod
563 def registerDatasetType(self, datasetType: DatasetType) -> bool:
564 """
565 Add a new `DatasetType` to the Registry.
567 It is not an error to register the same `DatasetType` twice.
569 Parameters
570 ----------
571 datasetType : `DatasetType`
572 The `DatasetType` to be added.
574 Returns
575 -------
576 inserted : `bool`
577 `True` if ``datasetType`` was inserted, `False` if an identical
578 existing `DatsetType` was found. Note that in either case the
579 DatasetType is guaranteed to be defined in the Registry
580 consistently with the given definition.
582 Raises
583 ------
584 ValueError
585 Raised if the dimensions or storage class are invalid.
586 ConflictingDefinitionError
587 Raised if this DatasetType is already registered with a different
588 definition.
590 Notes
591 -----
592 This method cannot be called within transactions, as it needs to be
593 able to perform its own transaction to be concurrent.
594 """
595 raise NotImplementedError()
597 @abstractmethod
598 def removeDatasetType(self, name: str) -> None:
599 """Remove the named `DatasetType` from the registry.
601 .. warning::
603 Registry implementations can cache the dataset type definitions.
604 This means that deleting the dataset type definition may result in
605 unexpected behavior from other butler processes that are active
606 that have not seen the deletion.
608 Parameters
609 ----------
610 name : `str`
611 Name of the type to be removed.
613 Raises
614 ------
615 lsst.daf.butler.registry.OrphanedRecordError
616 Raised if an attempt is made to remove the dataset type definition
617 when there are already datasets associated with it.
619 Notes
620 -----
621 If the dataset type is not registered the method will return without
622 action.
623 """
624 raise NotImplementedError()
626 @abstractmethod
627 def getDatasetType(self, name: str) -> DatasetType:
628 """Get the `DatasetType`.
630 Parameters
631 ----------
632 name : `str`
633 Name of the type.
635 Returns
636 -------
637 type : `DatasetType`
638 The `DatasetType` associated with the given name.
640 Raises
641 ------
642 MissingDatasetTypeError
643 Raised if the requested dataset type has not been registered.
645 Notes
646 -----
647 This method handles component dataset types automatically, though most
648 other registry operations do not.
649 """
650 raise NotImplementedError()
652 @abstractmethod
653 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
654 """Test whether the given dataset ID generation mode is supported by
655 `insertDatasets`.
657 Parameters
658 ----------
659 mode : `DatasetIdGenEnum`
660 Enum value for the mode to test.
662 Returns
663 -------
664 supported : `bool`
665 Whether the given mode is supported.
666 """
667 raise NotImplementedError()
669 @abstractmethod
670 def findDataset(
671 self,
672 datasetType: Union[DatasetType, str],
673 dataId: Optional[DataId] = None,
674 *,
675 collections: Any = None,
676 timespan: Optional[Timespan] = None,
677 **kwargs: Any,
678 ) -> Optional[DatasetRef]:
679 """Find a dataset given its `DatasetType` and data ID.
681 This can be used to obtain a `DatasetRef` that permits the dataset to
682 be read from a `Datastore`. If the dataset is a component and can not
683 be found using the provided dataset type, a dataset ref for the parent
684 will be returned instead but with the correct dataset type.
686 Parameters
687 ----------
688 datasetType : `DatasetType` or `str`
689 A `DatasetType` or the name of one.
690 dataId : `dict` or `DataCoordinate`, optional
691 A `dict`-like object containing the `Dimension` links that identify
692 the dataset within a collection.
693 collections, optional.
694 An expression that fully or partially identifies the collections to
695 search for the dataset; see
696 :ref:`daf_butler_collection_expressions` for more information.
697 Defaults to ``self.defaults.collections``.
698 timespan : `Timespan`, optional
699 A timespan that the validity range of the dataset must overlap.
700 If not provided, any `~CollectionType.CALIBRATION` collections
701 matched by the ``collections`` argument will not be searched.
702 **kwargs
703 Additional keyword arguments passed to
704 `DataCoordinate.standardize` to convert ``dataId`` to a true
705 `DataCoordinate` or augment an existing one.
707 Returns
708 -------
709 ref : `DatasetRef`
710 A reference to the dataset, or `None` if no matching Dataset
711 was found.
713 Raises
714 ------
715 NoDefaultCollectionError
716 Raised if ``collections`` is `None` and
717 ``self.defaults.collections`` is `None`.
718 LookupError
719 Raised if one or more data ID keys are missing.
720 MissingDatasetTypeError
721 Raised if the dataset type does not exist.
722 MissingCollectionError
723 Raised if any of ``collections`` does not exist in the registry.
725 Notes
726 -----
727 This method simply returns `None` and does not raise an exception even
728 when the set of collections searched is intrinsically incompatible with
729 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
730 only `~CollectionType.CALIBRATION` collections are being searched.
731 This may make it harder to debug some lookup failures, but the behavior
732 is intentional; we consider it more important that failed searches are
733 reported consistently, regardless of the reason, and that adding
734 additional collections that do not contain a match to the search path
735 never changes the behavior.
737 This method handles component dataset types automatically, though most
738 other registry operations do not.
739 """
740 raise NotImplementedError()
742 @abstractmethod
743 def insertDatasets(
744 self,
745 datasetType: Union[DatasetType, str],
746 dataIds: Iterable[DataId],
747 run: Optional[str] = None,
748 expand: bool = True,
749 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
750 ) -> List[DatasetRef]:
751 """Insert one or more datasets into the `Registry`
753 This always adds new datasets; to associate existing datasets with
754 a new collection, use ``associate``.
756 Parameters
757 ----------
758 datasetType : `DatasetType` or `str`
759 A `DatasetType` or the name of one.
760 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
761 Dimension-based identifiers for the new datasets.
762 run : `str`, optional
763 The name of the run that produced the datasets. Defaults to
764 ``self.defaults.run``.
765 expand : `bool`, optional
766 If `True` (default), expand data IDs as they are inserted. This is
767 necessary in general to allow datastore to generate file templates,
768 but it may be disabled if the caller can guarantee this is
769 unnecessary.
770 idGenerationMode : `DatasetIdGenEnum`, optional
771 Specifies option for generating dataset IDs. By default unique IDs
772 are generated for each inserted dataset.
774 Returns
775 -------
776 refs : `list` of `DatasetRef`
777 Resolved `DatasetRef` instances for all given data IDs (in the same
778 order).
780 Raises
781 ------
782 DatasetTypeError
783 Raised if ``datasetType`` is not known to registry.
784 CollectionTypeError
785 Raised if ``run`` collection type is not `~CollectionType.RUN`.
786 NoDefaultCollectionError
787 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
788 ConflictingDefinitionError
789 If a dataset with the same dataset type and data ID as one of those
790 given already exists in ``run``.
791 MissingCollectionError
792 Raised if ``run`` does not exist in the registry.
793 """
794 raise NotImplementedError()
796 @abstractmethod
797 def _importDatasets(
798 self,
799 datasets: Iterable[DatasetRef],
800 expand: bool = True,
801 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
802 reuseIds: bool = False,
803 ) -> List[DatasetRef]:
804 """Import one or more datasets into the `Registry`.
806 Difference from `insertDatasets` method is that this method accepts
807 `DatasetRef` instances which should already be resolved and have a
808 dataset ID. If registry supports globally-unique dataset IDs (e.g.
809 `uuid.UUID`) then datasets which already exist in the registry will be
810 ignored if imported again.
812 Parameters
813 ----------
814 datasets : `~collections.abc.Iterable` of `DatasetRef`
815 Datasets to be inserted. All `DatasetRef` instances must have
816 identical ``datasetType`` and ``run`` attributes. ``run``
817 attribute can be `None` and defaults to ``self.defaults.run``.
818 Datasets can specify ``id`` attribute which will be used for
819 inserted datasets. All dataset IDs must have the same type
820 (`int` or `uuid.UUID`), if type of dataset IDs does not match
821 configured backend then IDs will be ignored and new IDs will be
822 generated by backend.
823 expand : `bool`, optional
824 If `True` (default), expand data IDs as they are inserted. This is
825 necessary in general to allow datastore to generate file templates,
826 but it may be disabled if the caller can guarantee this is
827 unnecessary.
828 idGenerationMode : `DatasetIdGenEnum`, optional
829 Specifies option for generating dataset IDs when IDs are not
830 provided or their type does not match backend type. By default
831 unique IDs are generated for each inserted dataset.
832 reuseIds : `bool`, optional
833 If `True` then forces re-use of imported dataset IDs for integer
834 IDs which are normally generated as auto-incremented; exception
835 will be raised if imported IDs clash with existing ones. This
836 option has no effect on the use of globally-unique IDs which are
837 always re-used (or generated if integer IDs are being imported).
839 Returns
840 -------
841 refs : `list` of `DatasetRef`
842 Resolved `DatasetRef` instances for all given data IDs (in the same
843 order). If any of ``datasets`` has an ID which already exists in
844 the database then it will not be inserted or updated, but a
845 resolved `DatasetRef` will be returned for it in any case.
847 Raises
848 ------
849 NoDefaultCollectionError
850 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
851 DatasetTypeError
852 Raised if datasets correspond to more than one dataset type or
853 dataset type is not known to registry.
854 ConflictingDefinitionError
855 If a dataset with the same dataset type and data ID as one of those
856 given already exists in ``run``.
857 MissingCollectionError
858 Raised if ``run`` does not exist in the registry.
860 Notes
861 -----
862 This method is considered package-private and internal to Butler
863 implementation. Clients outside daf_butler package should not use this
864 method.
865 """
866 raise NotImplementedError()
868 @abstractmethod
869 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
870 """Retrieve a Dataset entry.
872 Parameters
873 ----------
874 id : `DatasetId`
875 The unique identifier for the dataset.
877 Returns
878 -------
879 ref : `DatasetRef` or `None`
880 A ref to the Dataset, or `None` if no matching Dataset
881 was found.
882 """
883 raise NotImplementedError()
885 @abstractmethod
886 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
887 """Remove datasets from the Registry.
889 The datasets will be removed unconditionally from all collections, and
890 any `Quantum` that consumed this dataset will instead be marked with
891 having a NULL input. `Datastore` records will *not* be deleted; the
892 caller is responsible for ensuring that the dataset has already been
893 removed from all Datastores.
895 Parameters
896 ----------
897 refs : `Iterable` of `DatasetRef`
898 References to the datasets to be removed. Must include a valid
899 ``id`` attribute, and should be considered invalidated upon return.
901 Raises
902 ------
903 AmbiguousDatasetError
904 Raised if any ``ref.id`` is `None`.
905 OrphanedRecordError
906 Raised if any dataset is still present in any `Datastore`.
907 """
908 raise NotImplementedError()
910 @abstractmethod
911 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
912 """Add existing datasets to a `~CollectionType.TAGGED` collection.
914 If a DatasetRef with the same exact ID is already in a collection
915 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
916 data ID but with different ID exists in the collection,
917 `ConflictingDefinitionError` is raised.
919 Parameters
920 ----------
921 collection : `str`
922 Indicates the collection the datasets should be associated with.
923 refs : `Iterable` [ `DatasetRef` ]
924 An iterable of resolved `DatasetRef` instances that already exist
925 in this `Registry`.
927 Raises
928 ------
929 ConflictingDefinitionError
930 If a Dataset with the given `DatasetRef` already exists in the
931 given collection.
932 AmbiguousDatasetError
933 Raised if ``any(ref.id is None for ref in refs)``.
934 MissingCollectionError
935 Raised if ``collection`` does not exist in the registry.
936 CollectionTypeError
937 Raise adding new datasets to the given ``collection`` is not
938 allowed.
939 """
940 raise NotImplementedError()
942 @abstractmethod
943 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
944 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
946 ``collection`` and ``ref`` combinations that are not currently
947 associated are silently ignored.
949 Parameters
950 ----------
951 collection : `str`
952 The collection the datasets should no longer be associated with.
953 refs : `Iterable` [ `DatasetRef` ]
954 An iterable of resolved `DatasetRef` instances that already exist
955 in this `Registry`.
957 Raises
958 ------
959 AmbiguousDatasetError
960 Raised if any of the given dataset references is unresolved.
961 MissingCollectionError
962 Raised if ``collection`` does not exist in the registry.
963 CollectionTypeError
964 Raise adding new datasets to the given ``collection`` is not
965 allowed.
966 """
967 raise NotImplementedError()
969 @abstractmethod
970 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
971 """Associate one or more datasets with a calibration collection and a
972 validity range within it.
974 Parameters
975 ----------
976 collection : `str`
977 The name of an already-registered `~CollectionType.CALIBRATION`
978 collection.
979 refs : `Iterable` [ `DatasetRef` ]
980 Datasets to be associated.
981 timespan : `Timespan`
982 The validity range for these datasets within the collection.
984 Raises
985 ------
986 AmbiguousDatasetError
987 Raised if any of the given `DatasetRef` instances is unresolved.
988 ConflictingDefinitionError
989 Raised if the collection already contains a different dataset with
990 the same `DatasetType` and data ID and an overlapping validity
991 range.
992 CollectionTypeError
993 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
994 collection or if one or more datasets are of a dataset type for
995 which `DatasetType.isCalibration` returns `False`.
996 """
997 raise NotImplementedError()
999 @abstractmethod
1000 def decertify(
1001 self,
1002 collection: str,
1003 datasetType: Union[str, DatasetType],
1004 timespan: Timespan,
1005 *,
1006 dataIds: Optional[Iterable[DataId]] = None,
1007 ) -> None:
1008 """Remove or adjust datasets to clear a validity range within a
1009 calibration collection.
1011 Parameters
1012 ----------
1013 collection : `str`
1014 The name of an already-registered `~CollectionType.CALIBRATION`
1015 collection.
1016 datasetType : `str` or `DatasetType`
1017 Name or `DatasetType` instance for the datasets to be decertified.
1018 timespan : `Timespan`, optional
1019 The validity range to remove datasets from within the collection.
1020 Datasets that overlap this range but are not contained by it will
1021 have their validity ranges adjusted to not overlap it, which may
1022 split a single dataset validity range into two.
1023 dataIds : `Iterable` [ `DataId` ], optional
1024 Data IDs that should be decertified within the given validity range
1025 If `None`, all data IDs for ``self.datasetType`` will be
1026 decertified.
1028 Raises
1029 ------
1030 CollectionTypeError
1031 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1032 collection or if ``datasetType.isCalibration() is False``.
1033 """
1034 raise NotImplementedError()
1036 @abstractmethod
1037 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1038 """Return an object that allows a new `Datastore` instance to
1039 communicate with this `Registry`.
1041 Returns
1042 -------
1043 manager : `DatastoreRegistryBridgeManager`
1044 Object that mediates communication between this `Registry` and its
1045 associated datastores.
1046 """
1047 raise NotImplementedError()
1049 @abstractmethod
1050 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1051 """Retrieve datastore locations for a given dataset.
1053 Parameters
1054 ----------
1055 ref : `DatasetRef`
1056 A reference to the dataset for which to retrieve storage
1057 information.
1059 Returns
1060 -------
1061 datastores : `Iterable` [ `str` ]
1062 All the matching datastores holding this dataset.
1064 Raises
1065 ------
1066 AmbiguousDatasetError
1067 Raised if ``ref.id`` is `None`.
1068 """
1069 raise NotImplementedError()
1071 @abstractmethod
1072 def expandDataId(
1073 self,
1074 dataId: Optional[DataId] = None,
1075 *,
1076 graph: Optional[DimensionGraph] = None,
1077 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
1078 withDefaults: bool = True,
1079 **kwargs: Any,
1080 ) -> DataCoordinate:
1081 """Expand a dimension-based data ID to include additional information.
1083 Parameters
1084 ----------
1085 dataId : `DataCoordinate` or `dict`, optional
1086 Data ID to be expanded; augmented and overridden by ``kwargs``.
1087 graph : `DimensionGraph`, optional
1088 Set of dimensions for the expanded ID. If `None`, the dimensions
1089 will be inferred from the keys of ``dataId`` and ``kwargs``.
1090 Dimensions that are in ``dataId`` or ``kwargs`` but not in
1091 ``graph`` are silently ignored, providing a way to extract and
1092 ``graph`` expand a subset of a data ID.
1093 records : `Mapping` [`str`, `DimensionRecord`], optional
1094 Dimension record data to use before querying the database for that
1095 data, keyed by element name.
1096 withDefaults : `bool`, optional
1097 Utilize ``self.defaults.dataId`` to fill in missing governor
1098 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1099 used).
1100 **kwargs
1101 Additional keywords are treated like additional key-value pairs for
1102 ``dataId``, extending and overriding
1104 Returns
1105 -------
1106 expanded : `DataCoordinate`
1107 A data ID that includes full metadata for all of the dimensions it
1108 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1109 ``expanded.hasFull()`` both return `True`.
1111 Raises
1112 ------
1113 DataIdError
1114 Raised when ``dataId`` or keyword arguments specify unknown
1115 dimensions or values, or when a resulting data ID contains
1116 contradictory key-value pairs, according to dimension
1117 relationships.
1118 """
1119 raise NotImplementedError()
1121 @abstractmethod
1122 def insertDimensionData(
1123 self,
1124 element: Union[DimensionElement, str],
1125 *data: Union[Mapping[str, Any], DimensionRecord],
1126 conform: bool = True,
1127 replace: bool = False,
1128 skip_existing: bool = False,
1129 ) -> None:
1130 """Insert one or more dimension records into the database.
1132 Parameters
1133 ----------
1134 element : `DimensionElement` or `str`
1135 The `DimensionElement` or name thereof that identifies the table
1136 records will be inserted into.
1137 data : `dict` or `DimensionRecord` (variadic)
1138 One or more records to insert.
1139 conform : `bool`, optional
1140 If `False` (`True` is default) perform no checking or conversions,
1141 and assume that ``element`` is a `DimensionElement` instance and
1142 ``data`` is a one or more `DimensionRecord` instances of the
1143 appropriate subclass.
1144 replace : `bool`, optional
1145 If `True` (`False` is default), replace existing records in the
1146 database if there is a conflict.
1147 skip_existing : `bool`, optional
1148 If `True` (`False` is default), skip insertion if a record with
1149 the same primary key values already exists. Unlike
1150 `syncDimensionData`, this will not detect when the given record
1151 differs from what is in the database, and should not be used when
1152 this is a concern.
1153 """
1154 raise NotImplementedError()
1156 @abstractmethod
1157 def syncDimensionData(
1158 self,
1159 element: Union[DimensionElement, str],
1160 row: Union[Mapping[str, Any], DimensionRecord],
1161 conform: bool = True,
1162 update: bool = False,
1163 ) -> Union[bool, Dict[str, Any]]:
1164 """Synchronize the given dimension record with the database, inserting
1165 if it does not already exist and comparing values if it does.
1167 Parameters
1168 ----------
1169 element : `DimensionElement` or `str`
1170 The `DimensionElement` or name thereof that identifies the table
1171 records will be inserted into.
1172 row : `dict` or `DimensionRecord`
1173 The record to insert.
1174 conform : `bool`, optional
1175 If `False` (`True` is default) perform no checking or conversions,
1176 and assume that ``element`` is a `DimensionElement` instance and
1177 ``data`` is a one or more `DimensionRecord` instances of the
1178 appropriate subclass.
1179 update: `bool`, optional
1180 If `True` (`False` is default), update the existing record in the
1181 database if there is a conflict.
1183 Returns
1184 -------
1185 inserted_or_updated : `bool` or `dict`
1186 `True` if a new row was inserted, `False` if no changes were
1187 needed, or a `dict` mapping updated column names to their old
1188 values if an update was performed (only possible if
1189 ``update=True``).
1191 Raises
1192 ------
1193 ConflictingDefinitionError
1194 Raised if the record exists in the database (according to primary
1195 key lookup) but is inconsistent with the given one.
1196 """
1197 raise NotImplementedError()
1199 @abstractmethod
1200 def queryDatasetTypes(
1201 self,
1202 expression: Any = ...,
1203 *,
1204 components: Optional[bool] = None,
1205 missing: Optional[List[str]] = None,
1206 ) -> Iterable[DatasetType]:
1207 """Iterate over the dataset types whose names match an expression.
1209 Parameters
1210 ----------
1211 expression : `Any`, optional
1212 An expression that fully or partially identifies the dataset types
1213 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1214 ``...`` can be used to return all dataset types, and is the
1215 default. See :ref:`daf_butler_dataset_type_expressions` for more
1216 information.
1217 components : `bool`, optional
1218 If `True`, apply all expression patterns to component dataset type
1219 names as well. If `False`, never apply patterns to components.
1220 If `None` (default), apply patterns to components only if their
1221 parent datasets were not matched by the expression.
1222 Fully-specified component datasets (`str` or `DatasetType`
1223 instances) are always included.
1225 Values other than `False` are deprecated, and only `False` will be
1226 supported after v26. After v27 this argument will be removed
1227 entirely.
1228 missing : `list` of `str`, optional
1229 String dataset type names that were explicitly given (i.e. not
1230 regular expression patterns) but not found will be appended to this
1231 list, if it is provided.
1233 Returns
1234 -------
1235 dataset_types : `Iterable` [ `DatasetType`]
1236 An `Iterable` of `DatasetType` instances whose names match
1237 ``expression``.
1239 Raises
1240 ------
1241 DatasetTypeExpressionError
1242 Raised when ``expression`` is invalid.
1243 """
1244 raise NotImplementedError()
1246 @abstractmethod
1247 def queryCollections(
1248 self,
1249 expression: Any = ...,
1250 datasetType: Optional[DatasetType] = None,
1251 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
1252 flattenChains: bool = False,
1253 includeChains: Optional[bool] = None,
1254 ) -> Sequence[str]:
1255 """Iterate over the collections whose names match an expression.
1257 Parameters
1258 ----------
1259 expression : `Any`, optional
1260 An expression that identifies the collections to return, such as
1261 a `str` (for full matches or partial matches via globs),
1262 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1263 can be used to return all collections, and is the default.
1264 See :ref:`daf_butler_collection_expressions` for more information.
1265 datasetType : `DatasetType`, optional
1266 If provided, only yield collections that may contain datasets of
1267 this type. This is a conservative approximation in general; it may
1268 yield collections that do not have any such datasets.
1269 collectionTypes : `AbstractSet` [ `CollectionType` ] or \
1270 `CollectionType`, optional
1271 If provided, only yield collections of these types.
1272 flattenChains : `bool`, optional
1273 If `True` (`False` is default), recursively yield the child
1274 collections of matching `~CollectionType.CHAINED` collections.
1275 includeChains : `bool`, optional
1276 If `True`, yield records for matching `~CollectionType.CHAINED`
1277 collections. Default is the opposite of ``flattenChains``: include
1278 either CHAINED collections or their children, but not both.
1280 Returns
1281 -------
1282 collections : `Sequence` [ `str` ]
1283 The names of collections that match ``expression``.
1285 Raises
1286 ------
1287 CollectionExpressionError
1288 Raised when ``expression`` is invalid.
1290 Notes
1291 -----
1292 The order in which collections are returned is unspecified, except that
1293 the children of a `~CollectionType.CHAINED` collection are guaranteed
1294 to be in the order in which they are searched. When multiple parent
1295 `~CollectionType.CHAINED` collections match the same criteria, the
1296 order in which the two lists appear is unspecified, and the lists of
1297 children may be incomplete if a child has multiple parents.
1298 """
1299 raise NotImplementedError()
1301 @abstractmethod
1302 def queryDatasets(
1303 self,
1304 datasetType: Any,
1305 *,
1306 collections: Any = None,
1307 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1308 dataId: Optional[DataId] = None,
1309 where: Optional[str] = None,
1310 findFirst: bool = False,
1311 components: Optional[bool] = None,
1312 bind: Optional[Mapping[str, Any]] = None,
1313 check: bool = True,
1314 **kwargs: Any,
1315 ) -> DatasetQueryResults:
1316 """Query for and iterate over dataset references matching user-provided
1317 criteria.
1319 Parameters
1320 ----------
1321 datasetType
1322 An expression that fully or partially identifies the dataset types
1323 to be queried. Allowed types include `DatasetType`, `str`,
1324 `re.Pattern`, and iterables thereof. The special value ``...`` can
1325 be used to query all dataset types. See
1326 :ref:`daf_butler_dataset_type_expressions` for more information.
1327 collections: optional
1328 An expression that identifies the collections to search, such as a
1329 `str` (for full matches or partial matches via globs), `re.Pattern`
1330 (for partial matches), or iterable thereof. ``...`` can be used to
1331 search all collections (actually just all `~CollectionType.RUN`
1332 collections, because this will still find all datasets).
1333 If not provided, ``self.default.collections`` is used. See
1334 :ref:`daf_butler_collection_expressions` for more information.
1335 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1336 Dimensions to include in the query (in addition to those used
1337 to identify the queried dataset type(s)), either to constrain
1338 the resulting datasets to those for which a matching dimension
1339 exists, or to relate the dataset type's dimensions to dimensions
1340 referenced by the ``dataId`` or ``where`` arguments.
1341 dataId : `dict` or `DataCoordinate`, optional
1342 A data ID whose key-value pairs are used as equality constraints
1343 in the query.
1344 where : `str`, optional
1345 A string expression similar to a SQL WHERE clause. May involve
1346 any column of a dimension table or (as a shortcut for the primary
1347 key column of a dimension table) dimension name. See
1348 :ref:`daf_butler_dimension_expressions` for more information.
1349 findFirst : `bool`, optional
1350 If `True` (`False` is default), for each result data ID, only
1351 yield one `DatasetRef` of each `DatasetType`, from the first
1352 collection in which a dataset of that dataset type appears
1353 (according to the order of ``collections`` passed in). If `True`,
1354 ``collections`` must not contain regular expressions and may not
1355 be ``...``.
1356 components : `bool`, optional
1357 If `True`, apply all dataset expression patterns to component
1358 dataset type names as well. If `False`, never apply patterns to
1359 components. If `None` (default), apply patterns to components only
1360 if their parent datasets were not matched by the expression.
1361 Fully-specified component datasets (`str` or `DatasetType`
1362 instances) are always included.
1364 Values other than `False` are deprecated, and only `False` will be
1365 supported after v26. After v27 this argument will be removed
1366 entirely.
1367 bind : `Mapping`, optional
1368 Mapping containing literal values that should be injected into the
1369 ``where`` expression, keyed by the identifiers they replace.
1370 check : `bool`, optional
1371 If `True` (default) check the query for consistency before
1372 executing it. This may reject some valid queries that resemble
1373 common mistakes (e.g. queries for visits without specifying an
1374 instrument).
1375 **kwargs
1376 Additional keyword arguments are forwarded to
1377 `DataCoordinate.standardize` when processing the ``dataId``
1378 argument (and may be used to provide a constraining data ID even
1379 when the ``dataId`` argument is `None`).
1381 Returns
1382 -------
1383 refs : `queries.DatasetQueryResults`
1384 Dataset references matching the given query criteria. Nested data
1385 IDs are guaranteed to include values for all implied dimensions
1386 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1387 include dimension records (`DataCoordinate.hasRecords` will be
1388 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1389 on the result object (which returns a new one).
1391 Raises
1392 ------
1393 DatasetTypeExpressionError
1394 Raised when ``datasetType`` expression is invalid.
1395 TypeError
1396 Raised when the arguments are incompatible, such as when a
1397 collection wildcard is passed when ``findFirst`` is `True`, or
1398 when ``collections`` is `None` and``self.defaults.collections`` is
1399 also `None`.
1400 DataIdError
1401 Raised when ``dataId`` or keyword arguments specify unknown
1402 dimensions or values, or when they contain inconsistent values.
1403 UserExpressionError
1404 Raised when ``where`` expression is invalid.
1406 Notes
1407 -----
1408 When multiple dataset types are queried in a single call, the
1409 results of this operation are equivalent to querying for each dataset
1410 type separately in turn, and no information about the relationships
1411 between datasets of different types is included. In contexts where
1412 that kind of information is important, the recommended pattern is to
1413 use `queryDataIds` to first obtain data IDs (possibly with the
1414 desired dataset types and collections passed as constraints to the
1415 query), and then use multiple (generally much simpler) calls to
1416 `queryDatasets` with the returned data IDs passed as constraints.
1417 """
1418 raise NotImplementedError()
1420 @abstractmethod
1421 def queryDataIds(
1422 self,
1423 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1424 *,
1425 dataId: Optional[DataId] = None,
1426 datasets: Any = None,
1427 collections: Any = None,
1428 where: Optional[str] = None,
1429 components: Optional[bool] = None,
1430 bind: Optional[Mapping[str, Any]] = None,
1431 check: bool = True,
1432 **kwargs: Any,
1433 ) -> DataCoordinateQueryResults:
1434 """Query for data IDs matching user-provided criteria.
1436 Parameters
1437 ----------
1438 dimensions : `Dimension` or `str`, or iterable thereof
1439 The dimensions of the data IDs to yield, as either `Dimension`
1440 instances or `str`. Will be automatically expanded to a complete
1441 `DimensionGraph`.
1442 dataId : `dict` or `DataCoordinate`, optional
1443 A data ID whose key-value pairs are used as equality constraints
1444 in the query.
1445 datasets : `Any`, optional
1446 An expression that fully or partially identifies dataset types
1447 that should constrain the yielded data IDs. For example, including
1448 "raw" here would constrain the yielded ``instrument``,
1449 ``exposure``, ``detector``, and ``physical_filter`` values to only
1450 those for which at least one "raw" dataset exists in
1451 ``collections``. Allowed types include `DatasetType`, `str`,
1452 and iterables thereof. Regular expression objects (i.e.
1453 `re.Pattern`) are deprecated and will be removed after the v26
1454 release. See :ref:`daf_butler_dataset_type_expressions` for more
1455 information.
1456 collections: `Any`, optional
1457 An expression that identifies the collections to search for
1458 datasets, such as a `str` (for full matches or partial matches
1459 via globs), `re.Pattern` (for partial matches), or iterable
1460 thereof. ``...`` can be used to search all collections (actually
1461 just all `~CollectionType.RUN` collections, because this will
1462 still find all datasets). If not provided,
1463 ``self.default.collections`` is used. Ignored unless ``datasets``
1464 is also passed. See :ref:`daf_butler_collection_expressions` for
1465 more information.
1466 where : `str`, optional
1467 A string expression similar to a SQL WHERE clause. May involve
1468 any column of a dimension table or (as a shortcut for the primary
1469 key column of a dimension table) dimension name. See
1470 :ref:`daf_butler_dimension_expressions` for more information.
1471 components : `bool`, optional
1472 If `True`, apply all dataset expression patterns to component
1473 dataset type names as well. If `False`, never apply patterns to
1474 components. If `None` (default), apply patterns to components only
1475 if their parent datasets were not matched by the expression.
1476 Fully-specified component datasets (`str` or `DatasetType`
1477 instances) are always included.
1479 Values other than `False` are deprecated, and only `False` will be
1480 supported after v26. After v27 this argument will be removed
1481 entirely.
1482 bind : `Mapping`, optional
1483 Mapping containing literal values that should be injected into the
1484 ``where`` expression, keyed by the identifiers they replace.
1485 check : `bool`, optional
1486 If `True` (default) check the query for consistency before
1487 executing it. This may reject some valid queries that resemble
1488 common mistakes (e.g. queries for visits without specifying an
1489 instrument).
1490 **kwargs
1491 Additional keyword arguments are forwarded to
1492 `DataCoordinate.standardize` when processing the ``dataId``
1493 argument (and may be used to provide a constraining data ID even
1494 when the ``dataId`` argument is `None`).
1496 Returns
1497 -------
1498 dataIds : `queries.DataCoordinateQueryResults`
1499 Data IDs matching the given query parameters. These are guaranteed
1500 to identify all dimensions (`DataCoordinate.hasFull` returns
1501 `True`), but will not contain `DimensionRecord` objects
1502 (`DataCoordinate.hasRecords` returns `False`). Call
1503 `DataCoordinateQueryResults.expanded` on the returned object to
1504 fetch those (and consider using
1505 `DataCoordinateQueryResults.materialize` on the returned object
1506 first if the expected number of rows is very large). See
1507 documentation for those methods for additional information.
1509 Raises
1510 ------
1511 NoDefaultCollectionError
1512 Raised if ``collections`` is `None` and
1513 ``self.defaults.collections`` is `None`.
1514 CollectionExpressionError
1515 Raised when ``collections`` expression is invalid.
1516 DataIdError
1517 Raised when ``dataId`` or keyword arguments specify unknown
1518 dimensions or values, or when they contain inconsistent values.
1519 DatasetTypeExpressionError
1520 Raised when ``datasetType`` expression is invalid.
1521 UserExpressionError
1522 Raised when ``where`` expression is invalid.
1523 """
1524 raise NotImplementedError()
1526 @abstractmethod
1527 def queryDimensionRecords(
1528 self,
1529 element: Union[DimensionElement, str],
1530 *,
1531 dataId: Optional[DataId] = None,
1532 datasets: Any = None,
1533 collections: Any = None,
1534 where: Optional[str] = None,
1535 components: Optional[bool] = None,
1536 bind: Optional[Mapping[str, Any]] = None,
1537 check: bool = True,
1538 **kwargs: Any,
1539 ) -> DimensionRecordQueryResults:
1540 """Query for dimension information matching user-provided criteria.
1542 Parameters
1543 ----------
1544 element : `DimensionElement` or `str`
1545 The dimension element to obtain records for.
1546 dataId : `dict` or `DataCoordinate`, optional
1547 A data ID whose key-value pairs are used as equality constraints
1548 in the query.
1549 datasets : `Any`, optional
1550 An expression that fully or partially identifies dataset types
1551 that should constrain the yielded records. See `queryDataIds` and
1552 :ref:`daf_butler_dataset_type_expressions` for more information.
1553 collections : `Any`, optional
1554 An expression that identifies the collections to search for
1555 datasets, such as a `str` (for full matches or partial matches
1556 via globs), `re.Pattern` (for partial matches), or iterable
1557 thereof. ``...`` can be used to search all collections (actually
1558 just all `~CollectionType.RUN` collections, because this will
1559 still find all datasets). If not provided,
1560 ``self.default.collections`` is used. Ignored unless ``datasets``
1561 is also passed. See :ref:`daf_butler_collection_expressions` for
1562 more information.
1563 where : `str`, optional
1564 A string expression similar to a SQL WHERE clause. See
1565 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1566 information.
1567 components : `bool`, optional
1568 Whether to apply dataset expressions to components as well.
1569 See `queryDataIds` for more information.
1571 Values other than `False` are deprecated, and only `False` will be
1572 supported after v26. After v27 this argument will be removed
1573 entirely.
1574 bind : `Mapping`, optional
1575 Mapping containing literal values that should be injected into the
1576 ``where`` expression, keyed by the identifiers they replace.
1577 check : `bool`, optional
1578 If `True` (default) check the query for consistency before
1579 executing it. This may reject some valid queries that resemble
1580 common mistakes (e.g. queries for visits without specifying an
1581 instrument).
1582 **kwargs
1583 Additional keyword arguments are forwarded to
1584 `DataCoordinate.standardize` when processing the ``dataId``
1585 argument (and may be used to provide a constraining data ID even
1586 when the ``dataId`` argument is `None`).
1588 Returns
1589 -------
1590 dataIds : `queries.DimensionRecordQueryResults`
1591 Data IDs matching the given query parameters.
1593 Raises
1594 ------
1595 NoDefaultCollectionError
1596 Raised if ``collections`` is `None` and
1597 ``self.defaults.collections`` is `None`.
1598 CollectionExpressionError
1599 Raised when ``collections`` expression is invalid.
1600 DataIdError
1601 Raised when ``dataId`` or keyword arguments specify unknown
1602 dimensions or values, or when they contain inconsistent values.
1603 DatasetTypeExpressionError
1604 Raised when ``datasetType`` expression is invalid.
1605 UserExpressionError
1606 Raised when ``where`` expression is invalid.
1607 """
1608 raise NotImplementedError()
1610 @abstractmethod
1611 def queryDatasetAssociations(
1612 self,
1613 datasetType: Union[str, DatasetType],
1614 collections: Any = ...,
1615 *,
1616 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1617 flattenChains: bool = False,
1618 ) -> Iterator[DatasetAssociation]:
1619 """Iterate over dataset-collection combinations where the dataset is in
1620 the collection.
1622 This method is a temporary placeholder for better support for
1623 association results in `queryDatasets`. It will probably be
1624 removed in the future, and should be avoided in production code
1625 whenever possible.
1627 Parameters
1628 ----------
1629 datasetType : `DatasetType` or `str`
1630 A dataset type object or the name of one.
1631 collections: `Any`, optional
1632 An expression that identifies the collections to search for
1633 datasets, such as a `str` (for full matches or partial matches
1634 via globs), `re.Pattern` (for partial matches), or iterable
1635 thereof. ``...`` can be used to search all collections (actually
1636 just all `~CollectionType.RUN` collections, because this will still
1637 find all datasets). If not provided, ``self.default.collections``
1638 is used. See :ref:`daf_butler_collection_expressions` for more
1639 information.
1640 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1641 If provided, only yield associations from collections of these
1642 types.
1643 flattenChains : `bool`, optional
1644 If `True` (default) search in the children of
1645 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1646 collections are ignored.
1648 Yields
1649 ------
1650 association : `.DatasetAssociation`
1651 Object representing the relationship between a single dataset and
1652 a single collection.
1654 Raises
1655 ------
1656 NoDefaultCollectionError
1657 Raised if ``collections`` is `None` and
1658 ``self.defaults.collections`` is `None`.
1659 CollectionExpressionError
1660 Raised when ``collections`` expression is invalid.
1661 """
1662 raise NotImplementedError()
1664 storageClasses: StorageClassFactory
1665 """All storage classes known to the registry (`StorageClassFactory`).
1666 """
1668 datasetIdFactory: DatasetIdFactory
1669 """Factory for dataset IDs."""