Coverage for python/lsst/daf/butler/registry/_registry.py: 78%
143 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 09:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 09:15 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Registry",)
26import contextlib
27import logging
28from abc import ABC, abstractmethod
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Sequence,
39 Set,
40 Tuple,
41 Type,
42 Union,
43)
45from lsst.resources import ResourcePathExpression
46from lsst.utils import doImportType
48from ..core import (
49 Config,
50 DataCoordinate,
51 DataId,
52 DatasetAssociation,
53 DatasetId,
54 DatasetIdFactory,
55 DatasetIdGenEnum,
56 DatasetRef,
57 DatasetType,
58 Dimension,
59 DimensionConfig,
60 DimensionElement,
61 DimensionGraph,
62 DimensionRecord,
63 DimensionUniverse,
64 NameLookupMapping,
65 StorageClassFactory,
66 Timespan,
67)
68from ._collection_summary import CollectionSummary
69from ._collectionType import CollectionType
70from ._config import RegistryConfig
71from ._defaults import RegistryDefaults
72from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
74if TYPE_CHECKING:
75 from .._butlerConfig import ButlerConfig
76 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager
78_LOG = logging.getLogger(__name__)
81class Registry(ABC):
82 """Abstract Registry interface.
84 Each registry implementation can have its own constructor parameters.
85 The assumption is that an instance of a specific subclass will be
86 constructed from configuration using `Registry.fromConfig()`.
87 The base class will look for a ``cls`` entry and call that specific
88 `fromConfig()` method.
90 All subclasses should store `RegistryDefaults` in a ``_defaults``
91 property. No other properties are assumed shared between implementations.
92 """
94 defaultConfigFile: Optional[str] = None
95 """Path to configuration defaults. Accessed within the ``configs`` resource
96 or relative to a search path. Can be None if no defaults specified.
97 """
99 @classmethod
100 def forceRegistryConfig(
101 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
102 ) -> RegistryConfig:
103 """Force the supplied config to a `RegistryConfig`.
105 Parameters
106 ----------
107 config : `RegistryConfig`, `Config` or `str` or `None`
108 Registry configuration, if missing then default configuration will
109 be loaded from registry.yaml.
111 Returns
112 -------
113 registry_config : `RegistryConfig`
114 A registry config.
115 """
116 if not isinstance(config, RegistryConfig):
117 if isinstance(config, (str, Config)) or config is None:
118 config = RegistryConfig(config)
119 else:
120 raise ValueError(f"Incompatible Registry configuration: {config}")
121 return config
123 @classmethod
124 def determineTrampoline(
125 cls, config: Optional[Union[ButlerConfig, RegistryConfig, Config, str]]
126 ) -> Tuple[Type[Registry], RegistryConfig]:
127 """Return class to use to instantiate real registry.
129 Parameters
130 ----------
131 config : `RegistryConfig` or `str`, optional
132 Registry configuration, if missing then default configuration will
133 be loaded from registry.yaml.
135 Returns
136 -------
137 requested_cls : `type` of `Registry`
138 The real registry class to use.
139 registry_config : `RegistryConfig`
140 The `RegistryConfig` to use.
141 """
142 config = cls.forceRegistryConfig(config)
144 # Default to the standard registry
145 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
146 registry_cls = doImportType(registry_cls_name)
147 if registry_cls is cls:
148 raise ValueError("Can not instantiate the abstract base Registry from config")
149 if not issubclass(registry_cls, Registry):
150 raise TypeError(
151 f"Registry class obtained from config {registry_cls_name} is not a Registry class."
152 )
153 return registry_cls, config
155 @classmethod
156 def createFromConfig(
157 cls,
158 config: Optional[Union[RegistryConfig, str]] = None,
159 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
160 butlerRoot: Optional[ResourcePathExpression] = None,
161 ) -> Registry:
162 """Create registry database and return `Registry` instance.
164 This method initializes database contents, database must be empty
165 prior to calling this method.
167 Parameters
168 ----------
169 config : `RegistryConfig` or `str`, optional
170 Registry configuration, if missing then default configuration will
171 be loaded from registry.yaml.
172 dimensionConfig : `DimensionConfig` or `str`, optional
173 Dimensions configuration, if missing then default configuration
174 will be loaded from dimensions.yaml.
175 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
176 Path to the repository root this `Registry` will manage.
178 Returns
179 -------
180 registry : `Registry`
181 A new `Registry` instance.
183 Notes
184 -----
185 This class will determine the concrete `Registry` subclass to
186 use from configuration. Each subclass should implement this method
187 even if it can not create a registry.
188 """
189 registry_cls, registry_config = cls.determineTrampoline(config)
190 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
192 @classmethod
193 def fromConfig(
194 cls,
195 config: Union[ButlerConfig, RegistryConfig, Config, str],
196 butlerRoot: Optional[ResourcePathExpression] = None,
197 writeable: bool = True,
198 defaults: Optional[RegistryDefaults] = None,
199 ) -> Registry:
200 """Create `Registry` subclass instance from `config`.
202 Registry database must be initialized prior to calling this method.
204 Parameters
205 ----------
206 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
207 Registry configuration
208 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
209 Path to the repository root this `Registry` will manage.
210 writeable : `bool`, optional
211 If `True` (default) create a read-write connection to the database.
212 defaults : `RegistryDefaults`, optional
213 Default collection search path and/or output `~CollectionType.RUN`
214 collection.
216 Returns
217 -------
218 registry : `Registry` (subclass)
219 A new `Registry` subclass instance.
221 Notes
222 -----
223 This class will determine the concrete `Registry` subclass to
224 use from configuration. Each subclass should implement this method.
225 """
226 # The base class implementation should trampoline to the correct
227 # subclass. No implementation should ever use this implementation
228 # directly. If no class is specified, default to the standard
229 # registry.
230 registry_cls, registry_config = cls.determineTrampoline(config)
231 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
233 @abstractmethod
234 def isWriteable(self) -> bool:
235 """Return `True` if this registry allows write operations, and `False`
236 otherwise.
237 """
238 raise NotImplementedError()
240 @abstractmethod
241 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
242 """Create a new `Registry` backed by the same data repository and
243 connection as this one, but independent defaults.
245 Parameters
246 ----------
247 defaults : `RegistryDefaults`, optional
248 Default collections and data ID values for the new registry. If
249 not provided, ``self.defaults`` will be used (but future changes
250 to either registry's defaults will not affect the other).
252 Returns
253 -------
254 copy : `Registry`
255 A new `Registry` instance with its own defaults.
257 Notes
258 -----
259 Because the new registry shares a connection with the original, they
260 also share transaction state (despite the fact that their `transaction`
261 context manager methods do not reflect this), and must be used with
262 care.
263 """
264 raise NotImplementedError()
266 @property
267 @abstractmethod
268 def dimensions(self) -> DimensionUniverse:
269 """Definitions of all dimensions recognized by this `Registry`
270 (`DimensionUniverse`).
271 """
272 raise NotImplementedError()
274 @property
275 def defaults(self) -> RegistryDefaults:
276 """Default collection search path and/or output `~CollectionType.RUN`
277 collection (`RegistryDefaults`).
279 This is an immutable struct whose components may not be set
280 individually, but the entire struct can be set by assigning to this
281 property.
282 """
283 return self._defaults
285 @defaults.setter
286 def defaults(self, value: RegistryDefaults) -> None:
287 if value.run is not None:
288 self.registerRun(value.run)
289 value.finish(self)
290 self._defaults = value
292 @abstractmethod
293 def refresh(self) -> None:
294 """Refresh all in-memory state by querying the database.
296 This may be necessary to enable querying for entities added by other
297 registry instances after this one was constructed.
298 """
299 raise NotImplementedError()
301 @contextlib.contextmanager
302 @abstractmethod
303 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
304 """Return a context manager that represents a transaction."""
305 raise NotImplementedError()
307 def resetConnectionPool(self) -> None:
308 """Reset connection pool for registry if relevant.
310 This operation can be used reset connections to servers when
311 using registry with fork-based multiprocessing. This method should
312 usually be called by the child process immediately
313 after the fork.
315 The base class implementation is a no-op.
316 """
317 pass
319 @abstractmethod
320 def registerCollection(
321 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
322 ) -> bool:
323 """Add a new collection if one with the given name does not exist.
325 Parameters
326 ----------
327 name : `str`
328 The name of the collection to create.
329 type : `CollectionType`
330 Enum value indicating the type of collection to create.
331 doc : `str`, optional
332 Documentation string for the collection.
334 Returns
335 -------
336 registered : `bool`
337 Boolean indicating whether the collection was already registered
338 or was created by this call.
340 Notes
341 -----
342 This method cannot be called within transactions, as it needs to be
343 able to perform its own transaction to be concurrent.
344 """
345 raise NotImplementedError()
347 @abstractmethod
348 def getCollectionType(self, name: str) -> CollectionType:
349 """Return an enumeration value indicating the type of the given
350 collection.
352 Parameters
353 ----------
354 name : `str`
355 The name of the collection.
357 Returns
358 -------
359 type : `CollectionType`
360 Enum value indicating the type of this collection.
362 Raises
363 ------
364 MissingCollectionError
365 Raised if no collection with the given name exists.
366 """
367 raise NotImplementedError()
369 @abstractmethod
370 def _get_collection_record(self, name: str) -> CollectionRecord:
371 """Return the record for this collection.
373 Parameters
374 ----------
375 name : `str`
376 Name of the collection for which the record is to be retrieved.
378 Returns
379 -------
380 record : `CollectionRecord`
381 The record for this collection.
382 """
383 raise NotImplementedError()
385 @abstractmethod
386 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
387 """Add a new run if one with the given name does not exist.
389 Parameters
390 ----------
391 name : `str`
392 The name of the run to create.
393 doc : `str`, optional
394 Documentation string for the collection.
396 Returns
397 -------
398 registered : `bool`
399 Boolean indicating whether a new run was registered. `False`
400 if it already existed.
402 Notes
403 -----
404 This method cannot be called within transactions, as it needs to be
405 able to perform its own transaction to be concurrent.
406 """
407 raise NotImplementedError()
409 @abstractmethod
410 def removeCollection(self, name: str) -> None:
411 """Remove the given collection from the registry.
413 Parameters
414 ----------
415 name : `str`
416 The name of the collection to remove.
418 Raises
419 ------
420 MissingCollectionError
421 Raised if no collection with the given name exists.
422 sqlalchemy.IntegrityError
423 Raised if the database rows associated with the collection are
424 still referenced by some other table, such as a dataset in a
425 datastore (for `~CollectionType.RUN` collections only) or a
426 `~CollectionType.CHAINED` collection of which this collection is
427 a child.
429 Notes
430 -----
431 If this is a `~CollectionType.RUN` collection, all datasets and quanta
432 in it will removed from the `Registry` database. This requires that
433 those datasets be removed (or at least trashed) from any datastores
434 that hold them first.
436 A collection may not be deleted as long as it is referenced by a
437 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
438 be deleted or redefined first.
439 """
440 raise NotImplementedError()
442 @abstractmethod
443 def getCollectionChain(self, parent: str) -> Sequence[str]:
444 """Return the child collections in a `~CollectionType.CHAINED`
445 collection.
447 Parameters
448 ----------
449 parent : `str`
450 Name of the chained collection. Must have already been added via
451 a call to `Registry.registerCollection`.
453 Returns
454 -------
455 children : `Sequence` [ `str` ]
456 An ordered sequence of collection names that are searched when the
457 given chained collection is searched.
459 Raises
460 ------
461 MissingCollectionError
462 Raised if ``parent`` does not exist in the `Registry`.
463 CollectionTypeError
464 Raised if ``parent`` does not correspond to a
465 `~CollectionType.CHAINED` collection.
466 """
467 raise NotImplementedError()
469 @abstractmethod
470 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
471 """Define or redefine a `~CollectionType.CHAINED` collection.
473 Parameters
474 ----------
475 parent : `str`
476 Name of the chained collection. Must have already been added via
477 a call to `Registry.registerCollection`.
478 children : `Any`
479 An expression defining an ordered search of child collections,
480 generally an iterable of `str`; see
481 :ref:`daf_butler_collection_expressions` for more information.
482 flatten : `bool`, optional
483 If `True` (`False` is default), recursively flatten out any nested
484 `~CollectionType.CHAINED` collections in ``children`` first.
486 Raises
487 ------
488 MissingCollectionError
489 Raised when any of the given collections do not exist in the
490 `Registry`.
491 CollectionTypeError
492 Raised if ``parent`` does not correspond to a
493 `~CollectionType.CHAINED` collection.
494 ValueError
495 Raised if the given collections contains a cycle.
496 """
497 raise NotImplementedError()
499 @abstractmethod
500 def getCollectionParentChains(self, collection: str) -> Set[str]:
501 """Return the CHAINED collections that directly contain the given one.
503 Parameters
504 ----------
505 name : `str`
506 Name of the collection.
508 Returns
509 -------
510 chains : `set` of `str`
511 Set of `~CollectionType.CHAINED` collection names.
512 """
513 raise NotImplementedError()
515 @abstractmethod
516 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
517 """Retrieve the documentation string for a collection.
519 Parameters
520 ----------
521 name : `str`
522 Name of the collection.
524 Returns
525 -------
526 docs : `str` or `None`
527 Docstring for the collection with the given name.
528 """
529 raise NotImplementedError()
531 @abstractmethod
532 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
533 """Set the documentation string for a collection.
535 Parameters
536 ----------
537 name : `str`
538 Name of the collection.
539 docs : `str` or `None`
540 Docstring for the collection with the given name; will replace any
541 existing docstring. Passing `None` will remove any existing
542 docstring.
543 """
544 raise NotImplementedError()
546 @abstractmethod
547 def getCollectionSummary(self, collection: str) -> CollectionSummary:
548 """Return a summary for the given collection.
550 Parameters
551 ----------
552 collection : `str`
553 Name of the collection for which a summary is to be retrieved.
555 Returns
556 -------
557 summary : `CollectionSummary`
558 Summary of the dataset types and governor dimension values in
559 this collection.
560 """
561 raise NotImplementedError()
563 @abstractmethod
564 def registerDatasetType(self, datasetType: DatasetType) -> bool:
565 """
566 Add a new `DatasetType` to the Registry.
568 It is not an error to register the same `DatasetType` twice.
570 Parameters
571 ----------
572 datasetType : `DatasetType`
573 The `DatasetType` to be added.
575 Returns
576 -------
577 inserted : `bool`
578 `True` if ``datasetType`` was inserted, `False` if an identical
579 existing `DatsetType` was found. Note that in either case the
580 DatasetType is guaranteed to be defined in the Registry
581 consistently with the given definition.
583 Raises
584 ------
585 ValueError
586 Raised if the dimensions or storage class are invalid.
587 ConflictingDefinitionError
588 Raised if this DatasetType is already registered with a different
589 definition.
591 Notes
592 -----
593 This method cannot be called within transactions, as it needs to be
594 able to perform its own transaction to be concurrent.
595 """
596 raise NotImplementedError()
598 @abstractmethod
599 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
600 """Remove the named `DatasetType` from the registry.
602 .. warning::
604 Registry implementations can cache the dataset type definitions.
605 This means that deleting the dataset type definition may result in
606 unexpected behavior from other butler processes that are active
607 that have not seen the deletion.
609 Parameters
610 ----------
611 name : `str` or `tuple[str, ...]`
612 Name of the type to be removed or tuple containing a list of type
613 names to be removed. Wildcards are allowed.
615 Raises
616 ------
617 lsst.daf.butler.registry.OrphanedRecordError
618 Raised if an attempt is made to remove the dataset type definition
619 when there are already datasets associated with it.
621 Notes
622 -----
623 If the dataset type is not registered the method will return without
624 action.
625 """
626 raise NotImplementedError()
628 @abstractmethod
629 def getDatasetType(self, name: str) -> DatasetType:
630 """Get the `DatasetType`.
632 Parameters
633 ----------
634 name : `str`
635 Name of the type.
637 Returns
638 -------
639 type : `DatasetType`
640 The `DatasetType` associated with the given name.
642 Raises
643 ------
644 MissingDatasetTypeError
645 Raised if the requested dataset type has not been registered.
647 Notes
648 -----
649 This method handles component dataset types automatically, though most
650 other registry operations do not.
651 """
652 raise NotImplementedError()
654 @abstractmethod
655 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
656 """Test whether the given dataset ID generation mode is supported by
657 `insertDatasets`.
659 Parameters
660 ----------
661 mode : `DatasetIdGenEnum`
662 Enum value for the mode to test.
664 Returns
665 -------
666 supported : `bool`
667 Whether the given mode is supported.
668 """
669 raise NotImplementedError()
671 @abstractmethod
672 def findDataset(
673 self,
674 datasetType: Union[DatasetType, str],
675 dataId: Optional[DataId] = None,
676 *,
677 collections: Any = None,
678 timespan: Optional[Timespan] = None,
679 **kwargs: Any,
680 ) -> Optional[DatasetRef]:
681 """Find a dataset given its `DatasetType` and data ID.
683 This can be used to obtain a `DatasetRef` that permits the dataset to
684 be read from a `Datastore`. If the dataset is a component and can not
685 be found using the provided dataset type, a dataset ref for the parent
686 will be returned instead but with the correct dataset type.
688 Parameters
689 ----------
690 datasetType : `DatasetType` or `str`
691 A `DatasetType` or the name of one. If this is a `DatasetType`
692 instance, its storage class will be respected and propagated to
693 the output, even if it differs from the dataset type definition
694 in the registry, as long as the storage classes are convertible.
695 dataId : `dict` or `DataCoordinate`, optional
696 A `dict`-like object containing the `Dimension` links that identify
697 the dataset within a collection.
698 collections, optional.
699 An expression that fully or partially identifies the collections to
700 search for the dataset; see
701 :ref:`daf_butler_collection_expressions` for more information.
702 Defaults to ``self.defaults.collections``.
703 timespan : `Timespan`, optional
704 A timespan that the validity range of the dataset must overlap.
705 If not provided, any `~CollectionType.CALIBRATION` collections
706 matched by the ``collections`` argument will not be searched.
707 **kwargs
708 Additional keyword arguments passed to
709 `DataCoordinate.standardize` to convert ``dataId`` to a true
710 `DataCoordinate` or augment an existing one.
712 Returns
713 -------
714 ref : `DatasetRef`
715 A reference to the dataset, or `None` if no matching Dataset
716 was found.
718 Raises
719 ------
720 NoDefaultCollectionError
721 Raised if ``collections`` is `None` and
722 ``self.defaults.collections`` is `None`.
723 LookupError
724 Raised if one or more data ID keys are missing.
725 MissingDatasetTypeError
726 Raised if the dataset type does not exist.
727 MissingCollectionError
728 Raised if any of ``collections`` does not exist in the registry.
730 Notes
731 -----
732 This method simply returns `None` and does not raise an exception even
733 when the set of collections searched is intrinsically incompatible with
734 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
735 only `~CollectionType.CALIBRATION` collections are being searched.
736 This may make it harder to debug some lookup failures, but the behavior
737 is intentional; we consider it more important that failed searches are
738 reported consistently, regardless of the reason, and that adding
739 additional collections that do not contain a match to the search path
740 never changes the behavior.
742 This method handles component dataset types automatically, though most
743 other registry operations do not.
744 """
745 raise NotImplementedError()
747 @abstractmethod
748 def insertDatasets(
749 self,
750 datasetType: Union[DatasetType, str],
751 dataIds: Iterable[DataId],
752 run: Optional[str] = None,
753 expand: bool = True,
754 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
755 ) -> List[DatasetRef]:
756 """Insert one or more datasets into the `Registry`
758 This always adds new datasets; to associate existing datasets with
759 a new collection, use ``associate``.
761 Parameters
762 ----------
763 datasetType : `DatasetType` or `str`
764 A `DatasetType` or the name of one.
765 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
766 Dimension-based identifiers for the new datasets.
767 run : `str`, optional
768 The name of the run that produced the datasets. Defaults to
769 ``self.defaults.run``.
770 expand : `bool`, optional
771 If `True` (default), expand data IDs as they are inserted. This is
772 necessary in general to allow datastore to generate file templates,
773 but it may be disabled if the caller can guarantee this is
774 unnecessary.
775 idGenerationMode : `DatasetIdGenEnum`, optional
776 Specifies option for generating dataset IDs. By default unique IDs
777 are generated for each inserted dataset.
779 Returns
780 -------
781 refs : `list` of `DatasetRef`
782 Resolved `DatasetRef` instances for all given data IDs (in the same
783 order).
785 Raises
786 ------
787 DatasetTypeError
788 Raised if ``datasetType`` is not known to registry.
789 CollectionTypeError
790 Raised if ``run`` collection type is not `~CollectionType.RUN`.
791 NoDefaultCollectionError
792 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
793 ConflictingDefinitionError
794 If a dataset with the same dataset type and data ID as one of those
795 given already exists in ``run``.
796 MissingCollectionError
797 Raised if ``run`` does not exist in the registry.
798 """
799 raise NotImplementedError()
801 @abstractmethod
802 def _importDatasets(
803 self,
804 datasets: Iterable[DatasetRef],
805 expand: bool = True,
806 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
807 reuseIds: bool = False,
808 ) -> List[DatasetRef]:
809 """Import one or more datasets into the `Registry`.
811 Difference from `insertDatasets` method is that this method accepts
812 `DatasetRef` instances which should already be resolved and have a
813 dataset ID. If registry supports globally-unique dataset IDs (e.g.
814 `uuid.UUID`) then datasets which already exist in the registry will be
815 ignored if imported again.
817 Parameters
818 ----------
819 datasets : `~collections.abc.Iterable` of `DatasetRef`
820 Datasets to be inserted. All `DatasetRef` instances must have
821 identical ``datasetType`` and ``run`` attributes. ``run``
822 attribute can be `None` and defaults to ``self.defaults.run``.
823 Datasets can specify ``id`` attribute which will be used for
824 inserted datasets. All dataset IDs must have the same type
825 (`int` or `uuid.UUID`), if type of dataset IDs does not match
826 configured backend then IDs will be ignored and new IDs will be
827 generated by backend.
828 expand : `bool`, optional
829 If `True` (default), expand data IDs as they are inserted. This is
830 necessary in general to allow datastore to generate file templates,
831 but it may be disabled if the caller can guarantee this is
832 unnecessary.
833 idGenerationMode : `DatasetIdGenEnum`, optional
834 Specifies option for generating dataset IDs when IDs are not
835 provided or their type does not match backend type. By default
836 unique IDs are generated for each inserted dataset.
837 reuseIds : `bool`, optional
838 If `True` then forces re-use of imported dataset IDs for integer
839 IDs which are normally generated as auto-incremented; exception
840 will be raised if imported IDs clash with existing ones. This
841 option has no effect on the use of globally-unique IDs which are
842 always re-used (or generated if integer IDs are being imported).
844 Returns
845 -------
846 refs : `list` of `DatasetRef`
847 Resolved `DatasetRef` instances for all given data IDs (in the same
848 order). If any of ``datasets`` has an ID which already exists in
849 the database then it will not be inserted or updated, but a
850 resolved `DatasetRef` will be returned for it in any case.
852 Raises
853 ------
854 NoDefaultCollectionError
855 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
856 DatasetTypeError
857 Raised if datasets correspond to more than one dataset type or
858 dataset type is not known to registry.
859 ConflictingDefinitionError
860 If a dataset with the same dataset type and data ID as one of those
861 given already exists in ``run``.
862 MissingCollectionError
863 Raised if ``run`` does not exist in the registry.
865 Notes
866 -----
867 This method is considered package-private and internal to Butler
868 implementation. Clients outside daf_butler package should not use this
869 method.
870 """
871 raise NotImplementedError()
873 @abstractmethod
874 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
875 """Retrieve a Dataset entry.
877 Parameters
878 ----------
879 id : `DatasetId`
880 The unique identifier for the dataset.
882 Returns
883 -------
884 ref : `DatasetRef` or `None`
885 A ref to the Dataset, or `None` if no matching Dataset
886 was found.
887 """
888 raise NotImplementedError()
890 @abstractmethod
891 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
892 """Remove datasets from the Registry.
894 The datasets will be removed unconditionally from all collections, and
895 any `Quantum` that consumed this dataset will instead be marked with
896 having a NULL input. `Datastore` records will *not* be deleted; the
897 caller is responsible for ensuring that the dataset has already been
898 removed from all Datastores.
900 Parameters
901 ----------
902 refs : `Iterable` of `DatasetRef`
903 References to the datasets to be removed. Must include a valid
904 ``id`` attribute, and should be considered invalidated upon return.
906 Raises
907 ------
908 AmbiguousDatasetError
909 Raised if any ``ref.id`` is `None`.
910 OrphanedRecordError
911 Raised if any dataset is still present in any `Datastore`.
912 """
913 raise NotImplementedError()
915 @abstractmethod
916 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
917 """Add existing datasets to a `~CollectionType.TAGGED` collection.
919 If a DatasetRef with the same exact ID is already in a collection
920 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
921 data ID but with different ID exists in the collection,
922 `ConflictingDefinitionError` is raised.
924 Parameters
925 ----------
926 collection : `str`
927 Indicates the collection the datasets should be associated with.
928 refs : `Iterable` [ `DatasetRef` ]
929 An iterable of resolved `DatasetRef` instances that already exist
930 in this `Registry`.
932 Raises
933 ------
934 ConflictingDefinitionError
935 If a Dataset with the given `DatasetRef` already exists in the
936 given collection.
937 AmbiguousDatasetError
938 Raised if ``any(ref.id is None for ref in refs)``.
939 MissingCollectionError
940 Raised if ``collection`` does not exist in the registry.
941 CollectionTypeError
942 Raise adding new datasets to the given ``collection`` is not
943 allowed.
944 """
945 raise NotImplementedError()
947 @abstractmethod
948 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
949 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
951 ``collection`` and ``ref`` combinations that are not currently
952 associated are silently ignored.
954 Parameters
955 ----------
956 collection : `str`
957 The collection the datasets should no longer be associated with.
958 refs : `Iterable` [ `DatasetRef` ]
959 An iterable of resolved `DatasetRef` instances that already exist
960 in this `Registry`.
962 Raises
963 ------
964 AmbiguousDatasetError
965 Raised if any of the given dataset references is unresolved.
966 MissingCollectionError
967 Raised if ``collection`` does not exist in the registry.
968 CollectionTypeError
969 Raise adding new datasets to the given ``collection`` is not
970 allowed.
971 """
972 raise NotImplementedError()
974 @abstractmethod
975 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
976 """Associate one or more datasets with a calibration collection and a
977 validity range within it.
979 Parameters
980 ----------
981 collection : `str`
982 The name of an already-registered `~CollectionType.CALIBRATION`
983 collection.
984 refs : `Iterable` [ `DatasetRef` ]
985 Datasets to be associated.
986 timespan : `Timespan`
987 The validity range for these datasets within the collection.
989 Raises
990 ------
991 AmbiguousDatasetError
992 Raised if any of the given `DatasetRef` instances is unresolved.
993 ConflictingDefinitionError
994 Raised if the collection already contains a different dataset with
995 the same `DatasetType` and data ID and an overlapping validity
996 range.
997 CollectionTypeError
998 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
999 collection or if one or more datasets are of a dataset type for
1000 which `DatasetType.isCalibration` returns `False`.
1001 """
1002 raise NotImplementedError()
1004 @abstractmethod
1005 def decertify(
1006 self,
1007 collection: str,
1008 datasetType: Union[str, DatasetType],
1009 timespan: Timespan,
1010 *,
1011 dataIds: Optional[Iterable[DataId]] = None,
1012 ) -> None:
1013 """Remove or adjust datasets to clear a validity range within a
1014 calibration collection.
1016 Parameters
1017 ----------
1018 collection : `str`
1019 The name of an already-registered `~CollectionType.CALIBRATION`
1020 collection.
1021 datasetType : `str` or `DatasetType`
1022 Name or `DatasetType` instance for the datasets to be decertified.
1023 timespan : `Timespan`, optional
1024 The validity range to remove datasets from within the collection.
1025 Datasets that overlap this range but are not contained by it will
1026 have their validity ranges adjusted to not overlap it, which may
1027 split a single dataset validity range into two.
1028 dataIds : `Iterable` [ `DataId` ], optional
1029 Data IDs that should be decertified within the given validity range
1030 If `None`, all data IDs for ``self.datasetType`` will be
1031 decertified.
1033 Raises
1034 ------
1035 CollectionTypeError
1036 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1037 collection or if ``datasetType.isCalibration() is False``.
1038 """
1039 raise NotImplementedError()
1041 @abstractmethod
1042 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1043 """Return an object that allows a new `Datastore` instance to
1044 communicate with this `Registry`.
1046 Returns
1047 -------
1048 manager : `DatastoreRegistryBridgeManager`
1049 Object that mediates communication between this `Registry` and its
1050 associated datastores.
1051 """
1052 raise NotImplementedError()
1054 @abstractmethod
1055 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1056 """Retrieve datastore locations for a given dataset.
1058 Parameters
1059 ----------
1060 ref : `DatasetRef`
1061 A reference to the dataset for which to retrieve storage
1062 information.
1064 Returns
1065 -------
1066 datastores : `Iterable` [ `str` ]
1067 All the matching datastores holding this dataset.
1069 Raises
1070 ------
1071 AmbiguousDatasetError
1072 Raised if ``ref.id`` is `None`.
1073 """
1074 raise NotImplementedError()
1076 @abstractmethod
1077 def expandDataId(
1078 self,
1079 dataId: Optional[DataId] = None,
1080 *,
1081 graph: Optional[DimensionGraph] = None,
1082 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
1083 withDefaults: bool = True,
1084 **kwargs: Any,
1085 ) -> DataCoordinate:
1086 """Expand a dimension-based data ID to include additional information.
1088 Parameters
1089 ----------
1090 dataId : `DataCoordinate` or `dict`, optional
1091 Data ID to be expanded; augmented and overridden by ``kwargs``.
1092 graph : `DimensionGraph`, optional
1093 Set of dimensions for the expanded ID. If `None`, the dimensions
1094 will be inferred from the keys of ``dataId`` and ``kwargs``.
1095 Dimensions that are in ``dataId`` or ``kwargs`` but not in
1096 ``graph`` are silently ignored, providing a way to extract and
1097 ``graph`` expand a subset of a data ID.
1098 records : `Mapping` [`str`, `DimensionRecord`], optional
1099 Dimension record data to use before querying the database for that
1100 data, keyed by element name.
1101 withDefaults : `bool`, optional
1102 Utilize ``self.defaults.dataId`` to fill in missing governor
1103 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1104 used).
1105 **kwargs
1106 Additional keywords are treated like additional key-value pairs for
1107 ``dataId``, extending and overriding
1109 Returns
1110 -------
1111 expanded : `DataCoordinate`
1112 A data ID that includes full metadata for all of the dimensions it
1113 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1114 ``expanded.hasFull()`` both return `True`.
1116 Raises
1117 ------
1118 DataIdError
1119 Raised when ``dataId`` or keyword arguments specify unknown
1120 dimensions or values, or when a resulting data ID contains
1121 contradictory key-value pairs, according to dimension
1122 relationships.
1124 Notes
1125 -----
1126 This method cannot be relied upon to reject invalid data ID values
1127 for dimensions that do actually not have any record columns. For
1128 efficiency reasons the records for these dimensions (which have only
1129 dimension key values that are given by the caller) may be constructed
1130 directly rather than obtained from the registry database.
1131 """
1132 raise NotImplementedError()
1134 @abstractmethod
1135 def insertDimensionData(
1136 self,
1137 element: Union[DimensionElement, str],
1138 *data: Union[Mapping[str, Any], DimensionRecord],
1139 conform: bool = True,
1140 replace: bool = False,
1141 skip_existing: bool = False,
1142 ) -> None:
1143 """Insert one or more dimension records into the database.
1145 Parameters
1146 ----------
1147 element : `DimensionElement` or `str`
1148 The `DimensionElement` or name thereof that identifies the table
1149 records will be inserted into.
1150 data : `dict` or `DimensionRecord` (variadic)
1151 One or more records to insert.
1152 conform : `bool`, optional
1153 If `False` (`True` is default) perform no checking or conversions,
1154 and assume that ``element`` is a `DimensionElement` instance and
1155 ``data`` is a one or more `DimensionRecord` instances of the
1156 appropriate subclass.
1157 replace : `bool`, optional
1158 If `True` (`False` is default), replace existing records in the
1159 database if there is a conflict.
1160 skip_existing : `bool`, optional
1161 If `True` (`False` is default), skip insertion if a record with
1162 the same primary key values already exists. Unlike
1163 `syncDimensionData`, this will not detect when the given record
1164 differs from what is in the database, and should not be used when
1165 this is a concern.
1166 """
1167 raise NotImplementedError()
1169 @abstractmethod
1170 def syncDimensionData(
1171 self,
1172 element: Union[DimensionElement, str],
1173 row: Union[Mapping[str, Any], DimensionRecord],
1174 conform: bool = True,
1175 update: bool = False,
1176 ) -> Union[bool, Dict[str, Any]]:
1177 """Synchronize the given dimension record with the database, inserting
1178 if it does not already exist and comparing values if it does.
1180 Parameters
1181 ----------
1182 element : `DimensionElement` or `str`
1183 The `DimensionElement` or name thereof that identifies the table
1184 records will be inserted into.
1185 row : `dict` or `DimensionRecord`
1186 The record to insert.
1187 conform : `bool`, optional
1188 If `False` (`True` is default) perform no checking or conversions,
1189 and assume that ``element`` is a `DimensionElement` instance and
1190 ``data`` is a one or more `DimensionRecord` instances of the
1191 appropriate subclass.
1192 update: `bool`, optional
1193 If `True` (`False` is default), update the existing record in the
1194 database if there is a conflict.
1196 Returns
1197 -------
1198 inserted_or_updated : `bool` or `dict`
1199 `True` if a new row was inserted, `False` if no changes were
1200 needed, or a `dict` mapping updated column names to their old
1201 values if an update was performed (only possible if
1202 ``update=True``).
1204 Raises
1205 ------
1206 ConflictingDefinitionError
1207 Raised if the record exists in the database (according to primary
1208 key lookup) but is inconsistent with the given one.
1209 """
1210 raise NotImplementedError()
1212 @abstractmethod
1213 def queryDatasetTypes(
1214 self,
1215 expression: Any = ...,
1216 *,
1217 components: Optional[bool] = None,
1218 missing: Optional[List[str]] = None,
1219 ) -> Iterable[DatasetType]:
1220 """Iterate over the dataset types whose names match an expression.
1222 Parameters
1223 ----------
1224 expression : `Any`, optional
1225 An expression that fully or partially identifies the dataset types
1226 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1227 ``...`` can be used to return all dataset types, and is the
1228 default. See :ref:`daf_butler_dataset_type_expressions` for more
1229 information.
1230 components : `bool`, optional
1231 If `True`, apply all expression patterns to component dataset type
1232 names as well. If `False`, never apply patterns to components.
1233 If `None` (default), apply patterns to components only if their
1234 parent datasets were not matched by the expression.
1235 Fully-specified component datasets (`str` or `DatasetType`
1236 instances) are always included.
1238 Values other than `False` are deprecated, and only `False` will be
1239 supported after v26. After v27 this argument will be removed
1240 entirely.
1241 missing : `list` of `str`, optional
1242 String dataset type names that were explicitly given (i.e. not
1243 regular expression patterns) but not found will be appended to this
1244 list, if it is provided.
1246 Returns
1247 -------
1248 dataset_types : `Iterable` [ `DatasetType`]
1249 An `Iterable` of `DatasetType` instances whose names match
1250 ``expression``.
1252 Raises
1253 ------
1254 DatasetTypeExpressionError
1255 Raised when ``expression`` is invalid.
1256 """
1257 raise NotImplementedError()
1259 @abstractmethod
1260 def queryCollections(
1261 self,
1262 expression: Any = ...,
1263 datasetType: Optional[DatasetType] = None,
1264 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
1265 flattenChains: bool = False,
1266 includeChains: Optional[bool] = None,
1267 ) -> Sequence[str]:
1268 """Iterate over the collections whose names match an expression.
1270 Parameters
1271 ----------
1272 expression : `Any`, optional
1273 An expression that identifies the collections to return, such as
1274 a `str` (for full matches or partial matches via globs),
1275 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1276 can be used to return all collections, and is the default.
1277 See :ref:`daf_butler_collection_expressions` for more information.
1278 datasetType : `DatasetType`, optional
1279 If provided, only yield collections that may contain datasets of
1280 this type. This is a conservative approximation in general; it may
1281 yield collections that do not have any such datasets.
1282 collectionTypes : `AbstractSet` [ `CollectionType` ] or \
1283 `CollectionType`, optional
1284 If provided, only yield collections of these types.
1285 flattenChains : `bool`, optional
1286 If `True` (`False` is default), recursively yield the child
1287 collections of matching `~CollectionType.CHAINED` collections.
1288 includeChains : `bool`, optional
1289 If `True`, yield records for matching `~CollectionType.CHAINED`
1290 collections. Default is the opposite of ``flattenChains``: include
1291 either CHAINED collections or their children, but not both.
1293 Returns
1294 -------
1295 collections : `Sequence` [ `str` ]
1296 The names of collections that match ``expression``.
1298 Raises
1299 ------
1300 CollectionExpressionError
1301 Raised when ``expression`` is invalid.
1303 Notes
1304 -----
1305 The order in which collections are returned is unspecified, except that
1306 the children of a `~CollectionType.CHAINED` collection are guaranteed
1307 to be in the order in which they are searched. When multiple parent
1308 `~CollectionType.CHAINED` collections match the same criteria, the
1309 order in which the two lists appear is unspecified, and the lists of
1310 children may be incomplete if a child has multiple parents.
1311 """
1312 raise NotImplementedError()
1314 @abstractmethod
1315 def queryDatasets(
1316 self,
1317 datasetType: Any,
1318 *,
1319 collections: Any = None,
1320 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1321 dataId: Optional[DataId] = None,
1322 where: str = "",
1323 findFirst: bool = False,
1324 components: Optional[bool] = None,
1325 bind: Optional[Mapping[str, Any]] = None,
1326 check: bool = True,
1327 **kwargs: Any,
1328 ) -> DatasetQueryResults:
1329 """Query for and iterate over dataset references matching user-provided
1330 criteria.
1332 Parameters
1333 ----------
1334 datasetType
1335 An expression that fully or partially identifies the dataset types
1336 to be queried. Allowed types include `DatasetType`, `str`,
1337 `re.Pattern`, and iterables thereof. The special value ``...`` can
1338 be used to query all dataset types. See
1339 :ref:`daf_butler_dataset_type_expressions` for more information.
1340 collections: optional
1341 An expression that identifies the collections to search, such as a
1342 `str` (for full matches or partial matches via globs), `re.Pattern`
1343 (for partial matches), or iterable thereof. ``...`` can be used to
1344 search all collections (actually just all `~CollectionType.RUN`
1345 collections, because this will still find all datasets).
1346 If not provided, ``self.default.collections`` is used. See
1347 :ref:`daf_butler_collection_expressions` for more information.
1348 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1349 Dimensions to include in the query (in addition to those used
1350 to identify the queried dataset type(s)), either to constrain
1351 the resulting datasets to those for which a matching dimension
1352 exists, or to relate the dataset type's dimensions to dimensions
1353 referenced by the ``dataId`` or ``where`` arguments.
1354 dataId : `dict` or `DataCoordinate`, optional
1355 A data ID whose key-value pairs are used as equality constraints
1356 in the query.
1357 where : `str`, optional
1358 A string expression similar to a SQL WHERE clause. May involve
1359 any column of a dimension table or (as a shortcut for the primary
1360 key column of a dimension table) dimension name. See
1361 :ref:`daf_butler_dimension_expressions` for more information.
1362 findFirst : `bool`, optional
1363 If `True` (`False` is default), for each result data ID, only
1364 yield one `DatasetRef` of each `DatasetType`, from the first
1365 collection in which a dataset of that dataset type appears
1366 (according to the order of ``collections`` passed in). If `True`,
1367 ``collections`` must not contain regular expressions and may not
1368 be ``...``.
1369 components : `bool`, optional
1370 If `True`, apply all dataset expression patterns to component
1371 dataset type names as well. If `False`, never apply patterns to
1372 components. If `None` (default), apply patterns to components only
1373 if their parent datasets were not matched by the expression.
1374 Fully-specified component datasets (`str` or `DatasetType`
1375 instances) are always included.
1377 Values other than `False` are deprecated, and only `False` will be
1378 supported after v26. After v27 this argument will be removed
1379 entirely.
1380 bind : `Mapping`, optional
1381 Mapping containing literal values that should be injected into the
1382 ``where`` expression, keyed by the identifiers they replace.
1383 check : `bool`, optional
1384 If `True` (default) check the query for consistency before
1385 executing it. This may reject some valid queries that resemble
1386 common mistakes (e.g. queries for visits without specifying an
1387 instrument).
1388 **kwargs
1389 Additional keyword arguments are forwarded to
1390 `DataCoordinate.standardize` when processing the ``dataId``
1391 argument (and may be used to provide a constraining data ID even
1392 when the ``dataId`` argument is `None`).
1394 Returns
1395 -------
1396 refs : `queries.DatasetQueryResults`
1397 Dataset references matching the given query criteria. Nested data
1398 IDs are guaranteed to include values for all implied dimensions
1399 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1400 include dimension records (`DataCoordinate.hasRecords` will be
1401 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1402 on the result object (which returns a new one).
1404 Raises
1405 ------
1406 DatasetTypeExpressionError
1407 Raised when ``datasetType`` expression is invalid.
1408 TypeError
1409 Raised when the arguments are incompatible, such as when a
1410 collection wildcard is passed when ``findFirst`` is `True`, or
1411 when ``collections`` is `None` and``self.defaults.collections`` is
1412 also `None`.
1413 DataIdError
1414 Raised when ``dataId`` or keyword arguments specify unknown
1415 dimensions or values, or when they contain inconsistent values.
1416 UserExpressionError
1417 Raised when ``where`` expression is invalid.
1419 Notes
1420 -----
1421 When multiple dataset types are queried in a single call, the
1422 results of this operation are equivalent to querying for each dataset
1423 type separately in turn, and no information about the relationships
1424 between datasets of different types is included. In contexts where
1425 that kind of information is important, the recommended pattern is to
1426 use `queryDataIds` to first obtain data IDs (possibly with the
1427 desired dataset types and collections passed as constraints to the
1428 query), and then use multiple (generally much simpler) calls to
1429 `queryDatasets` with the returned data IDs passed as constraints.
1430 """
1431 raise NotImplementedError()
1433 @abstractmethod
1434 def queryDataIds(
1435 self,
1436 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1437 *,
1438 dataId: Optional[DataId] = None,
1439 datasets: Any = None,
1440 collections: Any = None,
1441 where: str = "",
1442 components: Optional[bool] = None,
1443 bind: Optional[Mapping[str, Any]] = None,
1444 check: bool = True,
1445 **kwargs: Any,
1446 ) -> DataCoordinateQueryResults:
1447 """Query for data IDs matching user-provided criteria.
1449 Parameters
1450 ----------
1451 dimensions : `Dimension` or `str`, or iterable thereof
1452 The dimensions of the data IDs to yield, as either `Dimension`
1453 instances or `str`. Will be automatically expanded to a complete
1454 `DimensionGraph`.
1455 dataId : `dict` or `DataCoordinate`, optional
1456 A data ID whose key-value pairs are used as equality constraints
1457 in the query.
1458 datasets : `Any`, optional
1459 An expression that fully or partially identifies dataset types
1460 that should constrain the yielded data IDs. For example, including
1461 "raw" here would constrain the yielded ``instrument``,
1462 ``exposure``, ``detector``, and ``physical_filter`` values to only
1463 those for which at least one "raw" dataset exists in
1464 ``collections``. Allowed types include `DatasetType`, `str`,
1465 and iterables thereof. Regular expression objects (i.e.
1466 `re.Pattern`) are deprecated and will be removed after the v26
1467 release. See :ref:`daf_butler_dataset_type_expressions` for more
1468 information.
1469 collections: `Any`, optional
1470 An expression that identifies the collections to search for
1471 datasets, such as a `str` (for full matches or partial matches
1472 via globs), `re.Pattern` (for partial matches), or iterable
1473 thereof. ``...`` can be used to search all collections (actually
1474 just all `~CollectionType.RUN` collections, because this will
1475 still find all datasets). If not provided,
1476 ``self.default.collections`` is used. Ignored unless ``datasets``
1477 is also passed. See :ref:`daf_butler_collection_expressions` for
1478 more information.
1479 where : `str`, optional
1480 A string expression similar to a SQL WHERE clause. May involve
1481 any column of a dimension table or (as a shortcut for the primary
1482 key column of a dimension table) dimension name. See
1483 :ref:`daf_butler_dimension_expressions` for more information.
1484 components : `bool`, optional
1485 If `True`, apply all dataset expression patterns to component
1486 dataset type names as well. If `False`, never apply patterns to
1487 components. If `None` (default), apply patterns to components only
1488 if their parent datasets were not matched by the expression.
1489 Fully-specified component datasets (`str` or `DatasetType`
1490 instances) are always included.
1492 Values other than `False` are deprecated, and only `False` will be
1493 supported after v26. After v27 this argument will be removed
1494 entirely.
1495 bind : `Mapping`, optional
1496 Mapping containing literal values that should be injected into the
1497 ``where`` expression, keyed by the identifiers they replace.
1498 check : `bool`, optional
1499 If `True` (default) check the query for consistency before
1500 executing it. This may reject some valid queries that resemble
1501 common mistakes (e.g. queries for visits without specifying an
1502 instrument).
1503 **kwargs
1504 Additional keyword arguments are forwarded to
1505 `DataCoordinate.standardize` when processing the ``dataId``
1506 argument (and may be used to provide a constraining data ID even
1507 when the ``dataId`` argument is `None`).
1509 Returns
1510 -------
1511 dataIds : `queries.DataCoordinateQueryResults`
1512 Data IDs matching the given query parameters. These are guaranteed
1513 to identify all dimensions (`DataCoordinate.hasFull` returns
1514 `True`), but will not contain `DimensionRecord` objects
1515 (`DataCoordinate.hasRecords` returns `False`). Call
1516 `DataCoordinateQueryResults.expanded` on the returned object to
1517 fetch those (and consider using
1518 `DataCoordinateQueryResults.materialize` on the returned object
1519 first if the expected number of rows is very large). See
1520 documentation for those methods for additional information.
1522 Raises
1523 ------
1524 NoDefaultCollectionError
1525 Raised if ``collections`` is `None` and
1526 ``self.defaults.collections`` is `None`.
1527 CollectionExpressionError
1528 Raised when ``collections`` expression is invalid.
1529 DataIdError
1530 Raised when ``dataId`` or keyword arguments specify unknown
1531 dimensions or values, or when they contain inconsistent values.
1532 DatasetTypeExpressionError
1533 Raised when ``datasetType`` expression is invalid.
1534 UserExpressionError
1535 Raised when ``where`` expression is invalid.
1536 """
1537 raise NotImplementedError()
1539 @abstractmethod
1540 def queryDimensionRecords(
1541 self,
1542 element: Union[DimensionElement, str],
1543 *,
1544 dataId: Optional[DataId] = None,
1545 datasets: Any = None,
1546 collections: Any = None,
1547 where: str = "",
1548 components: Optional[bool] = None,
1549 bind: Optional[Mapping[str, Any]] = None,
1550 check: bool = True,
1551 **kwargs: Any,
1552 ) -> DimensionRecordQueryResults:
1553 """Query for dimension information matching user-provided criteria.
1555 Parameters
1556 ----------
1557 element : `DimensionElement` or `str`
1558 The dimension element to obtain records for.
1559 dataId : `dict` or `DataCoordinate`, optional
1560 A data ID whose key-value pairs are used as equality constraints
1561 in the query.
1562 datasets : `Any`, optional
1563 An expression that fully or partially identifies dataset types
1564 that should constrain the yielded records. See `queryDataIds` and
1565 :ref:`daf_butler_dataset_type_expressions` for more information.
1566 collections : `Any`, optional
1567 An expression that identifies the collections to search for
1568 datasets, such as a `str` (for full matches or partial matches
1569 via globs), `re.Pattern` (for partial matches), or iterable
1570 thereof. ``...`` can be used to search all collections (actually
1571 just all `~CollectionType.RUN` collections, because this will
1572 still find all datasets). If not provided,
1573 ``self.default.collections`` is used. Ignored unless ``datasets``
1574 is also passed. See :ref:`daf_butler_collection_expressions` for
1575 more information.
1576 where : `str`, optional
1577 A string expression similar to a SQL WHERE clause. See
1578 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1579 information.
1580 components : `bool`, optional
1581 Whether to apply dataset expressions to components as well.
1582 See `queryDataIds` for more information.
1584 Values other than `False` are deprecated, and only `False` will be
1585 supported after v26. After v27 this argument will be removed
1586 entirely.
1587 bind : `Mapping`, optional
1588 Mapping containing literal values that should be injected into the
1589 ``where`` expression, keyed by the identifiers they replace.
1590 check : `bool`, optional
1591 If `True` (default) check the query for consistency before
1592 executing it. This may reject some valid queries that resemble
1593 common mistakes (e.g. queries for visits without specifying an
1594 instrument).
1595 **kwargs
1596 Additional keyword arguments are forwarded to
1597 `DataCoordinate.standardize` when processing the ``dataId``
1598 argument (and may be used to provide a constraining data ID even
1599 when the ``dataId`` argument is `None`).
1601 Returns
1602 -------
1603 dataIds : `queries.DimensionRecordQueryResults`
1604 Data IDs matching the given query parameters.
1606 Raises
1607 ------
1608 NoDefaultCollectionError
1609 Raised if ``collections`` is `None` and
1610 ``self.defaults.collections`` is `None`.
1611 CollectionExpressionError
1612 Raised when ``collections`` expression is invalid.
1613 DataIdError
1614 Raised when ``dataId`` or keyword arguments specify unknown
1615 dimensions or values, or when they contain inconsistent values.
1616 DatasetTypeExpressionError
1617 Raised when ``datasetType`` expression is invalid.
1618 UserExpressionError
1619 Raised when ``where`` expression is invalid.
1620 """
1621 raise NotImplementedError()
1623 @abstractmethod
1624 def queryDatasetAssociations(
1625 self,
1626 datasetType: Union[str, DatasetType],
1627 collections: Any = ...,
1628 *,
1629 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1630 flattenChains: bool = False,
1631 ) -> Iterator[DatasetAssociation]:
1632 """Iterate over dataset-collection combinations where the dataset is in
1633 the collection.
1635 This method is a temporary placeholder for better support for
1636 association results in `queryDatasets`. It will probably be
1637 removed in the future, and should be avoided in production code
1638 whenever possible.
1640 Parameters
1641 ----------
1642 datasetType : `DatasetType` or `str`
1643 A dataset type object or the name of one.
1644 collections: `Any`, optional
1645 An expression that identifies the collections to search for
1646 datasets, such as a `str` (for full matches or partial matches
1647 via globs), `re.Pattern` (for partial matches), or iterable
1648 thereof. ``...`` can be used to search all collections (actually
1649 just all `~CollectionType.RUN` collections, because this will still
1650 find all datasets). If not provided, ``self.default.collections``
1651 is used. See :ref:`daf_butler_collection_expressions` for more
1652 information.
1653 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1654 If provided, only yield associations from collections of these
1655 types.
1656 flattenChains : `bool`, optional
1657 If `True` (default) search in the children of
1658 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1659 collections are ignored.
1661 Yields
1662 ------
1663 association : `.DatasetAssociation`
1664 Object representing the relationship between a single dataset and
1665 a single collection.
1667 Raises
1668 ------
1669 NoDefaultCollectionError
1670 Raised if ``collections`` is `None` and
1671 ``self.defaults.collections`` is `None`.
1672 CollectionExpressionError
1673 Raised when ``collections`` expression is invalid.
1674 """
1675 raise NotImplementedError()
1677 @property
1678 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1679 """ObsCore manager instance for this registry (`ObsCoreTableManager`
1680 or `None`).
1682 ObsCore manager may not be implemented for all registry backend, or
1683 may not be enabled for many repositories.
1684 """
1685 return None
1687 storageClasses: StorageClassFactory
1688 """All storage classes known to the registry (`StorageClassFactory`).
1689 """
1691 datasetIdFactory: DatasetIdFactory
1692 """Factory for dataset IDs."""