Coverage for python/lsst/daf/butler/registry/_registry.py: 78%
148 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 10:56 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 10:56 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Registry",)
26import contextlib
27import logging
28import re
29from abc import ABC, abstractmethod
30from collections.abc import Iterable, Iterator, Mapping, Sequence
31from types import EllipsisType
32from typing import TYPE_CHECKING, Any
34from lsst.resources import ResourcePathExpression
35from lsst.utils import doImportType
37from ..core import (
38 Config,
39 DataCoordinate,
40 DataId,
41 DatasetAssociation,
42 DatasetId,
43 DatasetIdFactory,
44 DatasetIdGenEnum,
45 DatasetRef,
46 DatasetType,
47 Dimension,
48 DimensionConfig,
49 DimensionElement,
50 DimensionGraph,
51 DimensionRecord,
52 DimensionUniverse,
53 NameLookupMapping,
54 StorageClassFactory,
55 Timespan,
56)
57from ._collection_summary import CollectionSummary
58from ._collectionType import CollectionType
59from ._config import RegistryConfig
60from ._defaults import RegistryDefaults
61from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
62from .wildcards import CollectionWildcard
64if TYPE_CHECKING:
65 from .._butlerConfig import ButlerConfig
66 from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager
68_LOG = logging.getLogger(__name__)
70# TYpe alias for `collections` arguments.
71CollectionArgType = str | re.Pattern | Iterable[str | re.Pattern] | EllipsisType | CollectionWildcard
74class Registry(ABC):
75 """Abstract Registry interface.
77 Each registry implementation can have its own constructor parameters.
78 The assumption is that an instance of a specific subclass will be
79 constructed from configuration using `Registry.fromConfig()`.
80 The base class will look for a ``cls`` entry and call that specific
81 `fromConfig()` method.
83 All subclasses should store `~lsst.daf.butler.registry.RegistryDefaults` in
84 a ``_defaults`` property. No other properties are assumed shared between
85 implementations.
86 """
88 defaultConfigFile: str | None = None
89 """Path to configuration defaults. Accessed within the ``configs`` resource
90 or relative to a search path. Can be None if no defaults specified.
91 """
93 @classmethod
94 def forceRegistryConfig(
95 cls, config: ButlerConfig | RegistryConfig | Config | str | None
96 ) -> RegistryConfig:
97 """Force the supplied config to a `RegistryConfig`.
99 Parameters
100 ----------
101 config : `RegistryConfig`, `Config` or `str` or `None`
102 Registry configuration, if missing then default configuration will
103 be loaded from registry.yaml.
105 Returns
106 -------
107 registry_config : `RegistryConfig`
108 A registry config.
109 """
110 if not isinstance(config, RegistryConfig):
111 if isinstance(config, (str, Config)) or config is None:
112 config = RegistryConfig(config)
113 else:
114 raise ValueError(f"Incompatible Registry configuration: {config}")
115 return config
117 @classmethod
118 def determineTrampoline(
119 cls, config: ButlerConfig | RegistryConfig | Config | str | None
120 ) -> tuple[type[Registry], RegistryConfig]:
121 """Return class to use to instantiate real registry.
123 Parameters
124 ----------
125 config : `RegistryConfig` or `str`, optional
126 Registry configuration, if missing then default configuration will
127 be loaded from registry.yaml.
129 Returns
130 -------
131 requested_cls : `type` of `Registry`
132 The real registry class to use.
133 registry_config : `RegistryConfig`
134 The `RegistryConfig` to use.
135 """
136 config = cls.forceRegistryConfig(config)
138 # Default to the standard registry
139 registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
140 registry_cls = doImportType(registry_cls_name)
141 if registry_cls is cls:
142 raise ValueError("Can not instantiate the abstract base Registry from config")
143 if not issubclass(registry_cls, Registry):
144 raise TypeError(
145 f"Registry class obtained from config {registry_cls_name} is not a Registry class."
146 )
147 return registry_cls, config
149 @classmethod
150 def createFromConfig(
151 cls,
152 config: RegistryConfig | str | None = None,
153 dimensionConfig: DimensionConfig | str | None = None,
154 butlerRoot: ResourcePathExpression | None = None,
155 ) -> Registry:
156 """Create registry database and return `Registry` instance.
158 This method initializes database contents, database must be empty
159 prior to calling this method.
161 Parameters
162 ----------
163 config : `RegistryConfig` or `str`, optional
164 Registry configuration, if missing then default configuration will
165 be loaded from registry.yaml.
166 dimensionConfig : `DimensionConfig` or `str`, optional
167 Dimensions configuration, if missing then default configuration
168 will be loaded from dimensions.yaml.
169 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
170 Path to the repository root this `Registry` will manage.
172 Returns
173 -------
174 registry : `Registry`
175 A new `Registry` instance.
177 Notes
178 -----
179 This class will determine the concrete `Registry` subclass to
180 use from configuration. Each subclass should implement this method
181 even if it can not create a registry.
182 """
183 registry_cls, registry_config = cls.determineTrampoline(config)
184 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
186 @classmethod
187 def fromConfig(
188 cls,
189 config: ButlerConfig | RegistryConfig | Config | str,
190 butlerRoot: ResourcePathExpression | None = None,
191 writeable: bool = True,
192 defaults: RegistryDefaults | None = None,
193 ) -> Registry:
194 """Create `Registry` subclass instance from ``config``.
196 Registry database must be initialized prior to calling this method.
198 Parameters
199 ----------
200 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
201 Registry configuration
202 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
203 Path to the repository root this `Registry` will manage.
204 writeable : `bool`, optional
205 If `True` (default) create a read-write connection to the database.
206 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
207 Default collection search path and/or output `~CollectionType.RUN`
208 collection.
210 Returns
211 -------
212 registry : `Registry` (subclass)
213 A new `Registry` subclass instance.
215 Notes
216 -----
217 This class will determine the concrete `Registry` subclass to
218 use from configuration. Each subclass should implement this method.
219 """
220 # The base class implementation should trampoline to the correct
221 # subclass. No implementation should ever use this implementation
222 # directly. If no class is specified, default to the standard
223 # registry.
224 registry_cls, registry_config = cls.determineTrampoline(config)
225 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
227 @abstractmethod
228 def isWriteable(self) -> bool:
229 """Return `True` if this registry allows write operations, and `False`
230 otherwise.
231 """
232 raise NotImplementedError()
234 @abstractmethod
235 def copy(self, defaults: RegistryDefaults | None = None) -> Registry:
236 """Create a new `Registry` backed by the same data repository and
237 connection as this one, but independent defaults.
239 Parameters
240 ----------
241 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
242 Default collections and data ID values for the new registry. If
243 not provided, ``self.defaults`` will be used (but future changes
244 to either registry's defaults will not affect the other).
246 Returns
247 -------
248 copy : `Registry`
249 A new `Registry` instance with its own defaults.
251 Notes
252 -----
253 Because the new registry shares a connection with the original, they
254 also share transaction state (despite the fact that their `transaction`
255 context manager methods do not reflect this), and must be used with
256 care.
257 """
258 raise NotImplementedError()
260 @property
261 @abstractmethod
262 def dimensions(self) -> DimensionUniverse:
263 """Definitions of all dimensions recognized by this `Registry`
264 (`DimensionUniverse`).
265 """
266 raise NotImplementedError()
268 @property
269 def defaults(self) -> RegistryDefaults:
270 """Default collection search path and/or output `~CollectionType.RUN`
271 collection (`~lsst.daf.butler.registry.RegistryDefaults`).
273 This is an immutable struct whose components may not be set
274 individually, but the entire struct can be set by assigning to this
275 property.
276 """
277 return self._defaults
279 @defaults.setter
280 def defaults(self, value: RegistryDefaults) -> None:
281 if value.run is not None:
282 self.registerRun(value.run)
283 value.finish(self)
284 self._defaults = value
286 @abstractmethod
287 def refresh(self) -> None:
288 """Refresh all in-memory state by querying the database.
290 This may be necessary to enable querying for entities added by other
291 registry instances after this one was constructed.
292 """
293 raise NotImplementedError()
295 @contextlib.contextmanager
296 @abstractmethod
297 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
298 """Return a context manager that represents a transaction."""
299 raise NotImplementedError()
301 def resetConnectionPool(self) -> None:
302 """Reset connection pool for registry if relevant.
304 This operation can be used reset connections to servers when
305 using registry with fork-based multiprocessing. This method should
306 usually be called by the child process immediately
307 after the fork.
309 The base class implementation is a no-op.
310 """
311 pass
313 @abstractmethod
314 def registerCollection(
315 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
316 ) -> bool:
317 """Add a new collection if one with the given name does not exist.
319 Parameters
320 ----------
321 name : `str`
322 The name of the collection to create.
323 type : `CollectionType`
324 Enum value indicating the type of collection to create.
325 doc : `str`, optional
326 Documentation string for the collection.
328 Returns
329 -------
330 registered : `bool`
331 Boolean indicating whether the collection was already registered
332 or was created by this call.
334 Notes
335 -----
336 This method cannot be called within transactions, as it needs to be
337 able to perform its own transaction to be concurrent.
338 """
339 raise NotImplementedError()
341 @abstractmethod
342 def getCollectionType(self, name: str) -> CollectionType:
343 """Return an enumeration value indicating the type of the given
344 collection.
346 Parameters
347 ----------
348 name : `str`
349 The name of the collection.
351 Returns
352 -------
353 type : `CollectionType`
354 Enum value indicating the type of this collection.
356 Raises
357 ------
358 lsst.daf.butler.registry.MissingCollectionError
359 Raised if no collection with the given name exists.
360 """
361 raise NotImplementedError()
363 @abstractmethod
364 def _get_collection_record(self, name: str) -> CollectionRecord:
365 """Return the record for this collection.
367 Parameters
368 ----------
369 name : `str`
370 Name of the collection for which the record is to be retrieved.
372 Returns
373 -------
374 record : `CollectionRecord`
375 The record for this collection.
376 """
377 raise NotImplementedError()
379 @abstractmethod
380 def registerRun(self, name: str, doc: str | None = None) -> bool:
381 """Add a new run if one with the given name does not exist.
383 Parameters
384 ----------
385 name : `str`
386 The name of the run to create.
387 doc : `str`, optional
388 Documentation string for the collection.
390 Returns
391 -------
392 registered : `bool`
393 Boolean indicating whether a new run was registered. `False`
394 if it already existed.
396 Notes
397 -----
398 This method cannot be called within transactions, as it needs to be
399 able to perform its own transaction to be concurrent.
400 """
401 raise NotImplementedError()
403 @abstractmethod
404 def removeCollection(self, name: str) -> None:
405 """Remove the given collection from the registry.
407 Parameters
408 ----------
409 name : `str`
410 The name of the collection to remove.
412 Raises
413 ------
414 lsst.daf.butler.registry.MissingCollectionError
415 Raised if no collection with the given name exists.
416 sqlalchemy.exc.IntegrityError
417 Raised if the database rows associated with the collection are
418 still referenced by some other table, such as a dataset in a
419 datastore (for `~CollectionType.RUN` collections only) or a
420 `~CollectionType.CHAINED` collection of which this collection is
421 a child.
423 Notes
424 -----
425 If this is a `~CollectionType.RUN` collection, all datasets and quanta
426 in it will removed from the `Registry` database. This requires that
427 those datasets be removed (or at least trashed) from any datastores
428 that hold them first.
430 A collection may not be deleted as long as it is referenced by a
431 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
432 be deleted or redefined first.
433 """
434 raise NotImplementedError()
436 @abstractmethod
437 def getCollectionChain(self, parent: str) -> Sequence[str]:
438 """Return the child collections in a `~CollectionType.CHAINED`
439 collection.
441 Parameters
442 ----------
443 parent : `str`
444 Name of the chained collection. Must have already been added via
445 a call to `Registry.registerCollection`.
447 Returns
448 -------
449 children : `~collections.abc.Sequence` [ `str` ]
450 An ordered sequence of collection names that are searched when the
451 given chained collection is searched.
453 Raises
454 ------
455 lsst.daf.butler.registry.MissingCollectionError
456 Raised if ``parent`` does not exist in the `Registry`.
457 lsst.daf.butler.registry.CollectionTypeError
458 Raised if ``parent`` does not correspond to a
459 `~CollectionType.CHAINED` collection.
460 """
461 raise NotImplementedError()
463 @abstractmethod
464 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
465 """Define or redefine a `~CollectionType.CHAINED` collection.
467 Parameters
468 ----------
469 parent : `str`
470 Name of the chained collection. Must have already been added via
471 a call to `Registry.registerCollection`.
472 children : collection expression
473 An expression defining an ordered search of child collections,
474 generally an iterable of `str`; see
475 :ref:`daf_butler_collection_expressions` for more information.
476 flatten : `bool`, optional
477 If `True` (`False` is default), recursively flatten out any nested
478 `~CollectionType.CHAINED` collections in ``children`` first.
480 Raises
481 ------
482 lsst.daf.butler.registry.MissingCollectionError
483 Raised when any of the given collections do not exist in the
484 `Registry`.
485 lsst.daf.butler.registry.CollectionTypeError
486 Raised if ``parent`` does not correspond to a
487 `~CollectionType.CHAINED` collection.
488 ValueError
489 Raised if the given collections contains a cycle.
490 """
491 raise NotImplementedError()
493 @abstractmethod
494 def getCollectionParentChains(self, collection: str) -> set[str]:
495 """Return the CHAINED collections that directly contain the given one.
497 Parameters
498 ----------
499 name : `str`
500 Name of the collection.
502 Returns
503 -------
504 chains : `set` of `str`
505 Set of `~CollectionType.CHAINED` collection names.
506 """
507 raise NotImplementedError()
509 @abstractmethod
510 def getCollectionDocumentation(self, collection: str) -> str | None:
511 """Retrieve the documentation string for a collection.
513 Parameters
514 ----------
515 name : `str`
516 Name of the collection.
518 Returns
519 -------
520 docs : `str` or `None`
521 Docstring for the collection with the given name.
522 """
523 raise NotImplementedError()
525 @abstractmethod
526 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
527 """Set the documentation string for a collection.
529 Parameters
530 ----------
531 name : `str`
532 Name of the collection.
533 docs : `str` or `None`
534 Docstring for the collection with the given name; will replace any
535 existing docstring. Passing `None` will remove any existing
536 docstring.
537 """
538 raise NotImplementedError()
540 @abstractmethod
541 def getCollectionSummary(self, collection: str) -> CollectionSummary:
542 """Return a summary for the given collection.
544 Parameters
545 ----------
546 collection : `str`
547 Name of the collection for which a summary is to be retrieved.
549 Returns
550 -------
551 summary : `~lsst.daf.butler.registry.CollectionSummary`
552 Summary of the dataset types and governor dimension values in
553 this collection.
554 """
555 raise NotImplementedError()
557 @abstractmethod
558 def registerDatasetType(self, datasetType: DatasetType) -> bool:
559 """Add a new `DatasetType` to the Registry.
561 It is not an error to register the same `DatasetType` twice.
563 Parameters
564 ----------
565 datasetType : `DatasetType`
566 The `DatasetType` to be added.
568 Returns
569 -------
570 inserted : `bool`
571 `True` if ``datasetType`` was inserted, `False` if an identical
572 existing `DatasetType` was found. Note that in either case the
573 DatasetType is guaranteed to be defined in the Registry
574 consistently with the given definition.
576 Raises
577 ------
578 ValueError
579 Raised if the dimensions or storage class are invalid.
580 lsst.daf.butler.registry.ConflictingDefinitionError
581 Raised if this `DatasetType` is already registered with a different
582 definition.
584 Notes
585 -----
586 This method cannot be called within transactions, as it needs to be
587 able to perform its own transaction to be concurrent.
588 """
589 raise NotImplementedError()
591 @abstractmethod
592 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
593 """Remove the named `DatasetType` from the registry.
595 .. warning::
597 Registry implementations can cache the dataset type definitions.
598 This means that deleting the dataset type definition may result in
599 unexpected behavior from other butler processes that are active
600 that have not seen the deletion.
602 Parameters
603 ----------
604 name : `str` or `tuple` [`str`]
605 Name of the type to be removed or tuple containing a list of type
606 names to be removed. Wildcards are allowed.
608 Raises
609 ------
610 lsst.daf.butler.registry.OrphanedRecordError
611 Raised if an attempt is made to remove the dataset type definition
612 when there are already datasets associated with it.
614 Notes
615 -----
616 If the dataset type is not registered the method will return without
617 action.
618 """
619 raise NotImplementedError()
621 @abstractmethod
622 def getDatasetType(self, name: str) -> DatasetType:
623 """Get the `DatasetType`.
625 Parameters
626 ----------
627 name : `str`
628 Name of the type.
630 Returns
631 -------
632 type : `DatasetType`
633 The `DatasetType` associated with the given name.
635 Raises
636 ------
637 lsst.daf.butler.registry.MissingDatasetTypeError
638 Raised if the requested dataset type has not been registered.
640 Notes
641 -----
642 This method handles component dataset types automatically, though most
643 other registry operations do not.
644 """
645 raise NotImplementedError()
647 @abstractmethod
648 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
649 """Test whether the given dataset ID generation mode is supported by
650 `insertDatasets`.
652 Parameters
653 ----------
654 mode : `DatasetIdGenEnum`
655 Enum value for the mode to test.
657 Returns
658 -------
659 supported : `bool`
660 Whether the given mode is supported.
661 """
662 raise NotImplementedError()
664 @abstractmethod
665 def findDataset(
666 self,
667 datasetType: DatasetType | str,
668 dataId: DataId | None = None,
669 *,
670 collections: CollectionArgType | None = None,
671 timespan: Timespan | None = None,
672 **kwargs: Any,
673 ) -> DatasetRef | None:
674 """Find a dataset given its `DatasetType` and data ID.
676 This can be used to obtain a `DatasetRef` that permits the dataset to
677 be read from a `Datastore`. If the dataset is a component and can not
678 be found using the provided dataset type, a dataset ref for the parent
679 will be returned instead but with the correct dataset type.
681 Parameters
682 ----------
683 datasetType : `DatasetType` or `str`
684 A `DatasetType` or the name of one. If this is a `DatasetType`
685 instance, its storage class will be respected and propagated to
686 the output, even if it differs from the dataset type definition
687 in the registry, as long as the storage classes are convertible.
688 dataId : `dict` or `DataCoordinate`, optional
689 A `dict`-like object containing the `Dimension` links that identify
690 the dataset within a collection.
691 collections : collection expression, optional
692 An expression that fully or partially identifies the collections to
693 search for the dataset; see
694 :ref:`daf_butler_collection_expressions` for more information.
695 Defaults to ``self.defaults.collections``.
696 timespan : `Timespan`, optional
697 A timespan that the validity range of the dataset must overlap.
698 If not provided, any `~CollectionType.CALIBRATION` collections
699 matched by the ``collections`` argument will not be searched.
700 **kwargs
701 Additional keyword arguments passed to
702 `DataCoordinate.standardize` to convert ``dataId`` to a true
703 `DataCoordinate` or augment an existing one.
705 Returns
706 -------
707 ref : `DatasetRef`
708 A reference to the dataset, or `None` if no matching Dataset
709 was found.
711 Raises
712 ------
713 lsst.daf.butler.registry.NoDefaultCollectionError
714 Raised if ``collections`` is `None` and
715 ``self.defaults.collections`` is `None`.
716 LookupError
717 Raised if one or more data ID keys are missing.
718 lsst.daf.butler.registry.MissingDatasetTypeError
719 Raised if the dataset type does not exist.
720 lsst.daf.butler.registry.MissingCollectionError
721 Raised if any of ``collections`` does not exist in the registry.
723 Notes
724 -----
725 This method simply returns `None` and does not raise an exception even
726 when the set of collections searched is intrinsically incompatible with
727 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
728 only `~CollectionType.CALIBRATION` collections are being searched.
729 This may make it harder to debug some lookup failures, but the behavior
730 is intentional; we consider it more important that failed searches are
731 reported consistently, regardless of the reason, and that adding
732 additional collections that do not contain a match to the search path
733 never changes the behavior.
735 This method handles component dataset types automatically, though most
736 other registry operations do not.
737 """
738 raise NotImplementedError()
740 @abstractmethod
741 def insertDatasets(
742 self,
743 datasetType: DatasetType | str,
744 dataIds: Iterable[DataId],
745 run: str | None = None,
746 expand: bool = True,
747 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
748 ) -> list[DatasetRef]:
749 """Insert one or more datasets into the `Registry`.
751 This always adds new datasets; to associate existing datasets with
752 a new collection, use ``associate``.
754 Parameters
755 ----------
756 datasetType : `DatasetType` or `str`
757 A `DatasetType` or the name of one.
758 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
759 Dimension-based identifiers for the new datasets.
760 run : `str`, optional
761 The name of the run that produced the datasets. Defaults to
762 ``self.defaults.run``.
763 expand : `bool`, optional
764 If `True` (default), expand data IDs as they are inserted. This is
765 necessary in general to allow datastore to generate file templates,
766 but it may be disabled if the caller can guarantee this is
767 unnecessary.
768 idGenerationMode : `DatasetIdGenEnum`, optional
769 Specifies option for generating dataset IDs. By default unique IDs
770 are generated for each inserted dataset.
772 Returns
773 -------
774 refs : `list` of `DatasetRef`
775 Resolved `DatasetRef` instances for all given data IDs (in the same
776 order).
778 Raises
779 ------
780 lsst.daf.butler.registry.DatasetTypeError
781 Raised if ``datasetType`` is not known to registry.
782 lsst.daf.butler.registry.CollectionTypeError
783 Raised if ``run`` collection type is not `~CollectionType.RUN`.
784 lsst.daf.butler.registry.NoDefaultCollectionError
785 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
786 lsst.daf.butler.registry.ConflictingDefinitionError
787 If a dataset with the same dataset type and data ID as one of those
788 given already exists in ``run``.
789 lsst.daf.butler.registry.MissingCollectionError
790 Raised if ``run`` does not exist in the registry.
791 """
792 raise NotImplementedError()
794 @abstractmethod
795 def _importDatasets(
796 self,
797 datasets: Iterable[DatasetRef],
798 expand: bool = True,
799 ) -> list[DatasetRef]:
800 """Import one or more datasets into the `Registry`.
802 Difference from `insertDatasets` method is that this method accepts
803 `DatasetRef` instances which should already be resolved and have a
804 dataset ID. If registry supports globally-unique dataset IDs (e.g.
805 `uuid.UUID`) then datasets which already exist in the registry will be
806 ignored if imported again.
808 Parameters
809 ----------
810 datasets : `~collections.abc.Iterable` of `DatasetRef`
811 Datasets to be inserted. All `DatasetRef` instances must have
812 identical ``datasetType`` and ``run`` attributes. ``run``
813 attribute can be `None` and defaults to ``self.defaults.run``.
814 Datasets can specify ``id`` attribute which will be used for
815 inserted datasets. All dataset IDs must have the same type
816 (`int` or `uuid.UUID`), if type of dataset IDs does not match
817 configured backend then IDs will be ignored and new IDs will be
818 generated by backend.
819 expand : `bool`, optional
820 If `True` (default), expand data IDs as they are inserted. This is
821 necessary in general to allow datastore to generate file templates,
822 but it may be disabled if the caller can guarantee this is
823 unnecessary.
825 Returns
826 -------
827 refs : `list` of `DatasetRef`
828 Resolved `DatasetRef` instances for all given data IDs (in the same
829 order). If any of ``datasets`` has an ID which already exists in
830 the database then it will not be inserted or updated, but a
831 resolved `DatasetRef` will be returned for it in any case.
833 Raises
834 ------
835 lsst.daf.butler.registry.NoDefaultCollectionError
836 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
837 lsst.daf.butler.registry.DatasetTypeError
838 Raised if datasets correspond to more than one dataset type or
839 dataset type is not known to registry.
840 lsst.daf.butler.registry.ConflictingDefinitionError
841 If a dataset with the same dataset type and data ID as one of those
842 given already exists in ``run``.
843 lsst.daf.butler.registry.MissingCollectionError
844 Raised if ``run`` does not exist in the registry.
846 Notes
847 -----
848 This method is considered package-private and internal to Butler
849 implementation. Clients outside daf_butler package should not use this
850 method.
851 """
852 raise NotImplementedError()
854 @abstractmethod
855 def getDataset(self, id: DatasetId) -> DatasetRef | None:
856 """Retrieve a Dataset entry.
858 Parameters
859 ----------
860 id : `DatasetId`
861 The unique identifier for the dataset.
863 Returns
864 -------
865 ref : `DatasetRef` or `None`
866 A ref to the Dataset, or `None` if no matching Dataset
867 was found.
868 """
869 raise NotImplementedError()
871 @abstractmethod
872 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
873 """Remove datasets from the Registry.
875 The datasets will be removed unconditionally from all collections, and
876 any `Quantum` that consumed this dataset will instead be marked with
877 having a NULL input. `Datastore` records will *not* be deleted; the
878 caller is responsible for ensuring that the dataset has already been
879 removed from all Datastores.
881 Parameters
882 ----------
883 refs : `~collections.abc.Iterable` [`DatasetRef`]
884 References to the datasets to be removed. Must include a valid
885 ``id`` attribute, and should be considered invalidated upon return.
887 Raises
888 ------
889 lsst.daf.butler.AmbiguousDatasetError
890 Raised if any ``ref.id`` is `None`.
891 lsst.daf.butler.registry.OrphanedRecordError
892 Raised if any dataset is still present in any `Datastore`.
893 """
894 raise NotImplementedError()
896 @abstractmethod
897 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
898 """Add existing datasets to a `~CollectionType.TAGGED` collection.
900 If a DatasetRef with the same exact ID is already in a collection
901 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
902 data ID but with different ID exists in the collection,
903 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised.
905 Parameters
906 ----------
907 collection : `str`
908 Indicates the collection the datasets should be associated with.
909 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
910 An iterable of resolved `DatasetRef` instances that already exist
911 in this `Registry`.
913 Raises
914 ------
915 lsst.daf.butler.registry.ConflictingDefinitionError
916 If a Dataset with the given `DatasetRef` already exists in the
917 given collection.
918 lsst.daf.butler.registry.MissingCollectionError
919 Raised if ``collection`` does not exist in the registry.
920 lsst.daf.butler.registry.CollectionTypeError
921 Raise adding new datasets to the given ``collection`` is not
922 allowed.
923 """
924 raise NotImplementedError()
926 @abstractmethod
927 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
928 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
930 ``collection`` and ``ref`` combinations that are not currently
931 associated are silently ignored.
933 Parameters
934 ----------
935 collection : `str`
936 The collection the datasets should no longer be associated with.
937 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
938 An iterable of resolved `DatasetRef` instances that already exist
939 in this `Registry`.
941 Raises
942 ------
943 lsst.daf.butler.AmbiguousDatasetError
944 Raised if any of the given dataset references is unresolved.
945 lsst.daf.butler.registry.MissingCollectionError
946 Raised if ``collection`` does not exist in the registry.
947 lsst.daf.butler.registry.CollectionTypeError
948 Raise adding new datasets to the given ``collection`` is not
949 allowed.
950 """
951 raise NotImplementedError()
953 @abstractmethod
954 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
955 """Associate one or more datasets with a calibration collection and a
956 validity range within it.
958 Parameters
959 ----------
960 collection : `str`
961 The name of an already-registered `~CollectionType.CALIBRATION`
962 collection.
963 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
964 Datasets to be associated.
965 timespan : `Timespan`
966 The validity range for these datasets within the collection.
968 Raises
969 ------
970 lsst.daf.butler.AmbiguousDatasetError
971 Raised if any of the given `DatasetRef` instances is unresolved.
972 lsst.daf.butler.registry.ConflictingDefinitionError
973 Raised if the collection already contains a different dataset with
974 the same `DatasetType` and data ID and an overlapping validity
975 range.
976 lsst.daf.butler.registry.CollectionTypeError
977 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
978 collection or if one or more datasets are of a dataset type for
979 which `DatasetType.isCalibration` returns `False`.
980 """
981 raise NotImplementedError()
983 @abstractmethod
984 def decertify(
985 self,
986 collection: str,
987 datasetType: str | DatasetType,
988 timespan: Timespan,
989 *,
990 dataIds: Iterable[DataId] | None = None,
991 ) -> None:
992 """Remove or adjust datasets to clear a validity range within a
993 calibration collection.
995 Parameters
996 ----------
997 collection : `str`
998 The name of an already-registered `~CollectionType.CALIBRATION`
999 collection.
1000 datasetType : `str` or `DatasetType`
1001 Name or `DatasetType` instance for the datasets to be decertified.
1002 timespan : `Timespan`, optional
1003 The validity range to remove datasets from within the collection.
1004 Datasets that overlap this range but are not contained by it will
1005 have their validity ranges adjusted to not overlap it, which may
1006 split a single dataset validity range into two.
1007 dataIds : iterable [`dict` or `DataCoordinate`], optional
1008 Data IDs that should be decertified within the given validity range
1009 If `None`, all data IDs for ``self.datasetType`` will be
1010 decertified.
1012 Raises
1013 ------
1014 lsst.daf.butler.registry.CollectionTypeError
1015 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1016 collection or if ``datasetType.isCalibration() is False``.
1017 """
1018 raise NotImplementedError()
1020 @abstractmethod
1021 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1022 """Return an object that allows a new `Datastore` instance to
1023 communicate with this `Registry`.
1025 Returns
1026 -------
1027 manager : `~.interfaces.DatastoreRegistryBridgeManager`
1028 Object that mediates communication between this `Registry` and its
1029 associated datastores.
1030 """
1031 raise NotImplementedError()
1033 @abstractmethod
1034 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1035 """Retrieve datastore locations for a given dataset.
1037 Parameters
1038 ----------
1039 ref : `DatasetRef`
1040 A reference to the dataset for which to retrieve storage
1041 information.
1043 Returns
1044 -------
1045 datastores : `~collections.abc.Iterable` [ `str` ]
1046 All the matching datastores holding this dataset.
1048 Raises
1049 ------
1050 lsst.daf.butler.AmbiguousDatasetError
1051 Raised if ``ref.id`` is `None`.
1052 """
1053 raise NotImplementedError()
1055 @abstractmethod
1056 def expandDataId(
1057 self,
1058 dataId: DataId | None = None,
1059 *,
1060 graph: DimensionGraph | None = None,
1061 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
1062 withDefaults: bool = True,
1063 **kwargs: Any,
1064 ) -> DataCoordinate:
1065 """Expand a dimension-based data ID to include additional information.
1067 Parameters
1068 ----------
1069 dataId : `DataCoordinate` or `dict`, optional
1070 Data ID to be expanded; augmented and overridden by ``kwargs``.
1071 graph : `DimensionGraph`, optional
1072 Set of dimensions for the expanded ID. If `None`, the dimensions
1073 will be inferred from the keys of ``dataId`` and ``kwargs``.
1074 Dimensions that are in ``dataId`` or ``kwargs`` but not in
1075 ``graph`` are silently ignored, providing a way to extract and
1076 ``graph`` expand a subset of a data ID.
1077 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \
1078 optional
1079 Dimension record data to use before querying the database for that
1080 data, keyed by element name.
1081 withDefaults : `bool`, optional
1082 Utilize ``self.defaults.dataId`` to fill in missing governor
1083 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1084 used).
1085 **kwargs
1086 Additional keywords are treated like additional key-value pairs for
1087 ``dataId``, extending and overriding
1089 Returns
1090 -------
1091 expanded : `DataCoordinate`
1092 A data ID that includes full metadata for all of the dimensions it
1093 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1094 ``expanded.hasFull()`` both return `True`.
1096 Raises
1097 ------
1098 lsst.daf.butler.registry.DataIdError
1099 Raised when ``dataId`` or keyword arguments specify unknown
1100 dimensions or values, or when a resulting data ID contains
1101 contradictory key-value pairs, according to dimension
1102 relationships.
1104 Notes
1105 -----
1106 This method cannot be relied upon to reject invalid data ID values
1107 for dimensions that do actually not have any record columns. For
1108 efficiency reasons the records for these dimensions (which have only
1109 dimension key values that are given by the caller) may be constructed
1110 directly rather than obtained from the registry database.
1111 """
1112 raise NotImplementedError()
1114 @abstractmethod
1115 def insertDimensionData(
1116 self,
1117 element: DimensionElement | str,
1118 *data: Mapping[str, Any] | DimensionRecord,
1119 conform: bool = True,
1120 replace: bool = False,
1121 skip_existing: bool = False,
1122 ) -> None:
1123 """Insert one or more dimension records into the database.
1125 Parameters
1126 ----------
1127 element : `DimensionElement` or `str`
1128 The `DimensionElement` or name thereof that identifies the table
1129 records will be inserted into.
1130 *data : `dict` or `DimensionRecord`
1131 One or more records to insert.
1132 conform : `bool`, optional
1133 If `False` (`True` is default) perform no checking or conversions,
1134 and assume that ``element`` is a `DimensionElement` instance and
1135 ``data`` is a one or more `DimensionRecord` instances of the
1136 appropriate subclass.
1137 replace : `bool`, optional
1138 If `True` (`False` is default), replace existing records in the
1139 database if there is a conflict.
1140 skip_existing : `bool`, optional
1141 If `True` (`False` is default), skip insertion if a record with
1142 the same primary key values already exists. Unlike
1143 `syncDimensionData`, this will not detect when the given record
1144 differs from what is in the database, and should not be used when
1145 this is a concern.
1146 """
1147 raise NotImplementedError()
1149 @abstractmethod
1150 def syncDimensionData(
1151 self,
1152 element: DimensionElement | str,
1153 row: Mapping[str, Any] | DimensionRecord,
1154 conform: bool = True,
1155 update: bool = False,
1156 ) -> bool | dict[str, Any]:
1157 """Synchronize the given dimension record with the database, inserting
1158 if it does not already exist and comparing values if it does.
1160 Parameters
1161 ----------
1162 element : `DimensionElement` or `str`
1163 The `DimensionElement` or name thereof that identifies the table
1164 records will be inserted into.
1165 row : `dict` or `DimensionRecord`
1166 The record to insert.
1167 conform : `bool`, optional
1168 If `False` (`True` is default) perform no checking or conversions,
1169 and assume that ``element`` is a `DimensionElement` instance and
1170 ``data`` is a one or more `DimensionRecord` instances of the
1171 appropriate subclass.
1172 update : `bool`, optional
1173 If `True` (`False` is default), update the existing record in the
1174 database if there is a conflict.
1176 Returns
1177 -------
1178 inserted_or_updated : `bool` or `dict`
1179 `True` if a new row was inserted, `False` if no changes were
1180 needed, or a `dict` mapping updated column names to their old
1181 values if an update was performed (only possible if
1182 ``update=True``).
1184 Raises
1185 ------
1186 lsst.daf.butler.registry.ConflictingDefinitionError
1187 Raised if the record exists in the database (according to primary
1188 key lookup) but is inconsistent with the given one.
1189 """
1190 raise NotImplementedError()
1192 @abstractmethod
1193 def queryDatasetTypes(
1194 self,
1195 expression: Any = ...,
1196 *,
1197 components: bool | None = None,
1198 missing: list[str] | None = None,
1199 ) -> Iterable[DatasetType]:
1200 """Iterate over the dataset types whose names match an expression.
1202 Parameters
1203 ----------
1204 expression : dataset type expression, optional
1205 An expression that fully or partially identifies the dataset types
1206 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1207 ``...`` can be used to return all dataset types, and is the
1208 default. See :ref:`daf_butler_dataset_type_expressions` for more
1209 information.
1210 components : `bool`, optional
1211 If `True`, apply all expression patterns to component dataset type
1212 names as well. If `False`, never apply patterns to components.
1213 If `None` (default), apply patterns to components only if their
1214 parent datasets were not matched by the expression.
1215 Fully-specified component datasets (`str` or `DatasetType`
1216 instances) are always included.
1218 Values other than `False` are deprecated, and only `False` will be
1219 supported after v26. After v27 this argument will be removed
1220 entirely.
1221 missing : `list` of `str`, optional
1222 String dataset type names that were explicitly given (i.e. not
1223 regular expression patterns) but not found will be appended to this
1224 list, if it is provided.
1226 Returns
1227 -------
1228 dataset_types : `~collections.abc.Iterable` [ `DatasetType`]
1229 An `~collections.abc.Iterable` of `DatasetType` instances whose
1230 names match ``expression``.
1232 Raises
1233 ------
1234 lsst.daf.butler.registry.DatasetTypeExpressionError
1235 Raised when ``expression`` is invalid.
1236 """
1237 raise NotImplementedError()
1239 @abstractmethod
1240 def queryCollections(
1241 self,
1242 expression: Any = ...,
1243 datasetType: DatasetType | None = None,
1244 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
1245 flattenChains: bool = False,
1246 includeChains: bool | None = None,
1247 ) -> Sequence[str]:
1248 """Iterate over the collections whose names match an expression.
1250 Parameters
1251 ----------
1252 expression : collection expression, optional
1253 An expression that identifies the collections to return, such as
1254 a `str` (for full matches or partial matches via globs),
1255 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1256 can be used to return all collections, and is the default.
1257 See :ref:`daf_butler_collection_expressions` for more information.
1258 datasetType : `DatasetType`, optional
1259 If provided, only yield collections that may contain datasets of
1260 this type. This is a conservative approximation in general; it may
1261 yield collections that do not have any such datasets.
1262 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \
1263 `CollectionType`, optional
1264 If provided, only yield collections of these types.
1265 flattenChains : `bool`, optional
1266 If `True` (`False` is default), recursively yield the child
1267 collections of matching `~CollectionType.CHAINED` collections.
1268 includeChains : `bool`, optional
1269 If `True`, yield records for matching `~CollectionType.CHAINED`
1270 collections. Default is the opposite of ``flattenChains``: include
1271 either CHAINED collections or their children, but not both.
1273 Returns
1274 -------
1275 collections : `~collections.abc.Sequence` [ `str` ]
1276 The names of collections that match ``expression``.
1278 Raises
1279 ------
1280 lsst.daf.butler.registry.CollectionExpressionError
1281 Raised when ``expression`` is invalid.
1283 Notes
1284 -----
1285 The order in which collections are returned is unspecified, except that
1286 the children of a `~CollectionType.CHAINED` collection are guaranteed
1287 to be in the order in which they are searched. When multiple parent
1288 `~CollectionType.CHAINED` collections match the same criteria, the
1289 order in which the two lists appear is unspecified, and the lists of
1290 children may be incomplete if a child has multiple parents.
1291 """
1292 raise NotImplementedError()
1294 @abstractmethod
1295 def queryDatasets(
1296 self,
1297 datasetType: Any,
1298 *,
1299 collections: CollectionArgType | None = None,
1300 dimensions: Iterable[Dimension | str] | None = None,
1301 dataId: DataId | None = None,
1302 where: str = "",
1303 findFirst: bool = False,
1304 components: bool | None = None,
1305 bind: Mapping[str, Any] | None = None,
1306 check: bool = True,
1307 **kwargs: Any,
1308 ) -> DatasetQueryResults:
1309 """Query for and iterate over dataset references matching user-provided
1310 criteria.
1312 Parameters
1313 ----------
1314 datasetType : dataset type expression
1315 An expression that fully or partially identifies the dataset types
1316 to be queried. Allowed types include `DatasetType`, `str`,
1317 `re.Pattern`, and iterables thereof. The special value ``...`` can
1318 be used to query all dataset types. See
1319 :ref:`daf_butler_dataset_type_expressions` for more information.
1320 collections : collection expression, optional
1321 An expression that identifies the collections to search, such as a
1322 `str` (for full matches or partial matches via globs), `re.Pattern`
1323 (for partial matches), or iterable thereof. ``...`` can be used to
1324 search all collections (actually just all `~CollectionType.RUN`
1325 collections, because this will still find all datasets).
1326 If not provided, ``self.default.collections`` is used. See
1327 :ref:`daf_butler_collection_expressions` for more information.
1328 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1329 Dimensions to include in the query (in addition to those used
1330 to identify the queried dataset type(s)), either to constrain
1331 the resulting datasets to those for which a matching dimension
1332 exists, or to relate the dataset type's dimensions to dimensions
1333 referenced by the ``dataId`` or ``where`` arguments.
1334 dataId : `dict` or `DataCoordinate`, optional
1335 A data ID whose key-value pairs are used as equality constraints
1336 in the query.
1337 where : `str`, optional
1338 A string expression similar to a SQL WHERE clause. May involve
1339 any column of a dimension table or (as a shortcut for the primary
1340 key column of a dimension table) dimension name. See
1341 :ref:`daf_butler_dimension_expressions` for more information.
1342 findFirst : `bool`, optional
1343 If `True` (`False` is default), for each result data ID, only
1344 yield one `DatasetRef` of each `DatasetType`, from the first
1345 collection in which a dataset of that dataset type appears
1346 (according to the order of ``collections`` passed in). If `True`,
1347 ``collections`` must not contain regular expressions and may not
1348 be ``...``.
1349 components : `bool`, optional
1350 If `True`, apply all dataset expression patterns to component
1351 dataset type names as well. If `False`, never apply patterns to
1352 components. If `None` (default), apply patterns to components only
1353 if their parent datasets were not matched by the expression.
1354 Fully-specified component datasets (`str` or `DatasetType`
1355 instances) are always included.
1357 Values other than `False` are deprecated, and only `False` will be
1358 supported after v26. After v27 this argument will be removed
1359 entirely.
1360 bind : `~collections.abc.Mapping`, optional
1361 Mapping containing literal values that should be injected into the
1362 ``where`` expression, keyed by the identifiers they replace.
1363 Values of collection type can be expanded in some cases; see
1364 :ref:`daf_butler_dimension_expressions_identifiers` for more
1365 information.
1366 check : `bool`, optional
1367 If `True` (default) check the query for consistency before
1368 executing it. This may reject some valid queries that resemble
1369 common mistakes (e.g. queries for visits without specifying an
1370 instrument).
1371 **kwargs
1372 Additional keyword arguments are forwarded to
1373 `DataCoordinate.standardize` when processing the ``dataId``
1374 argument (and may be used to provide a constraining data ID even
1375 when the ``dataId`` argument is `None`).
1377 Returns
1378 -------
1379 refs : `.queries.DatasetQueryResults`
1380 Dataset references matching the given query criteria. Nested data
1381 IDs are guaranteed to include values for all implied dimensions
1382 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1383 include dimension records (`DataCoordinate.hasRecords` will be
1384 `False`) unless `~.queries.DatasetQueryResults.expanded` is
1385 called on the result object (which returns a new one).
1387 Raises
1388 ------
1389 lsst.daf.butler.registry.DatasetTypeExpressionError
1390 Raised when ``datasetType`` expression is invalid.
1391 TypeError
1392 Raised when the arguments are incompatible, such as when a
1393 collection wildcard is passed when ``findFirst`` is `True`, or
1394 when ``collections`` is `None` and ``self.defaults.collections`` is
1395 also `None`.
1396 lsst.daf.butler.registry.DataIdError
1397 Raised when ``dataId`` or keyword arguments specify unknown
1398 dimensions or values, or when they contain inconsistent values.
1399 lsst.daf.butler.registry.UserExpressionError
1400 Raised when ``where`` expression is invalid.
1402 Notes
1403 -----
1404 When multiple dataset types are queried in a single call, the
1405 results of this operation are equivalent to querying for each dataset
1406 type separately in turn, and no information about the relationships
1407 between datasets of different types is included. In contexts where
1408 that kind of information is important, the recommended pattern is to
1409 use `queryDataIds` to first obtain data IDs (possibly with the
1410 desired dataset types and collections passed as constraints to the
1411 query), and then use multiple (generally much simpler) calls to
1412 `queryDatasets` with the returned data IDs passed as constraints.
1413 """
1414 raise NotImplementedError()
1416 @abstractmethod
1417 def queryDataIds(
1418 self,
1419 dimensions: Iterable[Dimension | str] | Dimension | str,
1420 *,
1421 dataId: DataId | None = None,
1422 datasets: Any = None,
1423 collections: CollectionArgType | None = None,
1424 where: str = "",
1425 components: bool | None = None,
1426 bind: Mapping[str, Any] | None = None,
1427 check: bool = True,
1428 **kwargs: Any,
1429 ) -> DataCoordinateQueryResults:
1430 """Query for data IDs matching user-provided criteria.
1432 Parameters
1433 ----------
1434 dimensions : `Dimension` or `str`, or iterable thereof
1435 The dimensions of the data IDs to yield, as either `Dimension`
1436 instances or `str`. Will be automatically expanded to a complete
1437 `DimensionGraph`.
1438 dataId : `dict` or `DataCoordinate`, optional
1439 A data ID whose key-value pairs are used as equality constraints
1440 in the query.
1441 datasets : dataset type expression, optional
1442 An expression that fully or partially identifies dataset types
1443 that should constrain the yielded data IDs. For example, including
1444 "raw" here would constrain the yielded ``instrument``,
1445 ``exposure``, ``detector``, and ``physical_filter`` values to only
1446 those for which at least one "raw" dataset exists in
1447 ``collections``. Allowed types include `DatasetType`, `str`,
1448 and iterables thereof. Regular expression objects (i.e.
1449 `re.Pattern`) are deprecated and will be removed after the v26
1450 release. See :ref:`daf_butler_dataset_type_expressions` for more
1451 information.
1452 collections : collection expression, optional
1453 An expression that identifies the collections to search for
1454 datasets, such as a `str` (for full matches or partial matches
1455 via globs), `re.Pattern` (for partial matches), or iterable
1456 thereof. ``...`` can be used to search all collections (actually
1457 just all `~CollectionType.RUN` collections, because this will
1458 still find all datasets). If not provided,
1459 ``self.default.collections`` is used. Ignored unless ``datasets``
1460 is also passed. See :ref:`daf_butler_collection_expressions` for
1461 more information.
1462 where : `str`, optional
1463 A string expression similar to a SQL WHERE clause. May involve
1464 any column of a dimension table or (as a shortcut for the primary
1465 key column of a dimension table) dimension name. See
1466 :ref:`daf_butler_dimension_expressions` for more information.
1467 components : `bool`, optional
1468 If `True`, apply all dataset expression patterns to component
1469 dataset type names as well. If `False`, never apply patterns to
1470 components. If `None` (default), apply patterns to components only
1471 if their parent datasets were not matched by the expression.
1472 Fully-specified component datasets (`str` or `DatasetType`
1473 instances) are always included.
1475 Values other than `False` are deprecated, and only `False` will be
1476 supported after v26. After v27 this argument will be removed
1477 entirely.
1478 bind : `~collections.abc.Mapping`, optional
1479 Mapping containing literal values that should be injected into the
1480 ``where`` expression, keyed by the identifiers they replace.
1481 Values of collection type can be expanded in some cases; see
1482 :ref:`daf_butler_dimension_expressions_identifiers` for more
1483 information.
1484 check : `bool`, optional
1485 If `True` (default) check the query for consistency before
1486 executing it. This may reject some valid queries that resemble
1487 common mistakes (e.g. queries for visits without specifying an
1488 instrument).
1489 **kwargs
1490 Additional keyword arguments are forwarded to
1491 `DataCoordinate.standardize` when processing the ``dataId``
1492 argument (and may be used to provide a constraining data ID even
1493 when the ``dataId`` argument is `None`).
1495 Returns
1496 -------
1497 dataIds : `.queries.DataCoordinateQueryResults`
1498 Data IDs matching the given query parameters. These are guaranteed
1499 to identify all dimensions (`DataCoordinate.hasFull` returns
1500 `True`), but will not contain `DimensionRecord` objects
1501 (`DataCoordinate.hasRecords` returns `False`). Call
1502 `~.queries.DataCoordinateQueryResults.expanded` on the
1503 returned object to fetch those (and consider using
1504 `~.queries.DataCoordinateQueryResults.materialize` on the
1505 returned object first if the expected number of rows is very
1506 large). See documentation for those methods for additional
1507 information.
1509 Raises
1510 ------
1511 lsst.daf.butler.registry.NoDefaultCollectionError
1512 Raised if ``collections`` is `None` and
1513 ``self.defaults.collections`` is `None`.
1514 lsst.daf.butler.registry.CollectionExpressionError
1515 Raised when ``collections`` expression is invalid.
1516 lsst.daf.butler.registry.DataIdError
1517 Raised when ``dataId`` or keyword arguments specify unknown
1518 dimensions or values, or when they contain inconsistent values.
1519 lsst.daf.butler.registry.DatasetTypeExpressionError
1520 Raised when ``datasetType`` expression is invalid.
1521 lsst.daf.butler.registry.UserExpressionError
1522 Raised when ``where`` expression is invalid.
1523 """
1524 raise NotImplementedError()
1526 @abstractmethod
1527 def queryDimensionRecords(
1528 self,
1529 element: DimensionElement | str,
1530 *,
1531 dataId: DataId | None = None,
1532 datasets: Any = None,
1533 collections: CollectionArgType | None = None,
1534 where: str = "",
1535 components: bool | None = None,
1536 bind: Mapping[str, Any] | None = None,
1537 check: bool = True,
1538 **kwargs: Any,
1539 ) -> DimensionRecordQueryResults:
1540 """Query for dimension information matching user-provided criteria.
1542 Parameters
1543 ----------
1544 element : `DimensionElement` or `str`
1545 The dimension element to obtain records for.
1546 dataId : `dict` or `DataCoordinate`, optional
1547 A data ID whose key-value pairs are used as equality constraints
1548 in the query.
1549 datasets : dataset type expression, optional
1550 An expression that fully or partially identifies dataset types
1551 that should constrain the yielded records. See `queryDataIds` and
1552 :ref:`daf_butler_dataset_type_expressions` for more information.
1553 collections : collection expression, optional
1554 An expression that identifies the collections to search for
1555 datasets, such as a `str` (for full matches or partial matches
1556 via globs), `re.Pattern` (for partial matches), or iterable
1557 thereof. ``...`` can be used to search all collections (actually
1558 just all `~CollectionType.RUN` collections, because this will
1559 still find all datasets). If not provided,
1560 ``self.default.collections`` is used. Ignored unless ``datasets``
1561 is also passed. See :ref:`daf_butler_collection_expressions` for
1562 more information.
1563 where : `str`, optional
1564 A string expression similar to a SQL WHERE clause. See
1565 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1566 information.
1567 components : `bool`, optional
1568 Whether to apply dataset expressions to components as well.
1569 See `queryDataIds` for more information.
1571 Values other than `False` are deprecated, and only `False` will be
1572 supported after v26. After v27 this argument will be removed
1573 entirely.
1574 bind : `~collections.abc.Mapping`, optional
1575 Mapping containing literal values that should be injected into the
1576 ``where`` expression, keyed by the identifiers they replace.
1577 Values of collection type can be expanded in some cases; see
1578 :ref:`daf_butler_dimension_expressions_identifiers` for more
1579 information.
1580 check : `bool`, optional
1581 If `True` (default) check the query for consistency before
1582 executing it. This may reject some valid queries that resemble
1583 common mistakes (e.g. queries for visits without specifying an
1584 instrument).
1585 **kwargs
1586 Additional keyword arguments are forwarded to
1587 `DataCoordinate.standardize` when processing the ``dataId``
1588 argument (and may be used to provide a constraining data ID even
1589 when the ``dataId`` argument is `None`).
1591 Returns
1592 -------
1593 dataIds : `.queries.DimensionRecordQueryResults`
1594 Data IDs matching the given query parameters.
1596 Raises
1597 ------
1598 lsst.daf.butler.registry.NoDefaultCollectionError
1599 Raised if ``collections`` is `None` and
1600 ``self.defaults.collections`` is `None`.
1601 lsst.daf.butler.registry.CollectionExpressionError
1602 Raised when ``collections`` expression is invalid.
1603 lsst.daf.butler.registry.DataIdError
1604 Raised when ``dataId`` or keyword arguments specify unknown
1605 dimensions or values, or when they contain inconsistent values.
1606 lsst.daf.butler.registry.DatasetTypeExpressionError
1607 Raised when ``datasetType`` expression is invalid.
1608 lsst.daf.butler.registry.UserExpressionError
1609 Raised when ``where`` expression is invalid.
1610 """
1611 raise NotImplementedError()
1613 @abstractmethod
1614 def queryDatasetAssociations(
1615 self,
1616 datasetType: str | DatasetType,
1617 collections: CollectionArgType | None = ...,
1618 *,
1619 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1620 flattenChains: bool = False,
1621 ) -> Iterator[DatasetAssociation]:
1622 """Iterate over dataset-collection combinations where the dataset is in
1623 the collection.
1625 This method is a temporary placeholder for better support for
1626 association results in `queryDatasets`. It will probably be
1627 removed in the future, and should be avoided in production code
1628 whenever possible.
1630 Parameters
1631 ----------
1632 datasetType : `DatasetType` or `str`
1633 A dataset type object or the name of one.
1634 collections : collection expression, optional
1635 An expression that identifies the collections to search for
1636 datasets, such as a `str` (for full matches or partial matches
1637 via globs), `re.Pattern` (for partial matches), or iterable
1638 thereof. ``...`` can be used to search all collections (actually
1639 just all `~CollectionType.RUN` collections, because this will still
1640 find all datasets). If not provided, ``self.default.collections``
1641 is used. See :ref:`daf_butler_collection_expressions` for more
1642 information.
1643 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional
1644 If provided, only yield associations from collections of these
1645 types.
1646 flattenChains : `bool`, optional
1647 If `True` (default) search in the children of
1648 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1649 collections are ignored.
1651 Yields
1652 ------
1653 association : `.DatasetAssociation`
1654 Object representing the relationship between a single dataset and
1655 a single collection.
1657 Raises
1658 ------
1659 lsst.daf.butler.registry.NoDefaultCollectionError
1660 Raised if ``collections`` is `None` and
1661 ``self.defaults.collections`` is `None`.
1662 lsst.daf.butler.registry.CollectionExpressionError
1663 Raised when ``collections`` expression is invalid.
1664 """
1665 raise NotImplementedError()
1667 @property
1668 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1669 """The ObsCore manager instance for this registry
1670 (`~.interfaces.ObsCoreTableManager`
1671 or `None`).
1673 ObsCore manager may not be implemented for all registry backend, or
1674 may not be enabled for many repositories.
1675 """
1676 return None
1678 storageClasses: StorageClassFactory
1679 """All storage classes known to the registry (`StorageClassFactory`).
1680 """
1682 datasetIdFactory: DatasetIdFactory
1683 """Factory for dataset IDs."""