Coverage for python/lsst/daf/butler/registry/_registry.py : 62%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "Registry",
26)
28from abc import ABC, abstractmethod
29import contextlib
30import logging
31from typing import (
32 Any,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Tuple,
39 Type,
40 TYPE_CHECKING,
41 Union,
42)
44from lsst.utils import doImport
46from ..core import (
47 ButlerURI,
48 Config,
49 DataCoordinate,
50 DataId,
51 DatasetAssociation,
52 DatasetRef,
53 DatasetType,
54 Dimension,
55 DimensionConfig,
56 DimensionElement,
57 DimensionGraph,
58 DimensionRecord,
59 DimensionUniverse,
60 NameLookupMapping,
61 StorageClassFactory,
62 Timespan,
63)
64from . import queries
65from ._config import RegistryConfig
66from ._collectionType import CollectionType
67from ._defaults import RegistryDefaults
68from .wildcards import CollectionSearch
69from .summaries import CollectionSummary
71if TYPE_CHECKING: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true
72 from .._butlerConfig import ButlerConfig
73 from .interfaces import (
74 CollectionRecord,
75 DatastoreRegistryBridgeManager,
76 )
78_LOG = logging.getLogger(__name__)
81class Registry(ABC):
82 """Abstract Registry interface.
84 Each registry implementation can have its own constructor parameters.
85 The assumption is that an instance of a specific subclass will be
86 constructed from configuration using `Registry.fromConfig()`.
87 The base class will look for a ``cls`` entry and call that specific
88 `fromConfig()` method.
90 All subclasses should store `RegistryDefaults` in a ``_defaults``
91 property. No other properties are assumed shared between implementations.
92 """
94 defaultConfigFile: Optional[str] = None
95 """Path to configuration defaults. Accessed within the ``configs`` resource
96 or relative to a search path. Can be None if no defaults specified.
97 """
99 @classmethod
100 def forceRegistryConfig(cls, config: Optional[Union[ButlerConfig,
101 RegistryConfig, Config, str]]) -> RegistryConfig:
102 """Force the supplied config to a `RegistryConfig`.
104 Parameters
105 ----------
106 config : `RegistryConfig`, `Config` or `str` or `None`
107 Registry configuration, if missing then default configuration will
108 be loaded from registry.yaml.
110 Returns
111 -------
112 registry_config : `RegistryConfig`
113 A registry config.
114 """
115 if not isinstance(config, RegistryConfig):
116 if isinstance(config, (str, Config)) or config is None:
117 config = RegistryConfig(config)
118 else:
119 raise ValueError(f"Incompatible Registry configuration: {config}")
120 return config
122 @classmethod
123 def determineTrampoline(cls,
124 config: Optional[Union[ButlerConfig,
125 RegistryConfig,
126 Config,
127 str]]) -> Tuple[Type[Registry], RegistryConfig]:
128 """Return class to use to instantiate real registry.
130 Parameters
131 ----------
132 config : `RegistryConfig` or `str`, optional
133 Registry configuration, if missing then default configuration will
134 be loaded from registry.yaml.
136 Returns
137 -------
138 requested_cls : `type` of `Registry`
139 The real registry class to use.
140 registry_config : `RegistryConfig`
141 The `RegistryConfig` to use.
142 """
143 config = cls.forceRegistryConfig(config)
145 # Default to the standard registry
146 registry_cls = doImport(config.get("cls", "lsst.daf.butler.registry.SqlRegistry"))
147 if registry_cls is cls:
148 raise ValueError("Can not instantiate the abstract base Registry from config")
149 return registry_cls, config
151 @classmethod
152 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None,
153 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
154 butlerRoot: Optional[str] = None) -> Registry:
155 """Create registry database and return `Registry` instance.
157 This method initializes database contents, database must be empty
158 prior to calling this method.
160 Parameters
161 ----------
162 config : `RegistryConfig` or `str`, optional
163 Registry configuration, if missing then default configuration will
164 be loaded from registry.yaml.
165 dimensionConfig : `DimensionConfig` or `str`, optional
166 Dimensions configuration, if missing then default configuration
167 will be loaded from dimensions.yaml.
168 butlerRoot : `str`, optional
169 Path to the repository root this `Registry` will manage.
171 Returns
172 -------
173 registry : `Registry`
174 A new `Registry` instance.
176 Notes
177 -----
178 This class will determine the concrete `Registry` subclass to
179 use from configuration. Each subclass should implement this method
180 even if it can not create a registry.
181 """
182 registry_cls, registry_config = cls.determineTrampoline(config)
183 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
185 @classmethod
186 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str],
187 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True,
188 defaults: Optional[RegistryDefaults] = None) -> Registry:
189 """Create `Registry` subclass instance from `config`.
191 Registry database must be initialized prior to calling this method.
193 Parameters
194 ----------
195 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
196 Registry configuration
197 butlerRoot : `str` or `ButlerURI`, optional
198 Path to the repository root this `Registry` will manage.
199 writeable : `bool`, optional
200 If `True` (default) create a read-write connection to the database.
201 defaults : `RegistryDefaults`, optional
202 Default collection search path and/or output `~CollectionType.RUN`
203 collection.
205 Returns
206 -------
207 registry : `Registry` (subclass)
208 A new `Registry` subclass instance.
210 Notes
211 -----
212 This class will determine the concrete `Registry` subclass to
213 use from configuration. Each subclass should implement this method.
214 """
215 # The base class implementation should trampoline to the correct
216 # subclass. No implementation should ever use this implementation
217 # directly. If no class is specified, default to the standard
218 # registry.
219 registry_cls, registry_config = cls.determineTrampoline(config)
220 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
222 @abstractmethod
223 def isWriteable(self) -> bool:
224 """Return `True` if this registry allows write operations, and `False`
225 otherwise.
226 """
227 raise NotImplementedError()
229 @abstractmethod
230 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
231 """Create a new `Registry` backed by the same data repository and
232 connection as this one, but independent defaults.
234 Parameters
235 ----------
236 defaults : `RegistryDefaults`, optional
237 Default collections and data ID values for the new registry. If
238 not provided, ``self.defaults`` will be used (but future changes
239 to either registry's defaults will not affect the other).
241 Returns
242 -------
243 copy : `Registry`
244 A new `Registry` instance with its own defaults.
246 Notes
247 -----
248 Because the new registry shares a connection with the original, they
249 also share transaction state (despite the fact that their `transaction`
250 context manager methods do not reflect this), and must be used with
251 care.
252 """
253 raise NotImplementedError()
255 @property
256 @abstractmethod
257 def dimensions(self) -> DimensionUniverse:
258 """All dimensions recognized by this `Registry` (`DimensionUniverse`).
259 """
260 raise NotImplementedError()
262 @property
263 def defaults(self) -> RegistryDefaults:
264 """Default collection search path and/or output `~CollectionType.RUN`
265 collection (`RegistryDefaults`).
267 This is an immutable struct whose components may not be set
268 individually, but the entire struct can be set by assigning to this
269 property.
270 """
271 return self._defaults
273 @defaults.setter
274 def defaults(self, value: RegistryDefaults) -> None:
275 if value.run is not None:
276 self.registerRun(value.run)
277 value.finish(self)
278 self._defaults = value
280 @abstractmethod
281 def refresh(self) -> None:
282 """Refresh all in-memory state by querying the database.
284 This may be necessary to enable querying for entities added by other
285 registry instances after this one was constructed.
286 """
287 raise NotImplementedError()
289 @contextlib.contextmanager
290 @abstractmethod
291 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
292 """Return a context manager that represents a transaction.
293 """
294 raise NotImplementedError()
296 def resetConnectionPool(self) -> None:
297 """Reset connection pool for registry if relevant.
299 This operation can be used reset connections to servers when
300 using registry with fork-based multiprocessing. This method should
301 usually be called by the child process immediately
302 after the fork.
304 The base class implementation is a no-op.
305 """
306 pass
308 @abstractmethod
309 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED,
310 doc: Optional[str] = None) -> None:
311 """Add a new collection if one with the given name does not exist.
313 Parameters
314 ----------
315 name : `str`
316 The name of the collection to create.
317 type : `CollectionType`
318 Enum value indicating the type of collection to create.
319 doc : `str`, optional
320 Documentation string for the collection.
322 Notes
323 -----
324 This method cannot be called within transactions, as it needs to be
325 able to perform its own transaction to be concurrent.
326 """
327 raise NotImplementedError()
329 @abstractmethod
330 def getCollectionType(self, name: str) -> CollectionType:
331 """Return an enumeration value indicating the type of the given
332 collection.
334 Parameters
335 ----------
336 name : `str`
337 The name of the collection.
339 Returns
340 -------
341 type : `CollectionType`
342 Enum value indicating the type of this collection.
344 Raises
345 ------
346 MissingCollectionError
347 Raised if no collection with the given name exists.
348 """
349 raise NotImplementedError()
351 @abstractmethod
352 def _get_collection_record(self, name: str) -> CollectionRecord:
353 """Return the record for this collection.
355 Parameters
356 ----------
357 name : `str`
358 Name of the collection for which the record is to be retrieved.
360 Returns
361 -------
362 record : `CollectionRecord`
363 The record for this collection.
364 """
365 raise NotImplementedError()
367 @abstractmethod
368 def registerRun(self, name: str, doc: Optional[str] = None) -> None:
369 """Add a new run if one with the given name does not exist.
371 Parameters
372 ----------
373 name : `str`
374 The name of the run to create.
375 doc : `str`, optional
376 Documentation string for the collection.
378 Notes
379 -----
380 This method cannot be called within transactions, as it needs to be
381 able to perform its own transaction to be concurrent.
382 """
383 raise NotImplementedError()
385 @abstractmethod
386 def removeCollection(self, name: str) -> None:
387 """Completely remove the given collection.
389 Parameters
390 ----------
391 name : `str`
392 The name of the collection to remove.
394 Raises
395 ------
396 MissingCollectionError
397 Raised if no collection with the given name exists.
399 Notes
400 -----
401 If this is a `~CollectionType.RUN` collection, all datasets and quanta
402 in it are also fully removed. This requires that those datasets be
403 removed (or at least trashed) from any datastores that hold them first.
405 A collection may not be deleted as long as it is referenced by a
406 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
407 be deleted or redefined first.
408 """
409 raise NotImplementedError()
411 @abstractmethod
412 def getCollectionChain(self, parent: str) -> CollectionSearch:
413 """Return the child collections in a `~CollectionType.CHAINED`
414 collection.
416 Parameters
417 ----------
418 parent : `str`
419 Name of the chained collection. Must have already been added via
420 a call to `Registry.registerCollection`.
422 Returns
423 -------
424 children : `CollectionSearch`
425 An object that defines the search path of the collection.
426 See :ref:`daf_butler_collection_expressions` for more information.
428 Raises
429 ------
430 MissingCollectionError
431 Raised if ``parent`` does not exist in the `Registry`.
432 TypeError
433 Raised if ``parent`` does not correspond to a
434 `~CollectionType.CHAINED` collection.
435 """
436 raise NotImplementedError()
438 @abstractmethod
439 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
440 """Define or redefine a `~CollectionType.CHAINED` collection.
442 Parameters
443 ----------
444 parent : `str`
445 Name of the chained collection. Must have already been added via
446 a call to `Registry.registerCollection`.
447 children : `Any`
448 An expression defining an ordered search of child collections,
449 generally an iterable of `str`; see
450 :ref:`daf_butler_collection_expressions` for more information.
451 flatten : `bool`, optional
452 If `True` (`False` is default), recursively flatten out any nested
453 `~CollectionType.CHAINED` collections in ``children`` first.
455 Raises
456 ------
457 MissingCollectionError
458 Raised when any of the given collections do not exist in the
459 `Registry`.
460 TypeError
461 Raised if ``parent`` does not correspond to a
462 `~CollectionType.CHAINED` collection.
463 ValueError
464 Raised if the given collections contains a cycle.
465 """
466 raise NotImplementedError()
468 @abstractmethod
469 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
470 """Retrieve the documentation string for a collection.
472 Parameters
473 ----------
474 name : `str`
475 Name of the collection.
477 Returns
478 -------
479 docs : `str` or `None`
480 Docstring for the collection with the given name.
481 """
482 raise NotImplementedError()
484 @abstractmethod
485 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
486 """Set the documentation string for a collection.
488 Parameters
489 ----------
490 name : `str`
491 Name of the collection.
492 docs : `str` or `None`
493 Docstring for the collection with the given name; will replace any
494 existing docstring. Passing `None` will remove any existing
495 docstring.
496 """
497 raise NotImplementedError()
499 @abstractmethod
500 def getCollectionSummary(self, collection: str) -> CollectionSummary:
501 """Return a summary for the given collection.
503 Parameters
504 ----------
505 collection : `str`
506 Name of the collection for which a summary is to be retrieved.
508 Returns
509 -------
510 summary : `CollectionSummary`
511 Summary of the dataset types and governor dimension values in
512 this collection.
513 """
514 raise NotImplementedError()
516 @abstractmethod
517 def registerDatasetType(self, datasetType: DatasetType) -> bool:
518 """
519 Add a new `DatasetType` to the Registry.
521 It is not an error to register the same `DatasetType` twice.
523 Parameters
524 ----------
525 datasetType : `DatasetType`
526 The `DatasetType` to be added.
528 Returns
529 -------
530 inserted : `bool`
531 `True` if ``datasetType`` was inserted, `False` if an identical
532 existing `DatsetType` was found. Note that in either case the
533 DatasetType is guaranteed to be defined in the Registry
534 consistently with the given definition.
536 Raises
537 ------
538 ValueError
539 Raised if the dimensions or storage class are invalid.
540 ConflictingDefinitionError
541 Raised if this DatasetType is already registered with a different
542 definition.
544 Notes
545 -----
546 This method cannot be called within transactions, as it needs to be
547 able to perform its own transaction to be concurrent.
548 """
549 raise NotImplementedError()
551 @abstractmethod
552 def removeDatasetType(self, name: str) -> None:
553 """Remove the named `DatasetType` from the registry.
555 .. warning::
557 Registry implementations can cache the dataset type definitions.
558 This means that deleting the dataset type definition may result in
559 unexpected behavior from other butler processes that are active
560 that have not seen the deletion.
562 Parameters
563 ----------
564 name : `str`
565 Name of the type to be removed.
567 Raises
568 ------
569 lsst.daf.butler.registry.OrphanedRecordError
570 Raised if an attempt is made to remove the dataset type definition
571 when there are already datasets associated with it.
573 Notes
574 -----
575 If the dataset type is not registered the method will return without
576 action.
577 """
578 raise NotImplementedError()
580 @abstractmethod
581 def getDatasetType(self, name: str) -> DatasetType:
582 """Get the `DatasetType`.
584 Parameters
585 ----------
586 name : `str`
587 Name of the type.
589 Returns
590 -------
591 type : `DatasetType`
592 The `DatasetType` associated with the given name.
594 Raises
595 ------
596 KeyError
597 Requested named DatasetType could not be found in registry.
598 """
599 raise NotImplementedError()
601 @abstractmethod
602 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *,
603 collections: Any = None, timespan: Optional[Timespan] = None,
604 **kwargs: Any) -> Optional[DatasetRef]:
605 """Find a dataset given its `DatasetType` and data ID.
607 This can be used to obtain a `DatasetRef` that permits the dataset to
608 be read from a `Datastore`. If the dataset is a component and can not
609 be found using the provided dataset type, a dataset ref for the parent
610 will be returned instead but with the correct dataset type.
612 Parameters
613 ----------
614 datasetType : `DatasetType` or `str`
615 A `DatasetType` or the name of one.
616 dataId : `dict` or `DataCoordinate`, optional
617 A `dict`-like object containing the `Dimension` links that identify
618 the dataset within a collection.
619 collections, optional.
620 An expression that fully or partially identifies the collections to
621 search for the dataset; see
622 :ref:`daf_butler_collection_expressions` for more information.
623 Defaults to ``self.defaults.collections``.
624 timespan : `Timespan`, optional
625 A timespan that the validity range of the dataset must overlap.
626 If not provided, any `~CollectionType.CALIBRATION` collections
627 matched by the ``collections`` argument will not be searched.
628 **kwargs
629 Additional keyword arguments passed to
630 `DataCoordinate.standardize` to convert ``dataId`` to a true
631 `DataCoordinate` or augment an existing one.
633 Returns
634 -------
635 ref : `DatasetRef`
636 A reference to the dataset, or `None` if no matching Dataset
637 was found.
639 Raises
640 ------
641 TypeError
642 Raised if ``collections`` is `None` and
643 ``self.defaults.collections`` is `None`.
644 LookupError
645 Raised if one or more data ID keys are missing.
646 KeyError
647 Raised if the dataset type does not exist.
648 MissingCollectionError
649 Raised if any of ``collections`` does not exist in the registry.
651 Notes
652 -----
653 This method simply returns `None` and does not raise an exception even
654 when the set of collections searched is intrinsically incompatible with
655 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
656 only `~CollectionType.CALIBRATION` collections are being searched.
657 This may make it harder to debug some lookup failures, but the behavior
658 is intentional; we consider it more important that failed searches are
659 reported consistently, regardless of the reason, and that adding
660 additional collections that do not contain a match to the search path
661 never changes the behavior.
662 """
663 raise NotImplementedError()
665 @abstractmethod
666 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId],
667 run: Optional[str] = None, expand: bool = True) -> List[DatasetRef]:
668 """Insert one or more datasets into the `Registry`
670 This always adds new datasets; to associate existing datasets with
671 a new collection, use ``associate``.
673 Parameters
674 ----------
675 datasetType : `DatasetType` or `str`
676 A `DatasetType` or the name of one.
677 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
678 Dimension-based identifiers for the new datasets.
679 run : `str`, optional
680 The name of the run that produced the datasets. Defaults to
681 ``self.defaults.run``.
682 expand : `bool`, optional
683 If `True` (default), expand data IDs as they are inserted. This is
684 necessary in general to allow datastore to generate file templates,
685 but it may be disabled if the caller can guarantee this is
686 unnecessary.
688 Returns
689 -------
690 refs : `list` of `DatasetRef`
691 Resolved `DatasetRef` instances for all given data IDs (in the same
692 order).
694 Raises
695 ------
696 TypeError
697 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
698 ConflictingDefinitionError
699 If a dataset with the same dataset type and data ID as one of those
700 given already exists in ``run``.
701 MissingCollectionError
702 Raised if ``run`` does not exist in the registry.
703 """
704 raise NotImplementedError()
706 @abstractmethod
707 def getDataset(self, id: int) -> Optional[DatasetRef]:
708 """Retrieve a Dataset entry.
710 Parameters
711 ----------
712 id : `int`
713 The unique identifier for the dataset.
715 Returns
716 -------
717 ref : `DatasetRef` or `None`
718 A ref to the Dataset, or `None` if no matching Dataset
719 was found.
720 """
721 raise NotImplementedError()
723 @abstractmethod
724 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
725 """Remove datasets from the Registry.
727 The datasets will be removed unconditionally from all collections, and
728 any `Quantum` that consumed this dataset will instead be marked with
729 having a NULL input. `Datastore` records will *not* be deleted; the
730 caller is responsible for ensuring that the dataset has already been
731 removed from all Datastores.
733 Parameters
734 ----------
735 refs : `Iterable` of `DatasetRef`
736 References to the datasets to be removed. Must include a valid
737 ``id`` attribute, and should be considered invalidated upon return.
739 Raises
740 ------
741 AmbiguousDatasetError
742 Raised if any ``ref.id`` is `None`.
743 OrphanedRecordError
744 Raised if any dataset is still present in any `Datastore`.
745 """
746 raise NotImplementedError()
748 @abstractmethod
749 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
750 """Add existing datasets to a `~CollectionType.TAGGED` collection.
752 If a DatasetRef with the same exact integer ID is already in a
753 collection nothing is changed. If a `DatasetRef` with the same
754 `DatasetType` and data ID but with different integer ID
755 exists in the collection, `ConflictingDefinitionError` is raised.
757 Parameters
758 ----------
759 collection : `str`
760 Indicates the collection the datasets should be associated with.
761 refs : `Iterable` [ `DatasetRef` ]
762 An iterable of resolved `DatasetRef` instances that already exist
763 in this `Registry`.
765 Raises
766 ------
767 ConflictingDefinitionError
768 If a Dataset with the given `DatasetRef` already exists in the
769 given collection.
770 AmbiguousDatasetError
771 Raised if ``any(ref.id is None for ref in refs)``.
772 MissingCollectionError
773 Raised if ``collection`` does not exist in the registry.
774 TypeError
775 Raise adding new datasets to the given ``collection`` is not
776 allowed.
777 """
778 raise NotImplementedError()
780 @abstractmethod
781 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
782 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
784 ``collection`` and ``ref`` combinations that are not currently
785 associated are silently ignored.
787 Parameters
788 ----------
789 collection : `str`
790 The collection the datasets should no longer be associated with.
791 refs : `Iterable` [ `DatasetRef` ]
792 An iterable of resolved `DatasetRef` instances that already exist
793 in this `Registry`.
795 Raises
796 ------
797 AmbiguousDatasetError
798 Raised if any of the given dataset references is unresolved.
799 MissingCollectionError
800 Raised if ``collection`` does not exist in the registry.
801 TypeError
802 Raise adding new datasets to the given ``collection`` is not
803 allowed.
804 """
805 raise NotImplementedError()
807 @abstractmethod
808 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
809 """Associate one or more datasets with a calibration collection and a
810 validity range within it.
812 Parameters
813 ----------
814 collection : `str`
815 The name of an already-registered `~CollectionType.CALIBRATION`
816 collection.
817 refs : `Iterable` [ `DatasetRef` ]
818 Datasets to be associated.
819 timespan : `Timespan`
820 The validity range for these datasets within the collection.
822 Raises
823 ------
824 AmbiguousDatasetError
825 Raised if any of the given `DatasetRef` instances is unresolved.
826 ConflictingDefinitionError
827 Raised if the collection already contains a different dataset with
828 the same `DatasetType` and data ID and an overlapping validity
829 range.
830 TypeError
831 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
832 collection or if one or more datasets are of a dataset type for
833 which `DatasetType.isCalibration` returns `False`.
834 """
835 raise NotImplementedError()
837 @abstractmethod
838 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *,
839 dataIds: Optional[Iterable[DataId]] = None) -> None:
840 """Remove or adjust datasets to clear a validity range within a
841 calibration collection.
843 Parameters
844 ----------
845 collection : `str`
846 The name of an already-registered `~CollectionType.CALIBRATION`
847 collection.
848 datasetType : `str` or `DatasetType`
849 Name or `DatasetType` instance for the datasets to be decertified.
850 timespan : `Timespan`, optional
851 The validity range to remove datasets from within the collection.
852 Datasets that overlap this range but are not contained by it will
853 have their validity ranges adjusted to not overlap it, which may
854 split a single dataset validity range into two.
855 dataIds : `Iterable` [ `DataId` ], optional
856 Data IDs that should be decertified within the given validity range
857 If `None`, all data IDs for ``self.datasetType`` will be
858 decertified.
860 Raises
861 ------
862 TypeError
863 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
864 collection or if ``datasetType.isCalibration() is False``.
865 """
866 raise NotImplementedError()
868 @abstractmethod
869 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
870 """Return an object that allows a new `Datastore` instance to
871 communicate with this `Registry`.
873 Returns
874 -------
875 manager : `DatastoreRegistryBridgeManager`
876 Object that mediates communication between this `Registry` and its
877 associated datastores.
878 """
879 raise NotImplementedError()
881 @abstractmethod
882 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
883 """Retrieve datastore locations for a given dataset.
885 Parameters
886 ----------
887 ref : `DatasetRef`
888 A reference to the dataset for which to retrieve storage
889 information.
891 Returns
892 -------
893 datastores : `Iterable` [ `str` ]
894 All the matching datastores holding this dataset.
896 Raises
897 ------
898 AmbiguousDatasetError
899 Raised if ``ref.id`` is `None`.
900 """
901 raise NotImplementedError()
903 @abstractmethod
904 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None,
905 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
906 withDefaults: bool = True,
907 **kwargs: Any) -> DataCoordinate:
908 """Expand a dimension-based data ID to include additional information.
910 Parameters
911 ----------
912 dataId : `DataCoordinate` or `dict`, optional
913 Data ID to be expanded; augmented and overridden by ``kwds``.
914 graph : `DimensionGraph`, optional
915 Set of dimensions for the expanded ID. If `None`, the dimensions
916 will be inferred from the keys of ``dataId`` and ``kwds``.
917 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph``
918 are silently ignored, providing a way to extract and expand a
919 subset of a data ID.
920 records : `Mapping` [`str`, `DimensionRecord`], optional
921 Dimension record data to use before querying the database for that
922 data, keyed by element name.
923 withDefaults : `bool`, optional
924 Utilize ``self.defaults.dataId`` to fill in missing governor
925 dimension key-value pairs. Defaults to `True` (i.e. defaults are
926 used).
927 **kwargs
928 Additional keywords are treated like additional key-value pairs for
929 ``dataId``, extending and overriding
931 Returns
932 -------
933 expanded : `DataCoordinate`
934 A data ID that includes full metadata for all of the dimensions it
935 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and
936 ``expanded.hasFull()`` both return `True`.
937 """
938 raise NotImplementedError()
940 @abstractmethod
941 def insertDimensionData(self, element: Union[DimensionElement, str],
942 *data: Union[Mapping[str, Any], DimensionRecord],
943 conform: bool = True) -> None:
944 """Insert one or more dimension records into the database.
946 Parameters
947 ----------
948 element : `DimensionElement` or `str`
949 The `DimensionElement` or name thereof that identifies the table
950 records will be inserted into.
951 data : `dict` or `DimensionRecord` (variadic)
952 One or more records to insert.
953 conform : `bool`, optional
954 If `False` (`True` is default) perform no checking or conversions,
955 and assume that ``element`` is a `DimensionElement` instance and
956 ``data`` is a one or more `DimensionRecord` instances of the
957 appropriate subclass.
958 """
959 raise NotImplementedError()
961 @abstractmethod
962 def syncDimensionData(self, element: Union[DimensionElement, str],
963 row: Union[Mapping[str, Any], DimensionRecord],
964 conform: bool = True) -> bool:
965 """Synchronize the given dimension record with the database, inserting
966 if it does not already exist and comparing values if it does.
968 Parameters
969 ----------
970 element : `DimensionElement` or `str`
971 The `DimensionElement` or name thereof that identifies the table
972 records will be inserted into.
973 row : `dict` or `DimensionRecord`
974 The record to insert.
975 conform : `bool`, optional
976 If `False` (`True` is default) perform no checking or conversions,
977 and assume that ``element`` is a `DimensionElement` instance and
978 ``data`` is a one or more `DimensionRecord` instances of the
979 appropriate subclass.
981 Returns
982 -------
983 inserted : `bool`
984 `True` if a new row was inserted, `False` otherwise.
986 Raises
987 ------
988 ConflictingDefinitionError
989 Raised if the record exists in the database (according to primary
990 key lookup) but is inconsistent with the given one.
991 """
992 raise NotImplementedError()
994 @abstractmethod
995 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None
996 ) -> Iterator[DatasetType]:
997 """Iterate over the dataset types whose names match an expression.
999 Parameters
1000 ----------
1001 expression : `Any`, optional
1002 An expression that fully or partially identifies the dataset types
1003 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1004 `...` can be used to return all dataset types, and is the default.
1005 See :ref:`daf_butler_dataset_type_expressions` for more
1006 information.
1007 components : `bool`, optional
1008 If `True`, apply all expression patterns to component dataset type
1009 names as well. If `False`, never apply patterns to components.
1010 If `None` (default), apply patterns to components only if their
1011 parent datasets were not matched by the expression.
1012 Fully-specified component datasets (`str` or `DatasetType`
1013 instances) are always included.
1015 Yields
1016 ------
1017 datasetType : `DatasetType`
1018 A `DatasetType` instance whose name matches ``expression``.
1019 """
1020 raise NotImplementedError()
1022 @abstractmethod
1023 def queryCollections(self, expression: Any = ...,
1024 datasetType: Optional[DatasetType] = None,
1025 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1026 flattenChains: bool = False,
1027 includeChains: Optional[bool] = None) -> Iterator[str]:
1028 """Iterate over the collections whose names match an expression.
1030 Parameters
1031 ----------
1032 expression : `Any`, optional
1033 An expression that identifies the collections to return, such as a
1034 `str` (for full matches), `re.Pattern` (for partial matches), or
1035 iterable thereof. `...` can be used to return all collections,
1036 and is the default. See :ref:`daf_butler_collection_expressions`
1037 for more information.
1038 datasetType : `DatasetType`, optional
1039 If provided, only yield collections that may contain datasets of
1040 this type. This is a conservative approximation in general; it may
1041 yield collections that do not have any such datasets.
1042 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1043 If provided, only yield collections of these types.
1044 flattenChains : `bool`, optional
1045 If `True` (`False` is default), recursively yield the child
1046 collections of matching `~CollectionType.CHAINED` collections.
1047 includeChains : `bool`, optional
1048 If `True`, yield records for matching `~CollectionType.CHAINED`
1049 collections. Default is the opposite of ``flattenChains``: include
1050 either CHAINED collections or their children, but not both.
1052 Yields
1053 ------
1054 collection : `str`
1055 The name of a collection that matches ``expression``.
1056 """
1057 raise NotImplementedError()
1059 @abstractmethod
1060 def queryDatasets(self, datasetType: Any, *,
1061 collections: Any = None,
1062 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1063 dataId: Optional[DataId] = None,
1064 where: Optional[str] = None,
1065 findFirst: bool = False,
1066 components: Optional[bool] = None,
1067 bind: Optional[Mapping[str, Any]] = None,
1068 check: bool = True,
1069 **kwargs: Any) -> queries.DatasetQueryResults:
1070 """Query for and iterate over dataset references matching user-provided
1071 criteria.
1073 Parameters
1074 ----------
1075 datasetType
1076 An expression that fully or partially identifies the dataset types
1077 to be queried. Allowed types include `DatasetType`, `str`,
1078 `re.Pattern`, and iterables thereof. The special value `...` can
1079 be used to query all dataset types. See
1080 :ref:`daf_butler_dataset_type_expressions` for more information.
1081 collections: optional
1082 An expression that identifies the collections to search, such as a
1083 `str` (for full matches), `re.Pattern` (for partial matches), or
1084 iterable thereof. `...` can be used to search all collections
1085 (actually just all `~CollectionType.RUN` collections, because this
1086 will still find all datasets). If not provided,
1087 ``self.default.collections`` is used. See
1088 :ref:`daf_butler_collection_expressions` for more information.
1089 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1090 Dimensions to include in the query (in addition to those used
1091 to identify the queried dataset type(s)), either to constrain
1092 the resulting datasets to those for which a matching dimension
1093 exists, or to relate the dataset type's dimensions to dimensions
1094 referenced by the ``dataId`` or ``where`` arguments.
1095 dataId : `dict` or `DataCoordinate`, optional
1096 A data ID whose key-value pairs are used as equality constraints
1097 in the query.
1098 where : `str`, optional
1099 A string expression similar to a SQL WHERE clause. May involve
1100 any column of a dimension table or (as a shortcut for the primary
1101 key column of a dimension table) dimension name. See
1102 :ref:`daf_butler_dimension_expressions` for more information.
1103 findFirst : `bool`, optional
1104 If `True` (`False` is default), for each result data ID, only
1105 yield one `DatasetRef` of each `DatasetType`, from the first
1106 collection in which a dataset of that dataset type appears
1107 (according to the order of ``collections`` passed in). If `True`,
1108 ``collections`` must not contain regular expressions and may not
1109 be `...`.
1110 components : `bool`, optional
1111 If `True`, apply all dataset expression patterns to component
1112 dataset type names as well. If `False`, never apply patterns to
1113 components. If `None` (default), apply patterns to components only
1114 if their parent datasets were not matched by the expression.
1115 Fully-specified component datasets (`str` or `DatasetType`
1116 instances) are always included.
1117 bind : `Mapping`, optional
1118 Mapping containing literal values that should be injected into the
1119 ``where`` expression, keyed by the identifiers they replace.
1120 check : `bool`, optional
1121 If `True` (default) check the query for consistency before
1122 executing it. This may reject some valid queries that resemble
1123 common mistakes (e.g. queries for visits without specifying an
1124 instrument).
1125 **kwargs
1126 Additional keyword arguments are forwarded to
1127 `DataCoordinate.standardize` when processing the ``dataId``
1128 argument (and may be used to provide a constraining data ID even
1129 when the ``dataId`` argument is `None`).
1131 Returns
1132 -------
1133 refs : `queries.DatasetQueryResults`
1134 Dataset references matching the given query criteria. Nested data
1135 IDs are guaranteed to include values for all implied dimensions
1136 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1137 include dimension records (`DataCoordinate.hasRecords` will be
1138 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1139 on the result object (which returns a new one).
1141 Raises
1142 ------
1143 TypeError
1144 Raised when the arguments are incompatible, such as when a
1145 collection wildcard is passed when ``findFirst`` is `True`, or
1146 when ``collections`` is `None` and``self.defaults.collections`` is
1147 also `None`.
1149 Notes
1150 -----
1151 When multiple dataset types are queried in a single call, the
1152 results of this operation are equivalent to querying for each dataset
1153 type separately in turn, and no information about the relationships
1154 between datasets of different types is included. In contexts where
1155 that kind of information is important, the recommended pattern is to
1156 use `queryDataIds` to first obtain data IDs (possibly with the
1157 desired dataset types and collections passed as constraints to the
1158 query), and then use multiple (generally much simpler) calls to
1159 `queryDatasets` with the returned data IDs passed as constraints.
1160 """
1161 raise NotImplementedError()
1163 @abstractmethod
1164 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *,
1165 dataId: Optional[DataId] = None,
1166 datasets: Any = None,
1167 collections: Any = None,
1168 where: Optional[str] = None,
1169 components: Optional[bool] = None,
1170 bind: Optional[Mapping[str, Any]] = None,
1171 check: bool = True,
1172 **kwargs: Any) -> queries.DataCoordinateQueryResults:
1173 """Query for data IDs matching user-provided criteria.
1175 Parameters
1176 ----------
1177 dimensions : `Dimension` or `str`, or iterable thereof
1178 The dimensions of the data IDs to yield, as either `Dimension`
1179 instances or `str`. Will be automatically expanded to a complete
1180 `DimensionGraph`.
1181 dataId : `dict` or `DataCoordinate`, optional
1182 A data ID whose key-value pairs are used as equality constraints
1183 in the query.
1184 datasets : `Any`, optional
1185 An expression that fully or partially identifies dataset types
1186 that should constrain the yielded data IDs. For example, including
1187 "raw" here would constrain the yielded ``instrument``,
1188 ``exposure``, ``detector``, and ``physical_filter`` values to only
1189 those for which at least one "raw" dataset exists in
1190 ``collections``. Allowed types include `DatasetType`, `str`,
1191 `re.Pattern`, and iterables thereof. Unlike other dataset type
1192 expressions, ``...`` is not permitted - it doesn't make sense to
1193 constrain data IDs on the existence of *all* datasets.
1194 See :ref:`daf_butler_dataset_type_expressions` for more
1195 information.
1196 collections: `Any`, optional
1197 An expression that identifies the collections to search for
1198 datasets, such as a `str` (for full matches), `re.Pattern` (for
1199 partial matches), or iterable thereof. `...` can be used to search
1200 all collections (actually just all `~CollectionType.RUN`
1201 collections, because this will still find all datasets). If not
1202 provided, ``self.default.collections`` is used. Ignored unless
1203 ``datasets`` is also passed. See
1204 :ref:`daf_butler_collection_expressions` for more information.
1205 where : `str`, optional
1206 A string expression similar to a SQL WHERE clause. May involve
1207 any column of a dimension table or (as a shortcut for the primary
1208 key column of a dimension table) dimension name. See
1209 :ref:`daf_butler_dimension_expressions` for more information.
1210 components : `bool`, optional
1211 If `True`, apply all dataset expression patterns to component
1212 dataset type names as well. If `False`, never apply patterns to
1213 components. If `None` (default), apply patterns to components only
1214 if their parent datasets were not matched by the expression.
1215 Fully-specified component datasets (`str` or `DatasetType`
1216 instances) are always included.
1217 bind : `Mapping`, optional
1218 Mapping containing literal values that should be injected into the
1219 ``where`` expression, keyed by the identifiers they replace.
1220 check : `bool`, optional
1221 If `True` (default) check the query for consistency before
1222 executing it. This may reject some valid queries that resemble
1223 common mistakes (e.g. queries for visits without specifying an
1224 instrument).
1225 **kwargs
1226 Additional keyword arguments are forwarded to
1227 `DataCoordinate.standardize` when processing the ``dataId``
1228 argument (and may be used to provide a constraining data ID even
1229 when the ``dataId`` argument is `None`).
1231 Returns
1232 -------
1233 dataIds : `DataCoordinateQueryResults`
1234 Data IDs matching the given query parameters. These are guaranteed
1235 to identify all dimensions (`DataCoordinate.hasFull` returns
1236 `True`), but will not contain `DimensionRecord` objects
1237 (`DataCoordinate.hasRecords` returns `False`). Call
1238 `DataCoordinateQueryResults.expanded` on the returned object to
1239 fetch those (and consider using
1240 `DataCoordinateQueryResults.materialize` on the returned object
1241 first if the expected number of rows is very large). See
1242 documentation for those methods for additional information.
1244 Raises
1245 ------
1246 TypeError
1247 Raised if ``collections`` is `None`, ``self.defaults.collections``
1248 is `None`, and ``datasets`` is not `None`.
1249 """
1250 raise NotImplementedError()
1252 @abstractmethod
1253 def queryDimensionRecords(self, element: Union[DimensionElement, str], *,
1254 dataId: Optional[DataId] = None,
1255 datasets: Any = None,
1256 collections: Any = None,
1257 where: Optional[str] = None,
1258 components: Optional[bool] = None,
1259 bind: Optional[Mapping[str, Any]] = None,
1260 check: bool = True,
1261 **kwargs: Any) -> Iterator[DimensionRecord]:
1262 """Query for dimension information matching user-provided criteria.
1264 Parameters
1265 ----------
1266 element : `DimensionElement` or `str`
1267 The dimension element to obtain records for.
1268 dataId : `dict` or `DataCoordinate`, optional
1269 A data ID whose key-value pairs are used as equality constraints
1270 in the query.
1271 datasets : `Any`, optional
1272 An expression that fully or partially identifies dataset types
1273 that should constrain the yielded records. See `queryDataIds` and
1274 :ref:`daf_butler_dataset_type_expressions` for more information.
1275 collections: `Any`, optional
1276 An expression that identifies the collections to search for
1277 datasets, such as a `str` (for full matches), `re.Pattern` (for
1278 partial matches), or iterable thereof. `...` can be used to search
1279 all collections (actually just all `~CollectionType.RUN`
1280 collections, because this will still find all datasets). If not
1281 provided, ``self.default.collections`` is used. Ignored unless
1282 ``datasets`` is also passed. See
1283 :ref:`daf_butler_collection_expressions` for more information.
1284 where : `str`, optional
1285 A string expression similar to a SQL WHERE clause. See
1286 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1287 information.
1288 components : `bool`, optional
1289 Whether to apply dataset expressions to components as well.
1290 See `queryDataIds` for more information.
1291 bind : `Mapping`, optional
1292 Mapping containing literal values that should be injected into the
1293 ``where`` expression, keyed by the identifiers they replace.
1294 check : `bool`, optional
1295 If `True` (default) check the query for consistency before
1296 executing it. This may reject some valid queries that resemble
1297 common mistakes (e.g. queries for visits without specifying an
1298 instrument).
1299 **kwargs
1300 Additional keyword arguments are forwarded to
1301 `DataCoordinate.standardize` when processing the ``dataId``
1302 argument (and may be used to provide a constraining data ID even
1303 when the ``dataId`` argument is `None`).
1305 Returns
1306 -------
1307 dataIds : `DataCoordinateQueryResults`
1308 Data IDs matching the given query parameters.
1309 """
1310 raise NotImplementedError()
1312 @abstractmethod
1313 def queryDatasetAssociations(
1314 self,
1315 datasetType: Union[str, DatasetType],
1316 collections: Any = ...,
1317 *,
1318 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1319 flattenChains: bool = False,
1320 ) -> Iterator[DatasetAssociation]:
1321 """Iterate over dataset-collection combinations where the dataset is in
1322 the collection.
1324 This method is a temporary placeholder for better support for
1325 assocation results in `queryDatasets`. It will probably be
1326 removed in the future, and should be avoided in production code
1327 whenever possible.
1329 Parameters
1330 ----------
1331 datasetType : `DatasetType` or `str`
1332 A dataset type object or the name of one.
1333 collections: `Any`, optional
1334 An expression that identifies the collections to search for
1335 datasets, such as a `str` (for full matches), `re.Pattern` (for
1336 partial matches), or iterable thereof. `...` can be used to search
1337 all collections (actually just all `~CollectionType.RUN`
1338 collections, because this will still find all datasets). If not
1339 provided, ``self.default.collections`` is used. See
1340 :ref:`daf_butler_collection_expressions` for more information.
1341 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1342 If provided, only yield associations from collections of these
1343 types.
1344 flattenChains : `bool`, optional
1345 If `True` (default) search in the children of
1346 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1347 collections are ignored.
1349 Yields
1350 ------
1351 association : `DatasetAssociation`
1352 Object representing the relationship beween a single dataset and
1353 a single collection.
1355 Raises
1356 ------
1357 TypeError
1358 Raised if ``collections`` is `None` and
1359 ``self.defaults.collections`` is `None`.
1360 """
1361 raise NotImplementedError()
1363 storageClasses: StorageClassFactory
1364 """All storage classes known to the registry (`StorageClassFactory`).
1365 """