Coverage for python/lsst/daf/butler/registry/_registry.py : 62%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "Registry",
26)
28from abc import ABC, abstractmethod
29import contextlib
30import logging
31from typing import (
32 Any,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Tuple,
39 Type,
40 TYPE_CHECKING,
41 Union,
42)
44from lsst.utils import doImport
46from ..core import (
47 ButlerURI,
48 Config,
49 DataCoordinate,
50 DataId,
51 DatasetAssociation,
52 DatasetId,
53 DatasetRef,
54 DatasetType,
55 Dimension,
56 DimensionConfig,
57 DimensionElement,
58 DimensionGraph,
59 DimensionRecord,
60 DimensionUniverse,
61 NameLookupMapping,
62 StorageClassFactory,
63 Timespan,
64)
65from . import queries
66from ._config import RegistryConfig
67from ._collectionType import CollectionType
68from ._defaults import RegistryDefaults
69from .interfaces import DatasetIdGenEnum
70from .wildcards import CollectionSearch
71from .summaries import CollectionSummary
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._butlerConfig import ButlerConfig
75 from .interfaces import (
76 CollectionRecord,
77 DatastoreRegistryBridgeManager,
78 )
80_LOG = logging.getLogger(__name__)
83class Registry(ABC):
84 """Abstract Registry interface.
86 Each registry implementation can have its own constructor parameters.
87 The assumption is that an instance of a specific subclass will be
88 constructed from configuration using `Registry.fromConfig()`.
89 The base class will look for a ``cls`` entry and call that specific
90 `fromConfig()` method.
92 All subclasses should store `RegistryDefaults` in a ``_defaults``
93 property. No other properties are assumed shared between implementations.
94 """
96 defaultConfigFile: Optional[str] = None
97 """Path to configuration defaults. Accessed within the ``configs`` resource
98 or relative to a search path. Can be None if no defaults specified.
99 """
101 @classmethod
102 def forceRegistryConfig(cls, config: Optional[Union[ButlerConfig,
103 RegistryConfig, Config, str]]) -> RegistryConfig:
104 """Force the supplied config to a `RegistryConfig`.
106 Parameters
107 ----------
108 config : `RegistryConfig`, `Config` or `str` or `None`
109 Registry configuration, if missing then default configuration will
110 be loaded from registry.yaml.
112 Returns
113 -------
114 registry_config : `RegistryConfig`
115 A registry config.
116 """
117 if not isinstance(config, RegistryConfig):
118 if isinstance(config, (str, Config)) or config is None:
119 config = RegistryConfig(config)
120 else:
121 raise ValueError(f"Incompatible Registry configuration: {config}")
122 return config
124 @classmethod
125 def determineTrampoline(cls,
126 config: Optional[Union[ButlerConfig,
127 RegistryConfig,
128 Config,
129 str]]) -> Tuple[Type[Registry], RegistryConfig]:
130 """Return class to use to instantiate real registry.
132 Parameters
133 ----------
134 config : `RegistryConfig` or `str`, optional
135 Registry configuration, if missing then default configuration will
136 be loaded from registry.yaml.
138 Returns
139 -------
140 requested_cls : `type` of `Registry`
141 The real registry class to use.
142 registry_config : `RegistryConfig`
143 The `RegistryConfig` to use.
144 """
145 config = cls.forceRegistryConfig(config)
147 # Default to the standard registry
148 registry_cls = doImport(config.get("cls", "lsst.daf.butler.registry.SqlRegistry"))
149 if registry_cls is cls:
150 raise ValueError("Can not instantiate the abstract base Registry from config")
151 return registry_cls, config
153 @classmethod
154 def createFromConfig(cls, config: Optional[Union[RegistryConfig, str]] = None,
155 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
156 butlerRoot: Optional[str] = None) -> Registry:
157 """Create registry database and return `Registry` instance.
159 This method initializes database contents, database must be empty
160 prior to calling this method.
162 Parameters
163 ----------
164 config : `RegistryConfig` or `str`, optional
165 Registry configuration, if missing then default configuration will
166 be loaded from registry.yaml.
167 dimensionConfig : `DimensionConfig` or `str`, optional
168 Dimensions configuration, if missing then default configuration
169 will be loaded from dimensions.yaml.
170 butlerRoot : `str`, optional
171 Path to the repository root this `Registry` will manage.
173 Returns
174 -------
175 registry : `Registry`
176 A new `Registry` instance.
178 Notes
179 -----
180 This class will determine the concrete `Registry` subclass to
181 use from configuration. Each subclass should implement this method
182 even if it can not create a registry.
183 """
184 registry_cls, registry_config = cls.determineTrampoline(config)
185 return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
187 @classmethod
188 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str],
189 butlerRoot: Optional[Union[str, ButlerURI]] = None, writeable: bool = True,
190 defaults: Optional[RegistryDefaults] = None) -> Registry:
191 """Create `Registry` subclass instance from `config`.
193 Registry database must be initialized prior to calling this method.
195 Parameters
196 ----------
197 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
198 Registry configuration
199 butlerRoot : `str` or `ButlerURI`, optional
200 Path to the repository root this `Registry` will manage.
201 writeable : `bool`, optional
202 If `True` (default) create a read-write connection to the database.
203 defaults : `RegistryDefaults`, optional
204 Default collection search path and/or output `~CollectionType.RUN`
205 collection.
207 Returns
208 -------
209 registry : `Registry` (subclass)
210 A new `Registry` subclass instance.
212 Notes
213 -----
214 This class will determine the concrete `Registry` subclass to
215 use from configuration. Each subclass should implement this method.
216 """
217 # The base class implementation should trampoline to the correct
218 # subclass. No implementation should ever use this implementation
219 # directly. If no class is specified, default to the standard
220 # registry.
221 registry_cls, registry_config = cls.determineTrampoline(config)
222 return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
224 @abstractmethod
225 def isWriteable(self) -> bool:
226 """Return `True` if this registry allows write operations, and `False`
227 otherwise.
228 """
229 raise NotImplementedError()
231 @abstractmethod
232 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
233 """Create a new `Registry` backed by the same data repository and
234 connection as this one, but independent defaults.
236 Parameters
237 ----------
238 defaults : `RegistryDefaults`, optional
239 Default collections and data ID values for the new registry. If
240 not provided, ``self.defaults`` will be used (but future changes
241 to either registry's defaults will not affect the other).
243 Returns
244 -------
245 copy : `Registry`
246 A new `Registry` instance with its own defaults.
248 Notes
249 -----
250 Because the new registry shares a connection with the original, they
251 also share transaction state (despite the fact that their `transaction`
252 context manager methods do not reflect this), and must be used with
253 care.
254 """
255 raise NotImplementedError()
257 @property
258 @abstractmethod
259 def dimensions(self) -> DimensionUniverse:
260 """All dimensions recognized by this `Registry` (`DimensionUniverse`).
261 """
262 raise NotImplementedError()
264 @property
265 def defaults(self) -> RegistryDefaults:
266 """Default collection search path and/or output `~CollectionType.RUN`
267 collection (`RegistryDefaults`).
269 This is an immutable struct whose components may not be set
270 individually, but the entire struct can be set by assigning to this
271 property.
272 """
273 return self._defaults
275 @defaults.setter
276 def defaults(self, value: RegistryDefaults) -> None:
277 if value.run is not None:
278 self.registerRun(value.run)
279 value.finish(self)
280 self._defaults = value
282 @abstractmethod
283 def refresh(self) -> None:
284 """Refresh all in-memory state by querying the database.
286 This may be necessary to enable querying for entities added by other
287 registry instances after this one was constructed.
288 """
289 raise NotImplementedError()
291 @contextlib.contextmanager
292 @abstractmethod
293 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
294 """Return a context manager that represents a transaction.
295 """
296 raise NotImplementedError()
298 def resetConnectionPool(self) -> None:
299 """Reset connection pool for registry if relevant.
301 This operation can be used reset connections to servers when
302 using registry with fork-based multiprocessing. This method should
303 usually be called by the child process immediately
304 after the fork.
306 The base class implementation is a no-op.
307 """
308 pass
310 @abstractmethod
311 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED,
312 doc: Optional[str] = None) -> None:
313 """Add a new collection if one with the given name does not exist.
315 Parameters
316 ----------
317 name : `str`
318 The name of the collection to create.
319 type : `CollectionType`
320 Enum value indicating the type of collection to create.
321 doc : `str`, optional
322 Documentation string for the collection.
324 Notes
325 -----
326 This method cannot be called within transactions, as it needs to be
327 able to perform its own transaction to be concurrent.
328 """
329 raise NotImplementedError()
331 @abstractmethod
332 def getCollectionType(self, name: str) -> CollectionType:
333 """Return an enumeration value indicating the type of the given
334 collection.
336 Parameters
337 ----------
338 name : `str`
339 The name of the collection.
341 Returns
342 -------
343 type : `CollectionType`
344 Enum value indicating the type of this collection.
346 Raises
347 ------
348 MissingCollectionError
349 Raised if no collection with the given name exists.
350 """
351 raise NotImplementedError()
353 @abstractmethod
354 def _get_collection_record(self, name: str) -> CollectionRecord:
355 """Return the record for this collection.
357 Parameters
358 ----------
359 name : `str`
360 Name of the collection for which the record is to be retrieved.
362 Returns
363 -------
364 record : `CollectionRecord`
365 The record for this collection.
366 """
367 raise NotImplementedError()
369 @abstractmethod
370 def registerRun(self, name: str, doc: Optional[str] = None) -> None:
371 """Add a new run if one with the given name does not exist.
373 Parameters
374 ----------
375 name : `str`
376 The name of the run to create.
377 doc : `str`, optional
378 Documentation string for the collection.
380 Notes
381 -----
382 This method cannot be called within transactions, as it needs to be
383 able to perform its own transaction to be concurrent.
384 """
385 raise NotImplementedError()
387 @abstractmethod
388 def removeCollection(self, name: str) -> None:
389 """Completely remove the given collection.
391 Parameters
392 ----------
393 name : `str`
394 The name of the collection to remove.
396 Raises
397 ------
398 MissingCollectionError
399 Raised if no collection with the given name exists.
401 Notes
402 -----
403 If this is a `~CollectionType.RUN` collection, all datasets and quanta
404 in it are also fully removed. This requires that those datasets be
405 removed (or at least trashed) from any datastores that hold them first.
407 A collection may not be deleted as long as it is referenced by a
408 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
409 be deleted or redefined first.
410 """
411 raise NotImplementedError()
413 @abstractmethod
414 def getCollectionChain(self, parent: str) -> CollectionSearch:
415 """Return the child collections in a `~CollectionType.CHAINED`
416 collection.
418 Parameters
419 ----------
420 parent : `str`
421 Name of the chained collection. Must have already been added via
422 a call to `Registry.registerCollection`.
424 Returns
425 -------
426 children : `CollectionSearch`
427 An object that defines the search path of the collection.
428 See :ref:`daf_butler_collection_expressions` for more information.
430 Raises
431 ------
432 MissingCollectionError
433 Raised if ``parent`` does not exist in the `Registry`.
434 TypeError
435 Raised if ``parent`` does not correspond to a
436 `~CollectionType.CHAINED` collection.
437 """
438 raise NotImplementedError()
440 @abstractmethod
441 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
442 """Define or redefine a `~CollectionType.CHAINED` collection.
444 Parameters
445 ----------
446 parent : `str`
447 Name of the chained collection. Must have already been added via
448 a call to `Registry.registerCollection`.
449 children : `Any`
450 An expression defining an ordered search of child collections,
451 generally an iterable of `str`; see
452 :ref:`daf_butler_collection_expressions` for more information.
453 flatten : `bool`, optional
454 If `True` (`False` is default), recursively flatten out any nested
455 `~CollectionType.CHAINED` collections in ``children`` first.
457 Raises
458 ------
459 MissingCollectionError
460 Raised when any of the given collections do not exist in the
461 `Registry`.
462 TypeError
463 Raised if ``parent`` does not correspond to a
464 `~CollectionType.CHAINED` collection.
465 ValueError
466 Raised if the given collections contains a cycle.
467 """
468 raise NotImplementedError()
470 @abstractmethod
471 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
472 """Retrieve the documentation string for a collection.
474 Parameters
475 ----------
476 name : `str`
477 Name of the collection.
479 Returns
480 -------
481 docs : `str` or `None`
482 Docstring for the collection with the given name.
483 """
484 raise NotImplementedError()
486 @abstractmethod
487 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
488 """Set the documentation string for a collection.
490 Parameters
491 ----------
492 name : `str`
493 Name of the collection.
494 docs : `str` or `None`
495 Docstring for the collection with the given name; will replace any
496 existing docstring. Passing `None` will remove any existing
497 docstring.
498 """
499 raise NotImplementedError()
501 @abstractmethod
502 def getCollectionSummary(self, collection: str) -> CollectionSummary:
503 """Return a summary for the given collection.
505 Parameters
506 ----------
507 collection : `str`
508 Name of the collection for which a summary is to be retrieved.
510 Returns
511 -------
512 summary : `CollectionSummary`
513 Summary of the dataset types and governor dimension values in
514 this collection.
515 """
516 raise NotImplementedError()
518 @abstractmethod
519 def registerDatasetType(self, datasetType: DatasetType) -> bool:
520 """
521 Add a new `DatasetType` to the Registry.
523 It is not an error to register the same `DatasetType` twice.
525 Parameters
526 ----------
527 datasetType : `DatasetType`
528 The `DatasetType` to be added.
530 Returns
531 -------
532 inserted : `bool`
533 `True` if ``datasetType`` was inserted, `False` if an identical
534 existing `DatsetType` was found. Note that in either case the
535 DatasetType is guaranteed to be defined in the Registry
536 consistently with the given definition.
538 Raises
539 ------
540 ValueError
541 Raised if the dimensions or storage class are invalid.
542 ConflictingDefinitionError
543 Raised if this DatasetType is already registered with a different
544 definition.
546 Notes
547 -----
548 This method cannot be called within transactions, as it needs to be
549 able to perform its own transaction to be concurrent.
550 """
551 raise NotImplementedError()
553 @abstractmethod
554 def removeDatasetType(self, name: str) -> None:
555 """Remove the named `DatasetType` from the registry.
557 .. warning::
559 Registry implementations can cache the dataset type definitions.
560 This means that deleting the dataset type definition may result in
561 unexpected behavior from other butler processes that are active
562 that have not seen the deletion.
564 Parameters
565 ----------
566 name : `str`
567 Name of the type to be removed.
569 Raises
570 ------
571 lsst.daf.butler.registry.OrphanedRecordError
572 Raised if an attempt is made to remove the dataset type definition
573 when there are already datasets associated with it.
575 Notes
576 -----
577 If the dataset type is not registered the method will return without
578 action.
579 """
580 raise NotImplementedError()
582 @abstractmethod
583 def getDatasetType(self, name: str) -> DatasetType:
584 """Get the `DatasetType`.
586 Parameters
587 ----------
588 name : `str`
589 Name of the type.
591 Returns
592 -------
593 type : `DatasetType`
594 The `DatasetType` associated with the given name.
596 Raises
597 ------
598 KeyError
599 Requested named DatasetType could not be found in registry.
600 """
601 raise NotImplementedError()
603 @abstractmethod
604 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *,
605 collections: Any = None, timespan: Optional[Timespan] = None,
606 **kwargs: Any) -> Optional[DatasetRef]:
607 """Find a dataset given its `DatasetType` and data ID.
609 This can be used to obtain a `DatasetRef` that permits the dataset to
610 be read from a `Datastore`. If the dataset is a component and can not
611 be found using the provided dataset type, a dataset ref for the parent
612 will be returned instead but with the correct dataset type.
614 Parameters
615 ----------
616 datasetType : `DatasetType` or `str`
617 A `DatasetType` or the name of one.
618 dataId : `dict` or `DataCoordinate`, optional
619 A `dict`-like object containing the `Dimension` links that identify
620 the dataset within a collection.
621 collections, optional.
622 An expression that fully or partially identifies the collections to
623 search for the dataset; see
624 :ref:`daf_butler_collection_expressions` for more information.
625 Defaults to ``self.defaults.collections``.
626 timespan : `Timespan`, optional
627 A timespan that the validity range of the dataset must overlap.
628 If not provided, any `~CollectionType.CALIBRATION` collections
629 matched by the ``collections`` argument will not be searched.
630 **kwargs
631 Additional keyword arguments passed to
632 `DataCoordinate.standardize` to convert ``dataId`` to a true
633 `DataCoordinate` or augment an existing one.
635 Returns
636 -------
637 ref : `DatasetRef`
638 A reference to the dataset, or `None` if no matching Dataset
639 was found.
641 Raises
642 ------
643 TypeError
644 Raised if ``collections`` is `None` and
645 ``self.defaults.collections`` is `None`.
646 LookupError
647 Raised if one or more data ID keys are missing.
648 KeyError
649 Raised if the dataset type does not exist.
650 MissingCollectionError
651 Raised if any of ``collections`` does not exist in the registry.
653 Notes
654 -----
655 This method simply returns `None` and does not raise an exception even
656 when the set of collections searched is intrinsically incompatible with
657 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
658 only `~CollectionType.CALIBRATION` collections are being searched.
659 This may make it harder to debug some lookup failures, but the behavior
660 is intentional; we consider it more important that failed searches are
661 reported consistently, regardless of the reason, and that adding
662 additional collections that do not contain a match to the search path
663 never changes the behavior.
664 """
665 raise NotImplementedError()
667 @abstractmethod
668 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId],
669 run: Optional[str] = None, expand: bool = True,
670 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE) -> List[DatasetRef]:
671 """Insert one or more datasets into the `Registry`
673 This always adds new datasets; to associate existing datasets with
674 a new collection, use ``associate``.
676 Parameters
677 ----------
678 datasetType : `DatasetType` or `str`
679 A `DatasetType` or the name of one.
680 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
681 Dimension-based identifiers for the new datasets.
682 run : `str`, optional
683 The name of the run that produced the datasets. Defaults to
684 ``self.defaults.run``.
685 expand : `bool`, optional
686 If `True` (default), expand data IDs as they are inserted. This is
687 necessary in general to allow datastore to generate file templates,
688 but it may be disabled if the caller can guarantee this is
689 unnecessary.
690 idGenerationMode : `DatasetIdGenEnum`, optional
691 Specifies option for generating dataset IDs. By default unique IDs
692 are generated for each inserted dataset.
694 Returns
695 -------
696 refs : `list` of `DatasetRef`
697 Resolved `DatasetRef` instances for all given data IDs (in the same
698 order).
700 Raises
701 ------
702 TypeError
703 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
704 ConflictingDefinitionError
705 If a dataset with the same dataset type and data ID as one of those
706 given already exists in ``run``.
707 MissingCollectionError
708 Raised if ``run`` does not exist in the registry.
709 """
710 raise NotImplementedError()
712 @abstractmethod
713 def _importDatasets(self, datasets: Iterable[DatasetRef], expand: bool = True,
714 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
715 reuseIds: bool = False) -> List[DatasetRef]:
716 """Import one or more datasets into the `Registry`.
718 Difference from `insertDatasets` method is that this method accepts
719 `DatasetRef` instances which should already be resolved and have a
720 dataset ID. If registry supports globally-unique dataset IDs (e.g.
721 `uuid.UUID`) then datasets which already exist in the registry will be
722 ignored if imported again.
724 Parameters
725 ----------
726 datasets : `~collections.abc.Iterable` of `DatasetRef`
727 Datasets to be inserted. All `DatasetRef` instances must have
728 identical ``datasetType`` and ``run`` attributes. ``run``
729 attribute can be `None` and defaults to ``self.defaults.run``.
730 Datasets can specify ``id`` attribute which will be used for
731 inserted datasets. All dataset IDs must have the same type
732 (`int` or `uuid.UUID`), if type of dataset IDs does not match
733 configured backend then IDs will be ignored and new IDs will be
734 generated by backend.
735 expand : `bool`, optional
736 If `True` (default), expand data IDs as they are inserted. This is
737 necessary in general to allow datastore to generate file templates,
738 but it may be disabled if the caller can guarantee this is
739 unnecessary.
740 idGenerationMode : `DatasetIdGenEnum`, optional
741 Specifies option for generating dataset IDs when IDs are not
742 provided or their type does not match backend type. By default
743 unique IDs are generated for each inserted dataset.
744 reuseIds : `bool`, optional
745 If `True` then forces re-use of imported dataset IDs for integer
746 IDs which are normally generated as auto-incremented; exception
747 will be raised if imported IDs clash with existing ones. This
748 option has no effect on the use of globally-unique IDs which are
749 always re-used (or generated if integer IDs are being imported).
751 Returns
752 -------
753 refs : `list` of `DatasetRef`
754 Resolved `DatasetRef` instances for all given data IDs (in the same
755 order). If any of ``datasets`` has an ID which already exists in
756 the database then it will not be inserted or updated, but a
757 resolved `DatasetRef` will be returned for it in any case.
759 Raises
760 ------
761 TypeError
762 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
763 ConflictingDefinitionError
764 If a dataset with the same dataset type and data ID as one of those
765 given already exists in ``run``.
766 MissingCollectionError
767 Raised if ``run`` does not exist in the registry.
769 Notes
770 -----
771 This method is considered package-private and internal to Butler
772 implementation. Clients outside daf_butler package should not use this
773 method.
774 """
775 raise NotImplementedError()
777 @abstractmethod
778 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
779 """Retrieve a Dataset entry.
781 Parameters
782 ----------
783 id : `DatasetId`
784 The unique identifier for the dataset.
786 Returns
787 -------
788 ref : `DatasetRef` or `None`
789 A ref to the Dataset, or `None` if no matching Dataset
790 was found.
791 """
792 raise NotImplementedError()
794 @abstractmethod
795 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
796 """Remove datasets from the Registry.
798 The datasets will be removed unconditionally from all collections, and
799 any `Quantum` that consumed this dataset will instead be marked with
800 having a NULL input. `Datastore` records will *not* be deleted; the
801 caller is responsible for ensuring that the dataset has already been
802 removed from all Datastores.
804 Parameters
805 ----------
806 refs : `Iterable` of `DatasetRef`
807 References to the datasets to be removed. Must include a valid
808 ``id`` attribute, and should be considered invalidated upon return.
810 Raises
811 ------
812 AmbiguousDatasetError
813 Raised if any ``ref.id`` is `None`.
814 OrphanedRecordError
815 Raised if any dataset is still present in any `Datastore`.
816 """
817 raise NotImplementedError()
819 @abstractmethod
820 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
821 """Add existing datasets to a `~CollectionType.TAGGED` collection.
823 If a DatasetRef with the same exact ID is already in a collection
824 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
825 data ID but with different ID exists in the collection,
826 `ConflictingDefinitionError` is raised.
828 Parameters
829 ----------
830 collection : `str`
831 Indicates the collection the datasets should be associated with.
832 refs : `Iterable` [ `DatasetRef` ]
833 An iterable of resolved `DatasetRef` instances that already exist
834 in this `Registry`.
836 Raises
837 ------
838 ConflictingDefinitionError
839 If a Dataset with the given `DatasetRef` already exists in the
840 given collection.
841 AmbiguousDatasetError
842 Raised if ``any(ref.id is None for ref in refs)``.
843 MissingCollectionError
844 Raised if ``collection`` does not exist in the registry.
845 TypeError
846 Raise adding new datasets to the given ``collection`` is not
847 allowed.
848 """
849 raise NotImplementedError()
851 @abstractmethod
852 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
853 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
855 ``collection`` and ``ref`` combinations that are not currently
856 associated are silently ignored.
858 Parameters
859 ----------
860 collection : `str`
861 The collection the datasets should no longer be associated with.
862 refs : `Iterable` [ `DatasetRef` ]
863 An iterable of resolved `DatasetRef` instances that already exist
864 in this `Registry`.
866 Raises
867 ------
868 AmbiguousDatasetError
869 Raised if any of the given dataset references is unresolved.
870 MissingCollectionError
871 Raised if ``collection`` does not exist in the registry.
872 TypeError
873 Raise adding new datasets to the given ``collection`` is not
874 allowed.
875 """
876 raise NotImplementedError()
878 @abstractmethod
879 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
880 """Associate one or more datasets with a calibration collection and a
881 validity range within it.
883 Parameters
884 ----------
885 collection : `str`
886 The name of an already-registered `~CollectionType.CALIBRATION`
887 collection.
888 refs : `Iterable` [ `DatasetRef` ]
889 Datasets to be associated.
890 timespan : `Timespan`
891 The validity range for these datasets within the collection.
893 Raises
894 ------
895 AmbiguousDatasetError
896 Raised if any of the given `DatasetRef` instances is unresolved.
897 ConflictingDefinitionError
898 Raised if the collection already contains a different dataset with
899 the same `DatasetType` and data ID and an overlapping validity
900 range.
901 TypeError
902 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
903 collection or if one or more datasets are of a dataset type for
904 which `DatasetType.isCalibration` returns `False`.
905 """
906 raise NotImplementedError()
908 @abstractmethod
909 def decertify(self, collection: str, datasetType: Union[str, DatasetType], timespan: Timespan, *,
910 dataIds: Optional[Iterable[DataId]] = None) -> None:
911 """Remove or adjust datasets to clear a validity range within a
912 calibration collection.
914 Parameters
915 ----------
916 collection : `str`
917 The name of an already-registered `~CollectionType.CALIBRATION`
918 collection.
919 datasetType : `str` or `DatasetType`
920 Name or `DatasetType` instance for the datasets to be decertified.
921 timespan : `Timespan`, optional
922 The validity range to remove datasets from within the collection.
923 Datasets that overlap this range but are not contained by it will
924 have their validity ranges adjusted to not overlap it, which may
925 split a single dataset validity range into two.
926 dataIds : `Iterable` [ `DataId` ], optional
927 Data IDs that should be decertified within the given validity range
928 If `None`, all data IDs for ``self.datasetType`` will be
929 decertified.
931 Raises
932 ------
933 TypeError
934 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
935 collection or if ``datasetType.isCalibration() is False``.
936 """
937 raise NotImplementedError()
939 @abstractmethod
940 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
941 """Return an object that allows a new `Datastore` instance to
942 communicate with this `Registry`.
944 Returns
945 -------
946 manager : `DatastoreRegistryBridgeManager`
947 Object that mediates communication between this `Registry` and its
948 associated datastores.
949 """
950 raise NotImplementedError()
952 @abstractmethod
953 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
954 """Retrieve datastore locations for a given dataset.
956 Parameters
957 ----------
958 ref : `DatasetRef`
959 A reference to the dataset for which to retrieve storage
960 information.
962 Returns
963 -------
964 datastores : `Iterable` [ `str` ]
965 All the matching datastores holding this dataset.
967 Raises
968 ------
969 AmbiguousDatasetError
970 Raised if ``ref.id`` is `None`.
971 """
972 raise NotImplementedError()
974 @abstractmethod
975 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None,
976 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
977 withDefaults: bool = True,
978 **kwargs: Any) -> DataCoordinate:
979 """Expand a dimension-based data ID to include additional information.
981 Parameters
982 ----------
983 dataId : `DataCoordinate` or `dict`, optional
984 Data ID to be expanded; augmented and overridden by ``kwds``.
985 graph : `DimensionGraph`, optional
986 Set of dimensions for the expanded ID. If `None`, the dimensions
987 will be inferred from the keys of ``dataId`` and ``kwds``.
988 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph``
989 are silently ignored, providing a way to extract and expand a
990 subset of a data ID.
991 records : `Mapping` [`str`, `DimensionRecord`], optional
992 Dimension record data to use before querying the database for that
993 data, keyed by element name.
994 withDefaults : `bool`, optional
995 Utilize ``self.defaults.dataId`` to fill in missing governor
996 dimension key-value pairs. Defaults to `True` (i.e. defaults are
997 used).
998 **kwargs
999 Additional keywords are treated like additional key-value pairs for
1000 ``dataId``, extending and overriding
1002 Returns
1003 -------
1004 expanded : `DataCoordinate`
1005 A data ID that includes full metadata for all of the dimensions it
1006 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and
1007 ``expanded.hasFull()`` both return `True`.
1008 """
1009 raise NotImplementedError()
1011 @abstractmethod
1012 def insertDimensionData(self, element: Union[DimensionElement, str],
1013 *data: Union[Mapping[str, Any], DimensionRecord],
1014 conform: bool = True) -> None:
1015 """Insert one or more dimension records into the database.
1017 Parameters
1018 ----------
1019 element : `DimensionElement` or `str`
1020 The `DimensionElement` or name thereof that identifies the table
1021 records will be inserted into.
1022 data : `dict` or `DimensionRecord` (variadic)
1023 One or more records to insert.
1024 conform : `bool`, optional
1025 If `False` (`True` is default) perform no checking or conversions,
1026 and assume that ``element`` is a `DimensionElement` instance and
1027 ``data`` is a one or more `DimensionRecord` instances of the
1028 appropriate subclass.
1029 """
1030 raise NotImplementedError()
1032 @abstractmethod
1033 def syncDimensionData(self, element: Union[DimensionElement, str],
1034 row: Union[Mapping[str, Any], DimensionRecord],
1035 conform: bool = True) -> bool:
1036 """Synchronize the given dimension record with the database, inserting
1037 if it does not already exist and comparing values if it does.
1039 Parameters
1040 ----------
1041 element : `DimensionElement` or `str`
1042 The `DimensionElement` or name thereof that identifies the table
1043 records will be inserted into.
1044 row : `dict` or `DimensionRecord`
1045 The record to insert.
1046 conform : `bool`, optional
1047 If `False` (`True` is default) perform no checking or conversions,
1048 and assume that ``element`` is a `DimensionElement` instance and
1049 ``data`` is a one or more `DimensionRecord` instances of the
1050 appropriate subclass.
1052 Returns
1053 -------
1054 inserted : `bool`
1055 `True` if a new row was inserted, `False` otherwise.
1057 Raises
1058 ------
1059 ConflictingDefinitionError
1060 Raised if the record exists in the database (according to primary
1061 key lookup) but is inconsistent with the given one.
1062 """
1063 raise NotImplementedError()
1065 @abstractmethod
1066 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None
1067 ) -> Iterator[DatasetType]:
1068 """Iterate over the dataset types whose names match an expression.
1070 Parameters
1071 ----------
1072 expression : `Any`, optional
1073 An expression that fully or partially identifies the dataset types
1074 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1075 `...` can be used to return all dataset types, and is the default.
1076 See :ref:`daf_butler_dataset_type_expressions` for more
1077 information.
1078 components : `bool`, optional
1079 If `True`, apply all expression patterns to component dataset type
1080 names as well. If `False`, never apply patterns to components.
1081 If `None` (default), apply patterns to components only if their
1082 parent datasets were not matched by the expression.
1083 Fully-specified component datasets (`str` or `DatasetType`
1084 instances) are always included.
1086 Yields
1087 ------
1088 datasetType : `DatasetType`
1089 A `DatasetType` instance whose name matches ``expression``.
1090 """
1091 raise NotImplementedError()
1093 @abstractmethod
1094 def queryCollections(self, expression: Any = ...,
1095 datasetType: Optional[DatasetType] = None,
1096 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1097 flattenChains: bool = False,
1098 includeChains: Optional[bool] = None) -> Iterator[str]:
1099 """Iterate over the collections whose names match an expression.
1101 Parameters
1102 ----------
1103 expression : `Any`, optional
1104 An expression that identifies the collections to return, such as a
1105 `str` (for full matches), `re.Pattern` (for partial matches), or
1106 iterable thereof. `...` can be used to return all collections,
1107 and is the default. See :ref:`daf_butler_collection_expressions`
1108 for more information.
1109 datasetType : `DatasetType`, optional
1110 If provided, only yield collections that may contain datasets of
1111 this type. This is a conservative approximation in general; it may
1112 yield collections that do not have any such datasets.
1113 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1114 If provided, only yield collections of these types.
1115 flattenChains : `bool`, optional
1116 If `True` (`False` is default), recursively yield the child
1117 collections of matching `~CollectionType.CHAINED` collections.
1118 includeChains : `bool`, optional
1119 If `True`, yield records for matching `~CollectionType.CHAINED`
1120 collections. Default is the opposite of ``flattenChains``: include
1121 either CHAINED collections or their children, but not both.
1123 Yields
1124 ------
1125 collection : `str`
1126 The name of a collection that matches ``expression``.
1127 """
1128 raise NotImplementedError()
1130 @abstractmethod
1131 def queryDatasets(self, datasetType: Any, *,
1132 collections: Any = None,
1133 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1134 dataId: Optional[DataId] = None,
1135 where: Optional[str] = None,
1136 findFirst: bool = False,
1137 components: Optional[bool] = None,
1138 bind: Optional[Mapping[str, Any]] = None,
1139 check: bool = True,
1140 **kwargs: Any) -> queries.DatasetQueryResults:
1141 """Query for and iterate over dataset references matching user-provided
1142 criteria.
1144 Parameters
1145 ----------
1146 datasetType
1147 An expression that fully or partially identifies the dataset types
1148 to be queried. Allowed types include `DatasetType`, `str`,
1149 `re.Pattern`, and iterables thereof. The special value `...` can
1150 be used to query all dataset types. See
1151 :ref:`daf_butler_dataset_type_expressions` for more information.
1152 collections: optional
1153 An expression that identifies the collections to search, such as a
1154 `str` (for full matches), `re.Pattern` (for partial matches), or
1155 iterable thereof. `...` can be used to search all collections
1156 (actually just all `~CollectionType.RUN` collections, because this
1157 will still find all datasets). If not provided,
1158 ``self.default.collections`` is used. See
1159 :ref:`daf_butler_collection_expressions` for more information.
1160 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1161 Dimensions to include in the query (in addition to those used
1162 to identify the queried dataset type(s)), either to constrain
1163 the resulting datasets to those for which a matching dimension
1164 exists, or to relate the dataset type's dimensions to dimensions
1165 referenced by the ``dataId`` or ``where`` arguments.
1166 dataId : `dict` or `DataCoordinate`, optional
1167 A data ID whose key-value pairs are used as equality constraints
1168 in the query.
1169 where : `str`, optional
1170 A string expression similar to a SQL WHERE clause. May involve
1171 any column of a dimension table or (as a shortcut for the primary
1172 key column of a dimension table) dimension name. See
1173 :ref:`daf_butler_dimension_expressions` for more information.
1174 findFirst : `bool`, optional
1175 If `True` (`False` is default), for each result data ID, only
1176 yield one `DatasetRef` of each `DatasetType`, from the first
1177 collection in which a dataset of that dataset type appears
1178 (according to the order of ``collections`` passed in). If `True`,
1179 ``collections`` must not contain regular expressions and may not
1180 be `...`.
1181 components : `bool`, optional
1182 If `True`, apply all dataset expression patterns to component
1183 dataset type names as well. If `False`, never apply patterns to
1184 components. If `None` (default), apply patterns to components only
1185 if their parent datasets were not matched by the expression.
1186 Fully-specified component datasets (`str` or `DatasetType`
1187 instances) are always included.
1188 bind : `Mapping`, optional
1189 Mapping containing literal values that should be injected into the
1190 ``where`` expression, keyed by the identifiers they replace.
1191 check : `bool`, optional
1192 If `True` (default) check the query for consistency before
1193 executing it. This may reject some valid queries that resemble
1194 common mistakes (e.g. queries for visits without specifying an
1195 instrument).
1196 **kwargs
1197 Additional keyword arguments are forwarded to
1198 `DataCoordinate.standardize` when processing the ``dataId``
1199 argument (and may be used to provide a constraining data ID even
1200 when the ``dataId`` argument is `None`).
1202 Returns
1203 -------
1204 refs : `queries.DatasetQueryResults`
1205 Dataset references matching the given query criteria. Nested data
1206 IDs are guaranteed to include values for all implied dimensions
1207 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1208 include dimension records (`DataCoordinate.hasRecords` will be
1209 `False`) unless `~queries.DatasetQueryResults.expanded` is called
1210 on the result object (which returns a new one).
1212 Raises
1213 ------
1214 TypeError
1215 Raised when the arguments are incompatible, such as when a
1216 collection wildcard is passed when ``findFirst`` is `True`, or
1217 when ``collections`` is `None` and``self.defaults.collections`` is
1218 also `None`.
1220 Notes
1221 -----
1222 When multiple dataset types are queried in a single call, the
1223 results of this operation are equivalent to querying for each dataset
1224 type separately in turn, and no information about the relationships
1225 between datasets of different types is included. In contexts where
1226 that kind of information is important, the recommended pattern is to
1227 use `queryDataIds` to first obtain data IDs (possibly with the
1228 desired dataset types and collections passed as constraints to the
1229 query), and then use multiple (generally much simpler) calls to
1230 `queryDatasets` with the returned data IDs passed as constraints.
1231 """
1232 raise NotImplementedError()
1234 @abstractmethod
1235 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *,
1236 dataId: Optional[DataId] = None,
1237 datasets: Any = None,
1238 collections: Any = None,
1239 where: Optional[str] = None,
1240 components: Optional[bool] = None,
1241 bind: Optional[Mapping[str, Any]] = None,
1242 check: bool = True,
1243 **kwargs: Any) -> queries.DataCoordinateQueryResults:
1244 """Query for data IDs matching user-provided criteria.
1246 Parameters
1247 ----------
1248 dimensions : `Dimension` or `str`, or iterable thereof
1249 The dimensions of the data IDs to yield, as either `Dimension`
1250 instances or `str`. Will be automatically expanded to a complete
1251 `DimensionGraph`.
1252 dataId : `dict` or `DataCoordinate`, optional
1253 A data ID whose key-value pairs are used as equality constraints
1254 in the query.
1255 datasets : `Any`, optional
1256 An expression that fully or partially identifies dataset types
1257 that should constrain the yielded data IDs. For example, including
1258 "raw" here would constrain the yielded ``instrument``,
1259 ``exposure``, ``detector``, and ``physical_filter`` values to only
1260 those for which at least one "raw" dataset exists in
1261 ``collections``. Allowed types include `DatasetType`, `str`,
1262 `re.Pattern`, and iterables thereof. Unlike other dataset type
1263 expressions, ``...`` is not permitted - it doesn't make sense to
1264 constrain data IDs on the existence of *all* datasets.
1265 See :ref:`daf_butler_dataset_type_expressions` for more
1266 information.
1267 collections: `Any`, optional
1268 An expression that identifies the collections to search for
1269 datasets, such as a `str` (for full matches), `re.Pattern` (for
1270 partial matches), or iterable thereof. `...` can be used to search
1271 all collections (actually just all `~CollectionType.RUN`
1272 collections, because this will still find all datasets). If not
1273 provided, ``self.default.collections`` is used. Ignored unless
1274 ``datasets`` is also passed. See
1275 :ref:`daf_butler_collection_expressions` for more information.
1276 where : `str`, optional
1277 A string expression similar to a SQL WHERE clause. May involve
1278 any column of a dimension table or (as a shortcut for the primary
1279 key column of a dimension table) dimension name. See
1280 :ref:`daf_butler_dimension_expressions` for more information.
1281 components : `bool`, optional
1282 If `True`, apply all dataset expression patterns to component
1283 dataset type names as well. If `False`, never apply patterns to
1284 components. If `None` (default), apply patterns to components only
1285 if their parent datasets were not matched by the expression.
1286 Fully-specified component datasets (`str` or `DatasetType`
1287 instances) are always included.
1288 bind : `Mapping`, optional
1289 Mapping containing literal values that should be injected into the
1290 ``where`` expression, keyed by the identifiers they replace.
1291 check : `bool`, optional
1292 If `True` (default) check the query for consistency before
1293 executing it. This may reject some valid queries that resemble
1294 common mistakes (e.g. queries for visits without specifying an
1295 instrument).
1296 **kwargs
1297 Additional keyword arguments are forwarded to
1298 `DataCoordinate.standardize` when processing the ``dataId``
1299 argument (and may be used to provide a constraining data ID even
1300 when the ``dataId`` argument is `None`).
1302 Returns
1303 -------
1304 dataIds : `DataCoordinateQueryResults`
1305 Data IDs matching the given query parameters. These are guaranteed
1306 to identify all dimensions (`DataCoordinate.hasFull` returns
1307 `True`), but will not contain `DimensionRecord` objects
1308 (`DataCoordinate.hasRecords` returns `False`). Call
1309 `DataCoordinateQueryResults.expanded` on the returned object to
1310 fetch those (and consider using
1311 `DataCoordinateQueryResults.materialize` on the returned object
1312 first if the expected number of rows is very large). See
1313 documentation for those methods for additional information.
1315 Raises
1316 ------
1317 TypeError
1318 Raised if ``collections`` is `None`, ``self.defaults.collections``
1319 is `None`, and ``datasets`` is not `None`.
1320 """
1321 raise NotImplementedError()
1323 @abstractmethod
1324 def queryDimensionRecords(self, element: Union[DimensionElement, str], *,
1325 dataId: Optional[DataId] = None,
1326 datasets: Any = None,
1327 collections: Any = None,
1328 where: Optional[str] = None,
1329 components: Optional[bool] = None,
1330 bind: Optional[Mapping[str, Any]] = None,
1331 check: bool = True,
1332 **kwargs: Any) -> Iterator[DimensionRecord]:
1333 """Query for dimension information matching user-provided criteria.
1335 Parameters
1336 ----------
1337 element : `DimensionElement` or `str`
1338 The dimension element to obtain records for.
1339 dataId : `dict` or `DataCoordinate`, optional
1340 A data ID whose key-value pairs are used as equality constraints
1341 in the query.
1342 datasets : `Any`, optional
1343 An expression that fully or partially identifies dataset types
1344 that should constrain the yielded records. See `queryDataIds` and
1345 :ref:`daf_butler_dataset_type_expressions` for more information.
1346 collections: `Any`, optional
1347 An expression that identifies the collections to search for
1348 datasets, such as a `str` (for full matches), `re.Pattern` (for
1349 partial matches), or iterable thereof. `...` can be used to search
1350 all collections (actually just all `~CollectionType.RUN`
1351 collections, because this will still find all datasets). If not
1352 provided, ``self.default.collections`` is used. Ignored unless
1353 ``datasets`` is also passed. See
1354 :ref:`daf_butler_collection_expressions` for more information.
1355 where : `str`, optional
1356 A string expression similar to a SQL WHERE clause. See
1357 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1358 information.
1359 components : `bool`, optional
1360 Whether to apply dataset expressions to components as well.
1361 See `queryDataIds` for more information.
1362 bind : `Mapping`, optional
1363 Mapping containing literal values that should be injected into the
1364 ``where`` expression, keyed by the identifiers they replace.
1365 check : `bool`, optional
1366 If `True` (default) check the query for consistency before
1367 executing it. This may reject some valid queries that resemble
1368 common mistakes (e.g. queries for visits without specifying an
1369 instrument).
1370 **kwargs
1371 Additional keyword arguments are forwarded to
1372 `DataCoordinate.standardize` when processing the ``dataId``
1373 argument (and may be used to provide a constraining data ID even
1374 when the ``dataId`` argument is `None`).
1376 Returns
1377 -------
1378 dataIds : `DataCoordinateQueryResults`
1379 Data IDs matching the given query parameters.
1380 """
1381 raise NotImplementedError()
1383 @abstractmethod
1384 def queryDatasetAssociations(
1385 self,
1386 datasetType: Union[str, DatasetType],
1387 collections: Any = ...,
1388 *,
1389 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1390 flattenChains: bool = False,
1391 ) -> Iterator[DatasetAssociation]:
1392 """Iterate over dataset-collection combinations where the dataset is in
1393 the collection.
1395 This method is a temporary placeholder for better support for
1396 assocation results in `queryDatasets`. It will probably be
1397 removed in the future, and should be avoided in production code
1398 whenever possible.
1400 Parameters
1401 ----------
1402 datasetType : `DatasetType` or `str`
1403 A dataset type object or the name of one.
1404 collections: `Any`, optional
1405 An expression that identifies the collections to search for
1406 datasets, such as a `str` (for full matches), `re.Pattern` (for
1407 partial matches), or iterable thereof. `...` can be used to search
1408 all collections (actually just all `~CollectionType.RUN`
1409 collections, because this will still find all datasets). If not
1410 provided, ``self.default.collections`` is used. See
1411 :ref:`daf_butler_collection_expressions` for more information.
1412 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
1413 If provided, only yield associations from collections of these
1414 types.
1415 flattenChains : `bool`, optional
1416 If `True` (default) search in the children of
1417 `~CollectionType.CHAINED` collections. If `False`, ``CHAINED``
1418 collections are ignored.
1420 Yields
1421 ------
1422 association : `DatasetAssociation`
1423 Object representing the relationship beween a single dataset and
1424 a single collection.
1426 Raises
1427 ------
1428 TypeError
1429 Raised if ``collections`` is `None` and
1430 ``self.defaults.collections`` is `None`.
1431 """
1432 raise NotImplementedError()
1434 storageClasses: StorageClassFactory
1435 """All storage classes known to the registry (`StorageClassFactory`).
1436 """