Coverage for python/lsst/daf/butler/_limited_butler.py: 70%
38 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-17 02:01 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-17 02:01 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("LimitedButler",)
26import logging
27from abc import ABC, abstractmethod
28from typing import Any, ClassVar, Dict, Iterable, Optional, Union
30from ._deferredDatasetHandle import DeferredDatasetHandle
31from .core import (
32 AmbiguousDatasetError,
33 DatasetRef,
34 Datastore,
35 DimensionUniverse,
36 StorageClass,
37 StorageClassFactory,
38)
40log = logging.getLogger(__name__)
43class LimitedButler(ABC):
44 """A minimal butler interface that is sufficient to back
45 `~lsst.pipe.base.PipelineTask` execution.
46 """
48 GENERATION: ClassVar[int] = 3
49 """This is a Generation 3 Butler.
51 This attribute may be removed in the future, once the Generation 2 Butler
52 interface has been fully retired; it should only be used in transitional
53 code.
54 """
56 @abstractmethod
57 def isWriteable(self) -> bool:
58 """Return `True` if this `Butler` supports write operations."""
59 raise NotImplementedError()
61 @abstractmethod
62 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef:
63 """Store a dataset that already has a UUID and ``RUN`` collection.
65 Parameters
66 ----------
67 obj : `object`
68 The dataset.
69 ref : `DatasetRef`
70 Resolved reference for a not-yet-stored dataset.
72 Returns
73 -------
74 ref : `DatasetRef`
75 The same as the given, for convenience and symmetry with
76 `Butler.put`.
78 Raises
79 ------
80 TypeError
81 Raised if the butler is read-only.
82 AmbiguousDatasetError
83 Raised if ``ref.id is None``, i.e. the reference is unresolved.
85 Notes
86 -----
87 Whether this method inserts the given dataset into a ``Registry`` is
88 implementation defined (some `LimitedButler` subclasses do not have a
89 `Registry`), but it always adds the dataset to a `Datastore`, and the
90 given ``ref.id`` and ``ref.run`` are always preserved.
91 """
92 raise NotImplementedError()
94 def getDirect(
95 self,
96 ref: DatasetRef,
97 *,
98 parameters: Optional[Dict[str, Any]] = None,
99 storageClass: str | StorageClass | None = None,
100 ) -> Any:
101 """Retrieve a stored dataset.
103 Unlike `Butler.get`, this method allows datasets outside the Butler's
104 collection to be read as long as the `DatasetRef` that identifies them
105 can be obtained separately.
107 Parameters
108 ----------
109 ref : `DatasetRef`
110 Resolved reference to an already stored dataset.
111 parameters : `dict`
112 Additional StorageClass-defined options to control reading,
113 typically used to efficiently read only a subset of the dataset.
114 storageClass : `StorageClass` or `str`, optional
115 The storage class to be used to override the Python type
116 returned by this method. By default the returned type matches
117 the dataset type definition for this dataset. Specifying a
118 read `StorageClass` can force a different type to be returned.
119 This type must be compatible with the original type.
121 Returns
122 -------
123 obj : `object`
124 The dataset.
126 Raises
127 ------
128 AmbiguousDatasetError
129 Raised if ``ref.id is None``, i.e. the reference is unresolved.
130 """
131 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass)
133 def getDirectDeferred(
134 self,
135 ref: DatasetRef,
136 *,
137 parameters: Union[dict, None] = None,
138 storageClass: str | StorageClass | None = None,
139 ) -> DeferredDatasetHandle:
140 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
141 from a resolved `DatasetRef`.
143 Parameters
144 ----------
145 ref : `DatasetRef`
146 Resolved reference to an already stored dataset.
147 parameters : `dict`
148 Additional StorageClass-defined options to control reading,
149 typically used to efficiently read only a subset of the dataset.
150 storageClass : `StorageClass` or `str`, optional
151 The storage class to be used to override the Python type
152 returned by this method. By default the returned type matches
153 the dataset type definition for this dataset. Specifying a
154 read `StorageClass` can force a different type to be returned.
155 This type must be compatible with the original type.
157 Returns
158 -------
159 obj : `DeferredDatasetHandle`
160 A handle which can be used to retrieve a dataset at a later time.
162 Raises
163 ------
164 AmbiguousDatasetError
165 Raised if ``ref.id is None``, i.e. the reference is unresolved.
166 """
167 if ref.id is None:
168 raise AmbiguousDatasetError(
169 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved."
170 )
171 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
173 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
174 """Return `True` if a dataset is actually present in the Datastore.
176 Parameters
177 ----------
178 ref : `DatasetRef`
179 Resolved reference to a dataset.
181 Returns
182 -------
183 exists : `bool`
184 Whether the dataset exists in the Datastore.
185 """
186 return self.datastore.exists(ref)
188 def markInputUnused(self, ref: DatasetRef) -> None:
189 """Indicate that a predicted input was not actually used when
190 processing a `Quantum`.
192 Parameters
193 ----------
194 ref : `DatasetRef`
195 Reference to the unused dataset.
197 Notes
198 -----
199 By default, a dataset is considered "actually used" if it is accessed
200 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
201 (even if the handle is not used). This method must be called after one
202 of those in order to remove the dataset from the actual input list.
204 This method does nothing for butlers that do not store provenance
205 information (which is the default implementation provided by the base
206 class).
207 """
208 pass
210 @abstractmethod
211 def pruneDatasets(
212 self,
213 refs: Iterable[DatasetRef],
214 *,
215 disassociate: bool = True,
216 unstore: bool = False,
217 tags: Iterable[str] = (),
218 purge: bool = False,
219 ) -> None:
220 """Remove one or more datasets from a collection and/or storage.
222 Parameters
223 ----------
224 refs : `~collections.abc.Iterable` of `DatasetRef`
225 Datasets to prune. These must be "resolved" references (not just
226 a `DatasetType` and data ID).
227 disassociate : `bool`, optional
228 Disassociate pruned datasets from ``tags``, or from all collections
229 if ``purge=True``.
230 unstore : `bool`, optional
231 If `True` (`False` is default) remove these datasets from all
232 datastores known to this butler. Note that this will make it
233 impossible to retrieve these datasets even via other collections.
234 Datasets that are already not stored are ignored by this option.
235 tags : `Iterable` [ `str` ], optional
236 `~CollectionType.TAGGED` collections to disassociate the datasets
237 from. Ignored if ``disassociate`` is `False` or ``purge`` is
238 `True`.
239 purge : `bool`, optional
240 If `True` (`False` is default), completely remove the dataset from
241 the `Registry`. To prevent accidental deletions, ``purge`` may
242 only be `True` if all of the following conditions are met:
244 - ``disassociate`` is `True`;
245 - ``unstore`` is `True`.
247 This mode may remove provenance information from datasets other
248 than those provided, and should be used with extreme care.
250 Raises
251 ------
252 TypeError
253 Raised if the butler is read-only, if no collection was provided,
254 or the conditions for ``purge=True`` were not met.
255 """
256 raise NotImplementedError()
258 @property
259 @abstractmethod
260 def dimensions(self) -> DimensionUniverse:
261 """Structure managing all dimensions recognized by this data
262 repository (`DimensionUniverse`).
263 """
264 raise NotImplementedError()
266 datastore: Datastore
267 """The object that manages actual dataset storage (`Datastore`).
269 Direct user access to the datastore should rarely be necessary; the primary
270 exception is the case where a `Datastore` implementation provides extra
271 functionality beyond what the base class defines.
272 """
274 storageClasses: StorageClassFactory
275 """An object that maps known storage class names to objects that fully
276 describe them (`StorageClassFactory`).
277 """