Coverage for python/lsst/daf/butler/_limited_butler.py: 81%
43 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("LimitedButler",)
26import logging
27from abc import ABC, abstractmethod
28from typing import Any, ClassVar, Dict, Iterable, Optional, Union
30from deprecated.sphinx import deprecated
32from ._deferredDatasetHandle import DeferredDatasetHandle
33from .core import DatasetRef, Datastore, DimensionUniverse, StorageClass, StorageClassFactory
35log = logging.getLogger(__name__)
38class LimitedButler(ABC):
39 """A minimal butler interface that is sufficient to back
40 `~lsst.pipe.base.PipelineTask` execution.
41 """
43 GENERATION: ClassVar[int] = 3
44 """This is a Generation 3 Butler.
46 This attribute may be removed in the future, once the Generation 2 Butler
47 interface has been fully retired; it should only be used in transitional
48 code.
49 """
51 @abstractmethod
52 def isWriteable(self) -> bool:
53 """Return `True` if this `Butler` supports write operations."""
54 raise NotImplementedError()
56 @deprecated(
57 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
58 " Please use Butler.put(). Will be removed after v27.0.",
59 version="v26.0",
60 category=FutureWarning,
61 )
62 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
63 """Store a dataset that already has a UUID and ``RUN`` collection.
65 Parameters
66 ----------
67 obj : `object`
68 The dataset.
69 ref : `DatasetRef`
70 Resolved reference for a not-yet-stored dataset.
72 Returns
73 -------
74 ref : `DatasetRef`
75 The same as the given, for convenience and symmetry with
76 `Butler.put`.
78 Raises
79 ------
80 TypeError
81 Raised if the butler is read-only.
83 Notes
84 -----
85 Whether this method inserts the given dataset into a ``Registry`` is
86 implementation defined (some `LimitedButler` subclasses do not have a
87 `Registry`), but it always adds the dataset to a `Datastore`, and the
88 given ``ref.id`` and ``ref.run`` are always preserved.
89 """
90 return self.put(obj, ref)
92 @abstractmethod
93 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
94 """Store a dataset that already has a UUID and ``RUN`` collection.
96 Parameters
97 ----------
98 obj : `object`
99 The dataset.
100 ref : `DatasetRef`
101 Resolved reference for a not-yet-stored dataset.
103 Returns
104 -------
105 ref : `DatasetRef`
106 The same as the given, for convenience and symmetry with
107 `Butler.put`.
109 Raises
110 ------
111 TypeError
112 Raised if the butler is read-only.
114 Notes
115 -----
116 Whether this method inserts the given dataset into a ``Registry`` is
117 implementation defined (some `LimitedButler` subclasses do not have a
118 `Registry`), but it always adds the dataset to a `Datastore`, and the
119 given ``ref.id`` and ``ref.run`` are always preserved.
120 """
121 raise NotImplementedError()
123 def get(
124 self,
125 ref: DatasetRef,
126 /,
127 *,
128 parameters: dict[str, Any] | None = None,
129 storageClass: StorageClass | str | None = None,
130 ) -> Any:
131 """Retrieve a stored dataset.
133 Parameters
134 ----------
135 ref: `DatasetRef`
136 A resolved `DatasetRef` directly associated with a dataset.
137 parameters : `dict`
138 Additional StorageClass-defined options to control reading,
139 typically used to efficiently read only a subset of the dataset.
140 storageClass : `StorageClass` or `str`, optional
141 The storage class to be used to override the Python type
142 returned by this method. By default the returned type matches
143 the dataset type definition for this dataset. Specifying a
144 read `StorageClass` can force a different type to be returned.
145 This type must be compatible with the original type.
147 Returns
148 -------
149 obj : `object`
150 The dataset.
152 Raises
153 ------
154 AmbiguousDatasetError
155 Raised if the supplied `DatasetRef` is unresolved.
157 Notes
158 -----
159 In a `LimitedButler` the only allowable way to specify a dataset is
160 to use a resolved `DatasetRef`. Subclasses can support more options.
161 """
162 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
163 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass)
165 @deprecated(
166 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
167 " Please use Butler.get(). Will be removed after v27.0.",
168 version="v26.0",
169 category=FutureWarning,
170 )
171 def getDirect(
172 self,
173 ref: DatasetRef,
174 *,
175 parameters: Optional[Dict[str, Any]] = None,
176 storageClass: str | StorageClass | None = None,
177 ) -> Any:
178 """Retrieve a stored dataset.
180 Parameters
181 ----------
182 ref : `DatasetRef`
183 Resolved reference to an already stored dataset.
184 parameters : `dict`
185 Additional StorageClass-defined options to control reading,
186 typically used to efficiently read only a subset of the dataset.
187 storageClass : `StorageClass` or `str`, optional
188 The storage class to be used to override the Python type
189 returned by this method. By default the returned type matches
190 the dataset type definition for this dataset. Specifying a
191 read `StorageClass` can force a different type to be returned.
192 This type must be compatible with the original type.
194 Returns
195 -------
196 obj : `object`
197 The dataset.
198 """
199 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass)
201 @deprecated(
202 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
203 "Please use Butler.getDeferred(). Will be removed after v27.0.",
204 version="v26.0",
205 category=FutureWarning,
206 )
207 def getDirectDeferred(
208 self,
209 ref: DatasetRef,
210 *,
211 parameters: Union[dict, None] = None,
212 storageClass: str | StorageClass | None = None,
213 ) -> DeferredDatasetHandle:
214 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
215 from a resolved `DatasetRef`.
217 Parameters
218 ----------
219 ref : `DatasetRef`
220 Resolved reference to an already stored dataset.
221 parameters : `dict`
222 Additional StorageClass-defined options to control reading,
223 typically used to efficiently read only a subset of the dataset.
224 storageClass : `StorageClass` or `str`, optional
225 The storage class to be used to override the Python type
226 returned by this method. By default the returned type matches
227 the dataset type definition for this dataset. Specifying a
228 read `StorageClass` can force a different type to be returned.
229 This type must be compatible with the original type.
231 Returns
232 -------
233 obj : `DeferredDatasetHandle`
234 A handle which can be used to retrieve a dataset at a later time.
235 """
236 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
238 def getDeferred(
239 self,
240 ref: DatasetRef,
241 /,
242 *,
243 parameters: dict[str, Any] | None = None,
244 storageClass: str | StorageClass | None = None,
245 ) -> DeferredDatasetHandle:
246 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
247 after an immediate registry lookup.
249 Parameters
250 ----------
251 ref : `DatasetRef`
252 For the default implementation of a `LimitedButler`, the only
253 acceptable parameter is a resolved `DatasetRef`.
254 parameters : `dict`
255 Additional StorageClass-defined options to control reading,
256 typically used to efficiently read only a subset of the dataset.
257 storageClass : `StorageClass` or `str`, optional
258 The storage class to be used to override the Python type
259 returned by this method. By default the returned type matches
260 the dataset type definition for this dataset. Specifying a
261 read `StorageClass` can force a different type to be returned.
262 This type must be compatible with the original type.
264 Returns
265 -------
266 obj : `DeferredDatasetHandle`
267 A handle which can be used to retrieve a dataset at a later time.
269 Notes
270 -----
271 In a `LimitedButler` the only allowable way to specify a dataset is
272 to use a resolved `DatasetRef`. Subclasses can support more options.
273 """
274 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
276 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
277 """Return `True` if a dataset is actually present in the Datastore.
279 Parameters
280 ----------
281 ref : `DatasetRef`
282 Resolved reference to a dataset.
284 Returns
285 -------
286 exists : `bool`
287 Whether the dataset exists in the Datastore.
288 """
289 return self.datastore.exists(ref)
291 def markInputUnused(self, ref: DatasetRef) -> None:
292 """Indicate that a predicted input was not actually used when
293 processing a `Quantum`.
295 Parameters
296 ----------
297 ref : `DatasetRef`
298 Reference to the unused dataset.
300 Notes
301 -----
302 By default, a dataset is considered "actually used" if it is accessed
303 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
304 (even if the handle is not used). This method must be called after one
305 of those in order to remove the dataset from the actual input list.
307 This method does nothing for butlers that do not store provenance
308 information (which is the default implementation provided by the base
309 class).
310 """
311 pass
313 @abstractmethod
314 def pruneDatasets(
315 self,
316 refs: Iterable[DatasetRef],
317 *,
318 disassociate: bool = True,
319 unstore: bool = False,
320 tags: Iterable[str] = (),
321 purge: bool = False,
322 ) -> None:
323 """Remove one or more datasets from a collection and/or storage.
325 Parameters
326 ----------
327 refs : `~collections.abc.Iterable` of `DatasetRef`
328 Datasets to prune. These must be "resolved" references (not just
329 a `DatasetType` and data ID).
330 disassociate : `bool`, optional
331 Disassociate pruned datasets from ``tags``, or from all collections
332 if ``purge=True``.
333 unstore : `bool`, optional
334 If `True` (`False` is default) remove these datasets from all
335 datastores known to this butler. Note that this will make it
336 impossible to retrieve these datasets even via other collections.
337 Datasets that are already not stored are ignored by this option.
338 tags : `Iterable` [ `str` ], optional
339 `~CollectionType.TAGGED` collections to disassociate the datasets
340 from. Ignored if ``disassociate`` is `False` or ``purge`` is
341 `True`.
342 purge : `bool`, optional
343 If `True` (`False` is default), completely remove the dataset from
344 the `Registry`. To prevent accidental deletions, ``purge`` may
345 only be `True` if all of the following conditions are met:
347 - ``disassociate`` is `True`;
348 - ``unstore`` is `True`.
350 This mode may remove provenance information from datasets other
351 than those provided, and should be used with extreme care.
353 Raises
354 ------
355 TypeError
356 Raised if the butler is read-only, if no collection was provided,
357 or the conditions for ``purge=True`` were not met.
358 """
359 raise NotImplementedError()
361 @property
362 @abstractmethod
363 def dimensions(self) -> DimensionUniverse:
364 """Structure managing all dimensions recognized by this data
365 repository (`DimensionUniverse`).
366 """
367 raise NotImplementedError()
369 datastore: Datastore
370 """The object that manages actual dataset storage (`Datastore`).
372 Direct user access to the datastore should rarely be necessary; the primary
373 exception is the case where a `Datastore` implementation provides extra
374 functionality beyond what the base class defines.
375 """
377 storageClasses: StorageClassFactory
378 """An object that maps known storage class names to objects that fully
379 describe them (`StorageClassFactory`).
380 """