Coverage for python/lsst/daf/butler/_limited_butler.py: 80%
49 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 09:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 09:30 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("LimitedButler",)
26import logging
27from abc import ABC, abstractmethod
28from collections.abc import Iterable
29from typing import Any, ClassVar
31from deprecated.sphinx import deprecated
33from ._deferredDatasetHandle import DeferredDatasetHandle
34from .core import DatasetRef, Datastore, DimensionUniverse, StorageClass, StorageClassFactory
36log = logging.getLogger(__name__)
39class LimitedButler(ABC):
40 """A minimal butler interface that is sufficient to back
41 `~lsst.pipe.base.PipelineTask` execution.
42 """
44 GENERATION: ClassVar[int] = 3
45 """This is a Generation 3 Butler.
47 This attribute may be removed in the future, once the Generation 2 Butler
48 interface has been fully retired; it should only be used in transitional
49 code.
50 """
52 @abstractmethod
53 def isWriteable(self) -> bool:
54 """Return `True` if this `Butler` supports write operations."""
55 raise NotImplementedError()
57 @deprecated(
58 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
59 " Please use Butler.put(). Will be removed after v27.0.",
60 version="v26.0",
61 category=FutureWarning,
62 )
63 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
64 """Store a dataset that already has a UUID and ``RUN`` collection.
66 Parameters
67 ----------
68 obj : `object`
69 The dataset.
70 ref : `DatasetRef`
71 Resolved reference for a not-yet-stored dataset.
73 Returns
74 -------
75 ref : `DatasetRef`
76 The same as the given, for convenience and symmetry with
77 `Butler.put`.
79 Raises
80 ------
81 TypeError
82 Raised if the butler is read-only.
84 Notes
85 -----
86 Whether this method inserts the given dataset into a ``Registry`` is
87 implementation defined (some `LimitedButler` subclasses do not have a
88 `Registry`), but it always adds the dataset to a `Datastore`, and the
89 given ``ref.id`` and ``ref.run`` are always preserved.
90 """
91 return self.put(obj, ref)
93 @abstractmethod
94 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
95 """Store a dataset that already has a UUID and ``RUN`` collection.
97 Parameters
98 ----------
99 obj : `object`
100 The dataset.
101 ref : `DatasetRef`
102 Resolved reference for a not-yet-stored dataset.
104 Returns
105 -------
106 ref : `DatasetRef`
107 The same as the given, for convenience and symmetry with
108 `Butler.put`.
110 Raises
111 ------
112 TypeError
113 Raised if the butler is read-only.
115 Notes
116 -----
117 Whether this method inserts the given dataset into a ``Registry`` is
118 implementation defined (some `LimitedButler` subclasses do not have a
119 `Registry`), but it always adds the dataset to a `Datastore`, and the
120 given ``ref.id`` and ``ref.run`` are always preserved.
121 """
122 raise NotImplementedError()
124 def get(
125 self,
126 ref: DatasetRef,
127 /,
128 *,
129 parameters: dict[str, Any] | None = None,
130 storageClass: StorageClass | str | None = None,
131 ) -> Any:
132 """Retrieve a stored dataset.
134 Parameters
135 ----------
136 ref: `DatasetRef`
137 A resolved `DatasetRef` directly associated with a dataset.
138 parameters : `dict`
139 Additional StorageClass-defined options to control reading,
140 typically used to efficiently read only a subset of the dataset.
141 storageClass : `StorageClass` or `str`, optional
142 The storage class to be used to override the Python type
143 returned by this method. By default the returned type matches
144 the dataset type definition for this dataset. Specifying a
145 read `StorageClass` can force a different type to be returned.
146 This type must be compatible with the original type.
148 Returns
149 -------
150 obj : `object`
151 The dataset.
153 Raises
154 ------
155 AmbiguousDatasetError
156 Raised if the supplied `DatasetRef` is unresolved.
158 Notes
159 -----
160 In a `LimitedButler` the only allowable way to specify a dataset is
161 to use a resolved `DatasetRef`. Subclasses can support more options.
162 """
163 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
164 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass)
166 @deprecated(
167 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
168 " Please use Butler.get(). Will be removed after v27.0.",
169 version="v26.0",
170 category=FutureWarning,
171 )
172 def getDirect(
173 self,
174 ref: DatasetRef,
175 *,
176 parameters: dict[str, Any] | None = None,
177 storageClass: str | StorageClass | None = None,
178 ) -> Any:
179 """Retrieve a stored dataset.
181 Parameters
182 ----------
183 ref : `DatasetRef`
184 Resolved reference to an already stored dataset.
185 parameters : `dict`
186 Additional StorageClass-defined options to control reading,
187 typically used to efficiently read only a subset of the dataset.
188 storageClass : `StorageClass` or `str`, optional
189 The storage class to be used to override the Python type
190 returned by this method. By default the returned type matches
191 the dataset type definition for this dataset. Specifying a
192 read `StorageClass` can force a different type to be returned.
193 This type must be compatible with the original type.
195 Returns
196 -------
197 obj : `object`
198 The dataset.
199 """
200 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass)
202 @deprecated(
203 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
204 "Please use Butler.getDeferred(). Will be removed after v27.0.",
205 version="v26.0",
206 category=FutureWarning,
207 )
208 def getDirectDeferred(
209 self,
210 ref: DatasetRef,
211 *,
212 parameters: dict[str, Any] | None = None,
213 storageClass: str | StorageClass | None = None,
214 ) -> DeferredDatasetHandle:
215 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
216 from a resolved `DatasetRef`.
218 Parameters
219 ----------
220 ref : `DatasetRef`
221 Resolved reference to an already stored dataset.
222 parameters : `dict`
223 Additional StorageClass-defined options to control reading,
224 typically used to efficiently read only a subset of the dataset.
225 storageClass : `StorageClass` or `str`, optional
226 The storage class to be used to override the Python type
227 returned by this method. By default the returned type matches
228 the dataset type definition for this dataset. Specifying a
229 read `StorageClass` can force a different type to be returned.
230 This type must be compatible with the original type.
232 Returns
233 -------
234 obj : `DeferredDatasetHandle`
235 A handle which can be used to retrieve a dataset at a later time.
236 """
237 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
239 def getDeferred(
240 self,
241 ref: DatasetRef,
242 /,
243 *,
244 parameters: dict[str, Any] | None = None,
245 storageClass: str | StorageClass | None = None,
246 ) -> DeferredDatasetHandle:
247 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
248 after an immediate registry lookup.
250 Parameters
251 ----------
252 ref : `DatasetRef`
253 For the default implementation of a `LimitedButler`, the only
254 acceptable parameter is a resolved `DatasetRef`.
255 parameters : `dict`
256 Additional StorageClass-defined options to control reading,
257 typically used to efficiently read only a subset of the dataset.
258 storageClass : `StorageClass` or `str`, optional
259 The storage class to be used to override the Python type
260 returned by this method. By default the returned type matches
261 the dataset type definition for this dataset. Specifying a
262 read `StorageClass` can force a different type to be returned.
263 This type must be compatible with the original type.
265 Returns
266 -------
267 obj : `DeferredDatasetHandle`
268 A handle which can be used to retrieve a dataset at a later time.
270 Notes
271 -----
272 In a `LimitedButler` the only allowable way to specify a dataset is
273 to use a resolved `DatasetRef`. Subclasses can support more options.
274 """
275 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
277 def stored(self, ref: DatasetRef) -> bool:
278 """Indicate whether the dataset's artifacts are present in the
279 Datastore.
281 Parameters
282 ----------
283 ref : `DatasetRef`
284 Resolved reference to a dataset.
286 Returns
287 -------
288 stored : `bool`
289 Whether the dataset artifact exists in the datastore and can be
290 retrieved.
291 """
292 return self.datastore.exists(ref)
294 def stored_many(
295 self,
296 refs: Iterable[DatasetRef],
297 ) -> dict[DatasetRef, bool]:
298 """Check the datastore for artifact existence of multiple datasets
299 at once.
301 Parameters
302 ----------
303 refs : iterable of `DatasetRef`
304 The datasets to be checked.
306 Returns
307 -------
308 existence : `dict` of [`DatasetRef`, `bool`]
309 Mapping from given dataset refs to boolean indicating artifact
310 existence.
311 """
312 return self.datastore.mexists(refs)
314 @deprecated(
315 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). "
316 "Will be removed after v27.0.",
317 version="v26.0",
318 category=FutureWarning,
319 )
320 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
321 """Return `True` if a dataset is actually present in the Datastore.
323 Parameters
324 ----------
325 ref : `DatasetRef`
326 Resolved reference to a dataset.
328 Returns
329 -------
330 exists : `bool`
331 Whether the dataset exists in the Datastore.
332 """
333 return self.stored(ref)
335 def markInputUnused(self, ref: DatasetRef) -> None:
336 """Indicate that a predicted input was not actually used when
337 processing a `Quantum`.
339 Parameters
340 ----------
341 ref : `DatasetRef`
342 Reference to the unused dataset.
344 Notes
345 -----
346 By default, a dataset is considered "actually used" if it is accessed
347 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
348 (even if the handle is not used). This method must be called after one
349 of those in order to remove the dataset from the actual input list.
351 This method does nothing for butlers that do not store provenance
352 information (which is the default implementation provided by the base
353 class).
354 """
355 pass
357 @abstractmethod
358 def pruneDatasets(
359 self,
360 refs: Iterable[DatasetRef],
361 *,
362 disassociate: bool = True,
363 unstore: bool = False,
364 tags: Iterable[str] = (),
365 purge: bool = False,
366 ) -> None:
367 """Remove one or more datasets from a collection and/or storage.
369 Parameters
370 ----------
371 refs : `~collections.abc.Iterable` of `DatasetRef`
372 Datasets to prune. These must be "resolved" references (not just
373 a `DatasetType` and data ID).
374 disassociate : `bool`, optional
375 Disassociate pruned datasets from ``tags``, or from all collections
376 if ``purge=True``.
377 unstore : `bool`, optional
378 If `True` (`False` is default) remove these datasets from all
379 datastores known to this butler. Note that this will make it
380 impossible to retrieve these datasets even via other collections.
381 Datasets that are already not stored are ignored by this option.
382 tags : `~collections.abc.Iterable` [ `str` ], optional
383 `~CollectionType.TAGGED` collections to disassociate the datasets
384 from. Ignored if ``disassociate`` is `False` or ``purge`` is
385 `True`.
386 purge : `bool`, optional
387 If `True` (`False` is default), completely remove the dataset from
388 the `Registry`. To prevent accidental deletions, ``purge`` may
389 only be `True` if all of the following conditions are met:
391 - ``disassociate`` is `True`;
392 - ``unstore`` is `True`.
394 This mode may remove provenance information from datasets other
395 than those provided, and should be used with extreme care.
397 Raises
398 ------
399 TypeError
400 Raised if the butler is read-only, if no collection was provided,
401 or the conditions for ``purge=True`` were not met.
402 """
403 raise NotImplementedError()
405 @property
406 @abstractmethod
407 def dimensions(self) -> DimensionUniverse:
408 """Structure managing all dimensions recognized by this data
409 repository (`DimensionUniverse`).
410 """
411 raise NotImplementedError()
413 datastore: Datastore
414 """The object that manages actual dataset storage (`Datastore`).
416 Direct user access to the datastore should rarely be necessary; the primary
417 exception is the case where a `Datastore` implementation provides extra
418 functionality beyond what the base class defines.
419 """
421 storageClasses: StorageClassFactory
422 """An object that maps known storage class names to objects that fully
423 describe them (`StorageClassFactory`).
424 """