Coverage for python/lsst/daf/butler/_limited_butler.py: 76%
53 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:16 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("LimitedButler",)
32import logging
33from abc import ABC, abstractmethod
34from collections.abc import Iterable
35from typing import Any, ClassVar
37from lsst.resources import ResourcePath
39from ._dataset_ref import DatasetRef
40from ._deferredDatasetHandle import DeferredDatasetHandle
41from ._storage_class import StorageClass, StorageClassFactory
42from .datastore import DatasetRefURIs, Datastore
43from .dimensions import DimensionUniverse
45log = logging.getLogger(__name__)
48class LimitedButler(ABC):
49 """A minimal butler interface that is sufficient to back
50 `~lsst.pipe.base.PipelineTask` execution.
51 """
53 GENERATION: ClassVar[int] = 3
54 """This is a Generation 3 Butler.
56 This attribute may be removed in the future, once the Generation 2 Butler
57 interface has been fully retired; it should only be used in transitional
58 code.
59 """
61 @abstractmethod
62 def isWriteable(self) -> bool:
63 """Return `True` if this `Butler` supports write operations."""
64 raise NotImplementedError()
66 @abstractmethod
67 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
68 """Store a dataset that already has a UUID and ``RUN`` collection.
70 Parameters
71 ----------
72 obj : `object`
73 The dataset.
74 ref : `DatasetRef`
75 Resolved reference for a not-yet-stored dataset.
77 Returns
78 -------
79 ref : `DatasetRef`
80 The same as the given, for convenience and symmetry with
81 `Butler.put`.
83 Raises
84 ------
85 TypeError
86 Raised if the butler is read-only.
88 Notes
89 -----
90 Whether this method inserts the given dataset into a ``Registry`` is
91 implementation defined (some `LimitedButler` subclasses do not have a
92 `Registry`), but it always adds the dataset to a `Datastore`, and the
93 given ``ref.id`` and ``ref.run`` are always preserved.
94 """
95 raise NotImplementedError()
97 def get(
98 self,
99 ref: DatasetRef,
100 /,
101 *,
102 parameters: dict[str, Any] | None = None,
103 storageClass: StorageClass | str | None = None,
104 ) -> Any:
105 """Retrieve a stored dataset.
107 Parameters
108 ----------
109 ref : `DatasetRef`
110 A resolved `DatasetRef` directly associated with a dataset.
111 parameters : `dict`
112 Additional StorageClass-defined options to control reading,
113 typically used to efficiently read only a subset of the dataset.
114 storageClass : `StorageClass` or `str`, optional
115 The storage class to be used to override the Python type
116 returned by this method. By default the returned type matches
117 the dataset type definition for this dataset. Specifying a
118 read `StorageClass` can force a different type to be returned.
119 This type must be compatible with the original type.
121 Returns
122 -------
123 obj : `object`
124 The dataset.
126 Raises
127 ------
128 AmbiguousDatasetError
129 Raised if the supplied `DatasetRef` is unresolved.
131 Notes
132 -----
133 In a `LimitedButler` the only allowable way to specify a dataset is
134 to use a resolved `DatasetRef`. Subclasses can support more options.
135 """
136 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
137 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
139 def getDeferred(
140 self,
141 ref: DatasetRef,
142 /,
143 *,
144 parameters: dict[str, Any] | None = None,
145 storageClass: str | StorageClass | None = None,
146 ) -> DeferredDatasetHandle:
147 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
148 after an immediate registry lookup.
150 Parameters
151 ----------
152 ref : `DatasetRef`
153 For the default implementation of a `LimitedButler`, the only
154 acceptable parameter is a resolved `DatasetRef`.
155 parameters : `dict`
156 Additional StorageClass-defined options to control reading,
157 typically used to efficiently read only a subset of the dataset.
158 storageClass : `StorageClass` or `str`, optional
159 The storage class to be used to override the Python type
160 returned by this method. By default the returned type matches
161 the dataset type definition for this dataset. Specifying a
162 read `StorageClass` can force a different type to be returned.
163 This type must be compatible with the original type.
165 Returns
166 -------
167 obj : `DeferredDatasetHandle`
168 A handle which can be used to retrieve a dataset at a later time.
170 Notes
171 -----
172 In a `LimitedButler` the only allowable way to specify a dataset is
173 to use a resolved `DatasetRef`. Subclasses can support more options.
174 """
175 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
177 def get_datastore_names(self) -> tuple[str, ...]:
178 """Return the names of the datastores associated with this butler.
180 Returns
181 -------
182 names : `tuple` [`str`, ...]
183 The names of the datastores.
184 """
185 return self._datastore.names
187 def get_datastore_roots(self) -> dict[str, ResourcePath | None]:
188 """Return the defined root URIs for all registered datastores.
190 Returns
191 -------
192 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`]
193 A mapping from datastore name to datastore root URI. The root
194 can be `None` if the datastore does not have any concept of a root
195 URI.
196 """
197 return self._datastore.roots
199 def getURIs(
200 self,
201 ref: DatasetRef,
202 /,
203 *,
204 predict: bool = False,
205 ) -> DatasetRefURIs:
206 """Return the URIs associated with the dataset.
208 Parameters
209 ----------
210 ref : `DatasetRef`
211 A `DatasetRef` for which URIs are requested.
212 predict : `bool`
213 If `True`, allow URIs to be returned of datasets that have not
214 been written.
216 Returns
217 -------
218 uris : `DatasetRefURIs`
219 The URI to the primary artifact associated with this dataset (if
220 the dataset was disassembled within the datastore this may be
221 `None`), and the URIs to any components associated with the dataset
222 artifact (can be empty if there are no components).
223 """
224 return self._datastore.getURIs(ref, predict)
226 def getURI(
227 self,
228 ref: DatasetRef,
229 /,
230 *,
231 predict: bool = False,
232 ) -> ResourcePath:
233 """Return the URI to the Dataset.
235 Parameters
236 ----------
237 ref : `DatasetRef`
238 A `DatasetRef` for which a single URI is requested.
239 predict : `bool`
240 If `True`, allow URIs to be returned of datasets that have not
241 been written.
243 Returns
244 -------
245 uri : `lsst.resources.ResourcePath`
246 URI pointing to the Dataset within the datastore. If the
247 Dataset does not exist in the datastore, and if ``predict`` is
248 `True`, the URI will be a prediction and will include a URI
249 fragment "#predicted".
250 If the datastore does not have entities that relate well
251 to the concept of a URI the returned URI string will be
252 descriptive. The returned URI is not guaranteed to be obtainable.
254 Raises
255 ------
256 RuntimeError
257 Raised if a URI is requested for a dataset that consists of
258 multiple artifacts.
259 """
260 primary, components = self.getURIs(ref, predict=predict)
262 if primary is None or components:
263 raise RuntimeError(
264 f"Dataset ({ref}) includes distinct URIs for components. "
265 "Use LimitedButler.getURIs() instead."
266 )
267 return primary
269 def get_many_uris(
270 self,
271 refs: Iterable[DatasetRef],
272 predict: bool = False,
273 allow_missing: bool = False,
274 ) -> dict[DatasetRef, DatasetRefURIs]:
275 """Return URIs associated with many datasets.
277 Parameters
278 ----------
279 refs : iterable of `DatasetIdRef`
280 References to the required datasets.
281 predict : `bool`, optional
282 If `True`, allow URIs to be returned of datasets that have not
283 been written.
284 allow_missing : `bool`
285 If `False`, and ``predict`` is `False`, will raise if a
286 `DatasetRef` does not exist.
288 Returns
289 -------
290 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`]
291 A dict of primary and component URIs, indexed by the passed-in
292 refs.
294 Raises
295 ------
296 FileNotFoundError
297 A URI has been requested for a dataset that does not exist and
298 guessing is not allowed.
300 Notes
301 -----
302 In file-based datastores, get_many_uris does not check that the file is
303 present. It assumes that if datastore is aware of the file then it
304 actually exists.
305 """
306 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing)
308 def stored(self, ref: DatasetRef) -> bool:
309 """Indicate whether the dataset's artifacts are present in the
310 Datastore.
312 Parameters
313 ----------
314 ref : `DatasetRef`
315 Resolved reference to a dataset.
317 Returns
318 -------
319 stored : `bool`
320 Whether the dataset artifact exists in the datastore and can be
321 retrieved.
322 """
323 return self._datastore.exists(ref)
325 def stored_many(
326 self,
327 refs: Iterable[DatasetRef],
328 ) -> dict[DatasetRef, bool]:
329 """Check the datastore for artifact existence of multiple datasets
330 at once.
332 Parameters
333 ----------
334 refs : iterable of `DatasetRef`
335 The datasets to be checked.
337 Returns
338 -------
339 existence : `dict` of [`DatasetRef`, `bool`]
340 Mapping from given dataset refs to boolean indicating artifact
341 existence.
342 """
343 return self._datastore.mexists(refs)
345 def markInputUnused(self, ref: DatasetRef) -> None:
346 """Indicate that a predicted input was not actually used when
347 processing a `Quantum`.
349 Parameters
350 ----------
351 ref : `DatasetRef`
352 Reference to the unused dataset.
354 Notes
355 -----
356 By default, a dataset is considered "actually used" if it is accessed
357 via `get` or a handle to it is obtained via `getDeferred`
358 (even if the handle is not used). This method must be called after one
359 of those in order to remove the dataset from the actual input list.
361 This method does nothing for butlers that do not store provenance
362 information (which is the default implementation provided by the base
363 class).
364 """
365 pass
367 @abstractmethod
368 def pruneDatasets(
369 self,
370 refs: Iterable[DatasetRef],
371 *,
372 disassociate: bool = True,
373 unstore: bool = False,
374 tags: Iterable[str] = (),
375 purge: bool = False,
376 ) -> None:
377 """Remove one or more datasets from a collection and/or storage.
379 Parameters
380 ----------
381 refs : `~collections.abc.Iterable` of `DatasetRef`
382 Datasets to prune. These must be "resolved" references (not just
383 a `DatasetType` and data ID).
384 disassociate : `bool`, optional
385 Disassociate pruned datasets from ``tags``, or from all collections
386 if ``purge=True``.
387 unstore : `bool`, optional
388 If `True` (`False` is default) remove these datasets from all
389 datastores known to this butler. Note that this will make it
390 impossible to retrieve these datasets even via other collections.
391 Datasets that are already not stored are ignored by this option.
392 tags : `~collections.abc.Iterable` [ `str` ], optional
393 `~CollectionType.TAGGED` collections to disassociate the datasets
394 from. Ignored if ``disassociate`` is `False` or ``purge`` is
395 `True`.
396 purge : `bool`, optional
397 If `True` (`False` is default), completely remove the dataset from
398 the `Registry`. To prevent accidental deletions, ``purge`` may
399 only be `True` if all of the following conditions are met:
401 - ``disassociate`` is `True`;
402 - ``unstore`` is `True`.
404 This mode may remove provenance information from datasets other
405 than those provided, and should be used with extreme care.
407 Raises
408 ------
409 TypeError
410 Raised if the butler is read-only, if no collection was provided,
411 or the conditions for ``purge=True`` were not met.
412 """
413 raise NotImplementedError()
415 @property
416 @abstractmethod
417 def dimensions(self) -> DimensionUniverse:
418 """Structure managing all dimensions recognized by this data
419 repository (`DimensionUniverse`).
420 """
421 raise NotImplementedError()
423 _datastore: Datastore
424 """The object that manages actual dataset storage (`Datastore`)."""
426 storageClasses: StorageClassFactory
427 """An object that maps known storage class names to objects that fully
428 describe them (`StorageClassFactory`).
429 """