Coverage for python/lsst/daf/butler/_limited_butler.py: 78%
67 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("LimitedButler",)
32import logging
33from abc import ABC, abstractmethod
34from collections.abc import Iterable
35from typing import Any, ClassVar
37from deprecated.sphinx import deprecated
38from lsst.resources import ResourcePath
40from ._deferredDatasetHandle import DeferredDatasetHandle
41from .core import DatasetRef, DatasetRefURIs, Datastore, DimensionUniverse, StorageClass, StorageClassFactory
43log = logging.getLogger(__name__)
46class LimitedButler(ABC):
47 """A minimal butler interface that is sufficient to back
48 `~lsst.pipe.base.PipelineTask` execution.
49 """
51 GENERATION: ClassVar[int] = 3
52 """This is a Generation 3 Butler.
54 This attribute may be removed in the future, once the Generation 2 Butler
55 interface has been fully retired; it should only be used in transitional
56 code.
57 """
59 @abstractmethod
60 def isWriteable(self) -> bool:
61 """Return `True` if this `Butler` supports write operations."""
62 raise NotImplementedError()
64 # TODO: remove on DM-40067.
65 @deprecated(
66 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
67 " Please use Butler.put(). Will be removed after v26.0.",
68 version="v26.0",
69 category=FutureWarning,
70 )
71 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
72 """Store a dataset that already has a UUID and ``RUN`` collection.
74 Parameters
75 ----------
76 obj : `object`
77 The dataset.
78 ref : `DatasetRef`
79 Resolved reference for a not-yet-stored dataset.
81 Returns
82 -------
83 ref : `DatasetRef`
84 The same as the given, for convenience and symmetry with
85 `Butler.put`.
87 Raises
88 ------
89 TypeError
90 Raised if the butler is read-only.
92 Notes
93 -----
94 Whether this method inserts the given dataset into a ``Registry`` is
95 implementation defined (some `LimitedButler` subclasses do not have a
96 `Registry`), but it always adds the dataset to a `Datastore`, and the
97 given ``ref.id`` and ``ref.run`` are always preserved.
98 """
99 return self.put(obj, ref)
101 @abstractmethod
102 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
103 """Store a dataset that already has a UUID and ``RUN`` collection.
105 Parameters
106 ----------
107 obj : `object`
108 The dataset.
109 ref : `DatasetRef`
110 Resolved reference for a not-yet-stored dataset.
112 Returns
113 -------
114 ref : `DatasetRef`
115 The same as the given, for convenience and symmetry with
116 `Butler.put`.
118 Raises
119 ------
120 TypeError
121 Raised if the butler is read-only.
123 Notes
124 -----
125 Whether this method inserts the given dataset into a ``Registry`` is
126 implementation defined (some `LimitedButler` subclasses do not have a
127 `Registry`), but it always adds the dataset to a `Datastore`, and the
128 given ``ref.id`` and ``ref.run`` are always preserved.
129 """
130 raise NotImplementedError()
132 def get(
133 self,
134 ref: DatasetRef,
135 /,
136 *,
137 parameters: dict[str, Any] | None = None,
138 storageClass: StorageClass | str | None = None,
139 ) -> Any:
140 """Retrieve a stored dataset.
142 Parameters
143 ----------
144 ref: `DatasetRef`
145 A resolved `DatasetRef` directly associated with a dataset.
146 parameters : `dict`
147 Additional StorageClass-defined options to control reading,
148 typically used to efficiently read only a subset of the dataset.
149 storageClass : `StorageClass` or `str`, optional
150 The storage class to be used to override the Python type
151 returned by this method. By default the returned type matches
152 the dataset type definition for this dataset. Specifying a
153 read `StorageClass` can force a different type to be returned.
154 This type must be compatible with the original type.
156 Returns
157 -------
158 obj : `object`
159 The dataset.
161 Raises
162 ------
163 AmbiguousDatasetError
164 Raised if the supplied `DatasetRef` is unresolved.
166 Notes
167 -----
168 In a `LimitedButler` the only allowable way to specify a dataset is
169 to use a resolved `DatasetRef`. Subclasses can support more options.
170 """
171 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
172 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
174 # TODO: remove on DM-40067.
175 @deprecated(
176 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
177 " Please use Butler.get(). Will be removed after v26.0.",
178 version="v26.0",
179 category=FutureWarning,
180 )
181 def getDirect(
182 self,
183 ref: DatasetRef,
184 *,
185 parameters: dict[str, Any] | None = None,
186 storageClass: str | StorageClass | None = None,
187 ) -> Any:
188 """Retrieve a stored dataset.
190 Parameters
191 ----------
192 ref : `DatasetRef`
193 Resolved reference to an already stored dataset.
194 parameters : `dict`
195 Additional StorageClass-defined options to control reading,
196 typically used to efficiently read only a subset of the dataset.
197 storageClass : `StorageClass` or `str`, optional
198 The storage class to be used to override the Python type
199 returned by this method. By default the returned type matches
200 the dataset type definition for this dataset. Specifying a
201 read `StorageClass` can force a different type to be returned.
202 This type must be compatible with the original type.
204 Returns
205 -------
206 obj : `object`
207 The dataset.
208 """
209 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
211 # TODO: remove on DM-40067.
212 @deprecated(
213 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
214 "Please use Butler.getDeferred(). Will be removed after v26.0.",
215 version="v26.0",
216 category=FutureWarning,
217 )
218 def getDirectDeferred(
219 self,
220 ref: DatasetRef,
221 *,
222 parameters: dict[str, Any] | None = None,
223 storageClass: str | StorageClass | None = None,
224 ) -> DeferredDatasetHandle:
225 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
226 from a resolved `DatasetRef`.
228 Parameters
229 ----------
230 ref : `DatasetRef`
231 Resolved reference to an already stored dataset.
232 parameters : `dict`
233 Additional StorageClass-defined options to control reading,
234 typically used to efficiently read only a subset of the dataset.
235 storageClass : `StorageClass` or `str`, optional
236 The storage class to be used to override the Python type
237 returned by this method. By default the returned type matches
238 the dataset type definition for this dataset. Specifying a
239 read `StorageClass` can force a different type to be returned.
240 This type must be compatible with the original type.
242 Returns
243 -------
244 obj : `DeferredDatasetHandle`
245 A handle which can be used to retrieve a dataset at a later time.
246 """
247 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
249 def getDeferred(
250 self,
251 ref: DatasetRef,
252 /,
253 *,
254 parameters: dict[str, Any] | None = None,
255 storageClass: str | StorageClass | None = None,
256 ) -> DeferredDatasetHandle:
257 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
258 after an immediate registry lookup.
260 Parameters
261 ----------
262 ref : `DatasetRef`
263 For the default implementation of a `LimitedButler`, the only
264 acceptable parameter is a resolved `DatasetRef`.
265 parameters : `dict`
266 Additional StorageClass-defined options to control reading,
267 typically used to efficiently read only a subset of the dataset.
268 storageClass : `StorageClass` or `str`, optional
269 The storage class to be used to override the Python type
270 returned by this method. By default the returned type matches
271 the dataset type definition for this dataset. Specifying a
272 read `StorageClass` can force a different type to be returned.
273 This type must be compatible with the original type.
275 Returns
276 -------
277 obj : `DeferredDatasetHandle`
278 A handle which can be used to retrieve a dataset at a later time.
280 Notes
281 -----
282 In a `LimitedButler` the only allowable way to specify a dataset is
283 to use a resolved `DatasetRef`. Subclasses can support more options.
284 """
285 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
287 def get_datastore_names(self) -> tuple[str, ...]:
288 """Return the names of the datastores associated with this butler.
290 Returns
291 -------
292 names : `tuple` [`str`, ...]
293 The names of the datastores.
294 """
295 return self._datastore.names
297 def get_datastore_roots(self) -> dict[str, ResourcePath | None]:
298 """Return the defined root URIs for all registered datastores.
300 Returns
301 -------
302 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`]
303 A mapping from datastore name to datastore root URI. The root
304 can be `None` if the datastore does not have any concept of a root
305 URI.
306 """
307 return self._datastore.roots
309 def getURIs(
310 self,
311 ref: DatasetRef,
312 /,
313 *,
314 predict: bool = False,
315 ) -> DatasetRefURIs:
316 """Return the URIs associated with the dataset.
318 Parameters
319 ----------
320 ref : `DatasetRef`
321 A `DatasetRef` for which URIs are requested.
322 predict : `bool`
323 If `True`, allow URIs to be returned of datasets that have not
324 been written.
326 Returns
327 -------
328 uris : `DatasetRefURIs`
329 The URI to the primary artifact associated with this dataset (if
330 the dataset was disassembled within the datastore this may be
331 `None`), and the URIs to any components associated with the dataset
332 artifact (can be empty if there are no components).
333 """
334 return self._datastore.getURIs(ref, predict)
336 def getURI(
337 self,
338 ref: DatasetRef,
339 /,
340 *,
341 predict: bool = False,
342 ) -> ResourcePath:
343 """Return the URI to the Dataset.
345 Parameters
346 ----------
347 ref : `DatasetRef`
348 A `DatasetRef` for which a single URI is requested.
349 predict : `bool`
350 If `True`, allow URIs to be returned of datasets that have not
351 been written.
353 Returns
354 -------
355 uri : `lsst.resources.ResourcePath`
356 URI pointing to the Dataset within the datastore. If the
357 Dataset does not exist in the datastore, and if ``predict`` is
358 `True`, the URI will be a prediction and will include a URI
359 fragment "#predicted".
360 If the datastore does not have entities that relate well
361 to the concept of a URI the returned URI string will be
362 descriptive. The returned URI is not guaranteed to be obtainable.
364 Raises
365 ------
366 RuntimeError
367 Raised if a URI is requested for a dataset that consists of
368 multiple artifacts.
369 """
370 primary, components = self.getURIs(ref, predict=predict)
372 if primary is None or components:
373 raise RuntimeError(
374 f"Dataset ({ref}) includes distinct URIs for components. "
375 "Use LimitedButler.getURIs() instead."
376 )
377 return primary
379 def get_many_uris(
380 self,
381 refs: Iterable[DatasetRef],
382 predict: bool = False,
383 allow_missing: bool = False,
384 ) -> dict[DatasetRef, DatasetRefURIs]:
385 """Return URIs associated with many datasets.
387 Parameters
388 ----------
389 refs : iterable of `DatasetIdRef`
390 References to the required datasets.
391 predict : `bool`, optional
392 If `True`, allow URIs to be returned of datasets that have not
393 been written.
394 allow_missing : `bool`
395 If `False`, and ``predict`` is `False`, will raise if a
396 `DatasetRef` does not exist.
398 Returns
399 -------
400 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`]
401 A dict of primary and component URIs, indexed by the passed-in
402 refs.
404 Raises
405 ------
406 FileNotFoundError
407 A URI has been requested for a dataset that does not exist and
408 guessing is not allowed.
410 Notes
411 -----
412 In file-based datastores, get_many_uris does not check that the file is
413 present. It assumes that if datastore is aware of the file then it
414 actually exists.
415 """
416 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing)
418 def stored(self, ref: DatasetRef) -> bool:
419 """Indicate whether the dataset's artifacts are present in the
420 Datastore.
422 Parameters
423 ----------
424 ref : `DatasetRef`
425 Resolved reference to a dataset.
427 Returns
428 -------
429 stored : `bool`
430 Whether the dataset artifact exists in the datastore and can be
431 retrieved.
432 """
433 return self._datastore.exists(ref)
435 def stored_many(
436 self,
437 refs: Iterable[DatasetRef],
438 ) -> dict[DatasetRef, bool]:
439 """Check the datastore for artifact existence of multiple datasets
440 at once.
442 Parameters
443 ----------
444 refs : iterable of `DatasetRef`
445 The datasets to be checked.
447 Returns
448 -------
449 existence : `dict` of [`DatasetRef`, `bool`]
450 Mapping from given dataset refs to boolean indicating artifact
451 existence.
452 """
453 return self._datastore.mexists(refs)
455 # TODO: remove on DM-40079.
456 @deprecated(
457 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). "
458 "Will be removed after v26.0.",
459 version="v26.0",
460 category=FutureWarning,
461 )
462 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
463 """Return `True` if a dataset is actually present in the Datastore.
465 Parameters
466 ----------
467 ref : `DatasetRef`
468 Resolved reference to a dataset.
470 Returns
471 -------
472 exists : `bool`
473 Whether the dataset exists in the Datastore.
474 """
475 return self.stored(ref)
477 def markInputUnused(self, ref: DatasetRef) -> None:
478 """Indicate that a predicted input was not actually used when
479 processing a `Quantum`.
481 Parameters
482 ----------
483 ref : `DatasetRef`
484 Reference to the unused dataset.
486 Notes
487 -----
488 By default, a dataset is considered "actually used" if it is accessed
489 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
490 (even if the handle is not used). This method must be called after one
491 of those in order to remove the dataset from the actual input list.
493 This method does nothing for butlers that do not store provenance
494 information (which is the default implementation provided by the base
495 class).
496 """
497 pass
499 @abstractmethod
500 def pruneDatasets(
501 self,
502 refs: Iterable[DatasetRef],
503 *,
504 disassociate: bool = True,
505 unstore: bool = False,
506 tags: Iterable[str] = (),
507 purge: bool = False,
508 ) -> None:
509 """Remove one or more datasets from a collection and/or storage.
511 Parameters
512 ----------
513 refs : `~collections.abc.Iterable` of `DatasetRef`
514 Datasets to prune. These must be "resolved" references (not just
515 a `DatasetType` and data ID).
516 disassociate : `bool`, optional
517 Disassociate pruned datasets from ``tags``, or from all collections
518 if ``purge=True``.
519 unstore : `bool`, optional
520 If `True` (`False` is default) remove these datasets from all
521 datastores known to this butler. Note that this will make it
522 impossible to retrieve these datasets even via other collections.
523 Datasets that are already not stored are ignored by this option.
524 tags : `~collections.abc.Iterable` [ `str` ], optional
525 `~CollectionType.TAGGED` collections to disassociate the datasets
526 from. Ignored if ``disassociate`` is `False` or ``purge`` is
527 `True`.
528 purge : `bool`, optional
529 If `True` (`False` is default), completely remove the dataset from
530 the `Registry`. To prevent accidental deletions, ``purge`` may
531 only be `True` if all of the following conditions are met:
533 - ``disassociate`` is `True`;
534 - ``unstore`` is `True`.
536 This mode may remove provenance information from datasets other
537 than those provided, and should be used with extreme care.
539 Raises
540 ------
541 TypeError
542 Raised if the butler is read-only, if no collection was provided,
543 or the conditions for ``purge=True`` were not met.
544 """
545 raise NotImplementedError()
547 @property
548 @abstractmethod
549 def dimensions(self) -> DimensionUniverse:
550 """Structure managing all dimensions recognized by this data
551 repository (`DimensionUniverse`).
552 """
553 raise NotImplementedError()
555 # TODO: remove on DM-40080.
556 @property
557 @deprecated(
558 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the "
559 "relevant functionality. Will be removed after v26.0.",
560 version="v26.0",
561 category=FutureWarning,
562 )
563 def datastore(self) -> Datastore:
564 """The object that manages actual dataset storage. (`Datastore`)"""
565 return self._datastore
567 _datastore: Datastore
568 """The object that manages actual dataset storage (`Datastore`)."""
570 storageClasses: StorageClassFactory
571 """An object that maps known storage class names to objects that fully
572 describe them (`StorageClassFactory`).
573 """