Coverage for python/lsst/daf/butler/_limited_butler.py: 78%
67 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("LimitedButler",)
26import logging
27from abc import ABC, abstractmethod
28from collections.abc import Iterable
29from typing import Any, ClassVar
31from deprecated.sphinx import deprecated
32from lsst.resources import ResourcePath
34from ._deferredDatasetHandle import DeferredDatasetHandle
35from .core import DatasetRef, DatasetRefURIs, Datastore, DimensionUniverse, StorageClass, StorageClassFactory
37log = logging.getLogger(__name__)
40class LimitedButler(ABC):
41 """A minimal butler interface that is sufficient to back
42 `~lsst.pipe.base.PipelineTask` execution.
43 """
45 GENERATION: ClassVar[int] = 3
46 """This is a Generation 3 Butler.
48 This attribute may be removed in the future, once the Generation 2 Butler
49 interface has been fully retired; it should only be used in transitional
50 code.
51 """
53 @abstractmethod
54 def isWriteable(self) -> bool:
55 """Return `True` if this `Butler` supports write operations."""
56 raise NotImplementedError()
58 # TODO: remove on DM-40067.
59 @deprecated(
60 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
61 " Please use Butler.put(). Will be removed after v26.0.",
62 version="v26.0",
63 category=FutureWarning,
64 )
65 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
66 """Store a dataset that already has a UUID and ``RUN`` collection.
68 Parameters
69 ----------
70 obj : `object`
71 The dataset.
72 ref : `DatasetRef`
73 Resolved reference for a not-yet-stored dataset.
75 Returns
76 -------
77 ref : `DatasetRef`
78 The same as the given, for convenience and symmetry with
79 `Butler.put`.
81 Raises
82 ------
83 TypeError
84 Raised if the butler is read-only.
86 Notes
87 -----
88 Whether this method inserts the given dataset into a ``Registry`` is
89 implementation defined (some `LimitedButler` subclasses do not have a
90 `Registry`), but it always adds the dataset to a `Datastore`, and the
91 given ``ref.id`` and ``ref.run`` are always preserved.
92 """
93 return self.put(obj, ref)
95 @abstractmethod
96 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
97 """Store a dataset that already has a UUID and ``RUN`` collection.
99 Parameters
100 ----------
101 obj : `object`
102 The dataset.
103 ref : `DatasetRef`
104 Resolved reference for a not-yet-stored dataset.
106 Returns
107 -------
108 ref : `DatasetRef`
109 The same as the given, for convenience and symmetry with
110 `Butler.put`.
112 Raises
113 ------
114 TypeError
115 Raised if the butler is read-only.
117 Notes
118 -----
119 Whether this method inserts the given dataset into a ``Registry`` is
120 implementation defined (some `LimitedButler` subclasses do not have a
121 `Registry`), but it always adds the dataset to a `Datastore`, and the
122 given ``ref.id`` and ``ref.run`` are always preserved.
123 """
124 raise NotImplementedError()
126 def get(
127 self,
128 ref: DatasetRef,
129 /,
130 *,
131 parameters: dict[str, Any] | None = None,
132 storageClass: StorageClass | str | None = None,
133 ) -> Any:
134 """Retrieve a stored dataset.
136 Parameters
137 ----------
138 ref: `DatasetRef`
139 A resolved `DatasetRef` directly associated with a dataset.
140 parameters : `dict`
141 Additional StorageClass-defined options to control reading,
142 typically used to efficiently read only a subset of the dataset.
143 storageClass : `StorageClass` or `str`, optional
144 The storage class to be used to override the Python type
145 returned by this method. By default the returned type matches
146 the dataset type definition for this dataset. Specifying a
147 read `StorageClass` can force a different type to be returned.
148 This type must be compatible with the original type.
150 Returns
151 -------
152 obj : `object`
153 The dataset.
155 Raises
156 ------
157 AmbiguousDatasetError
158 Raised if the supplied `DatasetRef` is unresolved.
160 Notes
161 -----
162 In a `LimitedButler` the only allowable way to specify a dataset is
163 to use a resolved `DatasetRef`. Subclasses can support more options.
164 """
165 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
166 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
168 # TODO: remove on DM-40067.
169 @deprecated(
170 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
171 " Please use Butler.get(). Will be removed after v26.0.",
172 version="v26.0",
173 category=FutureWarning,
174 )
175 def getDirect(
176 self,
177 ref: DatasetRef,
178 *,
179 parameters: dict[str, Any] | None = None,
180 storageClass: str | StorageClass | None = None,
181 ) -> Any:
182 """Retrieve a stored dataset.
184 Parameters
185 ----------
186 ref : `DatasetRef`
187 Resolved reference to an already stored dataset.
188 parameters : `dict`
189 Additional StorageClass-defined options to control reading,
190 typically used to efficiently read only a subset of the dataset.
191 storageClass : `StorageClass` or `str`, optional
192 The storage class to be used to override the Python type
193 returned by this method. By default the returned type matches
194 the dataset type definition for this dataset. Specifying a
195 read `StorageClass` can force a different type to be returned.
196 This type must be compatible with the original type.
198 Returns
199 -------
200 obj : `object`
201 The dataset.
202 """
203 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
205 # TODO: remove on DM-40067.
206 @deprecated(
207 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
208 "Please use Butler.getDeferred(). Will be removed after v26.0.",
209 version="v26.0",
210 category=FutureWarning,
211 )
212 def getDirectDeferred(
213 self,
214 ref: DatasetRef,
215 *,
216 parameters: dict[str, Any] | None = None,
217 storageClass: str | StorageClass | None = None,
218 ) -> DeferredDatasetHandle:
219 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
220 from a resolved `DatasetRef`.
222 Parameters
223 ----------
224 ref : `DatasetRef`
225 Resolved reference to an already stored dataset.
226 parameters : `dict`
227 Additional StorageClass-defined options to control reading,
228 typically used to efficiently read only a subset of the dataset.
229 storageClass : `StorageClass` or `str`, optional
230 The storage class to be used to override the Python type
231 returned by this method. By default the returned type matches
232 the dataset type definition for this dataset. Specifying a
233 read `StorageClass` can force a different type to be returned.
234 This type must be compatible with the original type.
236 Returns
237 -------
238 obj : `DeferredDatasetHandle`
239 A handle which can be used to retrieve a dataset at a later time.
240 """
241 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
243 def getDeferred(
244 self,
245 ref: DatasetRef,
246 /,
247 *,
248 parameters: dict[str, Any] | None = None,
249 storageClass: str | StorageClass | None = None,
250 ) -> DeferredDatasetHandle:
251 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
252 after an immediate registry lookup.
254 Parameters
255 ----------
256 ref : `DatasetRef`
257 For the default implementation of a `LimitedButler`, the only
258 acceptable parameter is a resolved `DatasetRef`.
259 parameters : `dict`
260 Additional StorageClass-defined options to control reading,
261 typically used to efficiently read only a subset of the dataset.
262 storageClass : `StorageClass` or `str`, optional
263 The storage class to be used to override the Python type
264 returned by this method. By default the returned type matches
265 the dataset type definition for this dataset. Specifying a
266 read `StorageClass` can force a different type to be returned.
267 This type must be compatible with the original type.
269 Returns
270 -------
271 obj : `DeferredDatasetHandle`
272 A handle which can be used to retrieve a dataset at a later time.
274 Notes
275 -----
276 In a `LimitedButler` the only allowable way to specify a dataset is
277 to use a resolved `DatasetRef`. Subclasses can support more options.
278 """
279 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
281 def get_datastore_names(self) -> tuple[str, ...]:
282 """Return the names of the datastores associated with this butler.
284 Returns
285 -------
286 names : `tuple` [`str`, ...]
287 The names of the datastores.
288 """
289 return self._datastore.names
291 def get_datastore_roots(self) -> dict[str, ResourcePath | None]:
292 """Return the defined root URIs for all registered datastores.
294 Returns
295 -------
296 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`]
297 A mapping from datastore name to datastore root URI. The root
298 can be `None` if the datastore does not have any concept of a root
299 URI.
300 """
301 return self._datastore.roots
303 def getURIs(
304 self,
305 ref: DatasetRef,
306 /,
307 *,
308 predict: bool = False,
309 ) -> DatasetRefURIs:
310 """Return the URIs associated with the dataset.
312 Parameters
313 ----------
314 ref : `DatasetRef`
315 A `DatasetRef` for which URIs are requested.
316 predict : `bool`
317 If `True`, allow URIs to be returned of datasets that have not
318 been written.
320 Returns
321 -------
322 uris : `DatasetRefURIs`
323 The URI to the primary artifact associated with this dataset (if
324 the dataset was disassembled within the datastore this may be
325 `None`), and the URIs to any components associated with the dataset
326 artifact (can be empty if there are no components).
327 """
328 return self._datastore.getURIs(ref, predict)
330 def getURI(
331 self,
332 ref: DatasetRef,
333 /,
334 *,
335 predict: bool = False,
336 ) -> ResourcePath:
337 """Return the URI to the Dataset.
339 Parameters
340 ----------
341 ref : `DatasetRef`
342 A `DatasetRef` for which a single URI is requested.
343 predict : `bool`
344 If `True`, allow URIs to be returned of datasets that have not
345 been written.
347 Returns
348 -------
349 uri : `lsst.resources.ResourcePath`
350 URI pointing to the Dataset within the datastore. If the
351 Dataset does not exist in the datastore, and if ``predict`` is
352 `True`, the URI will be a prediction and will include a URI
353 fragment "#predicted".
354 If the datastore does not have entities that relate well
355 to the concept of a URI the returned URI string will be
356 descriptive. The returned URI is not guaranteed to be obtainable.
358 Raises
359 ------
360 RuntimeError
361 Raised if a URI is requested for a dataset that consists of
362 multiple artifacts.
363 """
364 primary, components = self.getURIs(ref, predict=predict)
366 if primary is None or components:
367 raise RuntimeError(
368 f"Dataset ({ref}) includes distinct URIs for components. "
369 "Use LimitedButler.getURIs() instead."
370 )
371 return primary
373 def get_many_uris(
374 self,
375 refs: Iterable[DatasetRef],
376 predict: bool = False,
377 allow_missing: bool = False,
378 ) -> dict[DatasetRef, DatasetRefURIs]:
379 """Return URIs associated with many datasets.
381 Parameters
382 ----------
383 refs : iterable of `DatasetIdRef`
384 References to the required datasets.
385 predict : `bool`, optional
386 If `True`, allow URIs to be returned of datasets that have not
387 been written.
388 allow_missing : `bool`
389 If `False`, and ``predict`` is `False`, will raise if a
390 `DatasetRef` does not exist.
392 Returns
393 -------
394 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`]
395 A dict of primary and component URIs, indexed by the passed-in
396 refs.
398 Raises
399 ------
400 FileNotFoundError
401 A URI has been requested for a dataset that does not exist and
402 guessing is not allowed.
404 Notes
405 -----
406 In file-based datastores, get_many_uris does not check that the file is
407 present. It assumes that if datastore is aware of the file then it
408 actually exists.
409 """
410 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing)
412 def stored(self, ref: DatasetRef) -> bool:
413 """Indicate whether the dataset's artifacts are present in the
414 Datastore.
416 Parameters
417 ----------
418 ref : `DatasetRef`
419 Resolved reference to a dataset.
421 Returns
422 -------
423 stored : `bool`
424 Whether the dataset artifact exists in the datastore and can be
425 retrieved.
426 """
427 return self._datastore.exists(ref)
429 def stored_many(
430 self,
431 refs: Iterable[DatasetRef],
432 ) -> dict[DatasetRef, bool]:
433 """Check the datastore for artifact existence of multiple datasets
434 at once.
436 Parameters
437 ----------
438 refs : iterable of `DatasetRef`
439 The datasets to be checked.
441 Returns
442 -------
443 existence : `dict` of [`DatasetRef`, `bool`]
444 Mapping from given dataset refs to boolean indicating artifact
445 existence.
446 """
447 return self._datastore.mexists(refs)
449 # TODO: remove on DM-40079.
450 @deprecated(
451 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). "
452 "Will be removed after v26.0.",
453 version="v26.0",
454 category=FutureWarning,
455 )
456 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
457 """Return `True` if a dataset is actually present in the Datastore.
459 Parameters
460 ----------
461 ref : `DatasetRef`
462 Resolved reference to a dataset.
464 Returns
465 -------
466 exists : `bool`
467 Whether the dataset exists in the Datastore.
468 """
469 return self.stored(ref)
471 def markInputUnused(self, ref: DatasetRef) -> None:
472 """Indicate that a predicted input was not actually used when
473 processing a `Quantum`.
475 Parameters
476 ----------
477 ref : `DatasetRef`
478 Reference to the unused dataset.
480 Notes
481 -----
482 By default, a dataset is considered "actually used" if it is accessed
483 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
484 (even if the handle is not used). This method must be called after one
485 of those in order to remove the dataset from the actual input list.
487 This method does nothing for butlers that do not store provenance
488 information (which is the default implementation provided by the base
489 class).
490 """
491 pass
493 @abstractmethod
494 def pruneDatasets(
495 self,
496 refs: Iterable[DatasetRef],
497 *,
498 disassociate: bool = True,
499 unstore: bool = False,
500 tags: Iterable[str] = (),
501 purge: bool = False,
502 ) -> None:
503 """Remove one or more datasets from a collection and/or storage.
505 Parameters
506 ----------
507 refs : `~collections.abc.Iterable` of `DatasetRef`
508 Datasets to prune. These must be "resolved" references (not just
509 a `DatasetType` and data ID).
510 disassociate : `bool`, optional
511 Disassociate pruned datasets from ``tags``, or from all collections
512 if ``purge=True``.
513 unstore : `bool`, optional
514 If `True` (`False` is default) remove these datasets from all
515 datastores known to this butler. Note that this will make it
516 impossible to retrieve these datasets even via other collections.
517 Datasets that are already not stored are ignored by this option.
518 tags : `~collections.abc.Iterable` [ `str` ], optional
519 `~CollectionType.TAGGED` collections to disassociate the datasets
520 from. Ignored if ``disassociate`` is `False` or ``purge`` is
521 `True`.
522 purge : `bool`, optional
523 If `True` (`False` is default), completely remove the dataset from
524 the `Registry`. To prevent accidental deletions, ``purge`` may
525 only be `True` if all of the following conditions are met:
527 - ``disassociate`` is `True`;
528 - ``unstore`` is `True`.
530 This mode may remove provenance information from datasets other
531 than those provided, and should be used with extreme care.
533 Raises
534 ------
535 TypeError
536 Raised if the butler is read-only, if no collection was provided,
537 or the conditions for ``purge=True`` were not met.
538 """
539 raise NotImplementedError()
541 @property
542 @abstractmethod
543 def dimensions(self) -> DimensionUniverse:
544 """Structure managing all dimensions recognized by this data
545 repository (`DimensionUniverse`).
546 """
547 raise NotImplementedError()
549 # TODO: remove on DM-40080.
550 @property
551 @deprecated(
552 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the "
553 "relevant functionality. Will be removed after v26.0.",
554 version="v26.0",
555 category=FutureWarning,
556 )
557 def datastore(self) -> Datastore:
558 """The object that manages actual dataset storage. (`Datastore`)"""
559 return self._datastore
561 _datastore: Datastore
562 """The object that manages actual dataset storage (`Datastore`)."""
564 storageClasses: StorageClassFactory
565 """An object that maps known storage class names to objects that fully
566 describe them (`StorageClassFactory`).
567 """