Coverage for python/lsst/daf/butler/_limited_butler.py: 79%
70 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("LimitedButler",)
32import logging
33from abc import ABC, abstractmethod
34from collections.abc import Iterable
35from typing import Any, ClassVar
37from deprecated.sphinx import deprecated
38from lsst.resources import ResourcePath
40from ._dataset_ref import DatasetRef
41from ._deferredDatasetHandle import DeferredDatasetHandle
42from ._storage_class import StorageClass, StorageClassFactory
43from .datastore import DatasetRefURIs, Datastore
44from .dimensions import DimensionUniverse
46log = logging.getLogger(__name__)
49class LimitedButler(ABC):
50 """A minimal butler interface that is sufficient to back
51 `~lsst.pipe.base.PipelineTask` execution.
52 """
54 GENERATION: ClassVar[int] = 3
55 """This is a Generation 3 Butler.
57 This attribute may be removed in the future, once the Generation 2 Butler
58 interface has been fully retired; it should only be used in transitional
59 code.
60 """
62 @abstractmethod
63 def isWriteable(self) -> bool:
64 """Return `True` if this `Butler` supports write operations."""
65 raise NotImplementedError()
67 # TODO: remove on DM-40067.
68 @deprecated(
69 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
70 " Please use Butler.put(). Will be removed after v26.0.",
71 version="v26.0",
72 category=FutureWarning,
73 )
74 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
75 """Store a dataset that already has a UUID and ``RUN`` collection.
77 Parameters
78 ----------
79 obj : `object`
80 The dataset.
81 ref : `DatasetRef`
82 Resolved reference for a not-yet-stored dataset.
84 Returns
85 -------
86 ref : `DatasetRef`
87 The same as the given, for convenience and symmetry with
88 `Butler.put`.
90 Raises
91 ------
92 TypeError
93 Raised if the butler is read-only.
95 Notes
96 -----
97 Whether this method inserts the given dataset into a ``Registry`` is
98 implementation defined (some `LimitedButler` subclasses do not have a
99 `Registry`), but it always adds the dataset to a `Datastore`, and the
100 given ``ref.id`` and ``ref.run`` are always preserved.
101 """
102 return self.put(obj, ref)
104 @abstractmethod
105 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
106 """Store a dataset that already has a UUID and ``RUN`` collection.
108 Parameters
109 ----------
110 obj : `object`
111 The dataset.
112 ref : `DatasetRef`
113 Resolved reference for a not-yet-stored dataset.
115 Returns
116 -------
117 ref : `DatasetRef`
118 The same as the given, for convenience and symmetry with
119 `Butler.put`.
121 Raises
122 ------
123 TypeError
124 Raised if the butler is read-only.
126 Notes
127 -----
128 Whether this method inserts the given dataset into a ``Registry`` is
129 implementation defined (some `LimitedButler` subclasses do not have a
130 `Registry`), but it always adds the dataset to a `Datastore`, and the
131 given ``ref.id`` and ``ref.run`` are always preserved.
132 """
133 raise NotImplementedError()
135 def get(
136 self,
137 ref: DatasetRef,
138 /,
139 *,
140 parameters: dict[str, Any] | None = None,
141 storageClass: StorageClass | str | None = None,
142 ) -> Any:
143 """Retrieve a stored dataset.
145 Parameters
146 ----------
147 ref: `DatasetRef`
148 A resolved `DatasetRef` directly associated with a dataset.
149 parameters : `dict`
150 Additional StorageClass-defined options to control reading,
151 typically used to efficiently read only a subset of the dataset.
152 storageClass : `StorageClass` or `str`, optional
153 The storage class to be used to override the Python type
154 returned by this method. By default the returned type matches
155 the dataset type definition for this dataset. Specifying a
156 read `StorageClass` can force a different type to be returned.
157 This type must be compatible with the original type.
159 Returns
160 -------
161 obj : `object`
162 The dataset.
164 Raises
165 ------
166 AmbiguousDatasetError
167 Raised if the supplied `DatasetRef` is unresolved.
169 Notes
170 -----
171 In a `LimitedButler` the only allowable way to specify a dataset is
172 to use a resolved `DatasetRef`. Subclasses can support more options.
173 """
174 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
175 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
177 # TODO: remove on DM-40067.
178 @deprecated(
179 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
180 " Please use Butler.get(). Will be removed after v26.0.",
181 version="v26.0",
182 category=FutureWarning,
183 )
184 def getDirect(
185 self,
186 ref: DatasetRef,
187 *,
188 parameters: dict[str, Any] | None = None,
189 storageClass: str | StorageClass | None = None,
190 ) -> Any:
191 """Retrieve a stored dataset.
193 Parameters
194 ----------
195 ref : `DatasetRef`
196 Resolved reference to an already stored dataset.
197 parameters : `dict`
198 Additional StorageClass-defined options to control reading,
199 typically used to efficiently read only a subset of the dataset.
200 storageClass : `StorageClass` or `str`, optional
201 The storage class to be used to override the Python type
202 returned by this method. By default the returned type matches
203 the dataset type definition for this dataset. Specifying a
204 read `StorageClass` can force a different type to be returned.
205 This type must be compatible with the original type.
207 Returns
208 -------
209 obj : `object`
210 The dataset.
211 """
212 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
214 # TODO: remove on DM-40067.
215 @deprecated(
216 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
217 "Please use Butler.getDeferred(). Will be removed after v26.0.",
218 version="v26.0",
219 category=FutureWarning,
220 )
221 def getDirectDeferred(
222 self,
223 ref: DatasetRef,
224 *,
225 parameters: dict[str, Any] | None = None,
226 storageClass: str | StorageClass | None = None,
227 ) -> DeferredDatasetHandle:
228 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
229 from a resolved `DatasetRef`.
231 Parameters
232 ----------
233 ref : `DatasetRef`
234 Resolved reference to an already stored dataset.
235 parameters : `dict`
236 Additional StorageClass-defined options to control reading,
237 typically used to efficiently read only a subset of the dataset.
238 storageClass : `StorageClass` or `str`, optional
239 The storage class to be used to override the Python type
240 returned by this method. By default the returned type matches
241 the dataset type definition for this dataset. Specifying a
242 read `StorageClass` can force a different type to be returned.
243 This type must be compatible with the original type.
245 Returns
246 -------
247 obj : `DeferredDatasetHandle`
248 A handle which can be used to retrieve a dataset at a later time.
249 """
250 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
252 def getDeferred(
253 self,
254 ref: DatasetRef,
255 /,
256 *,
257 parameters: dict[str, Any] | None = None,
258 storageClass: str | StorageClass | None = None,
259 ) -> DeferredDatasetHandle:
260 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
261 after an immediate registry lookup.
263 Parameters
264 ----------
265 ref : `DatasetRef`
266 For the default implementation of a `LimitedButler`, the only
267 acceptable parameter is a resolved `DatasetRef`.
268 parameters : `dict`
269 Additional StorageClass-defined options to control reading,
270 typically used to efficiently read only a subset of the dataset.
271 storageClass : `StorageClass` or `str`, optional
272 The storage class to be used to override the Python type
273 returned by this method. By default the returned type matches
274 the dataset type definition for this dataset. Specifying a
275 read `StorageClass` can force a different type to be returned.
276 This type must be compatible with the original type.
278 Returns
279 -------
280 obj : `DeferredDatasetHandle`
281 A handle which can be used to retrieve a dataset at a later time.
283 Notes
284 -----
285 In a `LimitedButler` the only allowable way to specify a dataset is
286 to use a resolved `DatasetRef`. Subclasses can support more options.
287 """
288 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
290 def get_datastore_names(self) -> tuple[str, ...]:
291 """Return the names of the datastores associated with this butler.
293 Returns
294 -------
295 names : `tuple` [`str`, ...]
296 The names of the datastores.
297 """
298 return self._datastore.names
300 def get_datastore_roots(self) -> dict[str, ResourcePath | None]:
301 """Return the defined root URIs for all registered datastores.
303 Returns
304 -------
305 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`]
306 A mapping from datastore name to datastore root URI. The root
307 can be `None` if the datastore does not have any concept of a root
308 URI.
309 """
310 return self._datastore.roots
312 def getURIs(
313 self,
314 ref: DatasetRef,
315 /,
316 *,
317 predict: bool = False,
318 ) -> DatasetRefURIs:
319 """Return the URIs associated with the dataset.
321 Parameters
322 ----------
323 ref : `DatasetRef`
324 A `DatasetRef` for which URIs are requested.
325 predict : `bool`
326 If `True`, allow URIs to be returned of datasets that have not
327 been written.
329 Returns
330 -------
331 uris : `DatasetRefURIs`
332 The URI to the primary artifact associated with this dataset (if
333 the dataset was disassembled within the datastore this may be
334 `None`), and the URIs to any components associated with the dataset
335 artifact (can be empty if there are no components).
336 """
337 return self._datastore.getURIs(ref, predict)
339 def getURI(
340 self,
341 ref: DatasetRef,
342 /,
343 *,
344 predict: bool = False,
345 ) -> ResourcePath:
346 """Return the URI to the Dataset.
348 Parameters
349 ----------
350 ref : `DatasetRef`
351 A `DatasetRef` for which a single URI is requested.
352 predict : `bool`
353 If `True`, allow URIs to be returned of datasets that have not
354 been written.
356 Returns
357 -------
358 uri : `lsst.resources.ResourcePath`
359 URI pointing to the Dataset within the datastore. If the
360 Dataset does not exist in the datastore, and if ``predict`` is
361 `True`, the URI will be a prediction and will include a URI
362 fragment "#predicted".
363 If the datastore does not have entities that relate well
364 to the concept of a URI the returned URI string will be
365 descriptive. The returned URI is not guaranteed to be obtainable.
367 Raises
368 ------
369 RuntimeError
370 Raised if a URI is requested for a dataset that consists of
371 multiple artifacts.
372 """
373 primary, components = self.getURIs(ref, predict=predict)
375 if primary is None or components:
376 raise RuntimeError(
377 f"Dataset ({ref}) includes distinct URIs for components. "
378 "Use LimitedButler.getURIs() instead."
379 )
380 return primary
382 def get_many_uris(
383 self,
384 refs: Iterable[DatasetRef],
385 predict: bool = False,
386 allow_missing: bool = False,
387 ) -> dict[DatasetRef, DatasetRefURIs]:
388 """Return URIs associated with many datasets.
390 Parameters
391 ----------
392 refs : iterable of `DatasetIdRef`
393 References to the required datasets.
394 predict : `bool`, optional
395 If `True`, allow URIs to be returned of datasets that have not
396 been written.
397 allow_missing : `bool`
398 If `False`, and ``predict`` is `False`, will raise if a
399 `DatasetRef` does not exist.
401 Returns
402 -------
403 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`]
404 A dict of primary and component URIs, indexed by the passed-in
405 refs.
407 Raises
408 ------
409 FileNotFoundError
410 A URI has been requested for a dataset that does not exist and
411 guessing is not allowed.
413 Notes
414 -----
415 In file-based datastores, get_many_uris does not check that the file is
416 present. It assumes that if datastore is aware of the file then it
417 actually exists.
418 """
419 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing)
421 def stored(self, ref: DatasetRef) -> bool:
422 """Indicate whether the dataset's artifacts are present in the
423 Datastore.
425 Parameters
426 ----------
427 ref : `DatasetRef`
428 Resolved reference to a dataset.
430 Returns
431 -------
432 stored : `bool`
433 Whether the dataset artifact exists in the datastore and can be
434 retrieved.
435 """
436 return self._datastore.exists(ref)
438 def stored_many(
439 self,
440 refs: Iterable[DatasetRef],
441 ) -> dict[DatasetRef, bool]:
442 """Check the datastore for artifact existence of multiple datasets
443 at once.
445 Parameters
446 ----------
447 refs : iterable of `DatasetRef`
448 The datasets to be checked.
450 Returns
451 -------
452 existence : `dict` of [`DatasetRef`, `bool`]
453 Mapping from given dataset refs to boolean indicating artifact
454 existence.
455 """
456 return self._datastore.mexists(refs)
458 # TODO: remove on DM-40079.
459 @deprecated(
460 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). "
461 "Will be removed after v26.0.",
462 version="v26.0",
463 category=FutureWarning,
464 )
465 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
466 """Return `True` if a dataset is actually present in the Datastore.
468 Parameters
469 ----------
470 ref : `DatasetRef`
471 Resolved reference to a dataset.
473 Returns
474 -------
475 exists : `bool`
476 Whether the dataset exists in the Datastore.
477 """
478 return self.stored(ref)
480 def markInputUnused(self, ref: DatasetRef) -> None:
481 """Indicate that a predicted input was not actually used when
482 processing a `Quantum`.
484 Parameters
485 ----------
486 ref : `DatasetRef`
487 Reference to the unused dataset.
489 Notes
490 -----
491 By default, a dataset is considered "actually used" if it is accessed
492 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
493 (even if the handle is not used). This method must be called after one
494 of those in order to remove the dataset from the actual input list.
496 This method does nothing for butlers that do not store provenance
497 information (which is the default implementation provided by the base
498 class).
499 """
500 pass
502 @abstractmethod
503 def pruneDatasets(
504 self,
505 refs: Iterable[DatasetRef],
506 *,
507 disassociate: bool = True,
508 unstore: bool = False,
509 tags: Iterable[str] = (),
510 purge: bool = False,
511 ) -> None:
512 """Remove one or more datasets from a collection and/or storage.
514 Parameters
515 ----------
516 refs : `~collections.abc.Iterable` of `DatasetRef`
517 Datasets to prune. These must be "resolved" references (not just
518 a `DatasetType` and data ID).
519 disassociate : `bool`, optional
520 Disassociate pruned datasets from ``tags``, or from all collections
521 if ``purge=True``.
522 unstore : `bool`, optional
523 If `True` (`False` is default) remove these datasets from all
524 datastores known to this butler. Note that this will make it
525 impossible to retrieve these datasets even via other collections.
526 Datasets that are already not stored are ignored by this option.
527 tags : `~collections.abc.Iterable` [ `str` ], optional
528 `~CollectionType.TAGGED` collections to disassociate the datasets
529 from. Ignored if ``disassociate`` is `False` or ``purge`` is
530 `True`.
531 purge : `bool`, optional
532 If `True` (`False` is default), completely remove the dataset from
533 the `Registry`. To prevent accidental deletions, ``purge`` may
534 only be `True` if all of the following conditions are met:
536 - ``disassociate`` is `True`;
537 - ``unstore`` is `True`.
539 This mode may remove provenance information from datasets other
540 than those provided, and should be used with extreme care.
542 Raises
543 ------
544 TypeError
545 Raised if the butler is read-only, if no collection was provided,
546 or the conditions for ``purge=True`` were not met.
547 """
548 raise NotImplementedError()
550 @property
551 @abstractmethod
552 def dimensions(self) -> DimensionUniverse:
553 """Structure managing all dimensions recognized by this data
554 repository (`DimensionUniverse`).
555 """
556 raise NotImplementedError()
558 # TODO: remove on DM-40080.
559 @property
560 @deprecated(
561 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the "
562 "relevant functionality. Will be removed after v26.0.",
563 version="v26.0",
564 category=FutureWarning,
565 )
566 def datastore(self) -> Datastore:
567 """The object that manages actual dataset storage. (`Datastore`)"""
568 return self._datastore
570 _datastore: Datastore
571 """The object that manages actual dataset storage (`Datastore`)."""
573 storageClasses: StorageClassFactory
574 """An object that maps known storage class names to objects that fully
575 describe them (`StorageClassFactory`).
576 """