Coverage for python/lsst/daf/butler/_limited_butler.py: 78%
67 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("LimitedButler",)
26import logging
27from abc import ABC, abstractmethod
28from collections.abc import Iterable
29from typing import Any, ClassVar
31from deprecated.sphinx import deprecated
32from lsst.resources import ResourcePath
34from ._deferredDatasetHandle import DeferredDatasetHandle
35from .core import DatasetRef, DatasetRefURIs, Datastore, DimensionUniverse, StorageClass, StorageClassFactory
37log = logging.getLogger(__name__)
40class LimitedButler(ABC):
41 """A minimal butler interface that is sufficient to back
42 `~lsst.pipe.base.PipelineTask` execution.
43 """
45 GENERATION: ClassVar[int] = 3
46 """This is a Generation 3 Butler.
48 This attribute may be removed in the future, once the Generation 2 Butler
49 interface has been fully retired; it should only be used in transitional
50 code.
51 """
53 @abstractmethod
54 def isWriteable(self) -> bool:
55 """Return `True` if this `Butler` supports write operations."""
56 raise NotImplementedError()
58 @deprecated(
59 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
60 " Please use Butler.put(). Will be removed after v27.0.",
61 version="v26.0",
62 category=FutureWarning,
63 )
64 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
65 """Store a dataset that already has a UUID and ``RUN`` collection.
67 Parameters
68 ----------
69 obj : `object`
70 The dataset.
71 ref : `DatasetRef`
72 Resolved reference for a not-yet-stored dataset.
74 Returns
75 -------
76 ref : `DatasetRef`
77 The same as the given, for convenience and symmetry with
78 `Butler.put`.
80 Raises
81 ------
82 TypeError
83 Raised if the butler is read-only.
85 Notes
86 -----
87 Whether this method inserts the given dataset into a ``Registry`` is
88 implementation defined (some `LimitedButler` subclasses do not have a
89 `Registry`), but it always adds the dataset to a `Datastore`, and the
90 given ``ref.id`` and ``ref.run`` are always preserved.
91 """
92 return self.put(obj, ref)
94 @abstractmethod
95 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
96 """Store a dataset that already has a UUID and ``RUN`` collection.
98 Parameters
99 ----------
100 obj : `object`
101 The dataset.
102 ref : `DatasetRef`
103 Resolved reference for a not-yet-stored dataset.
105 Returns
106 -------
107 ref : `DatasetRef`
108 The same as the given, for convenience and symmetry with
109 `Butler.put`.
111 Raises
112 ------
113 TypeError
114 Raised if the butler is read-only.
116 Notes
117 -----
118 Whether this method inserts the given dataset into a ``Registry`` is
119 implementation defined (some `LimitedButler` subclasses do not have a
120 `Registry`), but it always adds the dataset to a `Datastore`, and the
121 given ``ref.id`` and ``ref.run`` are always preserved.
122 """
123 raise NotImplementedError()
125 def get(
126 self,
127 ref: DatasetRef,
128 /,
129 *,
130 parameters: dict[str, Any] | None = None,
131 storageClass: StorageClass | str | None = None,
132 ) -> Any:
133 """Retrieve a stored dataset.
135 Parameters
136 ----------
137 ref: `DatasetRef`
138 A resolved `DatasetRef` directly associated with a dataset.
139 parameters : `dict`
140 Additional StorageClass-defined options to control reading,
141 typically used to efficiently read only a subset of the dataset.
142 storageClass : `StorageClass` or `str`, optional
143 The storage class to be used to override the Python type
144 returned by this method. By default the returned type matches
145 the dataset type definition for this dataset. Specifying a
146 read `StorageClass` can force a different type to be returned.
147 This type must be compatible with the original type.
149 Returns
150 -------
151 obj : `object`
152 The dataset.
154 Raises
155 ------
156 AmbiguousDatasetError
157 Raised if the supplied `DatasetRef` is unresolved.
159 Notes
160 -----
161 In a `LimitedButler` the only allowable way to specify a dataset is
162 to use a resolved `DatasetRef`. Subclasses can support more options.
163 """
164 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
165 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
167 @deprecated(
168 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
169 " Please use Butler.get(). Will be removed after v27.0.",
170 version="v26.0",
171 category=FutureWarning,
172 )
173 def getDirect(
174 self,
175 ref: DatasetRef,
176 *,
177 parameters: dict[str, Any] | None = None,
178 storageClass: str | StorageClass | None = None,
179 ) -> Any:
180 """Retrieve a stored dataset.
182 Parameters
183 ----------
184 ref : `DatasetRef`
185 Resolved reference to an already stored dataset.
186 parameters : `dict`
187 Additional StorageClass-defined options to control reading,
188 typically used to efficiently read only a subset of the dataset.
189 storageClass : `StorageClass` or `str`, optional
190 The storage class to be used to override the Python type
191 returned by this method. By default the returned type matches
192 the dataset type definition for this dataset. Specifying a
193 read `StorageClass` can force a different type to be returned.
194 This type must be compatible with the original type.
196 Returns
197 -------
198 obj : `object`
199 The dataset.
200 """
201 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
203 @deprecated(
204 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
205 "Please use Butler.getDeferred(). Will be removed after v27.0.",
206 version="v26.0",
207 category=FutureWarning,
208 )
209 def getDirectDeferred(
210 self,
211 ref: DatasetRef,
212 *,
213 parameters: dict[str, Any] | None = None,
214 storageClass: str | StorageClass | None = None,
215 ) -> DeferredDatasetHandle:
216 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
217 from a resolved `DatasetRef`.
219 Parameters
220 ----------
221 ref : `DatasetRef`
222 Resolved reference to an already stored dataset.
223 parameters : `dict`
224 Additional StorageClass-defined options to control reading,
225 typically used to efficiently read only a subset of the dataset.
226 storageClass : `StorageClass` or `str`, optional
227 The storage class to be used to override the Python type
228 returned by this method. By default the returned type matches
229 the dataset type definition for this dataset. Specifying a
230 read `StorageClass` can force a different type to be returned.
231 This type must be compatible with the original type.
233 Returns
234 -------
235 obj : `DeferredDatasetHandle`
236 A handle which can be used to retrieve a dataset at a later time.
237 """
238 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
240 def getDeferred(
241 self,
242 ref: DatasetRef,
243 /,
244 *,
245 parameters: dict[str, Any] | None = None,
246 storageClass: str | StorageClass | None = None,
247 ) -> DeferredDatasetHandle:
248 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
249 after an immediate registry lookup.
251 Parameters
252 ----------
253 ref : `DatasetRef`
254 For the default implementation of a `LimitedButler`, the only
255 acceptable parameter is a resolved `DatasetRef`.
256 parameters : `dict`
257 Additional StorageClass-defined options to control reading,
258 typically used to efficiently read only a subset of the dataset.
259 storageClass : `StorageClass` or `str`, optional
260 The storage class to be used to override the Python type
261 returned by this method. By default the returned type matches
262 the dataset type definition for this dataset. Specifying a
263 read `StorageClass` can force a different type to be returned.
264 This type must be compatible with the original type.
266 Returns
267 -------
268 obj : `DeferredDatasetHandle`
269 A handle which can be used to retrieve a dataset at a later time.
271 Notes
272 -----
273 In a `LimitedButler` the only allowable way to specify a dataset is
274 to use a resolved `DatasetRef`. Subclasses can support more options.
275 """
276 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
278 def get_datastore_names(self) -> tuple[str, ...]:
279 """Return the names of the datastores associated with this butler.
281 Returns
282 -------
283 names : `tuple` [`str`, ...]
284 The names of the datastores.
285 """
286 return self._datastore.names
288 def get_datastore_roots(self) -> dict[str, ResourcePath | None]:
289 """Return the defined root URIs for all registered datastores.
291 Returns
292 -------
293 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`]
294 A mapping from datastore name to datastore root URI. The root
295 can be `None` if the datastore does not have any concept of a root
296 URI.
297 """
298 return self._datastore.roots
300 def getURIs(
301 self,
302 ref: DatasetRef,
303 /,
304 *,
305 predict: bool = False,
306 ) -> DatasetRefURIs:
307 """Return the URIs associated with the dataset.
309 Parameters
310 ----------
311 ref : `DatasetRef`
312 A `DatasetRef` for which URIs are requested.
313 predict : `bool`
314 If `True`, allow URIs to be returned of datasets that have not
315 been written.
317 Returns
318 -------
319 uris : `DatasetRefURIs`
320 The URI to the primary artifact associated with this dataset (if
321 the dataset was disassembled within the datastore this may be
322 `None`), and the URIs to any components associated with the dataset
323 artifact (can be empty if there are no components).
324 """
325 return self._datastore.getURIs(ref, predict)
327 def getURI(
328 self,
329 ref: DatasetRef,
330 /,
331 *,
332 predict: bool = False,
333 ) -> ResourcePath:
334 """Return the URI to the Dataset.
336 Parameters
337 ----------
338 ref : `DatasetRef`
339 A `DatasetRef` for which a single URI is requested.
340 predict : `bool`
341 If `True`, allow URIs to be returned of datasets that have not
342 been written.
344 Returns
345 -------
346 uri : `lsst.resources.ResourcePath`
347 URI pointing to the Dataset within the datastore. If the
348 Dataset does not exist in the datastore, and if ``predict`` is
349 `True`, the URI will be a prediction and will include a URI
350 fragment "#predicted".
351 If the datastore does not have entities that relate well
352 to the concept of a URI the returned URI string will be
353 descriptive. The returned URI is not guaranteed to be obtainable.
355 Raises
356 ------
357 RuntimeError
358 Raised if a URI is requested for a dataset that consists of
359 multiple artifacts.
360 """
361 primary, components = self.getURIs(ref, predict=predict)
363 if primary is None or components:
364 raise RuntimeError(
365 f"Dataset ({ref}) includes distinct URIs for components. "
366 "Use LimitedButler.getURIs() instead."
367 )
368 return primary
370 def get_many_uris(
371 self,
372 refs: Iterable[DatasetRef],
373 predict: bool = False,
374 allow_missing: bool = False,
375 ) -> dict[DatasetRef, DatasetRefURIs]:
376 """Return URIs associated with many datasets.
378 Parameters
379 ----------
380 refs : iterable of `DatasetIdRef`
381 References to the required datasets.
382 predict : `bool`, optional
383 If `True`, allow URIs to be returned of datasets that have not
384 been written.
385 allow_missing : `bool`
386 If `False`, and ``predict`` is `False`, will raise if a
387 `DatasetRef` does not exist.
389 Returns
390 -------
391 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`]
392 A dict of primary and component URIs, indexed by the passed-in
393 refs.
395 Raises
396 ------
397 FileNotFoundError
398 A URI has been requested for a dataset that does not exist and
399 guessing is not allowed.
401 Notes
402 -----
403 In file-based datastores, get_many_uris does not check that the file is
404 present. It assumes that if datastore is aware of the file then it
405 actually exists.
406 """
407 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing)
409 def stored(self, ref: DatasetRef) -> bool:
410 """Indicate whether the dataset's artifacts are present in the
411 Datastore.
413 Parameters
414 ----------
415 ref : `DatasetRef`
416 Resolved reference to a dataset.
418 Returns
419 -------
420 stored : `bool`
421 Whether the dataset artifact exists in the datastore and can be
422 retrieved.
423 """
424 return self._datastore.exists(ref)
426 def stored_many(
427 self,
428 refs: Iterable[DatasetRef],
429 ) -> dict[DatasetRef, bool]:
430 """Check the datastore for artifact existence of multiple datasets
431 at once.
433 Parameters
434 ----------
435 refs : iterable of `DatasetRef`
436 The datasets to be checked.
438 Returns
439 -------
440 existence : `dict` of [`DatasetRef`, `bool`]
441 Mapping from given dataset refs to boolean indicating artifact
442 existence.
443 """
444 return self._datastore.mexists(refs)
446 @deprecated(
447 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). "
448 "Will be removed after v27.0.",
449 version="v26.0",
450 category=FutureWarning,
451 )
452 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
453 """Return `True` if a dataset is actually present in the Datastore.
455 Parameters
456 ----------
457 ref : `DatasetRef`
458 Resolved reference to a dataset.
460 Returns
461 -------
462 exists : `bool`
463 Whether the dataset exists in the Datastore.
464 """
465 return self.stored(ref)
467 def markInputUnused(self, ref: DatasetRef) -> None:
468 """Indicate that a predicted input was not actually used when
469 processing a `Quantum`.
471 Parameters
472 ----------
473 ref : `DatasetRef`
474 Reference to the unused dataset.
476 Notes
477 -----
478 By default, a dataset is considered "actually used" if it is accessed
479 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
480 (even if the handle is not used). This method must be called after one
481 of those in order to remove the dataset from the actual input list.
483 This method does nothing for butlers that do not store provenance
484 information (which is the default implementation provided by the base
485 class).
486 """
487 pass
489 @abstractmethod
490 def pruneDatasets(
491 self,
492 refs: Iterable[DatasetRef],
493 *,
494 disassociate: bool = True,
495 unstore: bool = False,
496 tags: Iterable[str] = (),
497 purge: bool = False,
498 ) -> None:
499 """Remove one or more datasets from a collection and/or storage.
501 Parameters
502 ----------
503 refs : `~collections.abc.Iterable` of `DatasetRef`
504 Datasets to prune. These must be "resolved" references (not just
505 a `DatasetType` and data ID).
506 disassociate : `bool`, optional
507 Disassociate pruned datasets from ``tags``, or from all collections
508 if ``purge=True``.
509 unstore : `bool`, optional
510 If `True` (`False` is default) remove these datasets from all
511 datastores known to this butler. Note that this will make it
512 impossible to retrieve these datasets even via other collections.
513 Datasets that are already not stored are ignored by this option.
514 tags : `~collections.abc.Iterable` [ `str` ], optional
515 `~CollectionType.TAGGED` collections to disassociate the datasets
516 from. Ignored if ``disassociate`` is `False` or ``purge`` is
517 `True`.
518 purge : `bool`, optional
519 If `True` (`False` is default), completely remove the dataset from
520 the `Registry`. To prevent accidental deletions, ``purge`` may
521 only be `True` if all of the following conditions are met:
523 - ``disassociate`` is `True`;
524 - ``unstore`` is `True`.
526 This mode may remove provenance information from datasets other
527 than those provided, and should be used with extreme care.
529 Raises
530 ------
531 TypeError
532 Raised if the butler is read-only, if no collection was provided,
533 or the conditions for ``purge=True`` were not met.
534 """
535 raise NotImplementedError()
537 @property
538 @abstractmethod
539 def dimensions(self) -> DimensionUniverse:
540 """Structure managing all dimensions recognized by this data
541 repository (`DimensionUniverse`).
542 """
543 raise NotImplementedError()
545 @property
546 @deprecated(
547 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the "
548 "relevant functionality. Will be removed after v27.0.",
549 version="v26.0",
550 category=FutureWarning,
551 )
552 def datastore(self) -> Datastore:
553 """The object that manages actual dataset storage. (`Datastore`)"""
554 return self._datastore
556 _datastore: Datastore
557 """The object that manages actual dataset storage (`Datastore`)."""
559 storageClasses: StorageClassFactory
560 """An object that maps known storage class names to objects that fully
561 describe them (`StorageClassFactory`).
562 """