Coverage for python/lsst/daf/butler/_limited_butler.py: 71%
38 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-03 01:08 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-03 01:08 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("LimitedButler",)
26import logging
27from abc import ABC, abstractmethod
28from typing import Any, ClassVar, Dict, Iterable, Optional, Union
30from ._deferredDatasetHandle import DeferredDatasetHandle
31from .core import AmbiguousDatasetError, DatasetRef, Datastore, DimensionUniverse, StorageClassFactory
33log = logging.getLogger(__name__)
36class LimitedButler(ABC):
37 """A minimal butler interface that is sufficient to back
38 `~lsst.pipe.base.PipelineTask` execution.
39 """
41 GENERATION: ClassVar[int] = 3
42 """This is a Generation 3 Butler.
44 This attribute may be removed in the future, once the Generation 2 Butler
45 interface has been fully retired; it should only be used in transitional
46 code.
47 """
49 @abstractmethod
50 def isWriteable(self) -> bool:
51 """Return `True` if this `Butler` supports write operations."""
52 raise NotImplementedError()
54 @abstractmethod
55 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef:
56 """Store a dataset that already has a UUID and ``RUN`` collection.
58 Parameters
59 ----------
60 obj : `object`
61 The dataset.
62 ref : `DatasetRef`
63 Resolved reference for a not-yet-stored dataset.
65 Returns
66 -------
67 ref : `DatasetRef`
68 The same as the given, for convenience and symmetry with
69 `Butler.put`.
71 Raises
72 ------
73 TypeError
74 Raised if the butler is read-only.
75 AmbiguousDatasetError
76 Raised if ``ref.id is None``, i.e. the reference is unresolved.
78 Notes
79 -----
80 Whether this method inserts the given dataset into a ``Registry`` is
81 implementation defined (some `LimitedButler` subclasses do not have a
82 `Registry`), but it always adds the dataset to a `Datastore`, and the
83 given ``ref.id`` and ``ref.run`` are always preserved.
84 """
85 raise NotImplementedError()
87 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any:
88 """Retrieve a stored dataset.
90 Unlike `Butler.get`, this method allows datasets outside the Butler's
91 collection to be read as long as the `DatasetRef` that identifies them
92 can be obtained separately.
94 Parameters
95 ----------
96 ref : `DatasetRef`
97 Resolved reference to an already stored dataset.
98 parameters : `dict`
99 Additional StorageClass-defined options to control reading,
100 typically used to efficiently read only a subset of the dataset.
102 Returns
103 -------
104 obj : `object`
105 The dataset.
107 Raises
108 ------
109 AmbiguousDatasetError
110 Raised if ``ref.id is None``, i.e. the reference is unresolved.
111 """
112 return self.datastore.get(ref, parameters=parameters)
114 def getDirectDeferred(
115 self, ref: DatasetRef, *, parameters: Union[dict, None] = None
116 ) -> DeferredDatasetHandle:
117 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
118 from a resolved `DatasetRef`.
120 Parameters
121 ----------
122 ref : `DatasetRef`
123 Resolved reference to an already stored dataset.
124 parameters : `dict`
125 Additional StorageClass-defined options to control reading,
126 typically used to efficiently read only a subset of the dataset.
128 Returns
129 -------
130 obj : `DeferredDatasetHandle`
131 A handle which can be used to retrieve a dataset at a later time.
133 Raises
134 ------
135 AmbiguousDatasetError
136 Raised if ``ref.id is None``, i.e. the reference is unresolved.
137 """
138 if ref.id is None:
139 raise AmbiguousDatasetError(
140 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved."
141 )
142 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters)
144 def datasetExistsDirect(self, ref: DatasetRef) -> bool:
145 """Return `True` if a dataset is actually present in the Datastore.
147 Parameters
148 ----------
149 ref : `DatasetRef`
150 Resolved reference to a dataset.
152 Returns
153 -------
154 exists : `bool`
155 Whether the dataset exists in the Datastore.
156 """
157 return self.datastore.exists(ref)
159 def markInputUnused(self, ref: DatasetRef) -> None:
160 """Indicate that a predicted input was not actually used when
161 processing a `Quantum`.
163 Parameters
164 ----------
165 ref : `DatasetRef`
166 Reference to the unused dataset.
168 Notes
169 -----
170 By default, a dataset is considered "actually used" if it is accessed
171 via `getDirect` or a handle to it is obtained via `getDirectDeferred`
172 (even if the handle is not used). This method must be called after one
173 of those in order to remove the dataset from the actual input list.
175 This method does nothing for butlers that do not store provenance
176 information (which is the default implementation provided by the base
177 class).
178 """
179 pass
181 @abstractmethod
182 def pruneDatasets(
183 self,
184 refs: Iterable[DatasetRef],
185 *,
186 disassociate: bool = True,
187 unstore: bool = False,
188 tags: Iterable[str] = (),
189 purge: bool = False,
190 ) -> None:
191 """Remove one or more datasets from a collection and/or storage.
193 Parameters
194 ----------
195 refs : `~collections.abc.Iterable` of `DatasetRef`
196 Datasets to prune. These must be "resolved" references (not just
197 a `DatasetType` and data ID).
198 disassociate : `bool`, optional
199 Disassociate pruned datasets from ``tags``, or from all collections
200 if ``purge=True``.
201 unstore : `bool`, optional
202 If `True` (`False` is default) remove these datasets from all
203 datastores known to this butler. Note that this will make it
204 impossible to retrieve these datasets even via other collections.
205 Datasets that are already not stored are ignored by this option.
206 tags : `Iterable` [ `str` ], optional
207 `~CollectionType.TAGGED` collections to disassociate the datasets
208 from. Ignored if ``disassociate`` is `False` or ``purge`` is
209 `True`.
210 purge : `bool`, optional
211 If `True` (`False` is default), completely remove the dataset from
212 the `Registry`. To prevent accidental deletions, ``purge`` may
213 only be `True` if all of the following conditions are met:
215 - ``disassociate`` is `True`;
216 - ``unstore`` is `True`.
218 This mode may remove provenance information from datasets other
219 than those provided, and should be used with extreme care.
221 Raises
222 ------
223 TypeError
224 Raised if the butler is read-only, if no collection was provided,
225 or the conditions for ``purge=True`` were not met.
226 """
227 raise NotImplementedError()
229 @property
230 @abstractmethod
231 def dimensions(self) -> DimensionUniverse:
232 """Structure managing all dimensions recognized by this data
233 repository (`DimensionUniverse`).
234 """
235 raise NotImplementedError()
237 datastore: Datastore
238 """The object that manages actual dataset storage (`Datastore`).
240 Direct user access to the datastore should rarely be necessary; the primary
241 exception is the case where a `Datastore` implementation provides extra
242 functionality beyond what the base class defines.
243 """
245 storageClasses: StorageClassFactory
246 """An object that maps known storage class names to objects that fully
247 describe them (`StorageClassFactory`).
248 """