Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 14%
111 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-04 05:01 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-04 05:01 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext",)
29import warnings
30from typing import Any, List, Optional, Sequence, Union
32from lsst.daf.butler import (
33 Butler,
34 DatasetRef,
35 DimensionUniverse,
36 LimitedButler,
37 Quantum,
38 UnresolvedRefWarning,
39)
40from lsst.utils.introspection import get_full_type_name
41from lsst.utils.logging import PeriodicLogger, getLogger
43from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
44from .struct import Struct
46_LOG = getLogger(__name__)
49class ButlerQuantumContext:
50 """A Butler-like class specialized for a single quantum.
52 A ButlerQuantumContext class wraps a standard butler interface and
53 specializes it to the context of a given quantum. What this means
54 in practice is that the only gets and puts that this class allows
55 are DatasetRefs that are contained in the quantum.
57 In the future this class will also be used to record provenance on
58 what was actually get and put. This is in contrast to what the
59 preflight expects to be get and put by looking at the graph before
60 execution.
62 Do not use constructor directly, instead use `from_full` or `from_limited`
63 factory methods.
65 Notes
66 -----
67 `ButlerQuantumContext` instances are backed by either
68 `lsst.daf.butler.Butler` or `lsst.daf.butler.LimitedButler`. When a
69 limited butler is used then quantum has to contain dataset references
70 that are completely resolved (usually when graph is constructed by
71 GraphBuilder).
73 When instances are backed by full butler, the quantum graph does not have
74 to resolve output or intermediate references, but input references of each
75 quantum have to be resolved before they can be used by this class. When
76 executing such graphs, intermediate references used as input to some
77 Quantum are resolved by ``lsst.ctrl.mpexec.SingleQuantumExecutor``. If
78 output references of a quanta are resolved, they will be unresolved when
79 full butler is used.
80 """
82 def __init__(self, *, limited: LimitedButler, quantum: Quantum, butler: Butler | None = None):
83 self.quantum = quantum
84 self.allInputs = set()
85 self.allOutputs = set()
86 for refs in quantum.inputs.values():
87 for ref in refs:
88 self.allInputs.add((ref.datasetType, ref.dataId))
89 for refs in quantum.outputs.values():
90 for ref in refs:
91 self.allOutputs.add((ref.datasetType, ref.dataId))
92 self.__full_butler = butler
93 self.__butler = limited
95 @classmethod
96 def from_full(cls, butler: Butler, quantum: Quantum) -> ButlerQuantumContext:
97 """Make ButlerQuantumContext backed by `lsst.daf.butler.Butler`.
99 Parameters
100 ----------
101 butler : `lsst.daf.butler.Butler`
102 Butler object from/to which datasets will be get/put.
103 quantum : `lsst.daf.butler.core.Quantum`
104 Quantum object that describes the datasets which will be get/put by
105 a single execution of this node in the pipeline graph. All input
106 dataset references must be resolved in this Quantum. Output
107 references can be resolved, but they will be unresolved.
109 Returns
110 -------
111 butlerQC : `ButlerQuantumContext`
112 Instance of butler wrapper.
113 """
114 return ButlerQuantumContext(limited=butler, butler=butler, quantum=quantum)
116 @classmethod
117 def from_limited(cls, butler: LimitedButler, quantum: Quantum) -> ButlerQuantumContext:
118 """Make ButlerQuantumContext backed by `lsst.daf.butler.LimitedButler`.
120 Parameters
121 ----------
122 butler : `lsst.daf.butler.LimitedButler`
123 Butler object from/to which datasets will be get/put.
124 quantum : `lsst.daf.butler.core.Quantum`
125 Quantum object that describes the datasets which will be get/put by
126 a single execution of this node in the pipeline graph. Both input
127 and output dataset references must be resolved in this Quantum.
129 Returns
130 -------
131 butlerQC : `ButlerQuantumContext`
132 Instance of butler wrapper.
133 """
134 return ButlerQuantumContext(limited=butler, quantum=quantum)
136 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any:
137 # Butler methods below will check for unresolved DatasetRefs and
138 # raise appropriately, so no need for us to do that here.
139 if isinstance(ref, DeferredDatasetRef):
140 self._checkMembership(ref.datasetRef, self.allInputs)
141 return self.__butler.getDeferred(ref.datasetRef)
142 elif ref is None:
143 return None
144 else:
145 self._checkMembership(ref, self.allInputs)
146 return self.__butler.get(ref)
148 def _put(self, value: Any, ref: DatasetRef) -> None:
149 """Store data in butler"""
150 self._checkMembership(ref, self.allOutputs)
151 if self.__full_butler is not None:
152 # If reference is resolved we need to unresolved it first.
153 # It is possible that we are putting a dataset into a different
154 # run than what was originally expected.
155 if ref.id is not None:
156 with warnings.catch_warnings():
157 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
158 ref = ref.unresolved()
159 self.__full_butler.put(value, ref)
160 else:
161 self.__butler.put(value, ref)
163 def get(
164 self,
165 dataset: Union[
166 InputQuantizedConnection,
167 List[Optional[DatasetRef]],
168 List[Optional[DeferredDatasetRef]],
169 DatasetRef,
170 DeferredDatasetRef,
171 None,
172 ],
173 ) -> Any:
174 """Fetches data from the butler
176 Parameters
177 ----------
178 dataset
179 This argument may either be an `InputQuantizedConnection` which
180 describes all the inputs of a quantum, a list of
181 `~lsst.daf.butler.DatasetRef`, or a single
182 `~lsst.daf.butler.DatasetRef`. The function will get and return
183 the corresponding datasets from the butler. If `None` is passed in
184 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
185 returned object will be `None`.
187 Returns
188 -------
189 return : `object`
190 This function returns arbitrary objects fetched from the bulter.
191 The structure these objects are returned in depends on the type of
192 the input argument. If the input dataset argument is a
193 `InputQuantizedConnection`, then the return type will be a
194 dictionary with keys corresponding to the attributes of the
195 `InputQuantizedConnection` (which in turn are the attribute
196 identifiers of the connections). If the input argument is of type
197 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
198 be a list of objects. If the input argument is a single
199 `~lsst.daf.butler.DatasetRef` then a single object will be
200 returned.
202 Raises
203 ------
204 ValueError
205 Raised if a `DatasetRef` is passed to get that is not defined in
206 the quantum object
207 """
208 # Set up a periodic logger so log messages can be issued if things
209 # are taking too long.
210 periodic = PeriodicLogger(_LOG)
212 if isinstance(dataset, InputQuantizedConnection):
213 retVal = {}
214 n_connections = len(dataset)
215 n_retrieved = 0
216 for i, (name, ref) in enumerate(dataset):
217 if isinstance(ref, list):
218 val = []
219 n_refs = len(ref)
220 for j, r in enumerate(ref):
221 val.append(self._get(r))
222 n_retrieved += 1
223 periodic.log(
224 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
225 j + 1,
226 n_refs,
227 name,
228 i + 1,
229 n_connections,
230 )
231 else:
232 val = self._get(ref)
233 periodic.log(
234 "Retrieved dataset for connection '%s' (%d out of %d)",
235 name,
236 i + 1,
237 n_connections,
238 )
239 n_retrieved += 1
240 retVal[name] = val
241 if periodic.num_issued > 0:
242 # This took long enough that we issued some periodic log
243 # messages, so issue a final confirmation message as well.
244 _LOG.verbose(
245 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
246 )
247 return retVal
248 elif isinstance(dataset, list):
249 n_datasets = len(dataset)
250 retrieved = []
251 for i, x in enumerate(dataset):
252 # Mypy is not sure of the type of x because of the union
253 # of lists so complains. Ignoring it is more efficient
254 # than adding an isinstance assert.
255 retrieved.append(self._get(x))
256 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
257 if periodic.num_issued > 0:
258 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
259 return retrieved
260 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None:
261 return self._get(dataset)
262 else:
263 raise TypeError(
264 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
265 )
267 def put(
268 self,
269 values: Union[Struct, List[Any], Any],
270 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef],
271 ) -> None:
272 """Puts data into the butler
274 Parameters
275 ----------
276 values : `Struct` or `list` of `object` or `object`
277 The data that should be put with the butler. If the type of the
278 dataset is `OutputQuantizedConnection` then this argument should be
279 a `Struct` with corresponding attribute names. Each attribute
280 should then correspond to either a list of object or a single
281 object depending of the type of the corresponding attribute on
282 dataset. I.e. if ``dataset.calexp`` is
283 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
284 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
285 only a single object need be passed. The same restriction applies
286 if dataset is directly a `list` of `DatasetRef` or a single
287 `DatasetRef`.
288 dataset
289 This argument may either be an `InputQuantizedConnection` which
290 describes all the inputs of a quantum, a list of
291 `lsst.daf.butler.DatasetRef`, or a single
292 `lsst.daf.butler.DatasetRef`. The function will get and return
293 the corresponding datasets from the butler.
295 Raises
296 ------
297 ValueError
298 Raised if a `DatasetRef` is passed to put that is not defined in
299 the quantum object, or the type of values does not match what is
300 expected from the type of dataset.
301 """
302 if isinstance(dataset, OutputQuantizedConnection):
303 if not isinstance(values, Struct):
304 raise ValueError(
305 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
306 " attributes must be passed as the values to put"
307 )
308 for name, refs in dataset:
309 valuesAttribute = getattr(values, name)
310 if isinstance(refs, list):
311 if len(refs) != len(valuesAttribute):
312 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
313 for i, ref in enumerate(refs):
314 self._put(valuesAttribute[i], ref)
315 else:
316 self._put(valuesAttribute, refs)
317 elif isinstance(dataset, list):
318 if not isinstance(values, Sequence):
319 raise ValueError("Values to put must be a sequence")
320 if len(dataset) != len(values):
321 raise ValueError("There must be a common number of references and values to put")
322 for i, ref in enumerate(dataset):
323 self._put(values[i], ref)
324 elif isinstance(dataset, DatasetRef):
325 self._put(values, dataset)
326 else:
327 raise TypeError("Dataset argument is not a type that can be used to put")
329 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None:
330 """Internal function used to check if a DatasetRef is part of the input
331 quantum
333 This function will raise an exception if the ButlerQuantumContext is
334 used to get/put a DatasetRef which is not defined in the quantum.
336 Parameters
337 ----------
338 ref : `list` of `DatasetRef` or `DatasetRef`
339 Either a list or a single `DatasetRef` to check
340 inout : `set`
341 The connection type to check, e.g. either an input or an output.
342 This prevents both types needing to be checked for every operation,
343 which may be important for Quanta with lots of `DatasetRef`.
344 """
345 if not isinstance(ref, list):
346 ref = [ref]
347 for r in ref:
348 if (r.datasetType, r.dataId) not in inout:
349 raise ValueError("DatasetRef is not part of the Quantum being processed")
351 @property
352 def dimensions(self) -> DimensionUniverse:
353 """Structure managing all dimensions recognized by this data
354 repository (`DimensionUniverse`).
355 """
356 return self.__butler.dimensions