Coverage for python/lsst/pipe/base/_quantumContext.py: 20%
147 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-25 09:14 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-25 09:14 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext", "ExecutionResources", "QuantumContext")
29import numbers
30from collections.abc import Callable, Sequence
31from dataclasses import dataclass
32from typing import Any
34import astropy.units as u
35from deprecated.sphinx import deprecated
36from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
37from lsst.utils.introspection import get_full_type_name
38from lsst.utils.logging import PeriodicLogger, getLogger
40from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
41from .struct import Struct
43_LOG = getLogger(__name__)
46@dataclass(init=False, frozen=True)
47class ExecutionResources:
48 """A description of the resources available to a running quantum.
50 Parameters
51 ----------
52 num_cores : `int`, optional
53 The number of cores allocated to the task.
54 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\
55 optional
56 The amount of memory allocated to the task. Can be specified
57 as byte-compatible `~astropy.units.Quantity`, a plain number,
58 a string with a plain number, or a string representing a quantity.
59 If `None` no limit is specified.
60 default_mem_units : `astropy.units.Unit`, optional
61 The default unit to apply when the ``max_mem`` value is given
62 as a plain number.
63 """
65 num_cores: int = 1
66 """The maximum number of cores that the task can use."""
68 max_mem: u.Quantity | None = None
69 """If defined, the amount of memory allocated to the task.
70 """
72 def __init__(
73 self,
74 *,
75 num_cores: int = 1,
76 max_mem: u.Quantity | numbers.Real | str | None = None,
77 default_mem_units: u.Unit = u.B,
78 ):
79 # Create our own __init__ to allow more flexible input parameters
80 # but with a constrained dataclass definition.
81 if num_cores < 1:
82 raise ValueError("The number of cores must be a positive integer")
84 object.__setattr__(self, "num_cores", num_cores)
86 mem: u.Quantity | None = None
88 if max_mem is None or isinstance(max_mem, u.Quantity):
89 mem = max_mem
90 elif max_mem == "":
91 # Some command line tooling can treat no value as empty string.
92 pass
93 else:
94 parsed_mem = None
95 try:
96 parsed_mem = float(max_mem)
97 except ValueError:
98 pass
99 else:
100 mem = parsed_mem * default_mem_units
102 if mem is None:
103 mem = u.Quantity(max_mem)
105 if mem is not None:
106 # Force to bytes. This also checks that we can convert to bytes.
107 mem = mem.to(u.B)
109 object.__setattr__(self, "max_mem", mem)
111 def __deepcopy__(self, memo: Any) -> ExecutionResources:
112 """Deep copy returns itself because the class is frozen."""
113 return self
115 def _reduce_kwargs(self) -> dict[str, Any]:
116 """Return a dict of the keyword arguments that should be used
117 by `__reduce__`.
119 This is necessary because the dataclass is defined to be keyword
120 only and we wish the default pickling to only store a plain number
121 for the memory allocation and not a large Quantity.
123 Returns
124 -------
125 kwargs : `dict`
126 Keyword arguments to be used when pickling.
127 """
128 kwargs: dict[str, Any] = {"num_cores": self.num_cores}
129 if self.max_mem is not None:
130 # .value is a numpy float. Cast it to a python int since we
131 # do not want fractional bytes. The constructor ensures that this
132 # uses units of byte so we do not have to convert.
133 kwargs["max_mem"] = int(self.max_mem.value)
134 return kwargs
136 @staticmethod
137 def _unpickle_via_factory(
138 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any]
139 ) -> ExecutionResources:
140 """Unpickle something by calling a factory.
142 Allows unpickle using `__reduce__` with keyword
143 arguments as well as positional arguments.
144 """
145 return cls(**kwargs)
147 def __reduce__(
148 self,
149 ) -> tuple[
150 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources],
151 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]],
152 ]:
153 """Pickler."""
154 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())
157class QuantumContext:
158 """A Butler-like class specialized for a single quantum along with
159 context information that can influence how the task is executed.
161 Parameters
162 ----------
163 butler : `lsst.daf.butler.LimitedButler`
164 Butler object from/to which datasets will be get/put.
165 quantum : `lsst.daf.butler.core.Quantum`
166 Quantum object that describes the datasets which will be get/put by a
167 single execution of this node in the pipeline graph.
168 resources : `ExecutionResources`, optional
169 The resources allocated for executing quanta.
171 Notes
172 -----
173 A `QuantumContext` class wraps a standard butler interface and
174 specializes it to the context of a given quantum. What this means
175 in practice is that the only gets and puts that this class allows
176 are DatasetRefs that are contained in the quantum.
178 In the future this class will also be used to record provenance on
179 what was actually get and put. This is in contrast to what the
180 preflight expects to be get and put by looking at the graph before
181 execution.
182 """
184 resources: ExecutionResources
186 def __init__(
187 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None
188 ):
189 self.quantum = quantum
190 if resources is None:
191 resources = ExecutionResources()
192 self.resources = resources
194 self.allInputs = set()
195 self.allOutputs = set()
196 for refs in quantum.inputs.values():
197 for ref in refs:
198 self.allInputs.add((ref.datasetType, ref.dataId))
199 for refs in quantum.outputs.values():
200 for ref in refs:
201 self.allOutputs.add((ref.datasetType, ref.dataId))
202 self.__butler = butler
204 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
205 # Butler methods below will check for unresolved DatasetRefs and
206 # raise appropriately, so no need for us to do that here.
207 if isinstance(ref, DeferredDatasetRef):
208 self._checkMembership(ref.datasetRef, self.allInputs)
209 return self.__butler.getDeferred(ref.datasetRef)
210 elif ref is None:
211 return None
212 else:
213 self._checkMembership(ref, self.allInputs)
214 return self.__butler.get(ref)
216 def _put(self, value: Any, ref: DatasetRef) -> None:
217 """Store data in butler"""
218 self._checkMembership(ref, self.allOutputs)
219 self.__butler.put(value, ref)
221 def get(
222 self,
223 dataset: InputQuantizedConnection
224 | list[DatasetRef | None]
225 | list[DeferredDatasetRef | None]
226 | DatasetRef
227 | DeferredDatasetRef
228 | None,
229 ) -> Any:
230 """Fetch data from the butler
232 Parameters
233 ----------
234 dataset
235 This argument may either be an `InputQuantizedConnection` which
236 describes all the inputs of a quantum, a list of
237 `~lsst.daf.butler.DatasetRef`, or a single
238 `~lsst.daf.butler.DatasetRef`. The function will get and return
239 the corresponding datasets from the butler. If `None` is passed in
240 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
241 returned object will be `None`.
243 Returns
244 -------
245 return : `object`
246 This function returns arbitrary objects fetched from the bulter.
247 The structure these objects are returned in depends on the type of
248 the input argument. If the input dataset argument is a
249 `InputQuantizedConnection`, then the return type will be a
250 dictionary with keys corresponding to the attributes of the
251 `InputQuantizedConnection` (which in turn are the attribute
252 identifiers of the connections). If the input argument is of type
253 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
254 be a list of objects. If the input argument is a single
255 `~lsst.daf.butler.DatasetRef` then a single object will be
256 returned.
258 Raises
259 ------
260 ValueError
261 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is
262 not defined in the quantum object
263 """
264 # Set up a periodic logger so log messages can be issued if things
265 # are taking too long.
266 periodic = PeriodicLogger(_LOG)
268 if isinstance(dataset, InputQuantizedConnection):
269 retVal = {}
270 n_connections = len(dataset)
271 n_retrieved = 0
272 for i, (name, ref) in enumerate(dataset):
273 if isinstance(ref, list):
274 val = []
275 n_refs = len(ref)
276 for j, r in enumerate(ref):
277 val.append(self._get(r))
278 n_retrieved += 1
279 periodic.log(
280 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
281 j + 1,
282 n_refs,
283 name,
284 i + 1,
285 n_connections,
286 )
287 else:
288 val = self._get(ref)
289 periodic.log(
290 "Retrieved dataset for connection '%s' (%d out of %d)",
291 name,
292 i + 1,
293 n_connections,
294 )
295 n_retrieved += 1
296 retVal[name] = val
297 if periodic.num_issued > 0:
298 # This took long enough that we issued some periodic log
299 # messages, so issue a final confirmation message as well.
300 _LOG.verbose(
301 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
302 )
303 return retVal
304 elif isinstance(dataset, list):
305 n_datasets = len(dataset)
306 retrieved = []
307 for i, x in enumerate(dataset):
308 # Mypy is not sure of the type of x because of the union
309 # of lists so complains. Ignoring it is more efficient
310 # than adding an isinstance assert.
311 retrieved.append(self._get(x))
312 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
313 if periodic.num_issued > 0:
314 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
315 return retrieved
316 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None:
317 return self._get(dataset)
318 else:
319 raise TypeError(
320 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
321 )
323 def put(
324 self,
325 values: Struct | list[Any] | Any,
326 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef,
327 ) -> None:
328 """Put data into the butler.
330 Parameters
331 ----------
332 values : `Struct` or `list` of `object` or `object`
333 The data that should be put with the butler. If the type of the
334 dataset is `OutputQuantizedConnection` then this argument should be
335 a `Struct` with corresponding attribute names. Each attribute
336 should then correspond to either a list of object or a single
337 object depending of the type of the corresponding attribute on
338 dataset. I.e. if ``dataset.calexp`` is
339 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
340 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
341 only a single object need be passed. The same restriction applies
342 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
343 or a single `~lsst.daf.butler.DatasetRef`.
344 dataset
345 This argument may either be an `InputQuantizedConnection` which
346 describes all the inputs of a quantum, a list of
347 `lsst.daf.butler.DatasetRef`, or a single
348 `lsst.daf.butler.DatasetRef`. The function will get and return
349 the corresponding datasets from the butler.
351 Raises
352 ------
353 ValueError
354 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is
355 not defined in the `~lsst.daf.butler.Quantum` object, or the type
356 of values does not match what is expected from the type of dataset.
357 """
358 if isinstance(dataset, OutputQuantizedConnection):
359 if not isinstance(values, Struct):
360 raise ValueError(
361 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
362 " attributes must be passed as the values to put"
363 )
364 for name, refs in dataset:
365 valuesAttribute = getattr(values, name)
366 if isinstance(refs, list):
367 if len(refs) != len(valuesAttribute):
368 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
369 for i, ref in enumerate(refs):
370 self._put(valuesAttribute[i], ref)
371 else:
372 self._put(valuesAttribute, refs)
373 elif isinstance(dataset, list):
374 if not isinstance(values, Sequence):
375 raise ValueError("Values to put must be a sequence")
376 if len(dataset) != len(values):
377 raise ValueError("There must be a common number of references and values to put")
378 for i, ref in enumerate(dataset):
379 self._put(values[i], ref)
380 elif isinstance(dataset, DatasetRef):
381 self._put(values, dataset)
382 else:
383 raise TypeError("Dataset argument is not a type that can be used to put")
385 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None:
386 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input
387 `~lsst.daf.butler.Quantum`.
389 This function will raise an exception if the `QuantumContext` is
390 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined
391 in the quantum.
393 Parameters
394 ----------
395 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \
396 `~lsst.daf.butler.DatasetRef`
397 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check
398 inout : `set`
399 The connection type to check, e.g. either an input or an output.
400 This prevents both types needing to be checked for every operation,
401 which may be important for Quanta with lots of
402 `~lsst.daf.butler.DatasetRef`.
403 """
404 if not isinstance(ref, list):
405 ref = [ref]
406 for r in ref:
407 if (r.datasetType, r.dataId) not in inout:
408 raise ValueError("DatasetRef is not part of the Quantum being processed")
410 @property
411 def dimensions(self) -> DimensionUniverse:
412 """Structure managing all dimensions recognized by this data
413 repository (`~lsst.daf.butler.DimensionUniverse`).
414 """
415 return self.__butler.dimensions
418@deprecated(
419 reason="ButlerQuantumContext has been renamed to QuantumContext and been given extra functionality. "
420 "Please use the new name. Will be removed after v27.",
421 version="v26",
422 category=FutureWarning,
423)
424class ButlerQuantumContext(QuantumContext):
425 """Deprecated version of `QuantumContext`."""
427 pass