Coverage for python/lsst/pipe/base/_quantumContext.py: 21%
147 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-30 12:09 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-30 12:09 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining variants for valid values used to constrain datasets in a
29graph building query.
30"""
32from __future__ import annotations
34__all__ = ("ButlerQuantumContext", "ExecutionResources", "QuantumContext")
36import numbers
37from collections.abc import Callable, Sequence
38from dataclasses import dataclass
39from typing import Any
41import astropy.units as u
42from deprecated.sphinx import deprecated
43from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
44from lsst.utils.introspection import get_full_type_name
45from lsst.utils.logging import PeriodicLogger, getLogger
47from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
48from .struct import Struct
50_LOG = getLogger(__name__)
53@dataclass(init=False, frozen=True)
54class ExecutionResources:
55 """A description of the resources available to a running quantum.
57 Parameters
58 ----------
59 num_cores : `int`, optional
60 The number of cores allocated to the task.
61 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\
62 optional
63 The amount of memory allocated to the task. Can be specified
64 as byte-compatible `~astropy.units.Quantity`, a plain number,
65 a string with a plain number, or a string representing a quantity.
66 If `None` no limit is specified.
67 default_mem_units : `astropy.units.Unit`, optional
68 The default unit to apply when the ``max_mem`` value is given
69 as a plain number.
70 """
72 num_cores: int = 1
73 """The maximum number of cores that the task can use."""
75 max_mem: u.Quantity | None = None
76 """If defined, the amount of memory allocated to the task.
77 """
79 def __init__(
80 self,
81 *,
82 num_cores: int = 1,
83 max_mem: u.Quantity | numbers.Real | str | None = None,
84 default_mem_units: u.Unit = u.B,
85 ):
86 # Create our own __init__ to allow more flexible input parameters
87 # but with a constrained dataclass definition.
88 if num_cores < 1:
89 raise ValueError("The number of cores must be a positive integer")
91 object.__setattr__(self, "num_cores", num_cores)
93 mem: u.Quantity | None = None
95 if max_mem is None or isinstance(max_mem, u.Quantity):
96 mem = max_mem
97 elif max_mem == "":
98 # Some command line tooling can treat no value as empty string.
99 pass
100 else:
101 parsed_mem = None
102 try:
103 parsed_mem = float(max_mem)
104 except ValueError:
105 pass
106 else:
107 mem = parsed_mem * default_mem_units
109 if mem is None:
110 mem = u.Quantity(max_mem)
112 if mem is not None:
113 # Force to bytes. This also checks that we can convert to bytes.
114 mem = mem.to(u.B)
116 object.__setattr__(self, "max_mem", mem)
118 def __deepcopy__(self, memo: Any) -> ExecutionResources:
119 """Deep copy returns itself because the class is frozen."""
120 return self
122 def _reduce_kwargs(self) -> dict[str, Any]:
123 """Return a dict of the keyword arguments that should be used
124 by `__reduce__`.
126 This is necessary because the dataclass is defined to be keyword
127 only and we wish the default pickling to only store a plain number
128 for the memory allocation and not a large Quantity.
130 Returns
131 -------
132 kwargs : `dict`
133 Keyword arguments to be used when pickling.
134 """
135 kwargs: dict[str, Any] = {"num_cores": self.num_cores}
136 if self.max_mem is not None:
137 # .value is a numpy float. Cast it to a python int since we
138 # do not want fractional bytes. The constructor ensures that this
139 # uses units of byte so we do not have to convert.
140 kwargs["max_mem"] = int(self.max_mem.value)
141 return kwargs
143 @staticmethod
144 def _unpickle_via_factory(
145 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any]
146 ) -> ExecutionResources:
147 """Unpickle something by calling a factory.
149 Allows unpickle using `__reduce__` with keyword
150 arguments as well as positional arguments.
151 """
152 return cls(**kwargs)
154 def __reduce__(
155 self,
156 ) -> tuple[
157 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources],
158 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]],
159 ]:
160 """Pickler."""
161 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())
164class QuantumContext:
165 """A Butler-like class specialized for a single quantum along with
166 context information that can influence how the task is executed.
168 Parameters
169 ----------
170 butler : `lsst.daf.butler.LimitedButler`
171 Butler object from/to which datasets will be get/put.
172 quantum : `lsst.daf.butler.Quantum`
173 Quantum object that describes the datasets which will be get/put by a
174 single execution of this node in the pipeline graph.
175 resources : `ExecutionResources`, optional
176 The resources allocated for executing quanta.
178 Notes
179 -----
180 A `QuantumContext` class wraps a standard butler interface and
181 specializes it to the context of a given quantum. What this means
182 in practice is that the only gets and puts that this class allows
183 are DatasetRefs that are contained in the quantum.
185 In the future this class will also be used to record provenance on
186 what was actually get and put. This is in contrast to what the
187 preflight expects to be get and put by looking at the graph before
188 execution.
189 """
191 resources: ExecutionResources
193 def __init__(
194 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None
195 ):
196 self.quantum = quantum
197 if resources is None:
198 resources = ExecutionResources()
199 self.resources = resources
201 self.allInputs = set()
202 self.allOutputs = set()
203 for refs in quantum.inputs.values():
204 for ref in refs:
205 self.allInputs.add((ref.datasetType, ref.dataId))
206 for refs in quantum.outputs.values():
207 for ref in refs:
208 self.allOutputs.add((ref.datasetType, ref.dataId))
209 self.__butler = butler
211 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
212 # Butler methods below will check for unresolved DatasetRefs and
213 # raise appropriately, so no need for us to do that here.
214 if isinstance(ref, DeferredDatasetRef):
215 self._checkMembership(ref.datasetRef, self.allInputs)
216 return self.__butler.getDeferred(ref.datasetRef)
217 elif ref is None:
218 return None
219 else:
220 self._checkMembership(ref, self.allInputs)
221 return self.__butler.get(ref)
223 def _put(self, value: Any, ref: DatasetRef) -> None:
224 """Store data in butler"""
225 self._checkMembership(ref, self.allOutputs)
226 self.__butler.put(value, ref)
228 def get(
229 self,
230 dataset: InputQuantizedConnection
231 | list[DatasetRef | None]
232 | list[DeferredDatasetRef | None]
233 | DatasetRef
234 | DeferredDatasetRef
235 | None,
236 ) -> Any:
237 """Fetch data from the butler
239 Parameters
240 ----------
241 dataset
242 This argument may either be an `InputQuantizedConnection` which
243 describes all the inputs of a quantum, a list of
244 `~lsst.daf.butler.DatasetRef`, or a single
245 `~lsst.daf.butler.DatasetRef`. The function will get and return
246 the corresponding datasets from the butler. If `None` is passed in
247 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
248 returned object will be `None`.
250 Returns
251 -------
252 return : `object`
253 This function returns arbitrary objects fetched from the bulter.
254 The structure these objects are returned in depends on the type of
255 the input argument. If the input dataset argument is a
256 `InputQuantizedConnection`, then the return type will be a
257 dictionary with keys corresponding to the attributes of the
258 `InputQuantizedConnection` (which in turn are the attribute
259 identifiers of the connections). If the input argument is of type
260 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
261 be a list of objects. If the input argument is a single
262 `~lsst.daf.butler.DatasetRef` then a single object will be
263 returned.
265 Raises
266 ------
267 ValueError
268 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is
269 not defined in the quantum object
270 """
271 # Set up a periodic logger so log messages can be issued if things
272 # are taking too long.
273 periodic = PeriodicLogger(_LOG)
275 if isinstance(dataset, InputQuantizedConnection):
276 retVal = {}
277 n_connections = len(dataset)
278 n_retrieved = 0
279 for i, (name, ref) in enumerate(dataset):
280 if isinstance(ref, list | tuple):
281 val = []
282 n_refs = len(ref)
283 for j, r in enumerate(ref):
284 val.append(self._get(r))
285 n_retrieved += 1
286 periodic.log(
287 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
288 j + 1,
289 n_refs,
290 name,
291 i + 1,
292 n_connections,
293 )
294 else:
295 val = self._get(ref)
296 periodic.log(
297 "Retrieved dataset for connection '%s' (%d out of %d)",
298 name,
299 i + 1,
300 n_connections,
301 )
302 n_retrieved += 1
303 retVal[name] = val
304 if periodic.num_issued > 0:
305 # This took long enough that we issued some periodic log
306 # messages, so issue a final confirmation message as well.
307 _LOG.verbose(
308 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
309 )
310 return retVal
311 elif isinstance(dataset, list | tuple):
312 n_datasets = len(dataset)
313 retrieved = []
314 for i, x in enumerate(dataset):
315 # Mypy is not sure of the type of x because of the union
316 # of lists so complains. Ignoring it is more efficient
317 # than adding an isinstance assert.
318 retrieved.append(self._get(x))
319 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
320 if periodic.num_issued > 0:
321 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
322 return retrieved
323 elif isinstance(dataset, DatasetRef | DeferredDatasetRef) or dataset is None:
324 return self._get(dataset)
325 else:
326 raise TypeError(
327 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
328 )
330 def put(
331 self,
332 values: Struct | list[Any] | Any,
333 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef,
334 ) -> None:
335 """Put data into the butler.
337 Parameters
338 ----------
339 values : `Struct` or `list` of `object` or `object`
340 The data that should be put with the butler. If the type of the
341 dataset is `OutputQuantizedConnection` then this argument should be
342 a `Struct` with corresponding attribute names. Each attribute
343 should then correspond to either a list of object or a single
344 object depending of the type of the corresponding attribute on
345 dataset. I.e. if ``dataset.calexp`` is
346 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
347 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
348 only a single object need be passed. The same restriction applies
349 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
350 or a single `~lsst.daf.butler.DatasetRef`.
351 dataset
352 This argument may either be an `InputQuantizedConnection` which
353 describes all the inputs of a quantum, a list of
354 `lsst.daf.butler.DatasetRef`, or a single
355 `lsst.daf.butler.DatasetRef`. The function will get and return
356 the corresponding datasets from the butler.
358 Raises
359 ------
360 ValueError
361 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is
362 not defined in the `~lsst.daf.butler.Quantum` object, or the type
363 of values does not match what is expected from the type of dataset.
364 """
365 if isinstance(dataset, OutputQuantizedConnection):
366 if not isinstance(values, Struct):
367 raise ValueError(
368 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
369 " attributes must be passed as the values to put"
370 )
371 for name, refs in dataset:
372 valuesAttribute = getattr(values, name)
373 if isinstance(refs, list | tuple):
374 if len(refs) != len(valuesAttribute):
375 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
376 for i, ref in enumerate(refs):
377 self._put(valuesAttribute[i], ref)
378 else:
379 self._put(valuesAttribute, refs)
380 elif isinstance(dataset, list | tuple):
381 if not isinstance(values, Sequence):
382 raise ValueError("Values to put must be a sequence")
383 if len(dataset) != len(values):
384 raise ValueError("There must be a common number of references and values to put")
385 for i, ref in enumerate(dataset):
386 self._put(values[i], ref)
387 elif isinstance(dataset, DatasetRef):
388 self._put(values, dataset)
389 else:
390 raise TypeError("Dataset argument is not a type that can be used to put")
392 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None:
393 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input
394 `~lsst.daf.butler.Quantum`.
396 This function will raise an exception if the `QuantumContext` is
397 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined
398 in the quantum.
400 Parameters
401 ----------
402 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \
403 `~lsst.daf.butler.DatasetRef`
404 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check
405 inout : `set`
406 The connection type to check, e.g. either an input or an output.
407 This prevents both types needing to be checked for every operation,
408 which may be important for Quanta with lots of
409 `~lsst.daf.butler.DatasetRef`.
410 """
411 if not isinstance(ref, list | tuple):
412 ref = [ref]
413 for r in ref:
414 if (r.datasetType, r.dataId) not in inout:
415 raise ValueError("DatasetRef is not part of the Quantum being processed")
417 @property
418 def dimensions(self) -> DimensionUniverse:
419 """Structure managing all dimensions recognized by this data
420 repository (`~lsst.daf.butler.DimensionUniverse`).
421 """
422 return self.__butler.dimensions
425# TODO: remove on DM-40063.
426@deprecated(
427 reason="ButlerQuantumContext has been renamed to QuantumContext and been given extra functionality. "
428 "Please use the new name. Will be removed after v26.",
429 version="v26",
430 category=FutureWarning,
431)
432class ButlerQuantumContext(QuantumContext):
433 """Deprecated version of `QuantumContext`."""
435 pass