Coverage for python/lsst/pipe/base/_quantumContext.py: 19%
143 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 10:51 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 10:51 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining variants for valid values used to constrain datasets in a
29graph building query.
30"""
32from __future__ import annotations
34__all__ = ("ExecutionResources", "QuantumContext")
36import numbers
37from collections.abc import Callable, Sequence
38from dataclasses import dataclass
39from typing import Any
41import astropy.units as u
42from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
43from lsst.utils.introspection import get_full_type_name
44from lsst.utils.logging import PeriodicLogger, getLogger
46from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
47from .struct import Struct
49_LOG = getLogger(__name__)
52@dataclass(init=False, frozen=True)
53class ExecutionResources:
54 """A description of the resources available to a running quantum.
56 Parameters
57 ----------
58 num_cores : `int`, optional
59 The number of cores allocated to the task.
60 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\
61 optional
62 The amount of memory allocated to the task. Can be specified
63 as byte-compatible `~astropy.units.Quantity`, a plain number,
64 a string with a plain number, or a string representing a quantity.
65 If `None` no limit is specified.
66 default_mem_units : `astropy.units.Unit`, optional
67 The default unit to apply when the ``max_mem`` value is given
68 as a plain number.
69 """
71 num_cores: int = 1
72 """The maximum number of cores that the task can use."""
74 max_mem: u.Quantity | None = None
75 """If defined, the amount of memory allocated to the task.
76 """
78 def __init__(
79 self,
80 *,
81 num_cores: int = 1,
82 max_mem: u.Quantity | numbers.Real | str | None = None,
83 default_mem_units: u.Unit = u.B,
84 ):
85 # Create our own __init__ to allow more flexible input parameters
86 # but with a constrained dataclass definition.
87 if num_cores < 1:
88 raise ValueError("The number of cores must be a positive integer")
90 object.__setattr__(self, "num_cores", num_cores)
92 mem: u.Quantity | None = None
94 if max_mem is None or isinstance(max_mem, u.Quantity):
95 mem = max_mem
96 elif max_mem == "":
97 # Some command line tooling can treat no value as empty string.
98 pass
99 else:
100 parsed_mem = None
101 try:
102 parsed_mem = float(max_mem)
103 except ValueError:
104 pass
105 else:
106 mem = parsed_mem * default_mem_units
108 if mem is None:
109 mem = u.Quantity(max_mem)
111 if mem is not None:
112 # Force to bytes. This also checks that we can convert to bytes.
113 mem = mem.to(u.B)
115 object.__setattr__(self, "max_mem", mem)
117 def __deepcopy__(self, memo: Any) -> ExecutionResources:
118 """Deep copy returns itself because the class is frozen."""
119 return self
121 def _reduce_kwargs(self) -> dict[str, Any]:
122 """Return a dict of the keyword arguments that should be used
123 by `__reduce__`.
125 This is necessary because the dataclass is defined to be keyword
126 only and we wish the default pickling to only store a plain number
127 for the memory allocation and not a large Quantity.
129 Returns
130 -------
131 kwargs : `dict`
132 Keyword arguments to be used when pickling.
133 """
134 kwargs: dict[str, Any] = {"num_cores": self.num_cores}
135 if self.max_mem is not None:
136 # .value is a numpy float. Cast it to a python int since we
137 # do not want fractional bytes. The constructor ensures that this
138 # uses units of byte so we do not have to convert.
139 kwargs["max_mem"] = int(self.max_mem.value)
140 return kwargs
142 @staticmethod
143 def _unpickle_via_factory(
144 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any]
145 ) -> ExecutionResources:
146 """Unpickle something by calling a factory.
148 Allows unpickle using `__reduce__` with keyword
149 arguments as well as positional arguments.
150 """
151 return cls(**kwargs)
153 def __reduce__(
154 self,
155 ) -> tuple[
156 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources],
157 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]],
158 ]:
159 """Pickler."""
160 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())
163class QuantumContext:
164 """A Butler-like class specialized for a single quantum along with
165 context information that can influence how the task is executed.
167 Parameters
168 ----------
169 butler : `lsst.daf.butler.LimitedButler`
170 Butler object from/to which datasets will be get/put.
171 quantum : `lsst.daf.butler.Quantum`
172 Quantum object that describes the datasets which will be get/put by a
173 single execution of this node in the pipeline graph.
174 resources : `ExecutionResources`, optional
175 The resources allocated for executing quanta.
177 Notes
178 -----
179 A `QuantumContext` class wraps a standard butler interface and
180 specializes it to the context of a given quantum. What this means
181 in practice is that the only gets and puts that this class allows
182 are DatasetRefs that are contained in the quantum.
184 In the future this class will also be used to record provenance on
185 what was actually get and put. This is in contrast to what the
186 preflight expects to be get and put by looking at the graph before
187 execution.
188 """
190 resources: ExecutionResources
192 def __init__(
193 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None
194 ):
195 self.quantum = quantum
196 if resources is None:
197 resources = ExecutionResources()
198 self.resources = resources
200 self.allInputs = set()
201 self.allOutputs = set()
202 for refs in quantum.inputs.values():
203 for ref in refs:
204 self.allInputs.add((ref.datasetType, ref.dataId))
205 for refs in quantum.outputs.values():
206 for ref in refs:
207 self.allOutputs.add((ref.datasetType, ref.dataId))
208 self.__butler = butler
210 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
211 # Butler methods below will check for unresolved DatasetRefs and
212 # raise appropriately, so no need for us to do that here.
213 if isinstance(ref, DeferredDatasetRef):
214 self._checkMembership(ref.datasetRef, self.allInputs)
215 return self.__butler.getDeferred(ref.datasetRef)
216 elif ref is None:
217 return None
218 else:
219 self._checkMembership(ref, self.allInputs)
220 return self.__butler.get(ref)
222 def _put(self, value: Any, ref: DatasetRef) -> None:
223 """Store data in butler."""
224 self._checkMembership(ref, self.allOutputs)
225 self.__butler.put(value, ref)
227 def get(
228 self,
229 dataset: InputQuantizedConnection
230 | list[DatasetRef | None]
231 | list[DeferredDatasetRef | None]
232 | DatasetRef
233 | DeferredDatasetRef
234 | None,
235 ) -> Any:
236 """Fetch data from the butler.
238 Parameters
239 ----------
240 dataset : see description
241 This argument may either be an `InputQuantizedConnection` which
242 describes all the inputs of a quantum, a list of
243 `~lsst.daf.butler.DatasetRef`, or a single
244 `~lsst.daf.butler.DatasetRef`. The function will get and return
245 the corresponding datasets from the butler. If `None` is passed in
246 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
247 returned object will be `None`.
249 Returns
250 -------
251 return : `object`
252 This function returns arbitrary objects fetched from the bulter.
253 The structure these objects are returned in depends on the type of
254 the input argument. If the input dataset argument is a
255 `InputQuantizedConnection`, then the return type will be a
256 dictionary with keys corresponding to the attributes of the
257 `InputQuantizedConnection` (which in turn are the attribute
258 identifiers of the connections). If the input argument is of type
259 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
260 be a list of objects. If the input argument is a single
261 `~lsst.daf.butler.DatasetRef` then a single object will be
262 returned.
264 Raises
265 ------
266 ValueError
267 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is
268 not defined in the quantum object
269 """
270 # Set up a periodic logger so log messages can be issued if things
271 # are taking too long.
272 periodic = PeriodicLogger(_LOG)
274 if isinstance(dataset, InputQuantizedConnection):
275 retVal = {}
276 n_connections = len(dataset)
277 n_retrieved = 0
278 for i, (name, ref) in enumerate(dataset):
279 if isinstance(ref, list | tuple):
280 val = []
281 n_refs = len(ref)
282 for j, r in enumerate(ref):
283 val.append(self._get(r))
284 n_retrieved += 1
285 periodic.log(
286 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
287 j + 1,
288 n_refs,
289 name,
290 i + 1,
291 n_connections,
292 )
293 else:
294 val = self._get(ref)
295 periodic.log(
296 "Retrieved dataset for connection '%s' (%d out of %d)",
297 name,
298 i + 1,
299 n_connections,
300 )
301 n_retrieved += 1
302 retVal[name] = val
303 if periodic.num_issued > 0:
304 # This took long enough that we issued some periodic log
305 # messages, so issue a final confirmation message as well.
306 _LOG.verbose(
307 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
308 )
309 return retVal
310 elif isinstance(dataset, list | tuple):
311 n_datasets = len(dataset)
312 retrieved = []
313 for i, x in enumerate(dataset):
314 # Mypy is not sure of the type of x because of the union
315 # of lists so complains. Ignoring it is more efficient
316 # than adding an isinstance assert.
317 retrieved.append(self._get(x))
318 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
319 if periodic.num_issued > 0:
320 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
321 return retrieved
322 elif isinstance(dataset, DatasetRef | DeferredDatasetRef) or dataset is None:
323 return self._get(dataset)
324 else:
325 raise TypeError(
326 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
327 )
329 def put(
330 self,
331 values: Struct | list[Any] | Any,
332 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef,
333 ) -> None:
334 """Put data into the butler.
336 Parameters
337 ----------
338 values : `Struct` or `list` of `object` or `object`
339 The data that should be put with the butler. If the type of the
340 dataset is `OutputQuantizedConnection` then this argument should be
341 a `Struct` with corresponding attribute names. Each attribute
342 should then correspond to either a list of object or a single
343 object depending of the type of the corresponding attribute on
344 dataset. I.e. if ``dataset.calexp`` is
345 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
346 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
347 only a single object need be passed. The same restriction applies
348 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
349 or a single `~lsst.daf.butler.DatasetRef`.
350 dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \
351 or `DatasetRef`
352 This argument may either be an `InputQuantizedConnection` which
353 describes all the inputs of a quantum, a list of
354 `lsst.daf.butler.DatasetRef`, or a single
355 `lsst.daf.butler.DatasetRef`. The function will get and return
356 the corresponding datasets from the butler.
358 Raises
359 ------
360 ValueError
361 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is
362 not defined in the `~lsst.daf.butler.Quantum` object, or the type
363 of values does not match what is expected from the type of dataset.
364 """
365 if isinstance(dataset, OutputQuantizedConnection):
366 if not isinstance(values, Struct):
367 raise ValueError(
368 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
369 " attributes must be passed as the values to put"
370 )
371 for name, refs in dataset:
372 valuesAttribute = getattr(values, name)
373 if isinstance(refs, list | tuple):
374 if len(refs) != len(valuesAttribute):
375 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
376 for i, ref in enumerate(refs):
377 self._put(valuesAttribute[i], ref)
378 else:
379 self._put(valuesAttribute, refs)
380 elif isinstance(dataset, list | tuple):
381 if not isinstance(values, Sequence):
382 raise ValueError("Values to put must be a sequence")
383 if len(dataset) != len(values):
384 raise ValueError("There must be a common number of references and values to put")
385 for i, ref in enumerate(dataset):
386 self._put(values[i], ref)
387 elif isinstance(dataset, DatasetRef):
388 self._put(values, dataset)
389 else:
390 raise TypeError("Dataset argument is not a type that can be used to put")
392 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None:
393 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input
394 `~lsst.daf.butler.Quantum`.
396 This function will raise an exception if the `QuantumContext` is
397 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined
398 in the quantum.
400 Parameters
401 ----------
402 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \
403 `~lsst.daf.butler.DatasetRef`
404 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check
405 inout : `set`
406 The connection type to check, e.g. either an input or an output.
407 This prevents both types needing to be checked for every operation,
408 which may be important for Quanta with lots of
409 `~lsst.daf.butler.DatasetRef`.
410 """
411 if not isinstance(ref, list | tuple):
412 ref = [ref]
413 for r in ref:
414 if (r.datasetType, r.dataId) not in inout:
415 raise ValueError("DatasetRef is not part of the Quantum being processed")
417 @property
418 def dimensions(self) -> DimensionUniverse:
419 """Structure managing all dimensions recognized by this data
420 repository (`~lsst.daf.butler.DimensionUniverse`).
421 """
422 return self.__butler.dimensions