Coverage for python/lsst/pipe/base/_quantumContext.py: 19%
144 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 10:50 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 10:50 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining variants for valid values used to constrain datasets in a
29graph building query.
30"""
32from __future__ import annotations
34__all__ = ("ExecutionResources", "QuantumContext")
36import numbers
37from collections.abc import Callable, Sequence
38from dataclasses import dataclass
39from typing import Any
41import astropy.units as u
42from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
43from lsst.utils.introspection import get_full_type_name
44from lsst.utils.logging import PeriodicLogger, getLogger
46from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
47from .struct import Struct
49_LOG = getLogger(__name__)
52@dataclass(init=False, frozen=True)
53class ExecutionResources:
54 """A description of the resources available to a running quantum.
56 Parameters
57 ----------
58 num_cores : `int`, optional
59 The number of cores allocated to the task.
60 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\
61 optional
62 The amount of memory allocated to the task. Can be specified
63 as byte-compatible `~astropy.units.Quantity`, a plain number,
64 a string with a plain number, or a string representing a quantity.
65 If `None` no limit is specified.
66 default_mem_units : `astropy.units.Unit`, optional
67 The default unit to apply when the ``max_mem`` value is given
68 as a plain number.
69 """
71 num_cores: int = 1
72 """The maximum number of cores that the task can use."""
74 max_mem: u.Quantity | None = None
75 """If defined, the amount of memory allocated to the task.
76 """
78 def __init__(
79 self,
80 *,
81 num_cores: int = 1,
82 max_mem: u.Quantity | numbers.Real | str | None = None,
83 default_mem_units: u.Unit = u.B,
84 ):
85 # Create our own __init__ to allow more flexible input parameters
86 # but with a constrained dataclass definition.
87 if num_cores < 1:
88 raise ValueError("The number of cores must be a positive integer")
90 object.__setattr__(self, "num_cores", num_cores)
92 mem: u.Quantity | None = None
94 if max_mem is None or isinstance(max_mem, u.Quantity):
95 mem = max_mem
96 elif max_mem == "":
97 # Some command line tooling can treat no value as empty string.
98 pass
99 else:
100 parsed_mem = None
101 try:
102 parsed_mem = float(max_mem)
103 except ValueError:
104 pass
105 else:
106 mem = parsed_mem * default_mem_units
108 if mem is None:
109 mem = u.Quantity(max_mem)
111 if mem is not None:
112 # Force to bytes. This also checks that we can convert to bytes.
113 mem = mem.to(u.B)
115 object.__setattr__(self, "max_mem", mem)
117 def __deepcopy__(self, memo: Any) -> ExecutionResources:
118 """Deep copy returns itself because the class is frozen."""
119 return self
121 def _reduce_kwargs(self) -> dict[str, Any]:
122 """Return a dict of the keyword arguments that should be used
123 by `__reduce__`.
125 This is necessary because the dataclass is defined to be keyword
126 only and we wish the default pickling to only store a plain number
127 for the memory allocation and not a large Quantity.
129 Returns
130 -------
131 kwargs : `dict`
132 Keyword arguments to be used when pickling.
133 """
134 kwargs: dict[str, Any] = {"num_cores": self.num_cores}
135 if self.max_mem is not None:
136 # .value is a numpy float. Cast it to a python int since we
137 # do not want fractional bytes. The constructor ensures that this
138 # uses units of byte so we do not have to convert.
139 kwargs["max_mem"] = int(self.max_mem.value)
140 return kwargs
142 @staticmethod
143 def _unpickle_via_factory(
144 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any]
145 ) -> ExecutionResources:
146 """Unpickle something by calling a factory.
148 Allows unpickle using `__reduce__` with keyword
149 arguments as well as positional arguments.
150 """
151 return cls(**kwargs)
153 def __reduce__(
154 self,
155 ) -> tuple[
156 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources],
157 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]],
158 ]:
159 """Pickler."""
160 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())
163class QuantumContext:
164 """A Butler-like class specialized for a single quantum along with
165 context information that can influence how the task is executed.
167 Parameters
168 ----------
169 butler : `lsst.daf.butler.LimitedButler`
170 Butler object from/to which datasets will be get/put.
171 quantum : `lsst.daf.butler.Quantum`
172 Quantum object that describes the datasets which will be get/put by a
173 single execution of this node in the pipeline graph.
174 resources : `ExecutionResources`, optional
175 The resources allocated for executing quanta.
177 Notes
178 -----
179 A `QuantumContext` class wraps a standard butler interface and
180 specializes it to the context of a given quantum. What this means
181 in practice is that the only gets and puts that this class allows
182 are DatasetRefs that are contained in the quantum.
184 In the future this class will also be used to record provenance on
185 what was actually get and put. This is in contrast to what the
186 preflight expects to be get and put by looking at the graph before
187 execution.
188 """
190 resources: ExecutionResources
192 def __init__(
193 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None
194 ):
195 self.quantum = quantum
196 if resources is None:
197 resources = ExecutionResources()
198 self.resources = resources
200 self.allInputs = set()
201 self.allOutputs = set()
202 for refs in quantum.inputs.values():
203 for ref in refs:
204 self.allInputs.add((ref.datasetType, ref.dataId))
205 for refs in quantum.outputs.values():
206 for ref in refs:
207 self.allOutputs.add((ref.datasetType, ref.dataId))
208 self.__butler = butler
210 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
211 # Butler methods below will check for unresolved DatasetRefs and
212 # raise appropriately, so no need for us to do that here.
213 if isinstance(ref, DeferredDatasetRef):
214 self._checkMembership(ref.datasetRef, self.allInputs)
215 return self.__butler.getDeferred(ref.datasetRef)
216 elif ref is None:
217 return None
218 else:
219 self._checkMembership(ref, self.allInputs)
220 return self.__butler.get(ref)
222 def _put(self, value: Any, ref: DatasetRef) -> None:
223 """Store data in butler."""
224 self._checkMembership(ref, self.allOutputs)
225 self.__butler.put(value, ref)
227 def get(
228 self,
229 dataset: (
230 InputQuantizedConnection
231 | list[DatasetRef | None]
232 | list[DeferredDatasetRef | None]
233 | DatasetRef
234 | DeferredDatasetRef
235 | None
236 ),
237 ) -> Any:
238 """Fetch data from the butler.
240 Parameters
241 ----------
242 dataset : see description
243 This argument may either be an `InputQuantizedConnection` which
244 describes all the inputs of a quantum, a list of
245 `~lsst.daf.butler.DatasetRef`, or a single
246 `~lsst.daf.butler.DatasetRef`. The function will get and return
247 the corresponding datasets from the butler. If `None` is passed in
248 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
249 returned object will be `None`.
251 Returns
252 -------
253 return : `object`
254 This function returns arbitrary objects fetched from the bulter.
255 The structure these objects are returned in depends on the type of
256 the input argument. If the input dataset argument is a
257 `InputQuantizedConnection`, then the return type will be a
258 dictionary with keys corresponding to the attributes of the
259 `InputQuantizedConnection` (which in turn are the attribute
260 identifiers of the connections). If the input argument is of type
261 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
262 be a list of objects. If the input argument is a single
263 `~lsst.daf.butler.DatasetRef` then a single object will be
264 returned.
266 Raises
267 ------
268 ValueError
269 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is
270 not defined in the quantum object
271 """
272 # Set up a periodic logger so log messages can be issued if things
273 # are taking too long.
274 periodic = PeriodicLogger(_LOG)
276 if isinstance(dataset, InputQuantizedConnection):
277 retVal = {}
278 n_connections = len(dataset)
279 n_retrieved = 0
280 for i, (name, ref) in enumerate(dataset):
281 if isinstance(ref, list | tuple):
282 val = []
283 n_refs = len(ref)
284 for j, r in enumerate(ref):
285 val.append(self._get(r))
286 n_retrieved += 1
287 periodic.log(
288 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
289 j + 1,
290 n_refs,
291 name,
292 i + 1,
293 n_connections,
294 )
295 else:
296 val = self._get(ref)
297 periodic.log(
298 "Retrieved dataset for connection '%s' (%d out of %d)",
299 name,
300 i + 1,
301 n_connections,
302 )
303 n_retrieved += 1
304 retVal[name] = val
305 if periodic.num_issued > 0:
306 # This took long enough that we issued some periodic log
307 # messages, so issue a final confirmation message as well.
308 _LOG.verbose(
309 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
310 )
311 return retVal
312 elif isinstance(dataset, list | tuple):
313 n_datasets = len(dataset)
314 retrieved = []
315 for i, x in enumerate(dataset):
316 # Mypy is not sure of the type of x because of the union
317 # of lists so complains. Ignoring it is more efficient
318 # than adding an isinstance assert.
319 retrieved.append(self._get(x))
320 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
321 if periodic.num_issued > 0:
322 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
323 return retrieved
324 elif isinstance(dataset, DatasetRef | DeferredDatasetRef) or dataset is None:
325 return self._get(dataset)
326 else:
327 raise TypeError(
328 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
329 )
331 def put(
332 self,
333 values: Struct | list[Any] | Any,
334 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef,
335 ) -> None:
336 """Put data into the butler.
338 Parameters
339 ----------
340 values : `Struct` or `list` of `object` or `object`
341 The data that should be put with the butler. If the type of the
342 dataset is `OutputQuantizedConnection` then this argument should be
343 a `Struct` with corresponding attribute names. Each attribute
344 should then correspond to either a list of object or a single
345 object depending of the type of the corresponding attribute on
346 dataset. I.e. if ``dataset.calexp`` is
347 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
348 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
349 only a single object need be passed. The same restriction applies
350 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
351 or a single `~lsst.daf.butler.DatasetRef`. If ``values.NAME`` is
352 None, no output is written.
353 dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \
354 or `DatasetRef`
355 This argument may either be an `InputQuantizedConnection` which
356 describes all the inputs of a quantum, a list of
357 `lsst.daf.butler.DatasetRef`, or a single
358 `lsst.daf.butler.DatasetRef`. The function will get and return
359 the corresponding datasets from the butler.
361 Raises
362 ------
363 ValueError
364 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is
365 not defined in the `~lsst.daf.butler.Quantum` object, or the type
366 of values does not match what is expected from the type of dataset.
367 """
368 if isinstance(dataset, OutputQuantizedConnection):
369 if not isinstance(values, Struct):
370 raise ValueError(
371 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
372 " attributes must be passed as the values to put"
373 )
374 for name, refs in dataset:
375 if (valuesAttribute := getattr(values, name, None)) is None:
376 continue
377 if isinstance(refs, list | tuple):
378 if len(refs) != len(valuesAttribute):
379 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
380 for i, ref in enumerate(refs):
381 self._put(valuesAttribute[i], ref)
382 else:
383 self._put(valuesAttribute, refs)
384 elif isinstance(dataset, list | tuple):
385 if not isinstance(values, Sequence):
386 raise ValueError("Values to put must be a sequence")
387 if len(dataset) != len(values):
388 raise ValueError("There must be a common number of references and values to put")
389 for i, ref in enumerate(dataset):
390 self._put(values[i], ref)
391 elif isinstance(dataset, DatasetRef):
392 self._put(values, dataset)
393 else:
394 raise TypeError("Dataset argument is not a type that can be used to put")
396 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None:
397 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input
398 `~lsst.daf.butler.Quantum`.
400 This function will raise an exception if the `QuantumContext` is
401 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined
402 in the quantum.
404 Parameters
405 ----------
406 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \
407 `~lsst.daf.butler.DatasetRef`
408 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check
409 inout : `set`
410 The connection type to check, e.g. either an input or an output.
411 This prevents both types needing to be checked for every operation,
412 which may be important for Quanta with lots of
413 `~lsst.daf.butler.DatasetRef`.
414 """
415 if not isinstance(ref, list | tuple):
416 ref = [ref]
417 for r in ref:
418 if (r.datasetType, r.dataId) not in inout:
419 raise ValueError("DatasetRef is not part of the Quantum being processed")
421 @property
422 def dimensions(self) -> DimensionUniverse:
423 """Structure managing all dimensions recognized by this data
424 repository (`~lsst.daf.butler.DimensionUniverse`).
425 """
426 return self.__butler.dimensions