Coverage for python/lsst/pipe/base/_quantumContext.py: 21%
147 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-23 08:14 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-23 08:14 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining variants for valid values used to constrain datasets in a
23graph building query.
24"""
26from __future__ import annotations
28__all__ = ("ButlerQuantumContext", "ExecutionResources", "QuantumContext")
30import numbers
31from collections.abc import Callable, Sequence
32from dataclasses import dataclass
33from typing import Any
35import astropy.units as u
36from deprecated.sphinx import deprecated
37from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
38from lsst.utils.introspection import get_full_type_name
39from lsst.utils.logging import PeriodicLogger, getLogger
41from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
42from .struct import Struct
44_LOG = getLogger(__name__)
47@dataclass(init=False, frozen=True)
48class ExecutionResources:
49 """A description of the resources available to a running quantum.
51 Parameters
52 ----------
53 num_cores : `int`, optional
54 The number of cores allocated to the task.
55 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\
56 optional
57 The amount of memory allocated to the task. Can be specified
58 as byte-compatible `~astropy.units.Quantity`, a plain number,
59 a string with a plain number, or a string representing a quantity.
60 If `None` no limit is specified.
61 default_mem_units : `astropy.units.Unit`, optional
62 The default unit to apply when the ``max_mem`` value is given
63 as a plain number.
64 """
66 num_cores: int = 1
67 """The maximum number of cores that the task can use."""
69 max_mem: u.Quantity | None = None
70 """If defined, the amount of memory allocated to the task.
71 """
73 def __init__(
74 self,
75 *,
76 num_cores: int = 1,
77 max_mem: u.Quantity | numbers.Real | str | None = None,
78 default_mem_units: u.Unit = u.B,
79 ):
80 # Create our own __init__ to allow more flexible input parameters
81 # but with a constrained dataclass definition.
82 if num_cores < 1:
83 raise ValueError("The number of cores must be a positive integer")
85 object.__setattr__(self, "num_cores", num_cores)
87 mem: u.Quantity | None = None
89 if max_mem is None or isinstance(max_mem, u.Quantity):
90 mem = max_mem
91 elif max_mem == "":
92 # Some command line tooling can treat no value as empty string.
93 pass
94 else:
95 parsed_mem = None
96 try:
97 parsed_mem = float(max_mem)
98 except ValueError:
99 pass
100 else:
101 mem = parsed_mem * default_mem_units
103 if mem is None:
104 mem = u.Quantity(max_mem)
106 if mem is not None:
107 # Force to bytes. This also checks that we can convert to bytes.
108 mem = mem.to(u.B)
110 object.__setattr__(self, "max_mem", mem)
112 def __deepcopy__(self, memo: Any) -> ExecutionResources:
113 """Deep copy returns itself because the class is frozen."""
114 return self
116 def _reduce_kwargs(self) -> dict[str, Any]:
117 """Return a dict of the keyword arguments that should be used
118 by `__reduce__`.
120 This is necessary because the dataclass is defined to be keyword
121 only and we wish the default pickling to only store a plain number
122 for the memory allocation and not a large Quantity.
124 Returns
125 -------
126 kwargs : `dict`
127 Keyword arguments to be used when pickling.
128 """
129 kwargs: dict[str, Any] = {"num_cores": self.num_cores}
130 if self.max_mem is not None:
131 # .value is a numpy float. Cast it to a python int since we
132 # do not want fractional bytes. The constructor ensures that this
133 # uses units of byte so we do not have to convert.
134 kwargs["max_mem"] = int(self.max_mem.value)
135 return kwargs
137 @staticmethod
138 def _unpickle_via_factory(
139 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any]
140 ) -> ExecutionResources:
141 """Unpickle something by calling a factory.
143 Allows unpickle using `__reduce__` with keyword
144 arguments as well as positional arguments.
145 """
146 return cls(**kwargs)
148 def __reduce__(
149 self,
150 ) -> tuple[
151 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources],
152 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]],
153 ]:
154 """Pickler."""
155 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())
158class QuantumContext:
159 """A Butler-like class specialized for a single quantum along with
160 context information that can influence how the task is executed.
162 Parameters
163 ----------
164 butler : `lsst.daf.butler.LimitedButler`
165 Butler object from/to which datasets will be get/put.
166 quantum : `lsst.daf.butler.core.Quantum`
167 Quantum object that describes the datasets which will be get/put by a
168 single execution of this node in the pipeline graph.
169 resources : `ExecutionResources`, optional
170 The resources allocated for executing quanta.
172 Notes
173 -----
174 A `QuantumContext` class wraps a standard butler interface and
175 specializes it to the context of a given quantum. What this means
176 in practice is that the only gets and puts that this class allows
177 are DatasetRefs that are contained in the quantum.
179 In the future this class will also be used to record provenance on
180 what was actually get and put. This is in contrast to what the
181 preflight expects to be get and put by looking at the graph before
182 execution.
183 """
185 resources: ExecutionResources
187 def __init__(
188 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None
189 ):
190 self.quantum = quantum
191 if resources is None:
192 resources = ExecutionResources()
193 self.resources = resources
195 self.allInputs = set()
196 self.allOutputs = set()
197 for refs in quantum.inputs.values():
198 for ref in refs:
199 self.allInputs.add((ref.datasetType, ref.dataId))
200 for refs in quantum.outputs.values():
201 for ref in refs:
202 self.allOutputs.add((ref.datasetType, ref.dataId))
203 self.__butler = butler
205 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
206 # Butler methods below will check for unresolved DatasetRefs and
207 # raise appropriately, so no need for us to do that here.
208 if isinstance(ref, DeferredDatasetRef):
209 self._checkMembership(ref.datasetRef, self.allInputs)
210 return self.__butler.getDeferred(ref.datasetRef)
211 elif ref is None:
212 return None
213 else:
214 self._checkMembership(ref, self.allInputs)
215 return self.__butler.get(ref)
217 def _put(self, value: Any, ref: DatasetRef) -> None:
218 """Store data in butler"""
219 self._checkMembership(ref, self.allOutputs)
220 self.__butler.put(value, ref)
222 def get(
223 self,
224 dataset: InputQuantizedConnection
225 | list[DatasetRef | None]
226 | list[DeferredDatasetRef | None]
227 | DatasetRef
228 | DeferredDatasetRef
229 | None,
230 ) -> Any:
231 """Fetch data from the butler
233 Parameters
234 ----------
235 dataset
236 This argument may either be an `InputQuantizedConnection` which
237 describes all the inputs of a quantum, a list of
238 `~lsst.daf.butler.DatasetRef`, or a single
239 `~lsst.daf.butler.DatasetRef`. The function will get and return
240 the corresponding datasets from the butler. If `None` is passed in
241 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
242 returned object will be `None`.
244 Returns
245 -------
246 return : `object`
247 This function returns arbitrary objects fetched from the bulter.
248 The structure these objects are returned in depends on the type of
249 the input argument. If the input dataset argument is a
250 `InputQuantizedConnection`, then the return type will be a
251 dictionary with keys corresponding to the attributes of the
252 `InputQuantizedConnection` (which in turn are the attribute
253 identifiers of the connections). If the input argument is of type
254 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
255 be a list of objects. If the input argument is a single
256 `~lsst.daf.butler.DatasetRef` then a single object will be
257 returned.
259 Raises
260 ------
261 ValueError
262 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is
263 not defined in the quantum object
264 """
265 # Set up a periodic logger so log messages can be issued if things
266 # are taking too long.
267 periodic = PeriodicLogger(_LOG)
269 if isinstance(dataset, InputQuantizedConnection):
270 retVal = {}
271 n_connections = len(dataset)
272 n_retrieved = 0
273 for i, (name, ref) in enumerate(dataset):
274 if isinstance(ref, (list, tuple)):
275 val = []
276 n_refs = len(ref)
277 for j, r in enumerate(ref):
278 val.append(self._get(r))
279 n_retrieved += 1
280 periodic.log(
281 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
282 j + 1,
283 n_refs,
284 name,
285 i + 1,
286 n_connections,
287 )
288 else:
289 val = self._get(ref)
290 periodic.log(
291 "Retrieved dataset for connection '%s' (%d out of %d)",
292 name,
293 i + 1,
294 n_connections,
295 )
296 n_retrieved += 1
297 retVal[name] = val
298 if periodic.num_issued > 0:
299 # This took long enough that we issued some periodic log
300 # messages, so issue a final confirmation message as well.
301 _LOG.verbose(
302 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
303 )
304 return retVal
305 elif isinstance(dataset, (list, tuple)):
306 n_datasets = len(dataset)
307 retrieved = []
308 for i, x in enumerate(dataset):
309 # Mypy is not sure of the type of x because of the union
310 # of lists so complains. Ignoring it is more efficient
311 # than adding an isinstance assert.
312 retrieved.append(self._get(x))
313 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
314 if periodic.num_issued > 0:
315 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
316 return retrieved
317 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None:
318 return self._get(dataset)
319 else:
320 raise TypeError(
321 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
322 )
324 def put(
325 self,
326 values: Struct | list[Any] | Any,
327 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef,
328 ) -> None:
329 """Put data into the butler.
331 Parameters
332 ----------
333 values : `Struct` or `list` of `object` or `object`
334 The data that should be put with the butler. If the type of the
335 dataset is `OutputQuantizedConnection` then this argument should be
336 a `Struct` with corresponding attribute names. Each attribute
337 should then correspond to either a list of object or a single
338 object depending of the type of the corresponding attribute on
339 dataset. I.e. if ``dataset.calexp`` is
340 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
341 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
342 only a single object need be passed. The same restriction applies
343 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
344 or a single `~lsst.daf.butler.DatasetRef`.
345 dataset
346 This argument may either be an `InputQuantizedConnection` which
347 describes all the inputs of a quantum, a list of
348 `lsst.daf.butler.DatasetRef`, or a single
349 `lsst.daf.butler.DatasetRef`. The function will get and return
350 the corresponding datasets from the butler.
352 Raises
353 ------
354 ValueError
355 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is
356 not defined in the `~lsst.daf.butler.Quantum` object, or the type
357 of values does not match what is expected from the type of dataset.
358 """
359 if isinstance(dataset, OutputQuantizedConnection):
360 if not isinstance(values, Struct):
361 raise ValueError(
362 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
363 " attributes must be passed as the values to put"
364 )
365 for name, refs in dataset:
366 valuesAttribute = getattr(values, name)
367 if isinstance(refs, (list, tuple)):
368 if len(refs) != len(valuesAttribute):
369 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
370 for i, ref in enumerate(refs):
371 self._put(valuesAttribute[i], ref)
372 else:
373 self._put(valuesAttribute, refs)
374 elif isinstance(dataset, (list, tuple)):
375 if not isinstance(values, Sequence):
376 raise ValueError("Values to put must be a sequence")
377 if len(dataset) != len(values):
378 raise ValueError("There must be a common number of references and values to put")
379 for i, ref in enumerate(dataset):
380 self._put(values[i], ref)
381 elif isinstance(dataset, DatasetRef):
382 self._put(values, dataset)
383 else:
384 raise TypeError("Dataset argument is not a type that can be used to put")
386 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None:
387 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input
388 `~lsst.daf.butler.Quantum`.
390 This function will raise an exception if the `QuantumContext` is
391 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined
392 in the quantum.
394 Parameters
395 ----------
396 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \
397 `~lsst.daf.butler.DatasetRef`
398 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check
399 inout : `set`
400 The connection type to check, e.g. either an input or an output.
401 This prevents both types needing to be checked for every operation,
402 which may be important for Quanta with lots of
403 `~lsst.daf.butler.DatasetRef`.
404 """
405 if not isinstance(ref, (list, tuple)):
406 ref = [ref]
407 for r in ref:
408 if (r.datasetType, r.dataId) not in inout:
409 raise ValueError("DatasetRef is not part of the Quantum being processed")
411 @property
412 def dimensions(self) -> DimensionUniverse:
413 """Structure managing all dimensions recognized by this data
414 repository (`~lsst.daf.butler.DimensionUniverse`).
415 """
416 return self.__butler.dimensions
419@deprecated(
420 reason="ButlerQuantumContext has been renamed to QuantumContext and been given extra functionality. "
421 "Please use the new name. Will be removed after v27.",
422 version="v26",
423 category=FutureWarning,
424)
425class ButlerQuantumContext(QuantumContext):
426 """Deprecated version of `QuantumContext`."""
428 pass