Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 13%
98 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-16 09:02 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-16 09:02 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext",)
29from collections.abc import Sequence
30from typing import Any
32from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
33from lsst.utils.introspection import get_full_type_name
34from lsst.utils.logging import PeriodicLogger, getLogger
36from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
37from .struct import Struct
39_LOG = getLogger(__name__)
42class ButlerQuantumContext:
43 """A Butler-like class specialized for a single quantum.
45 Parameters
46 ----------
47 butler : `lsst.daf.butler.LimitedButler`
48 Butler object from/to which datasets will be get/put.
49 quantum : `lsst.daf.butler.core.Quantum`
50 Quantum object that describes the datasets which will be get/put by a
51 single execution of this node in the pipeline graph.
53 Notes
54 -----
55 A ButlerQuantumContext class wraps a standard butler interface and
56 specializes it to the context of a given quantum. What this means
57 in practice is that the only gets and puts that this class allows
58 are DatasetRefs that are contained in the quantum.
60 In the future this class will also be used to record provenance on
61 what was actually get and put. This is in contrast to what the
62 preflight expects to be get and put by looking at the graph before
63 execution.
64 """
66 def __init__(self, butler: LimitedButler, quantum: Quantum):
67 self.quantum = quantum
68 self.allInputs = set()
69 self.allOutputs = set()
70 for refs in quantum.inputs.values():
71 for ref in refs:
72 self.allInputs.add((ref.datasetType, ref.dataId))
73 for refs in quantum.outputs.values():
74 for ref in refs:
75 self.allOutputs.add((ref.datasetType, ref.dataId))
76 self.__butler = butler
78 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
79 # Butler methods below will check for unresolved DatasetRefs and
80 # raise appropriately, so no need for us to do that here.
81 if isinstance(ref, DeferredDatasetRef):
82 self._checkMembership(ref.datasetRef, self.allInputs)
83 return self.__butler.getDeferred(ref.datasetRef)
84 elif ref is None:
85 return None
86 else:
87 self._checkMembership(ref, self.allInputs)
88 return self.__butler.get(ref)
90 def _put(self, value: Any, ref: DatasetRef) -> None:
91 """Store data in butler"""
92 self._checkMembership(ref, self.allOutputs)
93 self.__butler.put(value, ref)
95 def get(
96 self,
97 dataset: InputQuantizedConnection
98 | list[DatasetRef | None]
99 | list[DeferredDatasetRef | None]
100 | DatasetRef
101 | DeferredDatasetRef
102 | None,
103 ) -> Any:
104 """Fetch data from the butler
106 Parameters
107 ----------
108 dataset
109 This argument may either be an `InputQuantizedConnection` which
110 describes all the inputs of a quantum, a list of
111 `~lsst.daf.butler.DatasetRef`, or a single
112 `~lsst.daf.butler.DatasetRef`. The function will get and return
113 the corresponding datasets from the butler. If `None` is passed in
114 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
115 returned object will be `None`.
117 Returns
118 -------
119 return : `object`
120 This function returns arbitrary objects fetched from the bulter.
121 The structure these objects are returned in depends on the type of
122 the input argument. If the input dataset argument is a
123 `InputQuantizedConnection`, then the return type will be a
124 dictionary with keys corresponding to the attributes of the
125 `InputQuantizedConnection` (which in turn are the attribute
126 identifiers of the connections). If the input argument is of type
127 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
128 be a list of objects. If the input argument is a single
129 `~lsst.daf.butler.DatasetRef` then a single object will be
130 returned.
132 Raises
133 ------
134 ValueError
135 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is
136 not defined in the quantum object
137 """
138 # Set up a periodic logger so log messages can be issued if things
139 # are taking too long.
140 periodic = PeriodicLogger(_LOG)
142 if isinstance(dataset, InputQuantizedConnection):
143 retVal = {}
144 n_connections = len(dataset)
145 n_retrieved = 0
146 for i, (name, ref) in enumerate(dataset):
147 if isinstance(ref, list):
148 val = []
149 n_refs = len(ref)
150 for j, r in enumerate(ref):
151 val.append(self._get(r))
152 n_retrieved += 1
153 periodic.log(
154 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
155 j + 1,
156 n_refs,
157 name,
158 i + 1,
159 n_connections,
160 )
161 else:
162 val = self._get(ref)
163 periodic.log(
164 "Retrieved dataset for connection '%s' (%d out of %d)",
165 name,
166 i + 1,
167 n_connections,
168 )
169 n_retrieved += 1
170 retVal[name] = val
171 if periodic.num_issued > 0:
172 # This took long enough that we issued some periodic log
173 # messages, so issue a final confirmation message as well.
174 _LOG.verbose(
175 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
176 )
177 return retVal
178 elif isinstance(dataset, list):
179 n_datasets = len(dataset)
180 retrieved = []
181 for i, x in enumerate(dataset):
182 # Mypy is not sure of the type of x because of the union
183 # of lists so complains. Ignoring it is more efficient
184 # than adding an isinstance assert.
185 retrieved.append(self._get(x))
186 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
187 if periodic.num_issued > 0:
188 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
189 return retrieved
190 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None:
191 return self._get(dataset)
192 else:
193 raise TypeError(
194 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
195 )
197 def put(
198 self,
199 values: Struct | list[Any] | Any,
200 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef,
201 ) -> None:
202 """Put data into the butler.
204 Parameters
205 ----------
206 values : `Struct` or `list` of `object` or `object`
207 The data that should be put with the butler. If the type of the
208 dataset is `OutputQuantizedConnection` then this argument should be
209 a `Struct` with corresponding attribute names. Each attribute
210 should then correspond to either a list of object or a single
211 object depending of the type of the corresponding attribute on
212 dataset. I.e. if ``dataset.calexp`` is
213 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
214 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
215 only a single object need be passed. The same restriction applies
216 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
217 or a single `~lsst.daf.butler.DatasetRef`.
218 dataset
219 This argument may either be an `InputQuantizedConnection` which
220 describes all the inputs of a quantum, a list of
221 `lsst.daf.butler.DatasetRef`, or a single
222 `lsst.daf.butler.DatasetRef`. The function will get and return
223 the corresponding datasets from the butler.
225 Raises
226 ------
227 ValueError
228 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is
229 not defined in the `~lsst.daf.butler.Quantum` object, or the type
230 of values does not match what is expected from the type of dataset.
231 """
232 if isinstance(dataset, OutputQuantizedConnection):
233 if not isinstance(values, Struct):
234 raise ValueError(
235 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
236 " attributes must be passed as the values to put"
237 )
238 for name, refs in dataset:
239 valuesAttribute = getattr(values, name)
240 if isinstance(refs, list):
241 if len(refs) != len(valuesAttribute):
242 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
243 for i, ref in enumerate(refs):
244 self._put(valuesAttribute[i], ref)
245 else:
246 self._put(valuesAttribute, refs)
247 elif isinstance(dataset, list):
248 if not isinstance(values, Sequence):
249 raise ValueError("Values to put must be a sequence")
250 if len(dataset) != len(values):
251 raise ValueError("There must be a common number of references and values to put")
252 for i, ref in enumerate(dataset):
253 self._put(values[i], ref)
254 elif isinstance(dataset, DatasetRef):
255 self._put(values, dataset)
256 else:
257 raise TypeError("Dataset argument is not a type that can be used to put")
259 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None:
260 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input
261 `~lsst.daf.butler.Quantum`.
263 This function will raise an exception if the `ButlerQuantumContext` is
264 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined
265 in the quantum.
267 Parameters
268 ----------
269 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \
270 `~lsst.daf.butler.DatasetRef`
271 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check
272 inout : `set`
273 The connection type to check, e.g. either an input or an output.
274 This prevents both types needing to be checked for every operation,
275 which may be important for Quanta with lots of
276 `~lsst.daf.butler.DatasetRef`.
277 """
278 if not isinstance(ref, list):
279 ref = [ref]
280 for r in ref:
281 if (r.datasetType, r.dataId) not in inout:
282 raise ValueError("DatasetRef is not part of the Quantum being processed")
284 @property
285 def dimensions(self) -> DimensionUniverse:
286 """Structure managing all dimensions recognized by this data
287 repository (`~lsst.daf.butler.DimensionUniverse`).
288 """
289 return self.__butler.dimensions