Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 12%
92 statements
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:42 -0700
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:42 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext",)
29from typing import Any, List, Sequence, Union
31from lsst.daf.butler import Butler, DatasetRef, Quantum
32from lsst.utils.introspection import get_full_type_name
33from lsst.utils.logging import PeriodicLogger, getLogger
35from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
36from .struct import Struct
38_LOG = getLogger(__name__)
41class ButlerQuantumContext:
42 """A Butler-like class specialized for a single quantum
44 A ButlerQuantumContext class wraps a standard butler interface and
45 specializes it to the context of a given quantum. What this means
46 in practice is that the only gets and puts that this class allows
47 are DatasetRefs that are contained in the quantum.
49 In the future this class will also be used to record provenance on
50 what was actually get and put. This is in contrast to what the
51 preflight expects to be get and put by looking at the graph before
52 execution.
54 Parameters
55 ----------
56 butler : `lsst.daf.butler.Butler`
57 Butler object from/to which datasets will be get/put
58 quantum : `lsst.daf.butler.core.Quantum`
59 Quantum object that describes the datasets which will be get/put by a
60 single execution of this node in the pipeline graph. All input
61 dataset references must be resolved (i.e. satisfy
62 ``DatasetRef.id is not None``) prior to constructing the
63 `ButlerQuantumContext`.
65 Notes
66 -----
67 Most quanta in any non-trivial graph will not start with resolved dataset
68 references, because they represent processing steps that can only run
69 after some other quanta have produced their inputs. At present, it is the
70 responsibility of ``lsst.ctrl.mpexec.SingleQuantumExecutor`` to resolve all
71 datasets prior to constructing `ButlerQuantumContext` and calling
72 `runQuantum`, and the fact that this precondition is satisfied by code in
73 a downstream package is considered a problem with the
74 ``pipe_base/ctrl_mpexec`` separation of concerns that will be addressed in
75 the future.
76 """
78 def __init__(self, butler: Butler, quantum: Quantum):
79 self.quantum = quantum
80 self.registry = butler.registry
81 self.allInputs = set()
82 self.allOutputs = set()
83 for refs in quantum.inputs.values():
84 for ref in refs:
85 self.allInputs.add((ref.datasetType, ref.dataId))
86 for refs in quantum.outputs.values():
87 for ref in refs:
88 self.allOutputs.add((ref.datasetType, ref.dataId))
89 self.__butler = butler
91 def _get(self, ref: Union[DeferredDatasetRef, DatasetRef]) -> Any:
92 # Butler methods below will check for unresolved DatasetRefs and
93 # raise appropriately, so no need for us to do that here.
94 if isinstance(ref, DeferredDatasetRef):
95 self._checkMembership(ref.datasetRef, self.allInputs)
96 return self.__butler.getDirectDeferred(ref.datasetRef)
98 else:
99 self._checkMembership(ref, self.allInputs)
100 return self.__butler.getDirect(ref)
102 def _put(self, value: Any, ref: DatasetRef) -> None:
103 self._checkMembership(ref, self.allOutputs)
104 self.__butler.put(value, ref)
106 def get(
107 self,
108 dataset: Union[
109 InputQuantizedConnection,
110 List[DatasetRef],
111 List[DeferredDatasetRef],
112 DatasetRef,
113 DeferredDatasetRef,
114 ],
115 ) -> Any:
116 """Fetches data from the butler
118 Parameters
119 ----------
120 dataset
121 This argument may either be an `InputQuantizedConnection` which
122 describes all the inputs of a quantum, a list of
123 `~lsst.daf.butler.DatasetRef`, or a single
124 `~lsst.daf.butler.DatasetRef`. The function will get and return
125 the corresponding datasets from the butler.
127 Returns
128 -------
129 return : `object`
130 This function returns arbitrary objects fetched from the bulter.
131 The structure these objects are returned in depends on the type of
132 the input argument. If the input dataset argument is a
133 `InputQuantizedConnection`, then the return type will be a
134 dictionary with keys corresponding to the attributes of the
135 `InputQuantizedConnection` (which in turn are the attribute
136 identifiers of the connections). If the input argument is of type
137 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
138 be a list of objects. If the input argument is a single
139 `~lsst.daf.butler.DatasetRef` then a single object will be
140 returned.
142 Raises
143 ------
144 ValueError
145 Raised if a `DatasetRef` is passed to get that is not defined in
146 the quantum object
147 """
148 # Set up a periodic logger so log messages can be issued if things
149 # are taking too long.
150 periodic = PeriodicLogger(_LOG)
152 if isinstance(dataset, InputQuantizedConnection):
153 retVal = {}
154 n_connections = len(dataset)
155 n_retrieved = 0
156 for i, (name, ref) in enumerate(dataset):
157 if isinstance(ref, list):
158 val = []
159 n_refs = len(ref)
160 for j, r in enumerate(ref):
161 val.append(self._get(r))
162 n_retrieved += 1
163 periodic.log(
164 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
165 j + 1,
166 n_refs,
167 name,
168 i + 1,
169 n_connections,
170 )
171 else:
172 val = self._get(ref)
173 periodic.log(
174 "Retrieved dataset for connection '%s' (%d out of %d)",
175 name,
176 i + 1,
177 n_connections,
178 )
179 n_retrieved += 1
180 retVal[name] = val
181 if periodic.num_issued > 0:
182 # This took long enough that we issued some periodic log
183 # messages, so issue a final confirmation message as well.
184 _LOG.verbose(
185 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
186 )
187 return retVal
188 elif isinstance(dataset, list):
189 n_datasets = len(dataset)
190 retrieved = []
191 for i, x in enumerate(dataset):
192 # Mypy is not sure of the type of x because of the union
193 # of lists so complains. Ignoring it is more efficient
194 # than adding an isinstance assert.
195 retrieved.append(self._get(x)) # type: ignore
196 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
197 if periodic.num_issued > 0:
198 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
199 return retrieved
200 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef):
201 return self._get(dataset)
202 else:
203 raise TypeError(
204 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
205 )
207 def put(
208 self,
209 values: Union[Struct, List[Any], Any],
210 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef],
211 ) -> None:
212 """Puts data into the butler
214 Parameters
215 ----------
216 values : `Struct` or `list` of `object` or `object`
217 The data that should be put with the butler. If the type of the
218 dataset is `OutputQuantizedConnection` then this argument should be
219 a `Struct` with corresponding attribute names. Each attribute
220 should then correspond to either a list of object or a single
221 object depending of the type of the corresponding attribute on
222 dataset. I.e. if ``dataset.calexp`` is
223 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
224 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
225 only a single object need be passed. The same restriction applies
226 if dataset is directly a `list` of `DatasetRef` or a single
227 `DatasetRef`.
228 dataset
229 This argument may either be an `InputQuantizedConnection` which
230 describes all the inputs of a quantum, a list of
231 `lsst.daf.butler.DatasetRef`, or a single
232 `lsst.daf.butler.DatasetRef`. The function will get and return
233 the corresponding datasets from the butler.
235 Raises
236 ------
237 ValueError
238 Raised if a `DatasetRef` is passed to put that is not defined in
239 the quantum object, or the type of values does not match what is
240 expected from the type of dataset.
241 """
242 if isinstance(dataset, OutputQuantizedConnection):
243 if not isinstance(values, Struct):
244 raise ValueError(
245 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
246 " attributes must be passed as the values to put"
247 )
248 for name, refs in dataset:
249 valuesAttribute = getattr(values, name)
250 if isinstance(refs, list):
251 if len(refs) != len(valuesAttribute):
252 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
253 for i, ref in enumerate(refs):
254 self._put(valuesAttribute[i], ref)
255 else:
256 self._put(valuesAttribute, refs)
257 elif isinstance(dataset, list):
258 if not isinstance(values, Sequence):
259 raise ValueError("Values to put must be a sequence")
260 if len(dataset) != len(values):
261 raise ValueError("There must be a common number of references and values to put")
262 for i, ref in enumerate(dataset):
263 self._put(values[i], ref)
264 elif isinstance(dataset, DatasetRef):
265 self._put(values, dataset)
266 else:
267 raise TypeError("Dataset argument is not a type that can be used to put")
269 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None:
270 """Internal function used to check if a DatasetRef is part of the input
271 quantum
273 This function will raise an exception if the ButlerQuantumContext is
274 used to get/put a DatasetRef which is not defined in the quantum.
276 Parameters
277 ----------
278 ref : `list` of `DatasetRef` or `DatasetRef`
279 Either a list or a single `DatasetRef` to check
280 inout : `set`
281 The connection type to check, e.g. either an input or an output.
282 This prevents both types needing to be checked for every operation,
283 which may be important for Quanta with lots of `DatasetRef`.
284 """
285 if not isinstance(ref, list):
286 ref = [ref]
287 for r in ref:
288 if (r.datasetType, r.dataId) not in inout:
289 raise ValueError("DatasetRef is not part of the Quantum being processed")