Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 11%
95 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-11 02:40 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-11 02:40 -0800
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext",)
29from typing import Any, List, Optional, Sequence, Union
31from lsst.daf.butler import Butler, DatasetRef, Quantum
32from lsst.utils.introspection import get_full_type_name
33from lsst.utils.logging import PeriodicLogger, getLogger
35from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
36from .struct import Struct
38_LOG = getLogger(__name__)
41class ButlerQuantumContext:
42 """A Butler-like class specialized for a single quantum
44 A ButlerQuantumContext class wraps a standard butler interface and
45 specializes it to the context of a given quantum. What this means
46 in practice is that the only gets and puts that this class allows
47 are DatasetRefs that are contained in the quantum.
49 In the future this class will also be used to record provenance on
50 what was actually get and put. This is in contrast to what the
51 preflight expects to be get and put by looking at the graph before
52 execution.
54 Parameters
55 ----------
56 butler : `lsst.daf.butler.Butler`
57 Butler object from/to which datasets will be get/put
58 quantum : `lsst.daf.butler.core.Quantum`
59 Quantum object that describes the datasets which will be get/put by a
60 single execution of this node in the pipeline graph. All input
61 dataset references must be resolved (i.e. satisfy
62 ``DatasetRef.id is not None``) prior to constructing the
63 `ButlerQuantumContext`.
65 Notes
66 -----
67 Most quanta in any non-trivial graph will not start with resolved dataset
68 references, because they represent processing steps that can only run
69 after some other quanta have produced their inputs. At present, it is the
70 responsibility of ``lsst.ctrl.mpexec.SingleQuantumExecutor`` to resolve all
71 datasets prior to constructing `ButlerQuantumContext` and calling
72 `runQuantum`, and the fact that this precondition is satisfied by code in
73 a downstream package is considered a problem with the
74 ``pipe_base/ctrl_mpexec`` separation of concerns that will be addressed in
75 the future.
76 """
78 def __init__(self, butler: Butler, quantum: Quantum):
79 self.quantum = quantum
80 self.registry = butler.registry
81 self.allInputs = set()
82 self.allOutputs = set()
83 for refs in quantum.inputs.values():
84 for ref in refs:
85 self.allInputs.add((ref.datasetType, ref.dataId))
86 for refs in quantum.outputs.values():
87 for ref in refs:
88 self.allOutputs.add((ref.datasetType, ref.dataId))
89 self.__butler = butler
91 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any:
92 # Butler methods below will check for unresolved DatasetRefs and
93 # raise appropriately, so no need for us to do that here.
94 if isinstance(ref, DeferredDatasetRef):
95 self._checkMembership(ref.datasetRef, self.allInputs)
96 return self.__butler.getDirectDeferred(ref.datasetRef)
97 elif ref is None:
98 return None
99 else:
100 self._checkMembership(ref, self.allInputs)
101 return self.__butler.getDirect(ref)
103 def _put(self, value: Any, ref: DatasetRef) -> None:
104 self._checkMembership(ref, self.allOutputs)
105 self.__butler.put(value, ref)
107 def get(
108 self,
109 dataset: Union[
110 InputQuantizedConnection,
111 List[Optional[DatasetRef]],
112 List[Optional[DeferredDatasetRef]],
113 DatasetRef,
114 DeferredDatasetRef,
115 None,
116 ],
117 ) -> Any:
118 """Fetches data from the butler
120 Parameters
121 ----------
122 dataset
123 This argument may either be an `InputQuantizedConnection` which
124 describes all the inputs of a quantum, a list of
125 `~lsst.daf.butler.DatasetRef`, or a single
126 `~lsst.daf.butler.DatasetRef`. The function will get and return
127 the corresponding datasets from the butler. If `None` is passed in
128 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
129 returned object will be `None`.
131 Returns
132 -------
133 return : `object`
134 This function returns arbitrary objects fetched from the bulter.
135 The structure these objects are returned in depends on the type of
136 the input argument. If the input dataset argument is a
137 `InputQuantizedConnection`, then the return type will be a
138 dictionary with keys corresponding to the attributes of the
139 `InputQuantizedConnection` (which in turn are the attribute
140 identifiers of the connections). If the input argument is of type
141 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
142 be a list of objects. If the input argument is a single
143 `~lsst.daf.butler.DatasetRef` then a single object will be
144 returned.
146 Raises
147 ------
148 ValueError
149 Raised if a `DatasetRef` is passed to get that is not defined in
150 the quantum object
151 """
152 # Set up a periodic logger so log messages can be issued if things
153 # are taking too long.
154 periodic = PeriodicLogger(_LOG)
156 if isinstance(dataset, InputQuantizedConnection):
157 retVal = {}
158 n_connections = len(dataset)
159 n_retrieved = 0
160 for i, (name, ref) in enumerate(dataset):
161 if isinstance(ref, list):
162 val = []
163 n_refs = len(ref)
164 for j, r in enumerate(ref):
165 val.append(self._get(r))
166 n_retrieved += 1
167 periodic.log(
168 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
169 j + 1,
170 n_refs,
171 name,
172 i + 1,
173 n_connections,
174 )
175 else:
176 val = self._get(ref)
177 periodic.log(
178 "Retrieved dataset for connection '%s' (%d out of %d)",
179 name,
180 i + 1,
181 n_connections,
182 )
183 n_retrieved += 1
184 retVal[name] = val
185 if periodic.num_issued > 0:
186 # This took long enough that we issued some periodic log
187 # messages, so issue a final confirmation message as well.
188 _LOG.verbose(
189 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
190 )
191 return retVal
192 elif isinstance(dataset, list):
193 n_datasets = len(dataset)
194 retrieved = []
195 for i, x in enumerate(dataset):
196 # Mypy is not sure of the type of x because of the union
197 # of lists so complains. Ignoring it is more efficient
198 # than adding an isinstance assert.
199 retrieved.append(self._get(x))
200 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
201 if periodic.num_issued > 0:
202 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
203 return retrieved
204 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None:
205 return self._get(dataset)
206 else:
207 raise TypeError(
208 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
209 )
211 def put(
212 self,
213 values: Union[Struct, List[Any], Any],
214 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef],
215 ) -> None:
216 """Puts data into the butler
218 Parameters
219 ----------
220 values : `Struct` or `list` of `object` or `object`
221 The data that should be put with the butler. If the type of the
222 dataset is `OutputQuantizedConnection` then this argument should be
223 a `Struct` with corresponding attribute names. Each attribute
224 should then correspond to either a list of object or a single
225 object depending of the type of the corresponding attribute on
226 dataset. I.e. if ``dataset.calexp`` is
227 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
228 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
229 only a single object need be passed. The same restriction applies
230 if dataset is directly a `list` of `DatasetRef` or a single
231 `DatasetRef`.
232 dataset
233 This argument may either be an `InputQuantizedConnection` which
234 describes all the inputs of a quantum, a list of
235 `lsst.daf.butler.DatasetRef`, or a single
236 `lsst.daf.butler.DatasetRef`. The function will get and return
237 the corresponding datasets from the butler.
239 Raises
240 ------
241 ValueError
242 Raised if a `DatasetRef` is passed to put that is not defined in
243 the quantum object, or the type of values does not match what is
244 expected from the type of dataset.
245 """
246 if isinstance(dataset, OutputQuantizedConnection):
247 if not isinstance(values, Struct):
248 raise ValueError(
249 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
250 " attributes must be passed as the values to put"
251 )
252 for name, refs in dataset:
253 valuesAttribute = getattr(values, name)
254 if isinstance(refs, list):
255 if len(refs) != len(valuesAttribute):
256 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
257 for i, ref in enumerate(refs):
258 self._put(valuesAttribute[i], ref)
259 else:
260 self._put(valuesAttribute, refs)
261 elif isinstance(dataset, list):
262 if not isinstance(values, Sequence):
263 raise ValueError("Values to put must be a sequence")
264 if len(dataset) != len(values):
265 raise ValueError("There must be a common number of references and values to put")
266 for i, ref in enumerate(dataset):
267 self._put(values[i], ref)
268 elif isinstance(dataset, DatasetRef):
269 self._put(values, dataset)
270 else:
271 raise TypeError("Dataset argument is not a type that can be used to put")
273 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None:
274 """Internal function used to check if a DatasetRef is part of the input
275 quantum
277 This function will raise an exception if the ButlerQuantumContext is
278 used to get/put a DatasetRef which is not defined in the quantum.
280 Parameters
281 ----------
282 ref : `list` of `DatasetRef` or `DatasetRef`
283 Either a list or a single `DatasetRef` to check
284 inout : `set`
285 The connection type to check, e.g. either an input or an output.
286 This prevents both types needing to be checked for every operation,
287 which may be important for Quanta with lots of `DatasetRef`.
288 """
289 if not isinstance(ref, list):
290 ref = [ref]
291 for r in ref:
292 if (r.datasetType, r.dataId) not in inout:
293 raise ValueError("DatasetRef is not part of the Quantum being processed")