Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 12%
97 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-06 10:05 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-06 10:05 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext",)
29from typing import Any, List, Optional, Sequence, Union
31from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum
32from lsst.utils.introspection import get_full_type_name
33from lsst.utils.logging import PeriodicLogger, getLogger
35from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
36from .struct import Struct
38_LOG = getLogger(__name__)
41class ButlerQuantumContext:
42 """A Butler-like class specialized for a single quantum.
44 Parameters
45 ----------
46 butler : `lsst.daf.butler.LimitedButler`
47 Butler object from/to which datasets will be get/put.
48 quantum : `lsst.daf.butler.core.Quantum`
49 Quantum object that describes the datasets which will be get/put by a
50 single execution of this node in the pipeline graph.
52 Notes
53 -----
54 A ButlerQuantumContext class wraps a standard butler interface and
55 specializes it to the context of a given quantum. What this means
56 in practice is that the only gets and puts that this class allows
57 are DatasetRefs that are contained in the quantum.
59 In the future this class will also be used to record provenance on
60 what was actually get and put. This is in contrast to what the
61 preflight expects to be get and put by looking at the graph before
62 execution.
63 """
65 def __init__(self, butler: LimitedButler, quantum: Quantum):
66 self.quantum = quantum
67 self.allInputs = set()
68 self.allOutputs = set()
69 for refs in quantum.inputs.values():
70 for ref in refs:
71 self.allInputs.add((ref.datasetType, ref.dataId))
72 for refs in quantum.outputs.values():
73 for ref in refs:
74 self.allOutputs.add((ref.datasetType, ref.dataId))
75 self.__butler = butler
77 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any:
78 # Butler methods below will check for unresolved DatasetRefs and
79 # raise appropriately, so no need for us to do that here.
80 if isinstance(ref, DeferredDatasetRef):
81 self._checkMembership(ref.datasetRef, self.allInputs)
82 return self.__butler.getDeferred(ref.datasetRef)
83 elif ref is None:
84 return None
85 else:
86 self._checkMembership(ref, self.allInputs)
87 return self.__butler.get(ref)
89 def _put(self, value: Any, ref: DatasetRef) -> None:
90 """Store data in butler"""
91 self._checkMembership(ref, self.allOutputs)
92 self.__butler.put(value, ref)
94 def get(
95 self,
96 dataset: Union[
97 InputQuantizedConnection,
98 List[Optional[DatasetRef]],
99 List[Optional[DeferredDatasetRef]],
100 DatasetRef,
101 DeferredDatasetRef,
102 None,
103 ],
104 ) -> Any:
105 """Fetches data from the butler
107 Parameters
108 ----------
109 dataset
110 This argument may either be an `InputQuantizedConnection` which
111 describes all the inputs of a quantum, a list of
112 `~lsst.daf.butler.DatasetRef`, or a single
113 `~lsst.daf.butler.DatasetRef`. The function will get and return
114 the corresponding datasets from the butler. If `None` is passed in
115 place of a `~lsst.daf.butler.DatasetRef` then the corresponding
116 returned object will be `None`.
118 Returns
119 -------
120 return : `object`
121 This function returns arbitrary objects fetched from the bulter.
122 The structure these objects are returned in depends on the type of
123 the input argument. If the input dataset argument is a
124 `InputQuantizedConnection`, then the return type will be a
125 dictionary with keys corresponding to the attributes of the
126 `InputQuantizedConnection` (which in turn are the attribute
127 identifiers of the connections). If the input argument is of type
128 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
129 be a list of objects. If the input argument is a single
130 `~lsst.daf.butler.DatasetRef` then a single object will be
131 returned.
133 Raises
134 ------
135 ValueError
136 Raised if a `DatasetRef` is passed to get that is not defined in
137 the quantum object
138 """
139 # Set up a periodic logger so log messages can be issued if things
140 # are taking too long.
141 periodic = PeriodicLogger(_LOG)
143 if isinstance(dataset, InputQuantizedConnection):
144 retVal = {}
145 n_connections = len(dataset)
146 n_retrieved = 0
147 for i, (name, ref) in enumerate(dataset):
148 if isinstance(ref, list):
149 val = []
150 n_refs = len(ref)
151 for j, r in enumerate(ref):
152 val.append(self._get(r))
153 n_retrieved += 1
154 periodic.log(
155 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)",
156 j + 1,
157 n_refs,
158 name,
159 i + 1,
160 n_connections,
161 )
162 else:
163 val = self._get(ref)
164 periodic.log(
165 "Retrieved dataset for connection '%s' (%d out of %d)",
166 name,
167 i + 1,
168 n_connections,
169 )
170 n_retrieved += 1
171 retVal[name] = val
172 if periodic.num_issued > 0:
173 # This took long enough that we issued some periodic log
174 # messages, so issue a final confirmation message as well.
175 _LOG.verbose(
176 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections
177 )
178 return retVal
179 elif isinstance(dataset, list):
180 n_datasets = len(dataset)
181 retrieved = []
182 for i, x in enumerate(dataset):
183 # Mypy is not sure of the type of x because of the union
184 # of lists so complains. Ignoring it is more efficient
185 # than adding an isinstance assert.
186 retrieved.append(self._get(x))
187 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets)
188 if periodic.num_issued > 0:
189 _LOG.verbose("Completed retrieval of %d datasets", n_datasets)
190 return retrieved
191 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None:
192 return self._get(dataset)
193 else:
194 raise TypeError(
195 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get"
196 )
198 def put(
199 self,
200 values: Union[Struct, List[Any], Any],
201 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef],
202 ) -> None:
203 """Puts data into the butler
205 Parameters
206 ----------
207 values : `Struct` or `list` of `object` or `object`
208 The data that should be put with the butler. If the type of the
209 dataset is `OutputQuantizedConnection` then this argument should be
210 a `Struct` with corresponding attribute names. Each attribute
211 should then correspond to either a list of object or a single
212 object depending of the type of the corresponding attribute on
213 dataset. I.e. if ``dataset.calexp`` is
214 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
215 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
216 only a single object need be passed. The same restriction applies
217 if dataset is directly a `list` of `DatasetRef` or a single
218 `DatasetRef`.
219 dataset
220 This argument may either be an `InputQuantizedConnection` which
221 describes all the inputs of a quantum, a list of
222 `lsst.daf.butler.DatasetRef`, or a single
223 `lsst.daf.butler.DatasetRef`. The function will get and return
224 the corresponding datasets from the butler.
226 Raises
227 ------
228 ValueError
229 Raised if a `DatasetRef` is passed to put that is not defined in
230 the quantum object, or the type of values does not match what is
231 expected from the type of dataset.
232 """
233 if isinstance(dataset, OutputQuantizedConnection):
234 if not isinstance(values, Struct):
235 raise ValueError(
236 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
237 " attributes must be passed as the values to put"
238 )
239 for name, refs in dataset:
240 valuesAttribute = getattr(values, name)
241 if isinstance(refs, list):
242 if len(refs) != len(valuesAttribute):
243 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
244 for i, ref in enumerate(refs):
245 self._put(valuesAttribute[i], ref)
246 else:
247 self._put(valuesAttribute, refs)
248 elif isinstance(dataset, list):
249 if not isinstance(values, Sequence):
250 raise ValueError("Values to put must be a sequence")
251 if len(dataset) != len(values):
252 raise ValueError("There must be a common number of references and values to put")
253 for i, ref in enumerate(dataset):
254 self._put(values[i], ref)
255 elif isinstance(dataset, DatasetRef):
256 self._put(values, dataset)
257 else:
258 raise TypeError("Dataset argument is not a type that can be used to put")
260 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None:
261 """Internal function used to check if a DatasetRef is part of the input
262 quantum
264 This function will raise an exception if the ButlerQuantumContext is
265 used to get/put a DatasetRef which is not defined in the quantum.
267 Parameters
268 ----------
269 ref : `list` of `DatasetRef` or `DatasetRef`
270 Either a list or a single `DatasetRef` to check
271 inout : `set`
272 The connection type to check, e.g. either an input or an output.
273 This prevents both types needing to be checked for every operation,
274 which may be important for Quanta with lots of `DatasetRef`.
275 """
276 if not isinstance(ref, list):
277 ref = [ref]
278 for r in ref:
279 if (r.datasetType, r.dataId) not in inout:
280 raise ValueError("DatasetRef is not part of the Quantum being processed")
282 @property
283 def dimensions(self) -> DimensionUniverse:
284 """Structure managing all dimensions recognized by this data
285 repository (`DimensionUniverse`).
286 """
287 return self.__butler.dimensions