Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Module defining a butler like object specialized to a specific quantum.
25"""
27__all__ = ("ButlerQuantumContext",)
29from typing import Any, List, Sequence, Union
31from lsst.daf.butler import Butler, DatasetRef, Quantum
33from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection
34from .struct import Struct
37class ButlerQuantumContext:
38 """A Butler-like class specialized for a single quantum
40 A ButlerQuantumContext class wraps a standard butler interface and
41 specializes it to the context of a given quantum. What this means
42 in practice is that the only gets and puts that this class allows
43 are DatasetRefs that are contained in the quantum.
45 In the future this class will also be used to record provenance on
46 what was actually get and put. This is in contrast to what the
47 preflight expects to be get and put by looking at the graph before
48 execution.
50 Parameters
51 ----------
52 butler : `lsst.daf.butler.Butler`
53 Butler object from/to which datasets will be get/put
54 quantum : `lsst.daf.butler.core.Quantum`
55 Quantum object that describes the datasets which will be get/put by a
56 single execution of this node in the pipeline graph. All input
57 dataset references must be resolved (i.e. satisfy
58 ``DatasetRef.id is not None``) prior to constructing the
59 `ButlerQuantumContext`.
61 Notes
62 -----
63 Most quanta in any non-trivial graph will not start with resolved dataset
64 references, because they represent processing steps that can only run
65 after some other quanta have produced their inputs. At present, it is the
66 responsibility of ``lsst.ctrl.mpexec.SingleQuantumExecutor`` to resolve all
67 datasets prior to constructing `ButlerQuantumContext` and calling
68 `runQuantum`, and the fact that this precondition is satisfied by code in
69 a downstream package is considered a problem with the
70 ``pipe_base/ctrl_mpexec`` separation of concerns that will be addressed in
71 the future.
72 """
74 def __init__(self, butler: Butler, quantum: Quantum):
75 self.quantum = quantum
76 self.registry = butler.registry
77 self.allInputs = set()
78 self.allOutputs = set()
79 for refs in quantum.inputs.values():
80 for ref in refs:
81 self.allInputs.add((ref.datasetType, ref.dataId))
82 for refs in quantum.outputs.values():
83 for ref in refs:
84 self.allOutputs.add((ref.datasetType, ref.dataId))
85 self.__butler = butler
87 def _get(self, ref: Union[DeferredDatasetRef, DatasetRef]) -> Any:
88 # Butler methods below will check for unresolved DatasetRefs and
89 # raise appropriately, so no need for us to do that here.
90 if isinstance(ref, DeferredDatasetRef):
91 self._checkMembership(ref.datasetRef, self.allInputs)
92 return self.__butler.getDirectDeferred(ref.datasetRef)
94 else:
95 self._checkMembership(ref, self.allInputs)
96 return self.__butler.getDirect(ref)
98 def _put(self, value: Any, ref: DatasetRef) -> None:
99 self._checkMembership(ref, self.allOutputs)
100 self.__butler.put(value, ref)
102 def get(
103 self,
104 dataset: Union[
105 InputQuantizedConnection,
106 List[DatasetRef],
107 List[DeferredDatasetRef],
108 DatasetRef,
109 DeferredDatasetRef,
110 ],
111 ) -> Any:
112 """Fetches data from the butler
114 Parameters
115 ----------
116 dataset
117 This argument may either be an `InputQuantizedConnection` which
118 describes all the inputs of a quantum, a list of
119 `~lsst.daf.butler.DatasetRef`, or a single
120 `~lsst.daf.butler.DatasetRef`. The function will get and return
121 the corresponding datasets from the butler.
123 Returns
124 -------
125 return : `object`
126 This function returns arbitrary objects fetched from the bulter.
127 The structure these objects are returned in depends on the type of
128 the input argument. If the input dataset argument is a
129 `InputQuantizedConnection`, then the return type will be a
130 dictionary with keys corresponding to the attributes of the
131 `InputQuantizedConnection` (which in turn are the attribute
132 identifiers of the connections). If the input argument is of type
133 `list` of `~lsst.daf.butler.DatasetRef` then the return type will
134 be a list of objects. If the input argument is a single
135 `~lsst.daf.butler.DatasetRef` then a single object will be
136 returned.
138 Raises
139 ------
140 ValueError
141 Raised if a `DatasetRef` is passed to get that is not defined in
142 the quantum object
143 """
144 if isinstance(dataset, InputQuantizedConnection):
145 retVal = {}
146 for name, ref in dataset:
147 if isinstance(ref, list):
148 val = [self._get(r) for r in ref]
149 else:
150 val = self._get(ref)
151 retVal[name] = val
152 return retVal
153 elif isinstance(dataset, list):
154 return [self._get(x) for x in dataset]
155 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef):
156 return self._get(dataset)
157 else:
158 raise TypeError("Dataset argument is not a type that can be used to get")
160 def put(
161 self,
162 values: Union[Struct, List[Any], Any],
163 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef],
164 ) -> None:
165 """Puts data into the butler
167 Parameters
168 ----------
169 values : `Struct` or `list` of `object` or `object`
170 The data that should be put with the butler. If the type of the
171 dataset is `OutputQuantizedConnection` then this argument should be
172 a `Struct` with corresponding attribute names. Each attribute
173 should then correspond to either a list of object or a single
174 object depending of the type of the corresponding attribute on
175 dataset. I.e. if ``dataset.calexp`` is
176 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be
177 ``[calexp1, calexp2]``. Like wise if there is a single ref, then
178 only a single object need be passed. The same restriction applies
179 if dataset is directly a `list` of `DatasetRef` or a single
180 `DatasetRef`.
181 dataset
182 This argument may either be an `InputQuantizedConnection` which
183 describes all the inputs of a quantum, a list of
184 `lsst.daf.butler.DatasetRef`, or a single
185 `lsst.daf.butler.DatasetRef`. The function will get and return
186 the corresponding datasets from the butler.
188 Raises
189 ------
190 ValueError
191 Raised if a `DatasetRef` is passed to put that is not defined in
192 the quantum object, or the type of values does not match what is
193 expected from the type of dataset.
194 """
195 if isinstance(dataset, OutputQuantizedConnection):
196 if not isinstance(values, Struct):
197 raise ValueError(
198 "dataset is a OutputQuantizedConnection, a Struct with corresponding"
199 " attributes must be passed as the values to put"
200 )
201 for name, refs in dataset:
202 valuesAttribute = getattr(values, name)
203 if isinstance(refs, list):
204 if len(refs) != len(valuesAttribute):
205 raise ValueError(f"There must be a object to put for every Dataset ref in {name}")
206 for i, ref in enumerate(refs):
207 self._put(valuesAttribute[i], ref)
208 else:
209 self._put(valuesAttribute, refs)
210 elif isinstance(dataset, list):
211 if not isinstance(values, Sequence):
212 raise ValueError("Values to put must be a sequence")
213 if len(dataset) != len(values):
214 raise ValueError("There must be a common number of references and values to put")
215 for i, ref in enumerate(dataset):
216 self._put(values[i], ref)
217 elif isinstance(dataset, DatasetRef):
218 self._put(values, dataset)
219 else:
220 raise TypeError("Dataset argument is not a type that can be used to put")
222 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None:
223 """Internal function used to check if a DatasetRef is part of the input
224 quantum
226 This function will raise an exception if the ButlerQuantumContext is
227 used to get/put a DatasetRef which is not defined in the quantum.
229 Parameters
230 ----------
231 ref : `list` of `DatasetRef` or `DatasetRef`
232 Either a list or a single `DatasetRef` to check
233 inout : `set`
234 The connection type to check, e.g. either an input or an output.
235 This prevents both types needing to be checked for every operation,
236 which may be important for Quanta with lots of `DatasetRef`.
237 """
238 if not isinstance(ref, list):
239 ref = [ref]
240 for r in ref:
241 if (r.datasetType, r.dataId) not in inout:
242 raise ValueError("DatasetRef is not part of the Quantum being processed")