Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

70 statements  

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Module defining a butler like object specialized to a specific quantum. 

25""" 

26 

27__all__ = ("ButlerQuantumContext",) 

28 

29from typing import Any, List, Sequence, Union 

30 

31from lsst.daf.butler import Butler, DatasetRef, Quantum 

32 

33from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

34from .struct import Struct 

35 

36 

37class ButlerQuantumContext: 

38 """A Butler-like class specialized for a single quantum 

39 

40 A ButlerQuantumContext class wraps a standard butler interface and 

41 specializes it to the context of a given quantum. What this means 

42 in practice is that the only gets and puts that this class allows 

43 are DatasetRefs that are contained in the quantum. 

44 

45 In the future this class will also be used to record provenance on 

46 what was actually get and put. This is in contrast to what the 

47 preflight expects to be get and put by looking at the graph before 

48 execution. 

49 

50 Parameters 

51 ---------- 

52 butler : `lsst.daf.butler.Butler` 

53 Butler object from/to which datasets will be get/put 

54 quantum : `lsst.daf.butler.core.Quantum` 

55 Quantum object that describes the datasets which will be get/put by a 

56 single execution of this node in the pipeline graph. All input 

57 dataset references must be resolved (i.e. satisfy 

58 ``DatasetRef.id is not None``) prior to constructing the 

59 `ButlerQuantumContext`. 

60 

61 Notes 

62 ----- 

63 Most quanta in any non-trivial graph will not start with resolved dataset 

64 references, because they represent processing steps that can only run 

65 after some other quanta have produced their inputs. At present, it is the 

66 responsibility of ``lsst.ctrl.mpexec.SingleQuantumExecutor`` to resolve all 

67 datasets prior to constructing `ButlerQuantumContext` and calling 

68 `runQuantum`, and the fact that this precondition is satisfied by code in 

69 a downstream package is considered a problem with the 

70 ``pipe_base/ctrl_mpexec`` separation of concerns that will be addressed in 

71 the future. 

72 """ 

73 

74 def __init__(self, butler: Butler, quantum: Quantum): 

75 self.quantum = quantum 

76 self.registry = butler.registry 

77 self.allInputs = set() 

78 self.allOutputs = set() 

79 for refs in quantum.inputs.values(): 

80 for ref in refs: 

81 self.allInputs.add((ref.datasetType, ref.dataId)) 

82 for refs in quantum.outputs.values(): 

83 for ref in refs: 

84 self.allOutputs.add((ref.datasetType, ref.dataId)) 

85 self.__butler = butler 

86 

87 def _get(self, ref: Union[DeferredDatasetRef, DatasetRef]) -> Any: 

88 # Butler methods below will check for unresolved DatasetRefs and 

89 # raise appropriately, so no need for us to do that here. 

90 if isinstance(ref, DeferredDatasetRef): 

91 self._checkMembership(ref.datasetRef, self.allInputs) 

92 return self.__butler.getDirectDeferred(ref.datasetRef) 

93 

94 else: 

95 self._checkMembership(ref, self.allInputs) 

96 return self.__butler.getDirect(ref) 

97 

98 def _put(self, value: Any, ref: DatasetRef) -> None: 

99 self._checkMembership(ref, self.allOutputs) 

100 self.__butler.put(value, ref) 

101 

102 def get( 

103 self, 

104 dataset: Union[ 

105 InputQuantizedConnection, 

106 List[DatasetRef], 

107 List[DeferredDatasetRef], 

108 DatasetRef, 

109 DeferredDatasetRef, 

110 ], 

111 ) -> Any: 

112 """Fetches data from the butler 

113 

114 Parameters 

115 ---------- 

116 dataset 

117 This argument may either be an `InputQuantizedConnection` which 

118 describes all the inputs of a quantum, a list of 

119 `~lsst.daf.butler.DatasetRef`, or a single 

120 `~lsst.daf.butler.DatasetRef`. The function will get and return 

121 the corresponding datasets from the butler. 

122 

123 Returns 

124 ------- 

125 return : `object` 

126 This function returns arbitrary objects fetched from the bulter. 

127 The structure these objects are returned in depends on the type of 

128 the input argument. If the input dataset argument is a 

129 `InputQuantizedConnection`, then the return type will be a 

130 dictionary with keys corresponding to the attributes of the 

131 `InputQuantizedConnection` (which in turn are the attribute 

132 identifiers of the connections). If the input argument is of type 

133 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

134 be a list of objects. If the input argument is a single 

135 `~lsst.daf.butler.DatasetRef` then a single object will be 

136 returned. 

137 

138 Raises 

139 ------ 

140 ValueError 

141 Raised if a `DatasetRef` is passed to get that is not defined in 

142 the quantum object 

143 """ 

144 if isinstance(dataset, InputQuantizedConnection): 

145 retVal = {} 

146 for name, ref in dataset: 

147 if isinstance(ref, list): 

148 val = [self._get(r) for r in ref] 

149 else: 

150 val = self._get(ref) 

151 retVal[name] = val 

152 return retVal 

153 elif isinstance(dataset, list): 

154 return [self._get(x) for x in dataset] 

155 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef): 

156 return self._get(dataset) 

157 else: 

158 raise TypeError("Dataset argument is not a type that can be used to get") 

159 

160 def put( 

161 self, 

162 values: Union[Struct, List[Any], Any], 

163 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef], 

164 ) -> None: 

165 """Puts data into the butler 

166 

167 Parameters 

168 ---------- 

169 values : `Struct` or `list` of `object` or `object` 

170 The data that should be put with the butler. If the type of the 

171 dataset is `OutputQuantizedConnection` then this argument should be 

172 a `Struct` with corresponding attribute names. Each attribute 

173 should then correspond to either a list of object or a single 

174 object depending of the type of the corresponding attribute on 

175 dataset. I.e. if ``dataset.calexp`` is 

176 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

177 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

178 only a single object need be passed. The same restriction applies 

179 if dataset is directly a `list` of `DatasetRef` or a single 

180 `DatasetRef`. 

181 dataset 

182 This argument may either be an `InputQuantizedConnection` which 

183 describes all the inputs of a quantum, a list of 

184 `lsst.daf.butler.DatasetRef`, or a single 

185 `lsst.daf.butler.DatasetRef`. The function will get and return 

186 the corresponding datasets from the butler. 

187 

188 Raises 

189 ------ 

190 ValueError 

191 Raised if a `DatasetRef` is passed to put that is not defined in 

192 the quantum object, or the type of values does not match what is 

193 expected from the type of dataset. 

194 """ 

195 if isinstance(dataset, OutputQuantizedConnection): 

196 if not isinstance(values, Struct): 

197 raise ValueError( 

198 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

199 " attributes must be passed as the values to put" 

200 ) 

201 for name, refs in dataset: 

202 valuesAttribute = getattr(values, name) 

203 if isinstance(refs, list): 

204 if len(refs) != len(valuesAttribute): 

205 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

206 for i, ref in enumerate(refs): 

207 self._put(valuesAttribute[i], ref) 

208 else: 

209 self._put(valuesAttribute, refs) 

210 elif isinstance(dataset, list): 

211 if not isinstance(values, Sequence): 

212 raise ValueError("Values to put must be a sequence") 

213 if len(dataset) != len(values): 

214 raise ValueError("There must be a common number of references and values to put") 

215 for i, ref in enumerate(dataset): 

216 self._put(values[i], ref) 

217 elif isinstance(dataset, DatasetRef): 

218 self._put(values, dataset) 

219 else: 

220 raise TypeError("Dataset argument is not a type that can be used to put") 

221 

222 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

223 """Internal function used to check if a DatasetRef is part of the input 

224 quantum 

225 

226 This function will raise an exception if the ButlerQuantumContext is 

227 used to get/put a DatasetRef which is not defined in the quantum. 

228 

229 Parameters 

230 ---------- 

231 ref : `list` of `DatasetRef` or `DatasetRef` 

232 Either a list or a single `DatasetRef` to check 

233 inout : `set` 

234 The connection type to check, e.g. either an input or an output. 

235 This prevents both types needing to be checked for every operation, 

236 which may be important for Quanta with lots of `DatasetRef`. 

237 """ 

238 if not isinstance(ref, list): 

239 ref = [ref] 

240 for r in ref: 

241 if (r.datasetType, r.dataId) not in inout: 

242 raise ValueError("DatasetRef is not part of the Quantum being processed")