Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 13%

93 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-18 02:36 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Module defining a butler like object specialized to a specific quantum. 

25""" 

26 

27__all__ = ("ButlerQuantumContext",) 

28 

29from typing import Any, List, Sequence, Union 

30 

31from lsst.daf.butler import Butler, DatasetRef, Quantum 

32from lsst.utils.introspection import get_full_type_name 

33from lsst.utils.logging import PeriodicLogger, getLogger 

34 

35from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

36from .struct import Struct 

37 

38_LOG = getLogger(__name__) 

39 

40 

41class ButlerQuantumContext: 

42 """A Butler-like class specialized for a single quantum 

43 

44 A ButlerQuantumContext class wraps a standard butler interface and 

45 specializes it to the context of a given quantum. What this means 

46 in practice is that the only gets and puts that this class allows 

47 are DatasetRefs that are contained in the quantum. 

48 

49 In the future this class will also be used to record provenance on 

50 what was actually get and put. This is in contrast to what the 

51 preflight expects to be get and put by looking at the graph before 

52 execution. 

53 

54 Parameters 

55 ---------- 

56 butler : `lsst.daf.butler.Butler` 

57 Butler object from/to which datasets will be get/put 

58 quantum : `lsst.daf.butler.core.Quantum` 

59 Quantum object that describes the datasets which will be get/put by a 

60 single execution of this node in the pipeline graph. All input 

61 dataset references must be resolved (i.e. satisfy 

62 ``DatasetRef.id is not None``) prior to constructing the 

63 `ButlerQuantumContext`. 

64 

65 Notes 

66 ----- 

67 Most quanta in any non-trivial graph will not start with resolved dataset 

68 references, because they represent processing steps that can only run 

69 after some other quanta have produced their inputs. At present, it is the 

70 responsibility of ``lsst.ctrl.mpexec.SingleQuantumExecutor`` to resolve all 

71 datasets prior to constructing `ButlerQuantumContext` and calling 

72 `runQuantum`, and the fact that this precondition is satisfied by code in 

73 a downstream package is considered a problem with the 

74 ``pipe_base/ctrl_mpexec`` separation of concerns that will be addressed in 

75 the future. 

76 """ 

77 

78 def __init__(self, butler: Butler, quantum: Quantum): 

79 self.quantum = quantum 

80 self.registry = butler.registry 

81 self.allInputs = set() 

82 self.allOutputs = set() 

83 for refs in quantum.inputs.values(): 

84 for ref in refs: 

85 self.allInputs.add((ref.datasetType, ref.dataId)) 

86 for refs in quantum.outputs.values(): 

87 for ref in refs: 

88 self.allOutputs.add((ref.datasetType, ref.dataId)) 

89 self.__butler = butler 

90 

91 def _get(self, ref: Union[DeferredDatasetRef, DatasetRef]) -> Any: 

92 # Butler methods below will check for unresolved DatasetRefs and 

93 # raise appropriately, so no need for us to do that here. 

94 if isinstance(ref, DeferredDatasetRef): 

95 self._checkMembership(ref.datasetRef, self.allInputs) 

96 return self.__butler.getDirectDeferred(ref.datasetRef) 

97 

98 else: 

99 self._checkMembership(ref, self.allInputs) 

100 return self.__butler.getDirect(ref) 

101 

102 def _put(self, value: Any, ref: DatasetRef) -> None: 

103 self._checkMembership(ref, self.allOutputs) 

104 self.__butler.put(value, ref) 

105 

106 def get( 

107 self, 

108 dataset: Union[ 

109 InputQuantizedConnection, 

110 List[DatasetRef], 

111 List[DeferredDatasetRef], 

112 DatasetRef, 

113 DeferredDatasetRef, 

114 ], 

115 ) -> Any: 

116 """Fetches data from the butler 

117 

118 Parameters 

119 ---------- 

120 dataset 

121 This argument may either be an `InputQuantizedConnection` which 

122 describes all the inputs of a quantum, a list of 

123 `~lsst.daf.butler.DatasetRef`, or a single 

124 `~lsst.daf.butler.DatasetRef`. The function will get and return 

125 the corresponding datasets from the butler. 

126 

127 Returns 

128 ------- 

129 return : `object` 

130 This function returns arbitrary objects fetched from the bulter. 

131 The structure these objects are returned in depends on the type of 

132 the input argument. If the input dataset argument is a 

133 `InputQuantizedConnection`, then the return type will be a 

134 dictionary with keys corresponding to the attributes of the 

135 `InputQuantizedConnection` (which in turn are the attribute 

136 identifiers of the connections). If the input argument is of type 

137 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

138 be a list of objects. If the input argument is a single 

139 `~lsst.daf.butler.DatasetRef` then a single object will be 

140 returned. 

141 

142 Raises 

143 ------ 

144 ValueError 

145 Raised if a `DatasetRef` is passed to get that is not defined in 

146 the quantum object 

147 """ 

148 # Set up a periodic logger so log messages can be issued if things 

149 # are taking too long. 

150 periodic = PeriodicLogger(_LOG) 

151 

152 if isinstance(dataset, InputQuantizedConnection): 

153 retVal = {} 

154 n_connections = len(dataset) 

155 n_retrieved = 0 

156 for i, (name, ref) in enumerate(dataset): 

157 if isinstance(ref, list): 

158 val = [] 

159 n_refs = len(ref) 

160 for j, r in enumerate(ref): 

161 val.append(self._get(r)) 

162 n_retrieved += 1 

163 periodic.log( 

164 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

165 j + 1, 

166 n_refs, 

167 name, 

168 i + 1, 

169 n_connections, 

170 ) 

171 else: 

172 val = self._get(ref) 

173 periodic.log( 

174 "Retrieved dataset for connection '%s' (%d out of %d)", 

175 name, 

176 i + 1, 

177 n_connections, 

178 ) 

179 n_retrieved += 1 

180 retVal[name] = val 

181 if periodic.num_issued > 0: 

182 # This took long enough that we issued some periodic log 

183 # messages, so issue a final confirmation message as well. 

184 _LOG.verbose( 

185 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

186 ) 

187 return retVal 

188 elif isinstance(dataset, list): 

189 n_datasets = len(dataset) 

190 retrieved = [] 

191 for i, x in enumerate(dataset): 

192 # Mypy is not sure of the type of x because of the union 

193 # of lists so complains. Ignoring it is more efficient 

194 # than adding an isinstance assert. 

195 retrieved.append(self._get(x)) # type: ignore 

196 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

197 if periodic.num_issued > 0: 

198 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

199 return retrieved 

200 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef): 

201 return self._get(dataset) 

202 else: 

203 raise TypeError( 

204 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

205 ) 

206 

207 def put( 

208 self, 

209 values: Union[Struct, List[Any], Any], 

210 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef], 

211 ) -> None: 

212 """Puts data into the butler 

213 

214 Parameters 

215 ---------- 

216 values : `Struct` or `list` of `object` or `object` 

217 The data that should be put with the butler. If the type of the 

218 dataset is `OutputQuantizedConnection` then this argument should be 

219 a `Struct` with corresponding attribute names. Each attribute 

220 should then correspond to either a list of object or a single 

221 object depending of the type of the corresponding attribute on 

222 dataset. I.e. if ``dataset.calexp`` is 

223 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

224 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

225 only a single object need be passed. The same restriction applies 

226 if dataset is directly a `list` of `DatasetRef` or a single 

227 `DatasetRef`. 

228 dataset 

229 This argument may either be an `InputQuantizedConnection` which 

230 describes all the inputs of a quantum, a list of 

231 `lsst.daf.butler.DatasetRef`, or a single 

232 `lsst.daf.butler.DatasetRef`. The function will get and return 

233 the corresponding datasets from the butler. 

234 

235 Raises 

236 ------ 

237 ValueError 

238 Raised if a `DatasetRef` is passed to put that is not defined in 

239 the quantum object, or the type of values does not match what is 

240 expected from the type of dataset. 

241 """ 

242 if isinstance(dataset, OutputQuantizedConnection): 

243 if not isinstance(values, Struct): 

244 raise ValueError( 

245 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

246 " attributes must be passed as the values to put" 

247 ) 

248 for name, refs in dataset: 

249 valuesAttribute = getattr(values, name) 

250 if isinstance(refs, list): 

251 if len(refs) != len(valuesAttribute): 

252 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

253 for i, ref in enumerate(refs): 

254 self._put(valuesAttribute[i], ref) 

255 else: 

256 self._put(valuesAttribute, refs) 

257 elif isinstance(dataset, list): 

258 if not isinstance(values, Sequence): 

259 raise ValueError("Values to put must be a sequence") 

260 if len(dataset) != len(values): 

261 raise ValueError("There must be a common number of references and values to put") 

262 for i, ref in enumerate(dataset): 

263 self._put(values[i], ref) 

264 elif isinstance(dataset, DatasetRef): 

265 self._put(values, dataset) 

266 else: 

267 raise TypeError("Dataset argument is not a type that can be used to put") 

268 

269 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

270 """Internal function used to check if a DatasetRef is part of the input 

271 quantum 

272 

273 This function will raise an exception if the ButlerQuantumContext is 

274 used to get/put a DatasetRef which is not defined in the quantum. 

275 

276 Parameters 

277 ---------- 

278 ref : `list` of `DatasetRef` or `DatasetRef` 

279 Either a list or a single `DatasetRef` to check 

280 inout : `set` 

281 The connection type to check, e.g. either an input or an output. 

282 This prevents both types needing to be checked for every operation, 

283 which may be important for Quanta with lots of `DatasetRef`. 

284 """ 

285 if not isinstance(ref, list): 

286 ref = [ref] 

287 for r in ref: 

288 if (r.datasetType, r.dataId) not in inout: 

289 raise ValueError("DatasetRef is not part of the Quantum being processed")