Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 11%

95 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-01 01:55 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Module defining a butler like object specialized to a specific quantum. 

25""" 

26 

27__all__ = ("ButlerQuantumContext",) 

28 

29from typing import Any, List, Optional, Sequence, Union 

30 

31from lsst.daf.butler import Butler, DatasetRef, Quantum 

32from lsst.utils.introspection import get_full_type_name 

33from lsst.utils.logging import PeriodicLogger, getLogger 

34 

35from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

36from .struct import Struct 

37 

38_LOG = getLogger(__name__) 

39 

40 

41class ButlerQuantumContext: 

42 """A Butler-like class specialized for a single quantum 

43 

44 A ButlerQuantumContext class wraps a standard butler interface and 

45 specializes it to the context of a given quantum. What this means 

46 in practice is that the only gets and puts that this class allows 

47 are DatasetRefs that are contained in the quantum. 

48 

49 In the future this class will also be used to record provenance on 

50 what was actually get and put. This is in contrast to what the 

51 preflight expects to be get and put by looking at the graph before 

52 execution. 

53 

54 Parameters 

55 ---------- 

56 butler : `lsst.daf.butler.Butler` 

57 Butler object from/to which datasets will be get/put 

58 quantum : `lsst.daf.butler.core.Quantum` 

59 Quantum object that describes the datasets which will be get/put by a 

60 single execution of this node in the pipeline graph. All input 

61 dataset references must be resolved (i.e. satisfy 

62 ``DatasetRef.id is not None``) prior to constructing the 

63 `ButlerQuantumContext`. 

64 

65 Notes 

66 ----- 

67 Most quanta in any non-trivial graph will not start with resolved dataset 

68 references, because they represent processing steps that can only run 

69 after some other quanta have produced their inputs. At present, it is the 

70 responsibility of ``lsst.ctrl.mpexec.SingleQuantumExecutor`` to resolve all 

71 datasets prior to constructing `ButlerQuantumContext` and calling 

72 `runQuantum`, and the fact that this precondition is satisfied by code in 

73 a downstream package is considered a problem with the 

74 ``pipe_base/ctrl_mpexec`` separation of concerns that will be addressed in 

75 the future. 

76 """ 

77 

78 def __init__(self, butler: Butler, quantum: Quantum): 

79 self.quantum = quantum 

80 self.registry = butler.registry 

81 self.allInputs = set() 

82 self.allOutputs = set() 

83 for refs in quantum.inputs.values(): 

84 for ref in refs: 

85 self.allInputs.add((ref.datasetType, ref.dataId)) 

86 for refs in quantum.outputs.values(): 

87 for ref in refs: 

88 self.allOutputs.add((ref.datasetType, ref.dataId)) 

89 self.__butler = butler 

90 

91 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any: 

92 # Butler methods below will check for unresolved DatasetRefs and 

93 # raise appropriately, so no need for us to do that here. 

94 if isinstance(ref, DeferredDatasetRef): 

95 self._checkMembership(ref.datasetRef, self.allInputs) 

96 return self.__butler.getDirectDeferred(ref.datasetRef) 

97 elif ref is None: 

98 return None 

99 else: 

100 self._checkMembership(ref, self.allInputs) 

101 return self.__butler.getDirect(ref) 

102 

103 def _put(self, value: Any, ref: DatasetRef) -> None: 

104 self._checkMembership(ref, self.allOutputs) 

105 self.__butler.put(value, ref) 

106 

107 def get( 

108 self, 

109 dataset: Union[ 

110 InputQuantizedConnection, 

111 List[Optional[DatasetRef]], 

112 List[Optional[DeferredDatasetRef]], 

113 DatasetRef, 

114 DeferredDatasetRef, 

115 None, 

116 ], 

117 ) -> Any: 

118 """Fetches data from the butler 

119 

120 Parameters 

121 ---------- 

122 dataset 

123 This argument may either be an `InputQuantizedConnection` which 

124 describes all the inputs of a quantum, a list of 

125 `~lsst.daf.butler.DatasetRef`, or a single 

126 `~lsst.daf.butler.DatasetRef`. The function will get and return 

127 the corresponding datasets from the butler. If `None` is passed in 

128 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

129 returned object will be `None`. 

130 

131 Returns 

132 ------- 

133 return : `object` 

134 This function returns arbitrary objects fetched from the bulter. 

135 The structure these objects are returned in depends on the type of 

136 the input argument. If the input dataset argument is a 

137 `InputQuantizedConnection`, then the return type will be a 

138 dictionary with keys corresponding to the attributes of the 

139 `InputQuantizedConnection` (which in turn are the attribute 

140 identifiers of the connections). If the input argument is of type 

141 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

142 be a list of objects. If the input argument is a single 

143 `~lsst.daf.butler.DatasetRef` then a single object will be 

144 returned. 

145 

146 Raises 

147 ------ 

148 ValueError 

149 Raised if a `DatasetRef` is passed to get that is not defined in 

150 the quantum object 

151 """ 

152 # Set up a periodic logger so log messages can be issued if things 

153 # are taking too long. 

154 periodic = PeriodicLogger(_LOG) 

155 

156 if isinstance(dataset, InputQuantizedConnection): 

157 retVal = {} 

158 n_connections = len(dataset) 

159 n_retrieved = 0 

160 for i, (name, ref) in enumerate(dataset): 

161 if isinstance(ref, list): 

162 val = [] 

163 n_refs = len(ref) 

164 for j, r in enumerate(ref): 

165 val.append(self._get(r)) 

166 n_retrieved += 1 

167 periodic.log( 

168 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

169 j + 1, 

170 n_refs, 

171 name, 

172 i + 1, 

173 n_connections, 

174 ) 

175 else: 

176 val = self._get(ref) 

177 periodic.log( 

178 "Retrieved dataset for connection '%s' (%d out of %d)", 

179 name, 

180 i + 1, 

181 n_connections, 

182 ) 

183 n_retrieved += 1 

184 retVal[name] = val 

185 if periodic.num_issued > 0: 

186 # This took long enough that we issued some periodic log 

187 # messages, so issue a final confirmation message as well. 

188 _LOG.verbose( 

189 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

190 ) 

191 return retVal 

192 elif isinstance(dataset, list): 

193 n_datasets = len(dataset) 

194 retrieved = [] 

195 for i, x in enumerate(dataset): 

196 # Mypy is not sure of the type of x because of the union 

197 # of lists so complains. Ignoring it is more efficient 

198 # than adding an isinstance assert. 

199 retrieved.append(self._get(x)) 

200 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

201 if periodic.num_issued > 0: 

202 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

203 return retrieved 

204 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None: 

205 return self._get(dataset) 

206 else: 

207 raise TypeError( 

208 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

209 ) 

210 

211 def put( 

212 self, 

213 values: Union[Struct, List[Any], Any], 

214 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef], 

215 ) -> None: 

216 """Puts data into the butler 

217 

218 Parameters 

219 ---------- 

220 values : `Struct` or `list` of `object` or `object` 

221 The data that should be put with the butler. If the type of the 

222 dataset is `OutputQuantizedConnection` then this argument should be 

223 a `Struct` with corresponding attribute names. Each attribute 

224 should then correspond to either a list of object or a single 

225 object depending of the type of the corresponding attribute on 

226 dataset. I.e. if ``dataset.calexp`` is 

227 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

228 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

229 only a single object need be passed. The same restriction applies 

230 if dataset is directly a `list` of `DatasetRef` or a single 

231 `DatasetRef`. 

232 dataset 

233 This argument may either be an `InputQuantizedConnection` which 

234 describes all the inputs of a quantum, a list of 

235 `lsst.daf.butler.DatasetRef`, or a single 

236 `lsst.daf.butler.DatasetRef`. The function will get and return 

237 the corresponding datasets from the butler. 

238 

239 Raises 

240 ------ 

241 ValueError 

242 Raised if a `DatasetRef` is passed to put that is not defined in 

243 the quantum object, or the type of values does not match what is 

244 expected from the type of dataset. 

245 """ 

246 if isinstance(dataset, OutputQuantizedConnection): 

247 if not isinstance(values, Struct): 

248 raise ValueError( 

249 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

250 " attributes must be passed as the values to put" 

251 ) 

252 for name, refs in dataset: 

253 valuesAttribute = getattr(values, name) 

254 if isinstance(refs, list): 

255 if len(refs) != len(valuesAttribute): 

256 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

257 for i, ref in enumerate(refs): 

258 self._put(valuesAttribute[i], ref) 

259 else: 

260 self._put(valuesAttribute, refs) 

261 elif isinstance(dataset, list): 

262 if not isinstance(values, Sequence): 

263 raise ValueError("Values to put must be a sequence") 

264 if len(dataset) != len(values): 

265 raise ValueError("There must be a common number of references and values to put") 

266 for i, ref in enumerate(dataset): 

267 self._put(values[i], ref) 

268 elif isinstance(dataset, DatasetRef): 

269 self._put(values, dataset) 

270 else: 

271 raise TypeError("Dataset argument is not a type that can be used to put") 

272 

273 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

274 """Internal function used to check if a DatasetRef is part of the input 

275 quantum 

276 

277 This function will raise an exception if the ButlerQuantumContext is 

278 used to get/put a DatasetRef which is not defined in the quantum. 

279 

280 Parameters 

281 ---------- 

282 ref : `list` of `DatasetRef` or `DatasetRef` 

283 Either a list or a single `DatasetRef` to check 

284 inout : `set` 

285 The connection type to check, e.g. either an input or an output. 

286 This prevents both types needing to be checked for every operation, 

287 which may be important for Quanta with lots of `DatasetRef`. 

288 """ 

289 if not isinstance(ref, list): 

290 ref = [ref] 

291 for r in ref: 

292 if (r.datasetType, r.dataId) not in inout: 

293 raise ValueError("DatasetRef is not part of the Quantum being processed")