Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 12%

97 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-06 02:51 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Module defining a butler like object specialized to a specific quantum. 

25""" 

26 

27__all__ = ("ButlerQuantumContext",) 

28 

29from typing import Any, List, Optional, Sequence, Union 

30 

31from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

32from lsst.utils.introspection import get_full_type_name 

33from lsst.utils.logging import PeriodicLogger, getLogger 

34 

35from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

36from .struct import Struct 

37 

38_LOG = getLogger(__name__) 

39 

40 

41class ButlerQuantumContext: 

42 """A Butler-like class specialized for a single quantum. 

43 

44 Parameters 

45 ---------- 

46 butler : `lsst.daf.butler.LimitedButler` 

47 Butler object from/to which datasets will be get/put. 

48 quantum : `lsst.daf.butler.core.Quantum` 

49 Quantum object that describes the datasets which will be get/put by a 

50 single execution of this node in the pipeline graph. 

51 

52 Notes 

53 ----- 

54 A ButlerQuantumContext class wraps a standard butler interface and 

55 specializes it to the context of a given quantum. What this means 

56 in practice is that the only gets and puts that this class allows 

57 are DatasetRefs that are contained in the quantum. 

58 

59 In the future this class will also be used to record provenance on 

60 what was actually get and put. This is in contrast to what the 

61 preflight expects to be get and put by looking at the graph before 

62 execution. 

63 """ 

64 

65 def __init__(self, butler: LimitedButler, quantum: Quantum): 

66 self.quantum = quantum 

67 self.allInputs = set() 

68 self.allOutputs = set() 

69 for refs in quantum.inputs.values(): 

70 for ref in refs: 

71 self.allInputs.add((ref.datasetType, ref.dataId)) 

72 for refs in quantum.outputs.values(): 

73 for ref in refs: 

74 self.allOutputs.add((ref.datasetType, ref.dataId)) 

75 self.__butler = butler 

76 

77 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any: 

78 # Butler methods below will check for unresolved DatasetRefs and 

79 # raise appropriately, so no need for us to do that here. 

80 if isinstance(ref, DeferredDatasetRef): 

81 self._checkMembership(ref.datasetRef, self.allInputs) 

82 return self.__butler.getDeferred(ref.datasetRef) 

83 elif ref is None: 

84 return None 

85 else: 

86 self._checkMembership(ref, self.allInputs) 

87 return self.__butler.get(ref) 

88 

89 def _put(self, value: Any, ref: DatasetRef) -> None: 

90 """Store data in butler""" 

91 self._checkMembership(ref, self.allOutputs) 

92 self.__butler.put(value, ref) 

93 

94 def get( 

95 self, 

96 dataset: Union[ 

97 InputQuantizedConnection, 

98 List[Optional[DatasetRef]], 

99 List[Optional[DeferredDatasetRef]], 

100 DatasetRef, 

101 DeferredDatasetRef, 

102 None, 

103 ], 

104 ) -> Any: 

105 """Fetches data from the butler 

106 

107 Parameters 

108 ---------- 

109 dataset 

110 This argument may either be an `InputQuantizedConnection` which 

111 describes all the inputs of a quantum, a list of 

112 `~lsst.daf.butler.DatasetRef`, or a single 

113 `~lsst.daf.butler.DatasetRef`. The function will get and return 

114 the corresponding datasets from the butler. If `None` is passed in 

115 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

116 returned object will be `None`. 

117 

118 Returns 

119 ------- 

120 return : `object` 

121 This function returns arbitrary objects fetched from the bulter. 

122 The structure these objects are returned in depends on the type of 

123 the input argument. If the input dataset argument is a 

124 `InputQuantizedConnection`, then the return type will be a 

125 dictionary with keys corresponding to the attributes of the 

126 `InputQuantizedConnection` (which in turn are the attribute 

127 identifiers of the connections). If the input argument is of type 

128 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

129 be a list of objects. If the input argument is a single 

130 `~lsst.daf.butler.DatasetRef` then a single object will be 

131 returned. 

132 

133 Raises 

134 ------ 

135 ValueError 

136 Raised if a `DatasetRef` is passed to get that is not defined in 

137 the quantum object 

138 """ 

139 # Set up a periodic logger so log messages can be issued if things 

140 # are taking too long. 

141 periodic = PeriodicLogger(_LOG) 

142 

143 if isinstance(dataset, InputQuantizedConnection): 

144 retVal = {} 

145 n_connections = len(dataset) 

146 n_retrieved = 0 

147 for i, (name, ref) in enumerate(dataset): 

148 if isinstance(ref, list): 

149 val = [] 

150 n_refs = len(ref) 

151 for j, r in enumerate(ref): 

152 val.append(self._get(r)) 

153 n_retrieved += 1 

154 periodic.log( 

155 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

156 j + 1, 

157 n_refs, 

158 name, 

159 i + 1, 

160 n_connections, 

161 ) 

162 else: 

163 val = self._get(ref) 

164 periodic.log( 

165 "Retrieved dataset for connection '%s' (%d out of %d)", 

166 name, 

167 i + 1, 

168 n_connections, 

169 ) 

170 n_retrieved += 1 

171 retVal[name] = val 

172 if periodic.num_issued > 0: 

173 # This took long enough that we issued some periodic log 

174 # messages, so issue a final confirmation message as well. 

175 _LOG.verbose( 

176 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

177 ) 

178 return retVal 

179 elif isinstance(dataset, list): 

180 n_datasets = len(dataset) 

181 retrieved = [] 

182 for i, x in enumerate(dataset): 

183 # Mypy is not sure of the type of x because of the union 

184 # of lists so complains. Ignoring it is more efficient 

185 # than adding an isinstance assert. 

186 retrieved.append(self._get(x)) 

187 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

188 if periodic.num_issued > 0: 

189 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

190 return retrieved 

191 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None: 

192 return self._get(dataset) 

193 else: 

194 raise TypeError( 

195 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

196 ) 

197 

198 def put( 

199 self, 

200 values: Union[Struct, List[Any], Any], 

201 dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef], 

202 ) -> None: 

203 """Puts data into the butler 

204 

205 Parameters 

206 ---------- 

207 values : `Struct` or `list` of `object` or `object` 

208 The data that should be put with the butler. If the type of the 

209 dataset is `OutputQuantizedConnection` then this argument should be 

210 a `Struct` with corresponding attribute names. Each attribute 

211 should then correspond to either a list of object or a single 

212 object depending of the type of the corresponding attribute on 

213 dataset. I.e. if ``dataset.calexp`` is 

214 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

215 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

216 only a single object need be passed. The same restriction applies 

217 if dataset is directly a `list` of `DatasetRef` or a single 

218 `DatasetRef`. 

219 dataset 

220 This argument may either be an `InputQuantizedConnection` which 

221 describes all the inputs of a quantum, a list of 

222 `lsst.daf.butler.DatasetRef`, or a single 

223 `lsst.daf.butler.DatasetRef`. The function will get and return 

224 the corresponding datasets from the butler. 

225 

226 Raises 

227 ------ 

228 ValueError 

229 Raised if a `DatasetRef` is passed to put that is not defined in 

230 the quantum object, or the type of values does not match what is 

231 expected from the type of dataset. 

232 """ 

233 if isinstance(dataset, OutputQuantizedConnection): 

234 if not isinstance(values, Struct): 

235 raise ValueError( 

236 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

237 " attributes must be passed as the values to put" 

238 ) 

239 for name, refs in dataset: 

240 valuesAttribute = getattr(values, name) 

241 if isinstance(refs, list): 

242 if len(refs) != len(valuesAttribute): 

243 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

244 for i, ref in enumerate(refs): 

245 self._put(valuesAttribute[i], ref) 

246 else: 

247 self._put(valuesAttribute, refs) 

248 elif isinstance(dataset, list): 

249 if not isinstance(values, Sequence): 

250 raise ValueError("Values to put must be a sequence") 

251 if len(dataset) != len(values): 

252 raise ValueError("There must be a common number of references and values to put") 

253 for i, ref in enumerate(dataset): 

254 self._put(values[i], ref) 

255 elif isinstance(dataset, DatasetRef): 

256 self._put(values, dataset) 

257 else: 

258 raise TypeError("Dataset argument is not a type that can be used to put") 

259 

260 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

261 """Internal function used to check if a DatasetRef is part of the input 

262 quantum 

263 

264 This function will raise an exception if the ButlerQuantumContext is 

265 used to get/put a DatasetRef which is not defined in the quantum. 

266 

267 Parameters 

268 ---------- 

269 ref : `list` of `DatasetRef` or `DatasetRef` 

270 Either a list or a single `DatasetRef` to check 

271 inout : `set` 

272 The connection type to check, e.g. either an input or an output. 

273 This prevents both types needing to be checked for every operation, 

274 which may be important for Quanta with lots of `DatasetRef`. 

275 """ 

276 if not isinstance(ref, list): 

277 ref = [ref] 

278 for r in ref: 

279 if (r.datasetType, r.dataId) not in inout: 

280 raise ValueError("DatasetRef is not part of the Quantum being processed") 

281 

282 @property 

283 def dimensions(self) -> DimensionUniverse: 

284 """Structure managing all dimensions recognized by this data 

285 repository (`DimensionUniverse`). 

286 """ 

287 return self.__butler.dimensions