Coverage for python/lsst/pipe/base/butlerQuantumContext.py: 13%

98 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-11 02:00 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Module defining a butler like object specialized to a specific quantum. 

25""" 

26 

27__all__ = ("ButlerQuantumContext",) 

28 

29from collections.abc import Sequence 

30from typing import Any 

31 

32from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

33from lsst.utils.introspection import get_full_type_name 

34from lsst.utils.logging import PeriodicLogger, getLogger 

35 

36from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

37from .struct import Struct 

38 

39_LOG = getLogger(__name__) 

40 

41 

42class ButlerQuantumContext: 

43 """A Butler-like class specialized for a single quantum. 

44 

45 Parameters 

46 ---------- 

47 butler : `lsst.daf.butler.LimitedButler` 

48 Butler object from/to which datasets will be get/put. 

49 quantum : `lsst.daf.butler.core.Quantum` 

50 Quantum object that describes the datasets which will be get/put by a 

51 single execution of this node in the pipeline graph. 

52 

53 Notes 

54 ----- 

55 A ButlerQuantumContext class wraps a standard butler interface and 

56 specializes it to the context of a given quantum. What this means 

57 in practice is that the only gets and puts that this class allows 

58 are DatasetRefs that are contained in the quantum. 

59 

60 In the future this class will also be used to record provenance on 

61 what was actually get and put. This is in contrast to what the 

62 preflight expects to be get and put by looking at the graph before 

63 execution. 

64 """ 

65 

66 def __init__(self, butler: LimitedButler, quantum: Quantum): 

67 self.quantum = quantum 

68 self.allInputs = set() 

69 self.allOutputs = set() 

70 for refs in quantum.inputs.values(): 

71 for ref in refs: 

72 self.allInputs.add((ref.datasetType, ref.dataId)) 

73 for refs in quantum.outputs.values(): 

74 for ref in refs: 

75 self.allOutputs.add((ref.datasetType, ref.dataId)) 

76 self.__butler = butler 

77 

78 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: 

79 # Butler methods below will check for unresolved DatasetRefs and 

80 # raise appropriately, so no need for us to do that here. 

81 if isinstance(ref, DeferredDatasetRef): 

82 self._checkMembership(ref.datasetRef, self.allInputs) 

83 return self.__butler.getDeferred(ref.datasetRef) 

84 elif ref is None: 

85 return None 

86 else: 

87 self._checkMembership(ref, self.allInputs) 

88 return self.__butler.get(ref) 

89 

90 def _put(self, value: Any, ref: DatasetRef) -> None: 

91 """Store data in butler""" 

92 self._checkMembership(ref, self.allOutputs) 

93 self.__butler.put(value, ref) 

94 

95 def get( 

96 self, 

97 dataset: InputQuantizedConnection 

98 | list[DatasetRef | None] 

99 | list[DeferredDatasetRef | None] 

100 | DatasetRef 

101 | DeferredDatasetRef 

102 | None, 

103 ) -> Any: 

104 """Fetch data from the butler 

105 

106 Parameters 

107 ---------- 

108 dataset 

109 This argument may either be an `InputQuantizedConnection` which 

110 describes all the inputs of a quantum, a list of 

111 `~lsst.daf.butler.DatasetRef`, or a single 

112 `~lsst.daf.butler.DatasetRef`. The function will get and return 

113 the corresponding datasets from the butler. If `None` is passed in 

114 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

115 returned object will be `None`. 

116 

117 Returns 

118 ------- 

119 return : `object` 

120 This function returns arbitrary objects fetched from the bulter. 

121 The structure these objects are returned in depends on the type of 

122 the input argument. If the input dataset argument is a 

123 `InputQuantizedConnection`, then the return type will be a 

124 dictionary with keys corresponding to the attributes of the 

125 `InputQuantizedConnection` (which in turn are the attribute 

126 identifiers of the connections). If the input argument is of type 

127 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

128 be a list of objects. If the input argument is a single 

129 `~lsst.daf.butler.DatasetRef` then a single object will be 

130 returned. 

131 

132 Raises 

133 ------ 

134 ValueError 

135 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is 

136 not defined in the quantum object 

137 """ 

138 # Set up a periodic logger so log messages can be issued if things 

139 # are taking too long. 

140 periodic = PeriodicLogger(_LOG) 

141 

142 if isinstance(dataset, InputQuantizedConnection): 

143 retVal = {} 

144 n_connections = len(dataset) 

145 n_retrieved = 0 

146 for i, (name, ref) in enumerate(dataset): 

147 if isinstance(ref, list): 

148 val = [] 

149 n_refs = len(ref) 

150 for j, r in enumerate(ref): 

151 val.append(self._get(r)) 

152 n_retrieved += 1 

153 periodic.log( 

154 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

155 j + 1, 

156 n_refs, 

157 name, 

158 i + 1, 

159 n_connections, 

160 ) 

161 else: 

162 val = self._get(ref) 

163 periodic.log( 

164 "Retrieved dataset for connection '%s' (%d out of %d)", 

165 name, 

166 i + 1, 

167 n_connections, 

168 ) 

169 n_retrieved += 1 

170 retVal[name] = val 

171 if periodic.num_issued > 0: 

172 # This took long enough that we issued some periodic log 

173 # messages, so issue a final confirmation message as well. 

174 _LOG.verbose( 

175 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

176 ) 

177 return retVal 

178 elif isinstance(dataset, list): 

179 n_datasets = len(dataset) 

180 retrieved = [] 

181 for i, x in enumerate(dataset): 

182 # Mypy is not sure of the type of x because of the union 

183 # of lists so complains. Ignoring it is more efficient 

184 # than adding an isinstance assert. 

185 retrieved.append(self._get(x)) 

186 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

187 if periodic.num_issued > 0: 

188 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

189 return retrieved 

190 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None: 

191 return self._get(dataset) 

192 else: 

193 raise TypeError( 

194 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

195 ) 

196 

197 def put( 

198 self, 

199 values: Struct | list[Any] | Any, 

200 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, 

201 ) -> None: 

202 """Put data into the butler. 

203 

204 Parameters 

205 ---------- 

206 values : `Struct` or `list` of `object` or `object` 

207 The data that should be put with the butler. If the type of the 

208 dataset is `OutputQuantizedConnection` then this argument should be 

209 a `Struct` with corresponding attribute names. Each attribute 

210 should then correspond to either a list of object or a single 

211 object depending of the type of the corresponding attribute on 

212 dataset. I.e. if ``dataset.calexp`` is 

213 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

214 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

215 only a single object need be passed. The same restriction applies 

216 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` 

217 or a single `~lsst.daf.butler.DatasetRef`. 

218 dataset 

219 This argument may either be an `InputQuantizedConnection` which 

220 describes all the inputs of a quantum, a list of 

221 `lsst.daf.butler.DatasetRef`, or a single 

222 `lsst.daf.butler.DatasetRef`. The function will get and return 

223 the corresponding datasets from the butler. 

224 

225 Raises 

226 ------ 

227 ValueError 

228 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is 

229 not defined in the `~lsst.daf.butler.Quantum` object, or the type 

230 of values does not match what is expected from the type of dataset. 

231 """ 

232 if isinstance(dataset, OutputQuantizedConnection): 

233 if not isinstance(values, Struct): 

234 raise ValueError( 

235 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

236 " attributes must be passed as the values to put" 

237 ) 

238 for name, refs in dataset: 

239 valuesAttribute = getattr(values, name) 

240 if isinstance(refs, list): 

241 if len(refs) != len(valuesAttribute): 

242 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

243 for i, ref in enumerate(refs): 

244 self._put(valuesAttribute[i], ref) 

245 else: 

246 self._put(valuesAttribute, refs) 

247 elif isinstance(dataset, list): 

248 if not isinstance(values, Sequence): 

249 raise ValueError("Values to put must be a sequence") 

250 if len(dataset) != len(values): 

251 raise ValueError("There must be a common number of references and values to put") 

252 for i, ref in enumerate(dataset): 

253 self._put(values[i], ref) 

254 elif isinstance(dataset, DatasetRef): 

255 self._put(values, dataset) 

256 else: 

257 raise TypeError("Dataset argument is not a type that can be used to put") 

258 

259 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: 

260 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input 

261 `~lsst.daf.butler.Quantum`. 

262 

263 This function will raise an exception if the `ButlerQuantumContext` is 

264 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined 

265 in the quantum. 

266 

267 Parameters 

268 ---------- 

269 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ 

270 `~lsst.daf.butler.DatasetRef` 

271 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check 

272 inout : `set` 

273 The connection type to check, e.g. either an input or an output. 

274 This prevents both types needing to be checked for every operation, 

275 which may be important for Quanta with lots of 

276 `~lsst.daf.butler.DatasetRef`. 

277 """ 

278 if not isinstance(ref, list): 

279 ref = [ref] 

280 for r in ref: 

281 if (r.datasetType, r.dataId) not in inout: 

282 raise ValueError("DatasetRef is not part of the Quantum being processed") 

283 

284 @property 

285 def dimensions(self) -> DimensionUniverse: 

286 """Structure managing all dimensions recognized by this data 

287 repository (`~lsst.daf.butler.DimensionUniverse`). 

288 """ 

289 return self.__butler.dimensions