Coverage for python/lsst/pipe/base/_quantumContext.py: 20%

147 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-25 09:14 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Module defining a butler like object specialized to a specific quantum. 

25""" 

26 

27__all__ = ("ButlerQuantumContext", "ExecutionResources", "QuantumContext") 

28 

29import numbers 

30from collections.abc import Callable, Sequence 

31from dataclasses import dataclass 

32from typing import Any 

33 

34import astropy.units as u 

35from deprecated.sphinx import deprecated 

36from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

37from lsst.utils.introspection import get_full_type_name 

38from lsst.utils.logging import PeriodicLogger, getLogger 

39 

40from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

41from .struct import Struct 

42 

43_LOG = getLogger(__name__) 

44 

45 

46@dataclass(init=False, frozen=True) 

47class ExecutionResources: 

48 """A description of the resources available to a running quantum. 

49 

50 Parameters 

51 ---------- 

52 num_cores : `int`, optional 

53 The number of cores allocated to the task. 

54 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\ 

55 optional 

56 The amount of memory allocated to the task. Can be specified 

57 as byte-compatible `~astropy.units.Quantity`, a plain number, 

58 a string with a plain number, or a string representing a quantity. 

59 If `None` no limit is specified. 

60 default_mem_units : `astropy.units.Unit`, optional 

61 The default unit to apply when the ``max_mem`` value is given 

62 as a plain number. 

63 """ 

64 

65 num_cores: int = 1 

66 """The maximum number of cores that the task can use.""" 

67 

68 max_mem: u.Quantity | None = None 

69 """If defined, the amount of memory allocated to the task. 

70 """ 

71 

72 def __init__( 

73 self, 

74 *, 

75 num_cores: int = 1, 

76 max_mem: u.Quantity | numbers.Real | str | None = None, 

77 default_mem_units: u.Unit = u.B, 

78 ): 

79 # Create our own __init__ to allow more flexible input parameters 

80 # but with a constrained dataclass definition. 

81 if num_cores < 1: 

82 raise ValueError("The number of cores must be a positive integer") 

83 

84 object.__setattr__(self, "num_cores", num_cores) 

85 

86 mem: u.Quantity | None = None 

87 

88 if max_mem is None or isinstance(max_mem, u.Quantity): 

89 mem = max_mem 

90 elif max_mem == "": 

91 # Some command line tooling can treat no value as empty string. 

92 pass 

93 else: 

94 parsed_mem = None 

95 try: 

96 parsed_mem = float(max_mem) 

97 except ValueError: 

98 pass 

99 else: 

100 mem = parsed_mem * default_mem_units 

101 

102 if mem is None: 

103 mem = u.Quantity(max_mem) 

104 

105 if mem is not None: 

106 # Force to bytes. This also checks that we can convert to bytes. 

107 mem = mem.to(u.B) 

108 

109 object.__setattr__(self, "max_mem", mem) 

110 

111 def __deepcopy__(self, memo: Any) -> ExecutionResources: 

112 """Deep copy returns itself because the class is frozen.""" 

113 return self 

114 

115 def _reduce_kwargs(self) -> dict[str, Any]: 

116 """Return a dict of the keyword arguments that should be used 

117 by `__reduce__`. 

118 

119 This is necessary because the dataclass is defined to be keyword 

120 only and we wish the default pickling to only store a plain number 

121 for the memory allocation and not a large Quantity. 

122 

123 Returns 

124 ------- 

125 kwargs : `dict` 

126 Keyword arguments to be used when pickling. 

127 """ 

128 kwargs: dict[str, Any] = {"num_cores": self.num_cores} 

129 if self.max_mem is not None: 

130 # .value is a numpy float. Cast it to a python int since we 

131 # do not want fractional bytes. The constructor ensures that this 

132 # uses units of byte so we do not have to convert. 

133 kwargs["max_mem"] = int(self.max_mem.value) 

134 return kwargs 

135 

136 @staticmethod 

137 def _unpickle_via_factory( 

138 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any] 

139 ) -> ExecutionResources: 

140 """Unpickle something by calling a factory. 

141 

142 Allows unpickle using `__reduce__` with keyword 

143 arguments as well as positional arguments. 

144 """ 

145 return cls(**kwargs) 

146 

147 def __reduce__( 

148 self, 

149 ) -> tuple[ 

150 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources], 

151 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]], 

152 ]: 

153 """Pickler.""" 

154 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs()) 

155 

156 

157class QuantumContext: 

158 """A Butler-like class specialized for a single quantum along with 

159 context information that can influence how the task is executed. 

160 

161 Parameters 

162 ---------- 

163 butler : `lsst.daf.butler.LimitedButler` 

164 Butler object from/to which datasets will be get/put. 

165 quantum : `lsst.daf.butler.core.Quantum` 

166 Quantum object that describes the datasets which will be get/put by a 

167 single execution of this node in the pipeline graph. 

168 resources : `ExecutionResources`, optional 

169 The resources allocated for executing quanta. 

170 

171 Notes 

172 ----- 

173 A `QuantumContext` class wraps a standard butler interface and 

174 specializes it to the context of a given quantum. What this means 

175 in practice is that the only gets and puts that this class allows 

176 are DatasetRefs that are contained in the quantum. 

177 

178 In the future this class will also be used to record provenance on 

179 what was actually get and put. This is in contrast to what the 

180 preflight expects to be get and put by looking at the graph before 

181 execution. 

182 """ 

183 

184 resources: ExecutionResources 

185 

186 def __init__( 

187 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None 

188 ): 

189 self.quantum = quantum 

190 if resources is None: 

191 resources = ExecutionResources() 

192 self.resources = resources 

193 

194 self.allInputs = set() 

195 self.allOutputs = set() 

196 for refs in quantum.inputs.values(): 

197 for ref in refs: 

198 self.allInputs.add((ref.datasetType, ref.dataId)) 

199 for refs in quantum.outputs.values(): 

200 for ref in refs: 

201 self.allOutputs.add((ref.datasetType, ref.dataId)) 

202 self.__butler = butler 

203 

204 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: 

205 # Butler methods below will check for unresolved DatasetRefs and 

206 # raise appropriately, so no need for us to do that here. 

207 if isinstance(ref, DeferredDatasetRef): 

208 self._checkMembership(ref.datasetRef, self.allInputs) 

209 return self.__butler.getDeferred(ref.datasetRef) 

210 elif ref is None: 

211 return None 

212 else: 

213 self._checkMembership(ref, self.allInputs) 

214 return self.__butler.get(ref) 

215 

216 def _put(self, value: Any, ref: DatasetRef) -> None: 

217 """Store data in butler""" 

218 self._checkMembership(ref, self.allOutputs) 

219 self.__butler.put(value, ref) 

220 

221 def get( 

222 self, 

223 dataset: InputQuantizedConnection 

224 | list[DatasetRef | None] 

225 | list[DeferredDatasetRef | None] 

226 | DatasetRef 

227 | DeferredDatasetRef 

228 | None, 

229 ) -> Any: 

230 """Fetch data from the butler 

231 

232 Parameters 

233 ---------- 

234 dataset 

235 This argument may either be an `InputQuantizedConnection` which 

236 describes all the inputs of a quantum, a list of 

237 `~lsst.daf.butler.DatasetRef`, or a single 

238 `~lsst.daf.butler.DatasetRef`. The function will get and return 

239 the corresponding datasets from the butler. If `None` is passed in 

240 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

241 returned object will be `None`. 

242 

243 Returns 

244 ------- 

245 return : `object` 

246 This function returns arbitrary objects fetched from the bulter. 

247 The structure these objects are returned in depends on the type of 

248 the input argument. If the input dataset argument is a 

249 `InputQuantizedConnection`, then the return type will be a 

250 dictionary with keys corresponding to the attributes of the 

251 `InputQuantizedConnection` (which in turn are the attribute 

252 identifiers of the connections). If the input argument is of type 

253 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

254 be a list of objects. If the input argument is a single 

255 `~lsst.daf.butler.DatasetRef` then a single object will be 

256 returned. 

257 

258 Raises 

259 ------ 

260 ValueError 

261 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is 

262 not defined in the quantum object 

263 """ 

264 # Set up a periodic logger so log messages can be issued if things 

265 # are taking too long. 

266 periodic = PeriodicLogger(_LOG) 

267 

268 if isinstance(dataset, InputQuantizedConnection): 

269 retVal = {} 

270 n_connections = len(dataset) 

271 n_retrieved = 0 

272 for i, (name, ref) in enumerate(dataset): 

273 if isinstance(ref, list): 

274 val = [] 

275 n_refs = len(ref) 

276 for j, r in enumerate(ref): 

277 val.append(self._get(r)) 

278 n_retrieved += 1 

279 periodic.log( 

280 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

281 j + 1, 

282 n_refs, 

283 name, 

284 i + 1, 

285 n_connections, 

286 ) 

287 else: 

288 val = self._get(ref) 

289 periodic.log( 

290 "Retrieved dataset for connection '%s' (%d out of %d)", 

291 name, 

292 i + 1, 

293 n_connections, 

294 ) 

295 n_retrieved += 1 

296 retVal[name] = val 

297 if periodic.num_issued > 0: 

298 # This took long enough that we issued some periodic log 

299 # messages, so issue a final confirmation message as well. 

300 _LOG.verbose( 

301 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

302 ) 

303 return retVal 

304 elif isinstance(dataset, list): 

305 n_datasets = len(dataset) 

306 retrieved = [] 

307 for i, x in enumerate(dataset): 

308 # Mypy is not sure of the type of x because of the union 

309 # of lists so complains. Ignoring it is more efficient 

310 # than adding an isinstance assert. 

311 retrieved.append(self._get(x)) 

312 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

313 if periodic.num_issued > 0: 

314 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

315 return retrieved 

316 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None: 

317 return self._get(dataset) 

318 else: 

319 raise TypeError( 

320 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

321 ) 

322 

323 def put( 

324 self, 

325 values: Struct | list[Any] | Any, 

326 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, 

327 ) -> None: 

328 """Put data into the butler. 

329 

330 Parameters 

331 ---------- 

332 values : `Struct` or `list` of `object` or `object` 

333 The data that should be put with the butler. If the type of the 

334 dataset is `OutputQuantizedConnection` then this argument should be 

335 a `Struct` with corresponding attribute names. Each attribute 

336 should then correspond to either a list of object or a single 

337 object depending of the type of the corresponding attribute on 

338 dataset. I.e. if ``dataset.calexp`` is 

339 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

340 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

341 only a single object need be passed. The same restriction applies 

342 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` 

343 or a single `~lsst.daf.butler.DatasetRef`. 

344 dataset 

345 This argument may either be an `InputQuantizedConnection` which 

346 describes all the inputs of a quantum, a list of 

347 `lsst.daf.butler.DatasetRef`, or a single 

348 `lsst.daf.butler.DatasetRef`. The function will get and return 

349 the corresponding datasets from the butler. 

350 

351 Raises 

352 ------ 

353 ValueError 

354 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is 

355 not defined in the `~lsst.daf.butler.Quantum` object, or the type 

356 of values does not match what is expected from the type of dataset. 

357 """ 

358 if isinstance(dataset, OutputQuantizedConnection): 

359 if not isinstance(values, Struct): 

360 raise ValueError( 

361 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

362 " attributes must be passed as the values to put" 

363 ) 

364 for name, refs in dataset: 

365 valuesAttribute = getattr(values, name) 

366 if isinstance(refs, list): 

367 if len(refs) != len(valuesAttribute): 

368 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

369 for i, ref in enumerate(refs): 

370 self._put(valuesAttribute[i], ref) 

371 else: 

372 self._put(valuesAttribute, refs) 

373 elif isinstance(dataset, list): 

374 if not isinstance(values, Sequence): 

375 raise ValueError("Values to put must be a sequence") 

376 if len(dataset) != len(values): 

377 raise ValueError("There must be a common number of references and values to put") 

378 for i, ref in enumerate(dataset): 

379 self._put(values[i], ref) 

380 elif isinstance(dataset, DatasetRef): 

381 self._put(values, dataset) 

382 else: 

383 raise TypeError("Dataset argument is not a type that can be used to put") 

384 

385 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: 

386 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input 

387 `~lsst.daf.butler.Quantum`. 

388 

389 This function will raise an exception if the `QuantumContext` is 

390 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined 

391 in the quantum. 

392 

393 Parameters 

394 ---------- 

395 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ 

396 `~lsst.daf.butler.DatasetRef` 

397 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check 

398 inout : `set` 

399 The connection type to check, e.g. either an input or an output. 

400 This prevents both types needing to be checked for every operation, 

401 which may be important for Quanta with lots of 

402 `~lsst.daf.butler.DatasetRef`. 

403 """ 

404 if not isinstance(ref, list): 

405 ref = [ref] 

406 for r in ref: 

407 if (r.datasetType, r.dataId) not in inout: 

408 raise ValueError("DatasetRef is not part of the Quantum being processed") 

409 

410 @property 

411 def dimensions(self) -> DimensionUniverse: 

412 """Structure managing all dimensions recognized by this data 

413 repository (`~lsst.daf.butler.DimensionUniverse`). 

414 """ 

415 return self.__butler.dimensions 

416 

417 

418@deprecated( 

419 reason="ButlerQuantumContext has been renamed to QuantumContext and been given extra functionality. " 

420 "Please use the new name. Will be removed after v27.", 

421 version="v26", 

422 category=FutureWarning, 

423) 

424class ButlerQuantumContext(QuantumContext): 

425 """Deprecated version of `QuantumContext`.""" 

426 

427 pass