Coverage for python/lsst/pipe/base/_quantumContext.py: 19%

144 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-23 03:26 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining variants for valid values used to constrain datasets in a 

29graph building query. 

30""" 

31 

32from __future__ import annotations 

33 

34__all__ = ("ExecutionResources", "QuantumContext") 

35 

36import numbers 

37from collections.abc import Callable, Sequence 

38from dataclasses import dataclass 

39from typing import Any 

40 

41import astropy.units as u 

42from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

43from lsst.utils.introspection import get_full_type_name 

44from lsst.utils.logging import PeriodicLogger, getLogger 

45 

46from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

47from .struct import Struct 

48 

49_LOG = getLogger(__name__) 

50 

51 

52@dataclass(init=False, frozen=True) 

53class ExecutionResources: 

54 """A description of the resources available to a running quantum. 

55 

56 Parameters 

57 ---------- 

58 num_cores : `int`, optional 

59 The number of cores allocated to the task. 

60 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\ 

61 optional 

62 The amount of memory allocated to the task. Can be specified 

63 as byte-compatible `~astropy.units.Quantity`, a plain number, 

64 a string with a plain number, or a string representing a quantity. 

65 If `None` no limit is specified. 

66 default_mem_units : `astropy.units.Unit`, optional 

67 The default unit to apply when the ``max_mem`` value is given 

68 as a plain number. 

69 """ 

70 

71 num_cores: int = 1 

72 """The maximum number of cores that the task can use.""" 

73 

74 max_mem: u.Quantity | None = None 

75 """If defined, the amount of memory allocated to the task. 

76 """ 

77 

78 def __init__( 

79 self, 

80 *, 

81 num_cores: int = 1, 

82 max_mem: u.Quantity | numbers.Real | str | None = None, 

83 default_mem_units: u.Unit = u.B, 

84 ): 

85 # Create our own __init__ to allow more flexible input parameters 

86 # but with a constrained dataclass definition. 

87 if num_cores < 1: 

88 raise ValueError("The number of cores must be a positive integer") 

89 

90 object.__setattr__(self, "num_cores", num_cores) 

91 

92 mem: u.Quantity | None = None 

93 

94 if max_mem is None or isinstance(max_mem, u.Quantity): 

95 mem = max_mem 

96 elif max_mem == "": 

97 # Some command line tooling can treat no value as empty string. 

98 pass 

99 else: 

100 parsed_mem = None 

101 try: 

102 parsed_mem = float(max_mem) 

103 except ValueError: 

104 pass 

105 else: 

106 mem = parsed_mem * default_mem_units 

107 

108 if mem is None: 

109 mem = u.Quantity(max_mem) 

110 

111 if mem is not None: 

112 # Force to bytes. This also checks that we can convert to bytes. 

113 mem = mem.to(u.B) 

114 

115 object.__setattr__(self, "max_mem", mem) 

116 

117 def __deepcopy__(self, memo: Any) -> ExecutionResources: 

118 """Deep copy returns itself because the class is frozen.""" 

119 return self 

120 

121 def _reduce_kwargs(self) -> dict[str, Any]: 

122 """Return a dict of the keyword arguments that should be used 

123 by `__reduce__`. 

124 

125 This is necessary because the dataclass is defined to be keyword 

126 only and we wish the default pickling to only store a plain number 

127 for the memory allocation and not a large Quantity. 

128 

129 Returns 

130 ------- 

131 kwargs : `dict` 

132 Keyword arguments to be used when pickling. 

133 """ 

134 kwargs: dict[str, Any] = {"num_cores": self.num_cores} 

135 if self.max_mem is not None: 

136 # .value is a numpy float. Cast it to a python int since we 

137 # do not want fractional bytes. The constructor ensures that this 

138 # uses units of byte so we do not have to convert. 

139 kwargs["max_mem"] = int(self.max_mem.value) 

140 return kwargs 

141 

142 @staticmethod 

143 def _unpickle_via_factory( 

144 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any] 

145 ) -> ExecutionResources: 

146 """Unpickle something by calling a factory. 

147 

148 Allows unpickle using `__reduce__` with keyword 

149 arguments as well as positional arguments. 

150 """ 

151 return cls(**kwargs) 

152 

153 def __reduce__( 

154 self, 

155 ) -> tuple[ 

156 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources], 

157 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]], 

158 ]: 

159 """Pickler.""" 

160 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs()) 

161 

162 

163class QuantumContext: 

164 """A Butler-like class specialized for a single quantum along with 

165 context information that can influence how the task is executed. 

166 

167 Parameters 

168 ---------- 

169 butler : `lsst.daf.butler.LimitedButler` 

170 Butler object from/to which datasets will be get/put. 

171 quantum : `lsst.daf.butler.Quantum` 

172 Quantum object that describes the datasets which will be get/put by a 

173 single execution of this node in the pipeline graph. 

174 resources : `ExecutionResources`, optional 

175 The resources allocated for executing quanta. 

176 

177 Notes 

178 ----- 

179 A `QuantumContext` class wraps a standard butler interface and 

180 specializes it to the context of a given quantum. What this means 

181 in practice is that the only gets and puts that this class allows 

182 are DatasetRefs that are contained in the quantum. 

183 

184 In the future this class will also be used to record provenance on 

185 what was actually get and put. This is in contrast to what the 

186 preflight expects to be get and put by looking at the graph before 

187 execution. 

188 """ 

189 

190 resources: ExecutionResources 

191 

192 def __init__( 

193 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None 

194 ): 

195 self.quantum = quantum 

196 if resources is None: 

197 resources = ExecutionResources() 

198 self.resources = resources 

199 

200 self.allInputs = set() 

201 self.allOutputs = set() 

202 for refs in quantum.inputs.values(): 

203 for ref in refs: 

204 self.allInputs.add((ref.datasetType, ref.dataId)) 

205 for refs in quantum.outputs.values(): 

206 for ref in refs: 

207 self.allOutputs.add((ref.datasetType, ref.dataId)) 

208 self.__butler = butler 

209 

210 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: 

211 # Butler methods below will check for unresolved DatasetRefs and 

212 # raise appropriately, so no need for us to do that here. 

213 if isinstance(ref, DeferredDatasetRef): 

214 self._checkMembership(ref.datasetRef, self.allInputs) 

215 return self.__butler.getDeferred(ref.datasetRef) 

216 elif ref is None: 

217 return None 

218 else: 

219 self._checkMembership(ref, self.allInputs) 

220 return self.__butler.get(ref) 

221 

222 def _put(self, value: Any, ref: DatasetRef) -> None: 

223 """Store data in butler.""" 

224 self._checkMembership(ref, self.allOutputs) 

225 self.__butler.put(value, ref) 

226 

227 def get( 

228 self, 

229 dataset: ( 

230 InputQuantizedConnection 

231 | list[DatasetRef | None] 

232 | list[DeferredDatasetRef | None] 

233 | DatasetRef 

234 | DeferredDatasetRef 

235 | None 

236 ), 

237 ) -> Any: 

238 """Fetch data from the butler. 

239 

240 Parameters 

241 ---------- 

242 dataset : see description 

243 This argument may either be an `InputQuantizedConnection` which 

244 describes all the inputs of a quantum, a list of 

245 `~lsst.daf.butler.DatasetRef`, or a single 

246 `~lsst.daf.butler.DatasetRef`. The function will get and return 

247 the corresponding datasets from the butler. If `None` is passed in 

248 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

249 returned object will be `None`. 

250 

251 Returns 

252 ------- 

253 return : `object` 

254 This function returns arbitrary objects fetched from the bulter. 

255 The structure these objects are returned in depends on the type of 

256 the input argument. If the input dataset argument is a 

257 `InputQuantizedConnection`, then the return type will be a 

258 dictionary with keys corresponding to the attributes of the 

259 `InputQuantizedConnection` (which in turn are the attribute 

260 identifiers of the connections). If the input argument is of type 

261 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

262 be a list of objects. If the input argument is a single 

263 `~lsst.daf.butler.DatasetRef` then a single object will be 

264 returned. 

265 

266 Raises 

267 ------ 

268 ValueError 

269 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is 

270 not defined in the quantum object 

271 """ 

272 # Set up a periodic logger so log messages can be issued if things 

273 # are taking too long. 

274 periodic = PeriodicLogger(_LOG) 

275 

276 if isinstance(dataset, InputQuantizedConnection): 

277 retVal = {} 

278 n_connections = len(dataset) 

279 n_retrieved = 0 

280 for i, (name, ref) in enumerate(dataset): 

281 if isinstance(ref, list | tuple): 

282 val = [] 

283 n_refs = len(ref) 

284 for j, r in enumerate(ref): 

285 val.append(self._get(r)) 

286 n_retrieved += 1 

287 periodic.log( 

288 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

289 j + 1, 

290 n_refs, 

291 name, 

292 i + 1, 

293 n_connections, 

294 ) 

295 else: 

296 val = self._get(ref) 

297 periodic.log( 

298 "Retrieved dataset for connection '%s' (%d out of %d)", 

299 name, 

300 i + 1, 

301 n_connections, 

302 ) 

303 n_retrieved += 1 

304 retVal[name] = val 

305 if periodic.num_issued > 0: 

306 # This took long enough that we issued some periodic log 

307 # messages, so issue a final confirmation message as well. 

308 _LOG.verbose( 

309 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

310 ) 

311 return retVal 

312 elif isinstance(dataset, list | tuple): 

313 n_datasets = len(dataset) 

314 retrieved = [] 

315 for i, x in enumerate(dataset): 

316 # Mypy is not sure of the type of x because of the union 

317 # of lists so complains. Ignoring it is more efficient 

318 # than adding an isinstance assert. 

319 retrieved.append(self._get(x)) 

320 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

321 if periodic.num_issued > 0: 

322 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

323 return retrieved 

324 elif isinstance(dataset, DatasetRef | DeferredDatasetRef) or dataset is None: 

325 return self._get(dataset) 

326 else: 

327 raise TypeError( 

328 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

329 ) 

330 

331 def put( 

332 self, 

333 values: Struct | list[Any] | Any, 

334 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, 

335 ) -> None: 

336 """Put data into the butler. 

337 

338 Parameters 

339 ---------- 

340 values : `Struct` or `list` of `object` or `object` 

341 The data that should be put with the butler. If the type of the 

342 dataset is `OutputQuantizedConnection` then this argument should be 

343 a `Struct` with corresponding attribute names. Each attribute 

344 should then correspond to either a list of object or a single 

345 object depending of the type of the corresponding attribute on 

346 dataset. I.e. if ``dataset.calexp`` is 

347 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

348 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

349 only a single object need be passed. The same restriction applies 

350 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` 

351 or a single `~lsst.daf.butler.DatasetRef`. If ``values.NAME`` is 

352 None, no output is written. 

353 dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \ 

354 or `DatasetRef` 

355 This argument may either be an `InputQuantizedConnection` which 

356 describes all the inputs of a quantum, a list of 

357 `lsst.daf.butler.DatasetRef`, or a single 

358 `lsst.daf.butler.DatasetRef`. The function will get and return 

359 the corresponding datasets from the butler. 

360 

361 Raises 

362 ------ 

363 ValueError 

364 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is 

365 not defined in the `~lsst.daf.butler.Quantum` object, or the type 

366 of values does not match what is expected from the type of dataset. 

367 """ 

368 if isinstance(dataset, OutputQuantizedConnection): 

369 if not isinstance(values, Struct): 

370 raise ValueError( 

371 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

372 " attributes must be passed as the values to put" 

373 ) 

374 for name, refs in dataset: 

375 if (valuesAttribute := getattr(values, name, None)) is None: 

376 continue 

377 if isinstance(refs, list | tuple): 

378 if len(refs) != len(valuesAttribute): 

379 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

380 for i, ref in enumerate(refs): 

381 self._put(valuesAttribute[i], ref) 

382 else: 

383 self._put(valuesAttribute, refs) 

384 elif isinstance(dataset, list | tuple): 

385 if not isinstance(values, Sequence): 

386 raise ValueError("Values to put must be a sequence") 

387 if len(dataset) != len(values): 

388 raise ValueError("There must be a common number of references and values to put") 

389 for i, ref in enumerate(dataset): 

390 self._put(values[i], ref) 

391 elif isinstance(dataset, DatasetRef): 

392 self._put(values, dataset) 

393 else: 

394 raise TypeError("Dataset argument is not a type that can be used to put") 

395 

396 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: 

397 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input 

398 `~lsst.daf.butler.Quantum`. 

399 

400 This function will raise an exception if the `QuantumContext` is 

401 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined 

402 in the quantum. 

403 

404 Parameters 

405 ---------- 

406 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ 

407 `~lsst.daf.butler.DatasetRef` 

408 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check 

409 inout : `set` 

410 The connection type to check, e.g. either an input or an output. 

411 This prevents both types needing to be checked for every operation, 

412 which may be important for Quanta with lots of 

413 `~lsst.daf.butler.DatasetRef`. 

414 """ 

415 if not isinstance(ref, list | tuple): 

416 ref = [ref] 

417 for r in ref: 

418 if (r.datasetType, r.dataId) not in inout: 

419 raise ValueError("DatasetRef is not part of the Quantum being processed") 

420 

421 @property 

422 def dimensions(self) -> DimensionUniverse: 

423 """Structure managing all dimensions recognized by this data 

424 repository (`~lsst.daf.butler.DimensionUniverse`). 

425 """ 

426 return self.__butler.dimensions