Coverage for python/lsst/pipe/base/_quantumContext.py: 19%

143 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-01-30 10:51 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining variants for valid values used to constrain datasets in a 

29graph building query. 

30""" 

31 

32from __future__ import annotations 

33 

34__all__ = ("ExecutionResources", "QuantumContext") 

35 

36import numbers 

37from collections.abc import Callable, Sequence 

38from dataclasses import dataclass 

39from typing import Any 

40 

41import astropy.units as u 

42from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

43from lsst.utils.introspection import get_full_type_name 

44from lsst.utils.logging import PeriodicLogger, getLogger 

45 

46from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

47from .struct import Struct 

48 

49_LOG = getLogger(__name__) 

50 

51 

52@dataclass(init=False, frozen=True) 

53class ExecutionResources: 

54 """A description of the resources available to a running quantum. 

55 

56 Parameters 

57 ---------- 

58 num_cores : `int`, optional 

59 The number of cores allocated to the task. 

60 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\ 

61 optional 

62 The amount of memory allocated to the task. Can be specified 

63 as byte-compatible `~astropy.units.Quantity`, a plain number, 

64 a string with a plain number, or a string representing a quantity. 

65 If `None` no limit is specified. 

66 default_mem_units : `astropy.units.Unit`, optional 

67 The default unit to apply when the ``max_mem`` value is given 

68 as a plain number. 

69 """ 

70 

71 num_cores: int = 1 

72 """The maximum number of cores that the task can use.""" 

73 

74 max_mem: u.Quantity | None = None 

75 """If defined, the amount of memory allocated to the task. 

76 """ 

77 

78 def __init__( 

79 self, 

80 *, 

81 num_cores: int = 1, 

82 max_mem: u.Quantity | numbers.Real | str | None = None, 

83 default_mem_units: u.Unit = u.B, 

84 ): 

85 # Create our own __init__ to allow more flexible input parameters 

86 # but with a constrained dataclass definition. 

87 if num_cores < 1: 

88 raise ValueError("The number of cores must be a positive integer") 

89 

90 object.__setattr__(self, "num_cores", num_cores) 

91 

92 mem: u.Quantity | None = None 

93 

94 if max_mem is None or isinstance(max_mem, u.Quantity): 

95 mem = max_mem 

96 elif max_mem == "": 

97 # Some command line tooling can treat no value as empty string. 

98 pass 

99 else: 

100 parsed_mem = None 

101 try: 

102 parsed_mem = float(max_mem) 

103 except ValueError: 

104 pass 

105 else: 

106 mem = parsed_mem * default_mem_units 

107 

108 if mem is None: 

109 mem = u.Quantity(max_mem) 

110 

111 if mem is not None: 

112 # Force to bytes. This also checks that we can convert to bytes. 

113 mem = mem.to(u.B) 

114 

115 object.__setattr__(self, "max_mem", mem) 

116 

117 def __deepcopy__(self, memo: Any) -> ExecutionResources: 

118 """Deep copy returns itself because the class is frozen.""" 

119 return self 

120 

121 def _reduce_kwargs(self) -> dict[str, Any]: 

122 """Return a dict of the keyword arguments that should be used 

123 by `__reduce__`. 

124 

125 This is necessary because the dataclass is defined to be keyword 

126 only and we wish the default pickling to only store a plain number 

127 for the memory allocation and not a large Quantity. 

128 

129 Returns 

130 ------- 

131 kwargs : `dict` 

132 Keyword arguments to be used when pickling. 

133 """ 

134 kwargs: dict[str, Any] = {"num_cores": self.num_cores} 

135 if self.max_mem is not None: 

136 # .value is a numpy float. Cast it to a python int since we 

137 # do not want fractional bytes. The constructor ensures that this 

138 # uses units of byte so we do not have to convert. 

139 kwargs["max_mem"] = int(self.max_mem.value) 

140 return kwargs 

141 

142 @staticmethod 

143 def _unpickle_via_factory( 

144 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any] 

145 ) -> ExecutionResources: 

146 """Unpickle something by calling a factory. 

147 

148 Allows unpickle using `__reduce__` with keyword 

149 arguments as well as positional arguments. 

150 """ 

151 return cls(**kwargs) 

152 

153 def __reduce__( 

154 self, 

155 ) -> tuple[ 

156 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources], 

157 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]], 

158 ]: 

159 """Pickler.""" 

160 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs()) 

161 

162 

163class QuantumContext: 

164 """A Butler-like class specialized for a single quantum along with 

165 context information that can influence how the task is executed. 

166 

167 Parameters 

168 ---------- 

169 butler : `lsst.daf.butler.LimitedButler` 

170 Butler object from/to which datasets will be get/put. 

171 quantum : `lsst.daf.butler.Quantum` 

172 Quantum object that describes the datasets which will be get/put by a 

173 single execution of this node in the pipeline graph. 

174 resources : `ExecutionResources`, optional 

175 The resources allocated for executing quanta. 

176 

177 Notes 

178 ----- 

179 A `QuantumContext` class wraps a standard butler interface and 

180 specializes it to the context of a given quantum. What this means 

181 in practice is that the only gets and puts that this class allows 

182 are DatasetRefs that are contained in the quantum. 

183 

184 In the future this class will also be used to record provenance on 

185 what was actually get and put. This is in contrast to what the 

186 preflight expects to be get and put by looking at the graph before 

187 execution. 

188 """ 

189 

190 resources: ExecutionResources 

191 

192 def __init__( 

193 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None 

194 ): 

195 self.quantum = quantum 

196 if resources is None: 

197 resources = ExecutionResources() 

198 self.resources = resources 

199 

200 self.allInputs = set() 

201 self.allOutputs = set() 

202 for refs in quantum.inputs.values(): 

203 for ref in refs: 

204 self.allInputs.add((ref.datasetType, ref.dataId)) 

205 for refs in quantum.outputs.values(): 

206 for ref in refs: 

207 self.allOutputs.add((ref.datasetType, ref.dataId)) 

208 self.__butler = butler 

209 

210 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: 

211 # Butler methods below will check for unresolved DatasetRefs and 

212 # raise appropriately, so no need for us to do that here. 

213 if isinstance(ref, DeferredDatasetRef): 

214 self._checkMembership(ref.datasetRef, self.allInputs) 

215 return self.__butler.getDeferred(ref.datasetRef) 

216 elif ref is None: 

217 return None 

218 else: 

219 self._checkMembership(ref, self.allInputs) 

220 return self.__butler.get(ref) 

221 

222 def _put(self, value: Any, ref: DatasetRef) -> None: 

223 """Store data in butler.""" 

224 self._checkMembership(ref, self.allOutputs) 

225 self.__butler.put(value, ref) 

226 

227 def get( 

228 self, 

229 dataset: InputQuantizedConnection 

230 | list[DatasetRef | None] 

231 | list[DeferredDatasetRef | None] 

232 | DatasetRef 

233 | DeferredDatasetRef 

234 | None, 

235 ) -> Any: 

236 """Fetch data from the butler. 

237 

238 Parameters 

239 ---------- 

240 dataset : see description 

241 This argument may either be an `InputQuantizedConnection` which 

242 describes all the inputs of a quantum, a list of 

243 `~lsst.daf.butler.DatasetRef`, or a single 

244 `~lsst.daf.butler.DatasetRef`. The function will get and return 

245 the corresponding datasets from the butler. If `None` is passed in 

246 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

247 returned object will be `None`. 

248 

249 Returns 

250 ------- 

251 return : `object` 

252 This function returns arbitrary objects fetched from the bulter. 

253 The structure these objects are returned in depends on the type of 

254 the input argument. If the input dataset argument is a 

255 `InputQuantizedConnection`, then the return type will be a 

256 dictionary with keys corresponding to the attributes of the 

257 `InputQuantizedConnection` (which in turn are the attribute 

258 identifiers of the connections). If the input argument is of type 

259 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

260 be a list of objects. If the input argument is a single 

261 `~lsst.daf.butler.DatasetRef` then a single object will be 

262 returned. 

263 

264 Raises 

265 ------ 

266 ValueError 

267 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is 

268 not defined in the quantum object 

269 """ 

270 # Set up a periodic logger so log messages can be issued if things 

271 # are taking too long. 

272 periodic = PeriodicLogger(_LOG) 

273 

274 if isinstance(dataset, InputQuantizedConnection): 

275 retVal = {} 

276 n_connections = len(dataset) 

277 n_retrieved = 0 

278 for i, (name, ref) in enumerate(dataset): 

279 if isinstance(ref, list | tuple): 

280 val = [] 

281 n_refs = len(ref) 

282 for j, r in enumerate(ref): 

283 val.append(self._get(r)) 

284 n_retrieved += 1 

285 periodic.log( 

286 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

287 j + 1, 

288 n_refs, 

289 name, 

290 i + 1, 

291 n_connections, 

292 ) 

293 else: 

294 val = self._get(ref) 

295 periodic.log( 

296 "Retrieved dataset for connection '%s' (%d out of %d)", 

297 name, 

298 i + 1, 

299 n_connections, 

300 ) 

301 n_retrieved += 1 

302 retVal[name] = val 

303 if periodic.num_issued > 0: 

304 # This took long enough that we issued some periodic log 

305 # messages, so issue a final confirmation message as well. 

306 _LOG.verbose( 

307 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

308 ) 

309 return retVal 

310 elif isinstance(dataset, list | tuple): 

311 n_datasets = len(dataset) 

312 retrieved = [] 

313 for i, x in enumerate(dataset): 

314 # Mypy is not sure of the type of x because of the union 

315 # of lists so complains. Ignoring it is more efficient 

316 # than adding an isinstance assert. 

317 retrieved.append(self._get(x)) 

318 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

319 if periodic.num_issued > 0: 

320 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

321 return retrieved 

322 elif isinstance(dataset, DatasetRef | DeferredDatasetRef) or dataset is None: 

323 return self._get(dataset) 

324 else: 

325 raise TypeError( 

326 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

327 ) 

328 

329 def put( 

330 self, 

331 values: Struct | list[Any] | Any, 

332 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, 

333 ) -> None: 

334 """Put data into the butler. 

335 

336 Parameters 

337 ---------- 

338 values : `Struct` or `list` of `object` or `object` 

339 The data that should be put with the butler. If the type of the 

340 dataset is `OutputQuantizedConnection` then this argument should be 

341 a `Struct` with corresponding attribute names. Each attribute 

342 should then correspond to either a list of object or a single 

343 object depending of the type of the corresponding attribute on 

344 dataset. I.e. if ``dataset.calexp`` is 

345 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

346 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

347 only a single object need be passed. The same restriction applies 

348 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` 

349 or a single `~lsst.daf.butler.DatasetRef`. 

350 dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \ 

351 or `DatasetRef` 

352 This argument may either be an `InputQuantizedConnection` which 

353 describes all the inputs of a quantum, a list of 

354 `lsst.daf.butler.DatasetRef`, or a single 

355 `lsst.daf.butler.DatasetRef`. The function will get and return 

356 the corresponding datasets from the butler. 

357 

358 Raises 

359 ------ 

360 ValueError 

361 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is 

362 not defined in the `~lsst.daf.butler.Quantum` object, or the type 

363 of values does not match what is expected from the type of dataset. 

364 """ 

365 if isinstance(dataset, OutputQuantizedConnection): 

366 if not isinstance(values, Struct): 

367 raise ValueError( 

368 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

369 " attributes must be passed as the values to put" 

370 ) 

371 for name, refs in dataset: 

372 valuesAttribute = getattr(values, name) 

373 if isinstance(refs, list | tuple): 

374 if len(refs) != len(valuesAttribute): 

375 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

376 for i, ref in enumerate(refs): 

377 self._put(valuesAttribute[i], ref) 

378 else: 

379 self._put(valuesAttribute, refs) 

380 elif isinstance(dataset, list | tuple): 

381 if not isinstance(values, Sequence): 

382 raise ValueError("Values to put must be a sequence") 

383 if len(dataset) != len(values): 

384 raise ValueError("There must be a common number of references and values to put") 

385 for i, ref in enumerate(dataset): 

386 self._put(values[i], ref) 

387 elif isinstance(dataset, DatasetRef): 

388 self._put(values, dataset) 

389 else: 

390 raise TypeError("Dataset argument is not a type that can be used to put") 

391 

392 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: 

393 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input 

394 `~lsst.daf.butler.Quantum`. 

395 

396 This function will raise an exception if the `QuantumContext` is 

397 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined 

398 in the quantum. 

399 

400 Parameters 

401 ---------- 

402 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ 

403 `~lsst.daf.butler.DatasetRef` 

404 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check 

405 inout : `set` 

406 The connection type to check, e.g. either an input or an output. 

407 This prevents both types needing to be checked for every operation, 

408 which may be important for Quanta with lots of 

409 `~lsst.daf.butler.DatasetRef`. 

410 """ 

411 if not isinstance(ref, list | tuple): 

412 ref = [ref] 

413 for r in ref: 

414 if (r.datasetType, r.dataId) not in inout: 

415 raise ValueError("DatasetRef is not part of the Quantum being processed") 

416 

417 @property 

418 def dimensions(self) -> DimensionUniverse: 

419 """Structure managing all dimensions recognized by this data 

420 repository (`~lsst.daf.butler.DimensionUniverse`). 

421 """ 

422 return self.__butler.dimensions