Coverage for python/lsst/pipe/base/_quantumContext.py: 21%

147 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:56 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining variants for valid values used to constrain datasets in a 

29graph building query. 

30""" 

31 

32from __future__ import annotations 

33 

34__all__ = ("ButlerQuantumContext", "ExecutionResources", "QuantumContext") 

35 

36import numbers 

37from collections.abc import Callable, Sequence 

38from dataclasses import dataclass 

39from typing import Any 

40 

41import astropy.units as u 

42from deprecated.sphinx import deprecated 

43from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

44from lsst.utils.introspection import get_full_type_name 

45from lsst.utils.logging import PeriodicLogger, getLogger 

46 

47from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

48from .struct import Struct 

49 

50_LOG = getLogger(__name__) 

51 

52 

53@dataclass(init=False, frozen=True) 

54class ExecutionResources: 

55 """A description of the resources available to a running quantum. 

56 

57 Parameters 

58 ---------- 

59 num_cores : `int`, optional 

60 The number of cores allocated to the task. 

61 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\ 

62 optional 

63 The amount of memory allocated to the task. Can be specified 

64 as byte-compatible `~astropy.units.Quantity`, a plain number, 

65 a string with a plain number, or a string representing a quantity. 

66 If `None` no limit is specified. 

67 default_mem_units : `astropy.units.Unit`, optional 

68 The default unit to apply when the ``max_mem`` value is given 

69 as a plain number. 

70 """ 

71 

72 num_cores: int = 1 

73 """The maximum number of cores that the task can use.""" 

74 

75 max_mem: u.Quantity | None = None 

76 """If defined, the amount of memory allocated to the task. 

77 """ 

78 

79 def __init__( 

80 self, 

81 *, 

82 num_cores: int = 1, 

83 max_mem: u.Quantity | numbers.Real | str | None = None, 

84 default_mem_units: u.Unit = u.B, 

85 ): 

86 # Create our own __init__ to allow more flexible input parameters 

87 # but with a constrained dataclass definition. 

88 if num_cores < 1: 

89 raise ValueError("The number of cores must be a positive integer") 

90 

91 object.__setattr__(self, "num_cores", num_cores) 

92 

93 mem: u.Quantity | None = None 

94 

95 if max_mem is None or isinstance(max_mem, u.Quantity): 

96 mem = max_mem 

97 elif max_mem == "": 

98 # Some command line tooling can treat no value as empty string. 

99 pass 

100 else: 

101 parsed_mem = None 

102 try: 

103 parsed_mem = float(max_mem) 

104 except ValueError: 

105 pass 

106 else: 

107 mem = parsed_mem * default_mem_units 

108 

109 if mem is None: 

110 mem = u.Quantity(max_mem) 

111 

112 if mem is not None: 

113 # Force to bytes. This also checks that we can convert to bytes. 

114 mem = mem.to(u.B) 

115 

116 object.__setattr__(self, "max_mem", mem) 

117 

118 def __deepcopy__(self, memo: Any) -> ExecutionResources: 

119 """Deep copy returns itself because the class is frozen.""" 

120 return self 

121 

122 def _reduce_kwargs(self) -> dict[str, Any]: 

123 """Return a dict of the keyword arguments that should be used 

124 by `__reduce__`. 

125 

126 This is necessary because the dataclass is defined to be keyword 

127 only and we wish the default pickling to only store a plain number 

128 for the memory allocation and not a large Quantity. 

129 

130 Returns 

131 ------- 

132 kwargs : `dict` 

133 Keyword arguments to be used when pickling. 

134 """ 

135 kwargs: dict[str, Any] = {"num_cores": self.num_cores} 

136 if self.max_mem is not None: 

137 # .value is a numpy float. Cast it to a python int since we 

138 # do not want fractional bytes. The constructor ensures that this 

139 # uses units of byte so we do not have to convert. 

140 kwargs["max_mem"] = int(self.max_mem.value) 

141 return kwargs 

142 

143 @staticmethod 

144 def _unpickle_via_factory( 

145 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any] 

146 ) -> ExecutionResources: 

147 """Unpickle something by calling a factory. 

148 

149 Allows unpickle using `__reduce__` with keyword 

150 arguments as well as positional arguments. 

151 """ 

152 return cls(**kwargs) 

153 

154 def __reduce__( 

155 self, 

156 ) -> tuple[ 

157 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources], 

158 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]], 

159 ]: 

160 """Pickler.""" 

161 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs()) 

162 

163 

164class QuantumContext: 

165 """A Butler-like class specialized for a single quantum along with 

166 context information that can influence how the task is executed. 

167 

168 Parameters 

169 ---------- 

170 butler : `lsst.daf.butler.LimitedButler` 

171 Butler object from/to which datasets will be get/put. 

172 quantum : `lsst.daf.butler.Quantum` 

173 Quantum object that describes the datasets which will be get/put by a 

174 single execution of this node in the pipeline graph. 

175 resources : `ExecutionResources`, optional 

176 The resources allocated for executing quanta. 

177 

178 Notes 

179 ----- 

180 A `QuantumContext` class wraps a standard butler interface and 

181 specializes it to the context of a given quantum. What this means 

182 in practice is that the only gets and puts that this class allows 

183 are DatasetRefs that are contained in the quantum. 

184 

185 In the future this class will also be used to record provenance on 

186 what was actually get and put. This is in contrast to what the 

187 preflight expects to be get and put by looking at the graph before 

188 execution. 

189 """ 

190 

191 resources: ExecutionResources 

192 

193 def __init__( 

194 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None 

195 ): 

196 self.quantum = quantum 

197 if resources is None: 

198 resources = ExecutionResources() 

199 self.resources = resources 

200 

201 self.allInputs = set() 

202 self.allOutputs = set() 

203 for refs in quantum.inputs.values(): 

204 for ref in refs: 

205 self.allInputs.add((ref.datasetType, ref.dataId)) 

206 for refs in quantum.outputs.values(): 

207 for ref in refs: 

208 self.allOutputs.add((ref.datasetType, ref.dataId)) 

209 self.__butler = butler 

210 

211 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: 

212 # Butler methods below will check for unresolved DatasetRefs and 

213 # raise appropriately, so no need for us to do that here. 

214 if isinstance(ref, DeferredDatasetRef): 

215 self._checkMembership(ref.datasetRef, self.allInputs) 

216 return self.__butler.getDeferred(ref.datasetRef) 

217 elif ref is None: 

218 return None 

219 else: 

220 self._checkMembership(ref, self.allInputs) 

221 return self.__butler.get(ref) 

222 

223 def _put(self, value: Any, ref: DatasetRef) -> None: 

224 """Store data in butler""" 

225 self._checkMembership(ref, self.allOutputs) 

226 self.__butler.put(value, ref) 

227 

228 def get( 

229 self, 

230 dataset: InputQuantizedConnection 

231 | list[DatasetRef | None] 

232 | list[DeferredDatasetRef | None] 

233 | DatasetRef 

234 | DeferredDatasetRef 

235 | None, 

236 ) -> Any: 

237 """Fetch data from the butler 

238 

239 Parameters 

240 ---------- 

241 dataset 

242 This argument may either be an `InputQuantizedConnection` which 

243 describes all the inputs of a quantum, a list of 

244 `~lsst.daf.butler.DatasetRef`, or a single 

245 `~lsst.daf.butler.DatasetRef`. The function will get and return 

246 the corresponding datasets from the butler. If `None` is passed in 

247 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

248 returned object will be `None`. 

249 

250 Returns 

251 ------- 

252 return : `object` 

253 This function returns arbitrary objects fetched from the bulter. 

254 The structure these objects are returned in depends on the type of 

255 the input argument. If the input dataset argument is a 

256 `InputQuantizedConnection`, then the return type will be a 

257 dictionary with keys corresponding to the attributes of the 

258 `InputQuantizedConnection` (which in turn are the attribute 

259 identifiers of the connections). If the input argument is of type 

260 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

261 be a list of objects. If the input argument is a single 

262 `~lsst.daf.butler.DatasetRef` then a single object will be 

263 returned. 

264 

265 Raises 

266 ------ 

267 ValueError 

268 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is 

269 not defined in the quantum object 

270 """ 

271 # Set up a periodic logger so log messages can be issued if things 

272 # are taking too long. 

273 periodic = PeriodicLogger(_LOG) 

274 

275 if isinstance(dataset, InputQuantizedConnection): 

276 retVal = {} 

277 n_connections = len(dataset) 

278 n_retrieved = 0 

279 for i, (name, ref) in enumerate(dataset): 

280 if isinstance(ref, list | tuple): 

281 val = [] 

282 n_refs = len(ref) 

283 for j, r in enumerate(ref): 

284 val.append(self._get(r)) 

285 n_retrieved += 1 

286 periodic.log( 

287 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

288 j + 1, 

289 n_refs, 

290 name, 

291 i + 1, 

292 n_connections, 

293 ) 

294 else: 

295 val = self._get(ref) 

296 periodic.log( 

297 "Retrieved dataset for connection '%s' (%d out of %d)", 

298 name, 

299 i + 1, 

300 n_connections, 

301 ) 

302 n_retrieved += 1 

303 retVal[name] = val 

304 if periodic.num_issued > 0: 

305 # This took long enough that we issued some periodic log 

306 # messages, so issue a final confirmation message as well. 

307 _LOG.verbose( 

308 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

309 ) 

310 return retVal 

311 elif isinstance(dataset, list | tuple): 

312 n_datasets = len(dataset) 

313 retrieved = [] 

314 for i, x in enumerate(dataset): 

315 # Mypy is not sure of the type of x because of the union 

316 # of lists so complains. Ignoring it is more efficient 

317 # than adding an isinstance assert. 

318 retrieved.append(self._get(x)) 

319 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

320 if periodic.num_issued > 0: 

321 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

322 return retrieved 

323 elif isinstance(dataset, DatasetRef | DeferredDatasetRef) or dataset is None: 

324 return self._get(dataset) 

325 else: 

326 raise TypeError( 

327 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

328 ) 

329 

330 def put( 

331 self, 

332 values: Struct | list[Any] | Any, 

333 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, 

334 ) -> None: 

335 """Put data into the butler. 

336 

337 Parameters 

338 ---------- 

339 values : `Struct` or `list` of `object` or `object` 

340 The data that should be put with the butler. If the type of the 

341 dataset is `OutputQuantizedConnection` then this argument should be 

342 a `Struct` with corresponding attribute names. Each attribute 

343 should then correspond to either a list of object or a single 

344 object depending of the type of the corresponding attribute on 

345 dataset. I.e. if ``dataset.calexp`` is 

346 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

347 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

348 only a single object need be passed. The same restriction applies 

349 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` 

350 or a single `~lsst.daf.butler.DatasetRef`. 

351 dataset 

352 This argument may either be an `InputQuantizedConnection` which 

353 describes all the inputs of a quantum, a list of 

354 `lsst.daf.butler.DatasetRef`, or a single 

355 `lsst.daf.butler.DatasetRef`. The function will get and return 

356 the corresponding datasets from the butler. 

357 

358 Raises 

359 ------ 

360 ValueError 

361 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is 

362 not defined in the `~lsst.daf.butler.Quantum` object, or the type 

363 of values does not match what is expected from the type of dataset. 

364 """ 

365 if isinstance(dataset, OutputQuantizedConnection): 

366 if not isinstance(values, Struct): 

367 raise ValueError( 

368 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

369 " attributes must be passed as the values to put" 

370 ) 

371 for name, refs in dataset: 

372 valuesAttribute = getattr(values, name) 

373 if isinstance(refs, list | tuple): 

374 if len(refs) != len(valuesAttribute): 

375 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

376 for i, ref in enumerate(refs): 

377 self._put(valuesAttribute[i], ref) 

378 else: 

379 self._put(valuesAttribute, refs) 

380 elif isinstance(dataset, list | tuple): 

381 if not isinstance(values, Sequence): 

382 raise ValueError("Values to put must be a sequence") 

383 if len(dataset) != len(values): 

384 raise ValueError("There must be a common number of references and values to put") 

385 for i, ref in enumerate(dataset): 

386 self._put(values[i], ref) 

387 elif isinstance(dataset, DatasetRef): 

388 self._put(values, dataset) 

389 else: 

390 raise TypeError("Dataset argument is not a type that can be used to put") 

391 

392 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: 

393 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input 

394 `~lsst.daf.butler.Quantum`. 

395 

396 This function will raise an exception if the `QuantumContext` is 

397 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined 

398 in the quantum. 

399 

400 Parameters 

401 ---------- 

402 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ 

403 `~lsst.daf.butler.DatasetRef` 

404 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check 

405 inout : `set` 

406 The connection type to check, e.g. either an input or an output. 

407 This prevents both types needing to be checked for every operation, 

408 which may be important for Quanta with lots of 

409 `~lsst.daf.butler.DatasetRef`. 

410 """ 

411 if not isinstance(ref, list | tuple): 

412 ref = [ref] 

413 for r in ref: 

414 if (r.datasetType, r.dataId) not in inout: 

415 raise ValueError("DatasetRef is not part of the Quantum being processed") 

416 

417 @property 

418 def dimensions(self) -> DimensionUniverse: 

419 """Structure managing all dimensions recognized by this data 

420 repository (`~lsst.daf.butler.DimensionUniverse`). 

421 """ 

422 return self.__butler.dimensions 

423 

424 

425# TODO: remove on DM-40063. 

426@deprecated( 

427 reason="ButlerQuantumContext has been renamed to QuantumContext and been given extra functionality. " 

428 "Please use the new name. Will be removed after v26.", 

429 version="v26", 

430 category=FutureWarning, 

431) 

432class ButlerQuantumContext(QuantumContext): 

433 """Deprecated version of `QuantumContext`.""" 

434 

435 pass