Coverage for python/lsst/pipe/base/_quantumContext.py: 20%

147 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 11:14 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining variants for valid values used to constrain datasets in a 

23graph building query. 

24""" 

25 

26from __future__ import annotations 

27 

28__all__ = ("ButlerQuantumContext", "ExecutionResources", "QuantumContext") 

29 

30import numbers 

31from collections.abc import Callable, Sequence 

32from dataclasses import dataclass 

33from typing import Any 

34 

35import astropy.units as u 

36from deprecated.sphinx import deprecated 

37from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum 

38from lsst.utils.introspection import get_full_type_name 

39from lsst.utils.logging import PeriodicLogger, getLogger 

40 

41from .connections import DeferredDatasetRef, InputQuantizedConnection, OutputQuantizedConnection 

42from .struct import Struct 

43 

44_LOG = getLogger(__name__) 

45 

46 

47@dataclass(init=False, frozen=True) 

48class ExecutionResources: 

49 """A description of the resources available to a running quantum. 

50 

51 Parameters 

52 ---------- 

53 num_cores : `int`, optional 

54 The number of cores allocated to the task. 

55 max_mem : `~astropy.units.Quantity`, `numbers.Real`, `str`, or `None`,\ 

56 optional 

57 The amount of memory allocated to the task. Can be specified 

58 as byte-compatible `~astropy.units.Quantity`, a plain number, 

59 a string with a plain number, or a string representing a quantity. 

60 If `None` no limit is specified. 

61 default_mem_units : `astropy.units.Unit`, optional 

62 The default unit to apply when the ``max_mem`` value is given 

63 as a plain number. 

64 """ 

65 

66 num_cores: int = 1 

67 """The maximum number of cores that the task can use.""" 

68 

69 max_mem: u.Quantity | None = None 

70 """If defined, the amount of memory allocated to the task. 

71 """ 

72 

73 def __init__( 

74 self, 

75 *, 

76 num_cores: int = 1, 

77 max_mem: u.Quantity | numbers.Real | str | None = None, 

78 default_mem_units: u.Unit = u.B, 

79 ): 

80 # Create our own __init__ to allow more flexible input parameters 

81 # but with a constrained dataclass definition. 

82 if num_cores < 1: 

83 raise ValueError("The number of cores must be a positive integer") 

84 

85 object.__setattr__(self, "num_cores", num_cores) 

86 

87 mem: u.Quantity | None = None 

88 

89 if max_mem is None or isinstance(max_mem, u.Quantity): 

90 mem = max_mem 

91 elif max_mem == "": 

92 # Some command line tooling can treat no value as empty string. 

93 pass 

94 else: 

95 parsed_mem = None 

96 try: 

97 parsed_mem = float(max_mem) 

98 except ValueError: 

99 pass 

100 else: 

101 mem = parsed_mem * default_mem_units 

102 

103 if mem is None: 

104 mem = u.Quantity(max_mem) 

105 

106 if mem is not None: 

107 # Force to bytes. This also checks that we can convert to bytes. 

108 mem = mem.to(u.B) 

109 

110 object.__setattr__(self, "max_mem", mem) 

111 

112 def __deepcopy__(self, memo: Any) -> ExecutionResources: 

113 """Deep copy returns itself because the class is frozen.""" 

114 return self 

115 

116 def _reduce_kwargs(self) -> dict[str, Any]: 

117 """Return a dict of the keyword arguments that should be used 

118 by `__reduce__`. 

119 

120 This is necessary because the dataclass is defined to be keyword 

121 only and we wish the default pickling to only store a plain number 

122 for the memory allocation and not a large Quantity. 

123 

124 Returns 

125 ------- 

126 kwargs : `dict` 

127 Keyword arguments to be used when pickling. 

128 """ 

129 kwargs: dict[str, Any] = {"num_cores": self.num_cores} 

130 if self.max_mem is not None: 

131 # .value is a numpy float. Cast it to a python int since we 

132 # do not want fractional bytes. The constructor ensures that this 

133 # uses units of byte so we do not have to convert. 

134 kwargs["max_mem"] = int(self.max_mem.value) 

135 return kwargs 

136 

137 @staticmethod 

138 def _unpickle_via_factory( 

139 cls: type[ExecutionResources], args: Sequence[Any], kwargs: dict[str, Any] 

140 ) -> ExecutionResources: 

141 """Unpickle something by calling a factory. 

142 

143 Allows unpickle using `__reduce__` with keyword 

144 arguments as well as positional arguments. 

145 """ 

146 return cls(**kwargs) 

147 

148 def __reduce__( 

149 self, 

150 ) -> tuple[ 

151 Callable[[type[ExecutionResources], Sequence[Any], dict[str, Any]], ExecutionResources], 

152 tuple[type[ExecutionResources], Sequence[Any], dict[str, Any]], 

153 ]: 

154 """Pickler.""" 

155 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs()) 

156 

157 

158class QuantumContext: 

159 """A Butler-like class specialized for a single quantum along with 

160 context information that can influence how the task is executed. 

161 

162 Parameters 

163 ---------- 

164 butler : `lsst.daf.butler.LimitedButler` 

165 Butler object from/to which datasets will be get/put. 

166 quantum : `lsst.daf.butler.core.Quantum` 

167 Quantum object that describes the datasets which will be get/put by a 

168 single execution of this node in the pipeline graph. 

169 resources : `ExecutionResources`, optional 

170 The resources allocated for executing quanta. 

171 

172 Notes 

173 ----- 

174 A `QuantumContext` class wraps a standard butler interface and 

175 specializes it to the context of a given quantum. What this means 

176 in practice is that the only gets and puts that this class allows 

177 are DatasetRefs that are contained in the quantum. 

178 

179 In the future this class will also be used to record provenance on 

180 what was actually get and put. This is in contrast to what the 

181 preflight expects to be get and put by looking at the graph before 

182 execution. 

183 """ 

184 

185 resources: ExecutionResources 

186 

187 def __init__( 

188 self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None 

189 ): 

190 self.quantum = quantum 

191 if resources is None: 

192 resources = ExecutionResources() 

193 self.resources = resources 

194 

195 self.allInputs = set() 

196 self.allOutputs = set() 

197 for refs in quantum.inputs.values(): 

198 for ref in refs: 

199 self.allInputs.add((ref.datasetType, ref.dataId)) 

200 for refs in quantum.outputs.values(): 

201 for ref in refs: 

202 self.allOutputs.add((ref.datasetType, ref.dataId)) 

203 self.__butler = butler 

204 

205 def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: 

206 # Butler methods below will check for unresolved DatasetRefs and 

207 # raise appropriately, so no need for us to do that here. 

208 if isinstance(ref, DeferredDatasetRef): 

209 self._checkMembership(ref.datasetRef, self.allInputs) 

210 return self.__butler.getDeferred(ref.datasetRef) 

211 elif ref is None: 

212 return None 

213 else: 

214 self._checkMembership(ref, self.allInputs) 

215 return self.__butler.get(ref) 

216 

217 def _put(self, value: Any, ref: DatasetRef) -> None: 

218 """Store data in butler""" 

219 self._checkMembership(ref, self.allOutputs) 

220 self.__butler.put(value, ref) 

221 

222 def get( 

223 self, 

224 dataset: InputQuantizedConnection 

225 | list[DatasetRef | None] 

226 | list[DeferredDatasetRef | None] 

227 | DatasetRef 

228 | DeferredDatasetRef 

229 | None, 

230 ) -> Any: 

231 """Fetch data from the butler 

232 

233 Parameters 

234 ---------- 

235 dataset 

236 This argument may either be an `InputQuantizedConnection` which 

237 describes all the inputs of a quantum, a list of 

238 `~lsst.daf.butler.DatasetRef`, or a single 

239 `~lsst.daf.butler.DatasetRef`. The function will get and return 

240 the corresponding datasets from the butler. If `None` is passed in 

241 place of a `~lsst.daf.butler.DatasetRef` then the corresponding 

242 returned object will be `None`. 

243 

244 Returns 

245 ------- 

246 return : `object` 

247 This function returns arbitrary objects fetched from the bulter. 

248 The structure these objects are returned in depends on the type of 

249 the input argument. If the input dataset argument is a 

250 `InputQuantizedConnection`, then the return type will be a 

251 dictionary with keys corresponding to the attributes of the 

252 `InputQuantizedConnection` (which in turn are the attribute 

253 identifiers of the connections). If the input argument is of type 

254 `list` of `~lsst.daf.butler.DatasetRef` then the return type will 

255 be a list of objects. If the input argument is a single 

256 `~lsst.daf.butler.DatasetRef` then a single object will be 

257 returned. 

258 

259 Raises 

260 ------ 

261 ValueError 

262 Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is 

263 not defined in the quantum object 

264 """ 

265 # Set up a periodic logger so log messages can be issued if things 

266 # are taking too long. 

267 periodic = PeriodicLogger(_LOG) 

268 

269 if isinstance(dataset, InputQuantizedConnection): 

270 retVal = {} 

271 n_connections = len(dataset) 

272 n_retrieved = 0 

273 for i, (name, ref) in enumerate(dataset): 

274 if isinstance(ref, (list, tuple)): 

275 val = [] 

276 n_refs = len(ref) 

277 for j, r in enumerate(ref): 

278 val.append(self._get(r)) 

279 n_retrieved += 1 

280 periodic.log( 

281 "Retrieved %d out of %d datasets for connection '%s' (%d out of %d)", 

282 j + 1, 

283 n_refs, 

284 name, 

285 i + 1, 

286 n_connections, 

287 ) 

288 else: 

289 val = self._get(ref) 

290 periodic.log( 

291 "Retrieved dataset for connection '%s' (%d out of %d)", 

292 name, 

293 i + 1, 

294 n_connections, 

295 ) 

296 n_retrieved += 1 

297 retVal[name] = val 

298 if periodic.num_issued > 0: 

299 # This took long enough that we issued some periodic log 

300 # messages, so issue a final confirmation message as well. 

301 _LOG.verbose( 

302 "Completed retrieval of %d datasets from %d connections", n_retrieved, n_connections 

303 ) 

304 return retVal 

305 elif isinstance(dataset, (list, tuple)): 

306 n_datasets = len(dataset) 

307 retrieved = [] 

308 for i, x in enumerate(dataset): 

309 # Mypy is not sure of the type of x because of the union 

310 # of lists so complains. Ignoring it is more efficient 

311 # than adding an isinstance assert. 

312 retrieved.append(self._get(x)) 

313 periodic.log("Retrieved %d out of %d datasets", i + 1, n_datasets) 

314 if periodic.num_issued > 0: 

315 _LOG.verbose("Completed retrieval of %d datasets", n_datasets) 

316 return retrieved 

317 elif isinstance(dataset, DatasetRef) or isinstance(dataset, DeferredDatasetRef) or dataset is None: 

318 return self._get(dataset) 

319 else: 

320 raise TypeError( 

321 f"Dataset argument ({get_full_type_name(dataset)}) is not a type that can be used to get" 

322 ) 

323 

324 def put( 

325 self, 

326 values: Struct | list[Any] | Any, 

327 dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, 

328 ) -> None: 

329 """Put data into the butler. 

330 

331 Parameters 

332 ---------- 

333 values : `Struct` or `list` of `object` or `object` 

334 The data that should be put with the butler. If the type of the 

335 dataset is `OutputQuantizedConnection` then this argument should be 

336 a `Struct` with corresponding attribute names. Each attribute 

337 should then correspond to either a list of object or a single 

338 object depending of the type of the corresponding attribute on 

339 dataset. I.e. if ``dataset.calexp`` is 

340 ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be 

341 ``[calexp1, calexp2]``. Like wise if there is a single ref, then 

342 only a single object need be passed. The same restriction applies 

343 if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` 

344 or a single `~lsst.daf.butler.DatasetRef`. 

345 dataset 

346 This argument may either be an `InputQuantizedConnection` which 

347 describes all the inputs of a quantum, a list of 

348 `lsst.daf.butler.DatasetRef`, or a single 

349 `lsst.daf.butler.DatasetRef`. The function will get and return 

350 the corresponding datasets from the butler. 

351 

352 Raises 

353 ------ 

354 ValueError 

355 Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is 

356 not defined in the `~lsst.daf.butler.Quantum` object, or the type 

357 of values does not match what is expected from the type of dataset. 

358 """ 

359 if isinstance(dataset, OutputQuantizedConnection): 

360 if not isinstance(values, Struct): 

361 raise ValueError( 

362 "dataset is a OutputQuantizedConnection, a Struct with corresponding" 

363 " attributes must be passed as the values to put" 

364 ) 

365 for name, refs in dataset: 

366 valuesAttribute = getattr(values, name) 

367 if isinstance(refs, (list, tuple)): 

368 if len(refs) != len(valuesAttribute): 

369 raise ValueError(f"There must be a object to put for every Dataset ref in {name}") 

370 for i, ref in enumerate(refs): 

371 self._put(valuesAttribute[i], ref) 

372 else: 

373 self._put(valuesAttribute, refs) 

374 elif isinstance(dataset, (list, tuple)): 

375 if not isinstance(values, Sequence): 

376 raise ValueError("Values to put must be a sequence") 

377 if len(dataset) != len(values): 

378 raise ValueError("There must be a common number of references and values to put") 

379 for i, ref in enumerate(dataset): 

380 self._put(values[i], ref) 

381 elif isinstance(dataset, DatasetRef): 

382 self._put(values, dataset) 

383 else: 

384 raise TypeError("Dataset argument is not a type that can be used to put") 

385 

386 def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: 

387 """Check if a `~lsst.daf.butler.DatasetRef` is part of the input 

388 `~lsst.daf.butler.Quantum`. 

389 

390 This function will raise an exception if the `QuantumContext` is 

391 used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined 

392 in the quantum. 

393 

394 Parameters 

395 ---------- 

396 ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ 

397 `~lsst.daf.butler.DatasetRef` 

398 Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check 

399 inout : `set` 

400 The connection type to check, e.g. either an input or an output. 

401 This prevents both types needing to be checked for every operation, 

402 which may be important for Quanta with lots of 

403 `~lsst.daf.butler.DatasetRef`. 

404 """ 

405 if not isinstance(ref, (list, tuple)): 

406 ref = [ref] 

407 for r in ref: 

408 if (r.datasetType, r.dataId) not in inout: 

409 raise ValueError("DatasetRef is not part of the Quantum being processed") 

410 

411 @property 

412 def dimensions(self) -> DimensionUniverse: 

413 """Structure managing all dimensions recognized by this data 

414 repository (`~lsst.daf.butler.DimensionUniverse`). 

415 """ 

416 return self.__butler.dimensions 

417 

418 

419@deprecated( 

420 reason="ButlerQuantumContext has been renamed to QuantumContext and been given extra functionality. " 

421 "Please use the new name. Will be removed after v27.", 

422 version="v26", 

423 category=FutureWarning, 

424) 

425class ButlerQuantumContext(QuantumContext): 

426 """Deprecated version of `QuantumContext`.""" 

427 

428 pass