Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 41%

66 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-26 09:59 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("SimplePipelineExecutor",) 

31 

32import warnings 

33from collections.abc import Iterable, Iterator, Mapping 

34from typing import Any 

35 

36from lsst.daf.butler import Butler, CollectionType, Quantum 

37from lsst.pex.config import Config 

38from lsst.pipe.base import ( 

39 ExecutionResources, 

40 Instrument, 

41 Pipeline, 

42 PipelineGraph, 

43 PipelineTask, 

44 QuantumGraph, 

45 TaskDef, 

46) 

47from lsst.pipe.base.all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder 

48from lsst.utils.introspection import find_outside_stacklevel 

49 

50from .preExecInit import PreExecInit 

51from .singleQuantumExecutor import SingleQuantumExecutor 

52from .taskFactory import TaskFactory 

53 

54 

55class SimplePipelineExecutor: 

56 """A simple, high-level executor for pipelines. 

57 

58 Parameters 

59 ---------- 

60 quantum_graph : `~lsst.pipe.base.QuantumGraph` 

61 Graph to be executed. 

62 butler : `~lsst.daf.butler.Butler` 

63 Object that manages all I/O. Must be initialized with `collections` 

64 and `run` properties that correspond to the input and output 

65 collections, which must be consistent with those used to create 

66 ``quantum_graph``. 

67 resources : `~lsst.pipe.base.ExecutionResources` 

68 The resources available to each quantum being executed. 

69 

70 Notes 

71 ----- 

72 Most callers should use one of the `classmethod` factory functions 

73 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

74 invoking the constructor directly; these guarantee that the 

75 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created 

76 consistently. 

77 

78 This class is intended primarily to support unit testing and small-scale 

79 integration testing of `~lsst.pipe.base.PipelineTask` classes. It 

80 deliberately lacks many features present in the command-line-only 

81 ``pipetask`` tool in order to keep the implementation simple. Python 

82 callers that need more sophistication should call lower-level tools like 

83 `~lsst.pipe.base.quantum_graph_builder.QuantumGraphBuilder`, `PreExecInit`, 

84 and `SingleQuantumExecutor` directly. 

85 """ 

86 

87 def __init__( 

88 self, 

89 quantum_graph: QuantumGraph, 

90 butler: Butler, 

91 resources: ExecutionResources | None = None, 

92 ): 

93 self.quantum_graph = quantum_graph 

94 self.butler = butler 

95 self.resources = resources 

96 

97 @classmethod 

98 def prep_butler( 

99 cls, 

100 root: str, 

101 inputs: Iterable[str], 

102 output: str, 

103 output_run: str | None = None, 

104 ) -> Butler: 

105 """Return configured `~lsst.daf.butler.Butler`. 

106 

107 Helper method for creating `~lsst.daf.butler.Butler` instances with 

108 collections appropriate for processing. 

109 

110 Parameters 

111 ---------- 

112 root : `str` 

113 Root of the butler data repository; must already exist, with all 

114 necessary input data. 

115 inputs : `~collections.abc.Iterable` [ `str` ] 

116 Collections to search for all input datasets, in search order. 

117 output : `str` 

118 Name of a new output `~lsst.daf.butler.CollectionType.CHAINED` 

119 collection to create that will combine both inputs and outputs. 

120 output_run : `str`, optional 

121 Name of the output `~lsst.daf.butler.CollectionType.RUN` that will 

122 directly hold all output datasets. If not provided, a name will 

123 be created from ``output`` and a timestamp. 

124 

125 Returns 

126 ------- 

127 butler : `~lsst.daf.butler.Butler` 

128 Butler client instance compatible with all `classmethod` factories. 

129 Always writeable. 

130 """ 

131 if output_run is None: 

132 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

133 # Make initial butler with no collections, since we haven't created 

134 # them yet. 

135 butler = Butler.from_config(root, writeable=True) 

136 butler.registry.registerCollection(output_run, CollectionType.RUN) 

137 butler.registry.registerCollection(output, CollectionType.CHAINED) 

138 collections = [output_run] 

139 collections.extend(inputs) 

140 butler.registry.setCollectionChain(output, collections) 

141 # Remake butler to let it infer default data IDs from collections, now 

142 # that those collections exist. 

143 return Butler.from_config(butler=butler, collections=[output], run=output_run) 

144 

145 @classmethod 

146 def from_pipeline_filename( 

147 cls, 

148 pipeline_filename: str, 

149 *, 

150 where: str = "", 

151 bind: Mapping[str, Any] | None = None, 

152 butler: Butler, 

153 resources: ExecutionResources | None = None, 

154 ) -> SimplePipelineExecutor: 

155 """Create an executor by building a QuantumGraph from an on-disk 

156 pipeline YAML file. 

157 

158 Parameters 

159 ---------- 

160 pipeline_filename : `str` 

161 Name of the YAML file to load the pipeline definition from. 

162 where : `str`, optional 

163 Data ID query expression that constraints the quanta generated. 

164 bind : `~collections.abc.Mapping`, optional 

165 Mapping containing literal values that should be injected into the 

166 ``where`` expression, keyed by the identifiers they replace. 

167 butler : `~lsst.daf.butler.Butler` 

168 Butler that manages all I/O. `prep_butler` can be used to create 

169 one. 

170 resources : `~lsst.pipe.base.ExecutionResources` 

171 The resources available to each quantum being executed. 

172 

173 Returns 

174 ------- 

175 executor : `SimplePipelineExecutor` 

176 An executor instance containing the constructed 

177 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

178 ready for `run` to be called. 

179 """ 

180 pipeline = Pipeline.fromFile(pipeline_filename) 

181 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind, resources=resources) 

182 

183 @classmethod 

184 def from_task_class( 

185 cls, 

186 task_class: type[PipelineTask], 

187 config: Config | None = None, 

188 label: str | None = None, 

189 *, 

190 where: str = "", 

191 bind: Mapping[str, Any] | None = None, 

192 butler: Butler, 

193 resources: ExecutionResources | None = None, 

194 ) -> SimplePipelineExecutor: 

195 """Create an executor by building a QuantumGraph from a pipeline 

196 containing a single task. 

197 

198 Parameters 

199 ---------- 

200 task_class : `type` 

201 A concrete `~lsst.pipe.base.PipelineTask` subclass. 

202 config : `~lsst.pex.config.Config`, optional 

203 Configuration for the task. If not provided, task-level defaults 

204 will be used (no per-instrument overrides). 

205 label : `str`, optional 

206 Label for the task in its pipeline; defaults to 

207 ``task_class._DefaultName``. 

208 where : `str`, optional 

209 Data ID query expression that constraints the quanta generated. 

210 bind : `~collections.abc.Mapping`, optional 

211 Mapping containing literal values that should be injected into the 

212 ``where`` expression, keyed by the identifiers they replace. 

213 butler : `~lsst.daf.butler.Butler` 

214 Butler that manages all I/O. `prep_butler` can be used to create 

215 one. 

216 resources : `~lsst.pipe.base.ExecutionResources` 

217 The resources available to each quantum being executed. 

218 

219 Returns 

220 ------- 

221 executor : `SimplePipelineExecutor` 

222 An executor instance containing the constructed 

223 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

224 ready for `run` to be called. 

225 """ 

226 if config is None: 

227 config = task_class.ConfigClass() 

228 if label is None: 

229 label = task_class._DefaultName 

230 if not isinstance(config, task_class.ConfigClass): 

231 raise TypeError( 

232 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

233 f"got {type(config).__name__}." 

234 ) 

235 pipeline_graph = PipelineGraph() 

236 pipeline_graph.add_task(label=label, task_class=task_class, config=config) 

237 return cls.from_pipeline_graph( 

238 pipeline_graph, butler=butler, where=where, bind=bind, resources=resources 

239 ) 

240 

241 @classmethod 

242 def from_pipeline( 

243 cls, 

244 pipeline: Pipeline | Iterable[TaskDef], 

245 *, 

246 where: str = "", 

247 bind: Mapping[str, Any] | None = None, 

248 butler: Butler, 

249 resources: ExecutionResources | None = None, 

250 ) -> SimplePipelineExecutor: 

251 """Create an executor by building a QuantumGraph from an in-memory 

252 pipeline. 

253 

254 Parameters 

255 ---------- 

256 pipeline : `~lsst.pipe.base.Pipeline` or \ 

257 `~collections.abc.Iterable` [ `~lsst.pipe.base.TaskDef` ] 

258 A Python object describing the tasks to run, along with their 

259 labels and configuration. Passing `~lsst.pipe.base.TaskDef` 

260 objects is deprecated and will not be supported after v27. 

261 where : `str`, optional 

262 Data ID query expression that constraints the quanta generated. 

263 bind : `~collections.abc.Mapping`, optional 

264 Mapping containing literal values that should be injected into the 

265 ``where`` expression, keyed by the identifiers they replace. 

266 butler : `~lsst.daf.butler.Butler` 

267 Butler that manages all I/O. `prep_butler` can be used to create 

268 one. 

269 resources : `~lsst.pipe.base.ExecutionResources` 

270 The resources available to each quantum being executed. 

271 

272 Returns 

273 ------- 

274 executor : `SimplePipelineExecutor` 

275 An executor instance containing the constructed 

276 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

277 ready for `run` to be called. 

278 """ 

279 if isinstance(pipeline, Pipeline): 

280 pipeline_graph = pipeline.to_graph() 

281 else: 

282 # TODO: disable this block and adjust docs and annotations 

283 # on DM-40443. 

284 warnings.warn( 

285 "Passing TaskDefs to SimplePipelineExecutor.from_pipeline is deprecated " 

286 "and will be removed after v27.", 

287 category=FutureWarning, 

288 stacklevel=find_outside_stacklevel("lsst.ctrl.mpexec"), 

289 ) 

290 pipeline_graph = PipelineGraph() 

291 for task_def in pipeline: 

292 pipeline_graph.add_task( 

293 task_def.label, task_def.taskClass, task_def.config, connections=task_def.connections 

294 ) 

295 return cls.from_pipeline_graph( 

296 pipeline_graph, where=where, bind=bind, butler=butler, resources=resources 

297 ) 

298 

299 @classmethod 

300 def from_pipeline_graph( 

301 cls, 

302 pipeline_graph: PipelineGraph, 

303 *, 

304 where: str = "", 

305 bind: Mapping[str, Any] | None = None, 

306 butler: Butler, 

307 resources: ExecutionResources | None = None, 

308 ) -> SimplePipelineExecutor: 

309 """Create an executor by building a QuantumGraph from an in-memory 

310 pipeline graph. 

311 

312 Parameters 

313 ---------- 

314 pipeline_graph : `~lsst.pipe.base.PipelineGraph` 

315 A Python object describing the tasks to run, along with their 

316 labels and configuration, in graph form. Will be resolved against 

317 the given ``butler``, with any existing resolutions ignored. 

318 where : `str`, optional 

319 Data ID query expression that constraints the quanta generated. 

320 bind : `~collections.abc.Mapping`, optional 

321 Mapping containing literal values that should be injected into the 

322 ``where`` expression, keyed by the identifiers they replace. 

323 butler : `~lsst.daf.butler.Butler` 

324 Butler that manages all I/O. `prep_butler` can be used to create 

325 one. Must have its `~Butler.run` and `~Butler.collections` not 

326 empty and not `None`. 

327 resources : `~lsst.pipe.base.ExecutionResources` 

328 The resources available to each quantum being executed. 

329 

330 Returns 

331 ------- 

332 executor : `SimplePipelineExecutor` 

333 An executor instance containing the constructed 

334 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

335 ready for `run` to be called. 

336 """ 

337 quantum_graph_builder = AllDimensionsQuantumGraphBuilder( 

338 pipeline_graph, butler, where=where, bind=bind 

339 ) 

340 quantum_graph = quantum_graph_builder.build(attach_datastore_records=False) 

341 return cls(quantum_graph=quantum_graph, butler=butler, resources=resources) 

342 

343 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]: 

344 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in 

345 topological order. 

346 

347 Use this method to run all quanta in the graph. Use 

348 `as_generator` to get a generator to run the quanta one at 

349 a time. 

350 

351 Parameters 

352 ---------- 

353 register_dataset_types : `bool`, optional 

354 If `True`, register all output dataset types before executing any 

355 quanta. 

356 save_versions : `bool`, optional 

357 If `True` (default), save a package versions dataset. 

358 

359 Returns 

360 ------- 

361 quanta : `list` [ `~lsst.daf.butler.Quantum` ] 

362 Executed quanta. 

363 

364 Notes 

365 ----- 

366 A topological ordering is not in general unique, but no other 

367 guarantees are made about the order in which quanta are processed. 

368 """ 

369 return list( 

370 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions) 

371 ) 

372 

373 def as_generator( 

374 self, register_dataset_types: bool = False, save_versions: bool = True 

375 ) -> Iterator[Quantum]: 

376 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological 

377 order. 

378 

379 These quanta will be run as the returned generator is iterated 

380 over. Use this method to run the quanta one at a time. 

381 Use `run` to run all quanta in the graph. 

382 

383 Parameters 

384 ---------- 

385 register_dataset_types : `bool`, optional 

386 If `True`, register all output dataset types before executing any 

387 quanta. 

388 save_versions : `bool`, optional 

389 If `True` (default), save a package versions dataset. 

390 

391 Returns 

392 ------- 

393 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ] 

394 Executed quanta. 

395 

396 Notes 

397 ----- 

398 Global initialization steps (see `PreExecInit`) are performed 

399 immediately when this method is called, but individual quanta are not 

400 actually executed until the returned iterator is iterated over. 

401 

402 A topological ordering is not in general unique, but no other 

403 guarantees are made about the order in which quanta are processed. 

404 """ 

405 task_factory = TaskFactory() 

406 pre_exec_init = PreExecInit(self.butler, task_factory) 

407 pre_exec_init.initialize( 

408 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions 

409 ) 

410 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory, resources=self.resources) 

411 # Important that this returns a generator expression rather than being 

412 # a generator itself; that is what makes the PreExecInit stuff above 

413 # happen immediately instead of when the first quanta is executed, 

414 # which might be useful for callers who want to check the state of the 

415 # repo in between. 

416 return ( 

417 single_quantum_executor.execute(qnode.task_node, qnode.quantum) for qnode in self.quantum_graph 

418 )