Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 39%

57 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-07 12:18 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("SimplePipelineExecutor",) 

31 

32from collections.abc import Iterable, Iterator, Mapping 

33from typing import Any 

34 

35from lsst.daf.butler import Butler, CollectionType, Quantum 

36from lsst.pex.config import Config 

37from lsst.pipe.base import ( 

38 ExecutionResources, 

39 GraphBuilder, 

40 Instrument, 

41 Pipeline, 

42 PipelineTask, 

43 QuantumGraph, 

44 TaskDef, 

45) 

46 

47from .preExecInit import PreExecInit 

48from .singleQuantumExecutor import SingleQuantumExecutor 

49from .taskFactory import TaskFactory 

50 

51 

52class SimplePipelineExecutor: 

53 """A simple, high-level executor for pipelines. 

54 

55 Parameters 

56 ---------- 

57 quantum_graph : `~lsst.pipe.base.QuantumGraph` 

58 Graph to be executed. 

59 butler : `~lsst.daf.butler.Butler` 

60 Object that manages all I/O. Must be initialized with `collections` 

61 and `run` properties that correspond to the input and output 

62 collections, which must be consistent with those used to create 

63 ``quantum_graph``. 

64 resources : `~lsst.pipe.base.ExecutionResources` 

65 The resources available to each quantum being executed. 

66 

67 Notes 

68 ----- 

69 Most callers should use one of the `classmethod` factory functions 

70 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

71 invoking the constructor directly; these guarantee that the 

72 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created 

73 consistently. 

74 

75 This class is intended primarily to support unit testing and small-scale 

76 integration testing of `~lsst.pipe.base.PipelineTask` classes. It 

77 deliberately lacks many features present in the command-line-only 

78 ``pipetask`` tool in order to keep the implementation simple. Python 

79 callers that need more sophistication should call lower-level tools like 

80 `~lsst.pipe.base.GraphBuilder`, `PreExecInit`, and `SingleQuantumExecutor` 

81 directly. 

82 """ 

83 

84 def __init__( 

85 self, 

86 quantum_graph: QuantumGraph, 

87 butler: Butler, 

88 resources: ExecutionResources | None = None, 

89 ): 

90 self.quantum_graph = quantum_graph 

91 self.butler = butler 

92 self.resources = resources 

93 

94 @classmethod 

95 def prep_butler( 

96 cls, 

97 root: str, 

98 inputs: Iterable[str], 

99 output: str, 

100 output_run: str | None = None, 

101 ) -> Butler: 

102 """Return configured `~lsst.daf.butler.Butler`. 

103 

104 Helper method for creating `~lsst.daf.butler.Butler` instances with 

105 collections appropriate for processing. 

106 

107 Parameters 

108 ---------- 

109 root : `str` 

110 Root of the butler data repository; must already exist, with all 

111 necessary input data. 

112 inputs : `~collections.abc.Iterable` [ `str` ] 

113 Collections to search for all input datasets, in search order. 

114 output : `str` 

115 Name of a new output `~lsst.daf.butler.CollectionType.CHAINED` 

116 collection to create that will combine both inputs and outputs. 

117 output_run : `str`, optional 

118 Name of the output `~lsst.daf.butler.CollectionType.RUN` that will 

119 directly hold all output datasets. If not provided, a name will 

120 be created from ``output`` and a timestamp. 

121 

122 Returns 

123 ------- 

124 butler : `~lsst.daf.butler.Butler` 

125 Butler client instance compatible with all `classmethod` factories. 

126 Always writeable. 

127 """ 

128 if output_run is None: 

129 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

130 # Make initial butler with no collections, since we haven't created 

131 # them yet. 

132 butler = Butler.from_config(root, writeable=True) 

133 butler.registry.registerCollection(output_run, CollectionType.RUN) 

134 butler.registry.registerCollection(output, CollectionType.CHAINED) 

135 collections = [output_run] 

136 collections.extend(inputs) 

137 butler.registry.setCollectionChain(output, collections) 

138 # Remake butler to let it infer default data IDs from collections, now 

139 # that those collections exist. 

140 return Butler.from_config(butler=butler, collections=[output], run=output_run) 

141 

142 @classmethod 

143 def from_pipeline_filename( 

144 cls, 

145 pipeline_filename: str, 

146 *, 

147 where: str = "", 

148 bind: Mapping[str, Any] | None = None, 

149 butler: Butler, 

150 resources: ExecutionResources | None = None, 

151 ) -> SimplePipelineExecutor: 

152 """Create an executor by building a QuantumGraph from an on-disk 

153 pipeline YAML file. 

154 

155 Parameters 

156 ---------- 

157 pipeline_filename : `str` 

158 Name of the YAML file to load the pipeline definition from. 

159 where : `str`, optional 

160 Data ID query expression that constraints the quanta generated. 

161 bind : `~collections.abc.Mapping`, optional 

162 Mapping containing literal values that should be injected into the 

163 ``where`` expression, keyed by the identifiers they replace. 

164 butler : `~lsst.daf.butler.Butler` 

165 Butler that manages all I/O. `prep_butler` can be used to create 

166 one. 

167 resources : `~lsst.pipe.base.ExecutionResources` 

168 The resources available to each quantum being executed. 

169 

170 Returns 

171 ------- 

172 executor : `SimplePipelineExecutor` 

173 An executor instance containing the constructed 

174 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

175 ready for `run` to be called. 

176 """ 

177 pipeline = Pipeline.fromFile(pipeline_filename) 

178 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind, resources=resources) 

179 

180 @classmethod 

181 def from_task_class( 

182 cls, 

183 task_class: type[PipelineTask], 

184 config: Config | None = None, 

185 label: str | None = None, 

186 *, 

187 where: str = "", 

188 bind: Mapping[str, Any] | None = None, 

189 butler: Butler, 

190 resources: ExecutionResources | None = None, 

191 ) -> SimplePipelineExecutor: 

192 """Create an executor by building a QuantumGraph from a pipeline 

193 containing a single task. 

194 

195 Parameters 

196 ---------- 

197 task_class : `type` 

198 A concrete `~lsst.pipe.base.PipelineTask` subclass. 

199 config : `~lsst.pex.config.Config`, optional 

200 Configuration for the task. If not provided, task-level defaults 

201 will be used (no per-instrument overrides). 

202 label : `str`, optional 

203 Label for the task in its pipeline; defaults to 

204 ``task_class._DefaultName``. 

205 where : `str`, optional 

206 Data ID query expression that constraints the quanta generated. 

207 bind : `~collections.abc.Mapping`, optional 

208 Mapping containing literal values that should be injected into the 

209 ``where`` expression, keyed by the identifiers they replace. 

210 butler : `~lsst.daf.butler.Butler` 

211 Butler that manages all I/O. `prep_butler` can be used to create 

212 one. 

213 resources : `~lsst.pipe.base.ExecutionResources` 

214 The resources available to each quantum being executed. 

215 

216 Returns 

217 ------- 

218 executor : `SimplePipelineExecutor` 

219 An executor instance containing the constructed 

220 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

221 ready for `run` to be called. 

222 """ 

223 if config is None: 

224 config = task_class.ConfigClass() 

225 if label is None: 

226 label = task_class._DefaultName 

227 if not isinstance(config, task_class.ConfigClass): 

228 raise TypeError( 

229 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

230 f"got {type(config).__name__}." 

231 ) 

232 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

233 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind, resources=resources) 

234 

235 @classmethod 

236 def from_pipeline( 

237 cls, 

238 pipeline: Pipeline | Iterable[TaskDef], 

239 *, 

240 where: str = "", 

241 bind: Mapping[str, Any] | None = None, 

242 butler: Butler, 

243 resources: ExecutionResources | None = None, 

244 **kwargs: Any, 

245 ) -> SimplePipelineExecutor: 

246 """Create an executor by building a QuantumGraph from an in-memory 

247 pipeline. 

248 

249 Parameters 

250 ---------- 

251 pipeline : `~lsst.pipe.base.Pipeline` or \ 

252 `~collections.abc.Iterable` [ `~lsst.pipe.base.TaskDef` ] 

253 A Python object describing the tasks to run, along with their 

254 labels and configuration. 

255 where : `str`, optional 

256 Data ID query expression that constraints the quanta generated. 

257 bind : `~collections.abc.Mapping`, optional 

258 Mapping containing literal values that should be injected into the 

259 ``where`` expression, keyed by the identifiers they replace. 

260 butler : `~lsst.daf.butler.Butler` 

261 Butler that manages all I/O. `prep_butler` can be used to create 

262 one. 

263 resources : `~lsst.pipe.base.ExecutionResources` 

264 The resources available to each quantum being executed. 

265 **kwargs : `~typing.Any` 

266 Unused. 

267 

268 Returns 

269 ------- 

270 executor : `SimplePipelineExecutor` 

271 An executor instance containing the constructed 

272 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

273 ready for `run` to be called. 

274 """ 

275 if isinstance(pipeline, Pipeline): 

276 pipeline = list(pipeline.toExpandedPipeline()) 

277 else: 

278 pipeline = list(pipeline) 

279 graph_builder = GraphBuilder(butler.registry) 

280 assert butler.run is not None, "Butler output run collection must be defined" 

281 quantum_graph = graph_builder.makeGraph( 

282 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind 

283 ) 

284 return cls(quantum_graph=quantum_graph, butler=butler, resources=resources) 

285 

286 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]: 

287 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in 

288 topological order. 

289 

290 Use this method to run all quanta in the graph. Use 

291 `as_generator` to get a generator to run the quanta one at 

292 a time. 

293 

294 Parameters 

295 ---------- 

296 register_dataset_types : `bool`, optional 

297 If `True`, register all output dataset types before executing any 

298 quanta. 

299 save_versions : `bool`, optional 

300 If `True` (default), save a package versions dataset. 

301 

302 Returns 

303 ------- 

304 quanta : `list` [ `~lsst.daf.butler.Quantum` ] 

305 Executed quanta. 

306 

307 Notes 

308 ----- 

309 A topological ordering is not in general unique, but no other 

310 guarantees are made about the order in which quanta are processed. 

311 """ 

312 return list( 

313 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions) 

314 ) 

315 

316 def as_generator( 

317 self, register_dataset_types: bool = False, save_versions: bool = True 

318 ) -> Iterator[Quantum]: 

319 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological 

320 order. 

321 

322 These quanta will be run as the returned generator is iterated 

323 over. Use this method to run the quanta one at a time. 

324 Use `run` to run all quanta in the graph. 

325 

326 Parameters 

327 ---------- 

328 register_dataset_types : `bool`, optional 

329 If `True`, register all output dataset types before executing any 

330 quanta. 

331 save_versions : `bool`, optional 

332 If `True` (default), save a package versions dataset. 

333 

334 Returns 

335 ------- 

336 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ] 

337 Executed quanta. 

338 

339 Notes 

340 ----- 

341 Global initialization steps (see `PreExecInit`) are performed 

342 immediately when this method is called, but individual quanta are not 

343 actually executed until the returned iterator is iterated over. 

344 

345 A topological ordering is not in general unique, but no other 

346 guarantees are made about the order in which quanta are processed. 

347 """ 

348 task_factory = TaskFactory() 

349 pre_exec_init = PreExecInit(self.butler, task_factory) 

350 pre_exec_init.initialize( 

351 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions 

352 ) 

353 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory, resources=self.resources) 

354 # Important that this returns a generator expression rather than being 

355 # a generator itself; that is what makes the PreExecInit stuff above 

356 # happen immediately instead of when the first quanta is executed, 

357 # which might be useful for callers who want to check the state of the 

358 # repo in between. 

359 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)