Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 39%

57 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-01 09:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SimplePipelineExecutor",) 

25 

26from collections.abc import Iterable, Iterator, Mapping 

27from typing import Any 

28 

29from lsst.daf.butler import Butler, CollectionType, Quantum 

30from lsst.pex.config import Config 

31from lsst.pipe.base import ( 

32 ExecutionResources, 

33 GraphBuilder, 

34 Instrument, 

35 Pipeline, 

36 PipelineTask, 

37 QuantumGraph, 

38 TaskDef, 

39) 

40 

41from .preExecInit import PreExecInit 

42from .singleQuantumExecutor import SingleQuantumExecutor 

43from .taskFactory import TaskFactory 

44 

45 

46class SimplePipelineExecutor: 

47 """A simple, high-level executor for pipelines. 

48 

49 Parameters 

50 ---------- 

51 quantum_graph : `~lsst.pipe.base.QuantumGraph` 

52 Graph to be executed. 

53 butler : `~lsst.daf.butler.Butler` 

54 Object that manages all I/O. Must be initialized with `collections` 

55 and `run` properties that correspond to the input and output 

56 collections, which must be consistent with those used to create 

57 ``quantum_graph``. 

58 resources : `~lsst.pipe.base.ExecutionResources` 

59 The resources available to each quantum being executed. 

60 

61 Notes 

62 ----- 

63 Most callers should use one of the `classmethod` factory functions 

64 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

65 invoking the constructor directly; these guarantee that the 

66 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created 

67 consistently. 

68 

69 This class is intended primarily to support unit testing and small-scale 

70 integration testing of `~lsst.pipe.base.PipelineTask` classes. It 

71 deliberately lacks many features present in the command-line-only 

72 ``pipetask`` tool in order to keep the implementation simple. Python 

73 callers that need more sophistication should call lower-level tools like 

74 `~lsst.pipe.base.GraphBuilder`, `PreExecInit`, and `SingleQuantumExecutor` 

75 directly. 

76 """ 

77 

78 def __init__( 

79 self, 

80 quantum_graph: QuantumGraph, 

81 butler: Butler, 

82 resources: ExecutionResources | None = None, 

83 ): 

84 self.quantum_graph = quantum_graph 

85 self.butler = butler 

86 self.resources = resources 

87 

88 @classmethod 

89 def prep_butler( 

90 cls, 

91 root: str, 

92 inputs: Iterable[str], 

93 output: str, 

94 output_run: str | None = None, 

95 ) -> Butler: 

96 """Return configured `~lsst.daf.butler.Butler`. 

97 

98 Helper method for creating `~lsst.daf.butler.Butler` instances with 

99 collections appropriate for processing. 

100 

101 Parameters 

102 ---------- 

103 root : `str` 

104 Root of the butler data repository; must already exist, with all 

105 necessary input data. 

106 inputs : `~collections.abc.Iterable` [ `str` ] 

107 Collections to search for all input datasets, in search order. 

108 output : `str` 

109 Name of a new output `~lsst.daf.butler.CollectionType.CHAINED` 

110 collection to create that will combine both inputs and outputs. 

111 output_run : `str`, optional 

112 Name of the output `~lsst.daf.butler.CollectionType.RUN` that will 

113 directly hold all output datasets. If not provided, a name will 

114 be created from ``output`` and a timestamp. 

115 

116 Returns 

117 ------- 

118 butler : `~lsst.daf.butler.Butler` 

119 Butler client instance compatible with all `classmethod` factories. 

120 Always writeable. 

121 """ 

122 if output_run is None: 

123 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

124 # Make initial butler with no collections, since we haven't created 

125 # them yet. 

126 butler = Butler(root, writeable=True) 

127 butler.registry.registerCollection(output_run, CollectionType.RUN) 

128 butler.registry.registerCollection(output, CollectionType.CHAINED) 

129 collections = [output_run] 

130 collections.extend(inputs) 

131 butler.registry.setCollectionChain(output, collections) 

132 # Remake butler to let it infer default data IDs from collections, now 

133 # that those collections exist. 

134 return Butler(butler=butler, collections=[output], run=output_run) 

135 

136 @classmethod 

137 def from_pipeline_filename( 

138 cls, 

139 pipeline_filename: str, 

140 *, 

141 where: str = "", 

142 bind: Mapping[str, Any] | None = None, 

143 butler: Butler, 

144 resources: ExecutionResources | None = None, 

145 ) -> SimplePipelineExecutor: 

146 """Create an executor by building a QuantumGraph from an on-disk 

147 pipeline YAML file. 

148 

149 Parameters 

150 ---------- 

151 pipeline_filename : `str` 

152 Name of the YAML file to load the pipeline definition from. 

153 where : `str`, optional 

154 Data ID query expression that constraints the quanta generated. 

155 bind : `~collections.abc.Mapping`, optional 

156 Mapping containing literal values that should be injected into the 

157 ``where`` expression, keyed by the identifiers they replace. 

158 butler : `~lsst.daf.butler.Butler` 

159 Butler that manages all I/O. `prep_butler` can be used to create 

160 one. 

161 resources : `~lsst.pipe.base.ExecutionResources` 

162 The resources available to each quantum being executed. 

163 

164 Returns 

165 ------- 

166 executor : `SimplePipelineExecutor` 

167 An executor instance containing the constructed 

168 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

169 ready for `run` to be called. 

170 """ 

171 pipeline = Pipeline.fromFile(pipeline_filename) 

172 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind, resources=resources) 

173 

174 @classmethod 

175 def from_task_class( 

176 cls, 

177 task_class: type[PipelineTask], 

178 config: Config | None = None, 

179 label: str | None = None, 

180 *, 

181 where: str = "", 

182 bind: Mapping[str, Any] | None = None, 

183 butler: Butler, 

184 resources: ExecutionResources | None = None, 

185 ) -> SimplePipelineExecutor: 

186 """Create an executor by building a QuantumGraph from a pipeline 

187 containing a single task. 

188 

189 Parameters 

190 ---------- 

191 task_class : `type` 

192 A concrete `~lsst.pipe.base.PipelineTask` subclass. 

193 config : `~lsst.pex.config.Config`, optional 

194 Configuration for the task. If not provided, task-level defaults 

195 will be used (no per-instrument overrides). 

196 label : `str`, optional 

197 Label for the task in its pipeline; defaults to 

198 ``task_class._DefaultName``. 

199 where : `str`, optional 

200 Data ID query expression that constraints the quanta generated. 

201 bind : `~collections.abc.Mapping`, optional 

202 Mapping containing literal values that should be injected into the 

203 ``where`` expression, keyed by the identifiers they replace. 

204 butler : `~lsst.daf.butler.Butler` 

205 Butler that manages all I/O. `prep_butler` can be used to create 

206 one. 

207 resources : `~lsst.pipe.base.ExecutionResources` 

208 The resources available to each quantum being executed. 

209 

210 Returns 

211 ------- 

212 executor : `SimplePipelineExecutor` 

213 An executor instance containing the constructed 

214 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

215 ready for `run` to be called. 

216 """ 

217 if config is None: 

218 config = task_class.ConfigClass() 

219 if label is None: 

220 label = task_class._DefaultName 

221 if not isinstance(config, task_class.ConfigClass): 

222 raise TypeError( 

223 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

224 f"got {type(config).__name__}." 

225 ) 

226 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

227 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind, resources=resources) 

228 

229 @classmethod 

230 def from_pipeline( 

231 cls, 

232 pipeline: Pipeline | Iterable[TaskDef], 

233 *, 

234 where: str = "", 

235 bind: Mapping[str, Any] | None = None, 

236 butler: Butler, 

237 resources: ExecutionResources | None = None, 

238 **kwargs: Any, 

239 ) -> SimplePipelineExecutor: 

240 """Create an executor by building a QuantumGraph from an in-memory 

241 pipeline. 

242 

243 Parameters 

244 ---------- 

245 pipeline : `~lsst.pipe.base.Pipeline` or \ 

246 `~collections.abc.Iterable` [ `~lsst.pipe.base.TaskDef` ] 

247 A Python object describing the tasks to run, along with their 

248 labels and configuration. 

249 where : `str`, optional 

250 Data ID query expression that constraints the quanta generated. 

251 bind : `~collections.abc.Mapping`, optional 

252 Mapping containing literal values that should be injected into the 

253 ``where`` expression, keyed by the identifiers they replace. 

254 butler : `~lsst.daf.butler.Butler` 

255 Butler that manages all I/O. `prep_butler` can be used to create 

256 one. 

257 resources : `~lsst.pipe.base.ExecutionResources` 

258 The resources available to each quantum being executed. 

259 

260 Returns 

261 ------- 

262 executor : `SimplePipelineExecutor` 

263 An executor instance containing the constructed 

264 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

265 ready for `run` to be called. 

266 """ 

267 if isinstance(pipeline, Pipeline): 

268 pipeline = list(pipeline.toExpandedPipeline()) 

269 else: 

270 pipeline = list(pipeline) 

271 graph_builder = GraphBuilder(butler.registry) 

272 assert butler.run is not None, "Butler output run collection must be defined" 

273 quantum_graph = graph_builder.makeGraph( 

274 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind 

275 ) 

276 return cls(quantum_graph=quantum_graph, butler=butler, resources=resources) 

277 

278 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]: 

279 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in 

280 topological order. 

281 

282 Use this method to run all quanta in the graph. Use 

283 `as_generator` to get a generator to run the quanta one at 

284 a time. 

285 

286 Parameters 

287 ---------- 

288 register_dataset_types : `bool`, optional 

289 If `True`, register all output dataset types before executing any 

290 quanta. 

291 save_versions : `bool`, optional 

292 If `True` (default), save a package versions dataset. 

293 

294 Returns 

295 ------- 

296 quanta : `list` [ `~lsst.daf.butler.Quantum` ] 

297 Executed quanta. 

298 

299 Notes 

300 ----- 

301 A topological ordering is not in general unique, but no other 

302 guarantees are made about the order in which quanta are processed. 

303 """ 

304 return list( 

305 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions) 

306 ) 

307 

308 def as_generator( 

309 self, register_dataset_types: bool = False, save_versions: bool = True 

310 ) -> Iterator[Quantum]: 

311 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological 

312 order. 

313 

314 These quanta will be run as the returned generator is iterated 

315 over. Use this method to run the quanta one at a time. 

316 Use `run` to run all quanta in the graph. 

317 

318 Parameters 

319 ---------- 

320 register_dataset_types : `bool`, optional 

321 If `True`, register all output dataset types before executing any 

322 quanta. 

323 save_versions : `bool`, optional 

324 If `True` (default), save a package versions dataset. 

325 

326 Returns 

327 ------- 

328 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ] 

329 Executed quanta. 

330 

331 Notes 

332 ----- 

333 Global initialization steps (see `PreExecInit`) are performed 

334 immediately when this method is called, but individual quanta are not 

335 actually executed until the returned iterator is iterated over. 

336 

337 A topological ordering is not in general unique, but no other 

338 guarantees are made about the order in which quanta are processed. 

339 """ 

340 task_factory = TaskFactory() 

341 pre_exec_init = PreExecInit(self.butler, task_factory) 

342 pre_exec_init.initialize( 

343 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions 

344 ) 

345 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory, resources=self.resources) 

346 # Important that this returns a generator expression rather than being 

347 # a generator itself; that is what makes the PreExecInit stuff above 

348 # happen immediately instead of when the first quanta is executed, 

349 # which might be useful for callers who want to check the state of the 

350 # repo in between. 

351 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)