Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 32%

56 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-11 09:04 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SimplePipelineExecutor",) 

25 

26from collections.abc import Iterable, Iterator, Mapping 

27from typing import Any 

28 

29from lsst.daf.butler import Butler, CollectionType, Quantum 

30from lsst.pex.config import Config 

31from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef 

32 

33from .preExecInit import PreExecInit 

34from .singleQuantumExecutor import SingleQuantumExecutor 

35from .taskFactory import TaskFactory 

36 

37 

38class SimplePipelineExecutor: 

39 """A simple, high-level executor for pipelines. 

40 

41 Parameters 

42 ---------- 

43 quantum_graph : `~lsst.pipe.base.QuantumGraph` 

44 Graph to be executed. 

45 butler : `~lsst.daf.butler.Butler` 

46 Object that manages all I/O. Must be initialized with `collections` 

47 and `run` properties that correspond to the input and output 

48 collections, which must be consistent with those used to create 

49 ``quantum_graph``. 

50 

51 Notes 

52 ----- 

53 Most callers should use one of the `classmethod` factory functions 

54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

55 invoking the constructor directly; these guarantee that the 

56 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created 

57 consistently. 

58 

59 This class is intended primarily to support unit testing and small-scale 

60 integration testing of `~lsst.pipe.base.PipelineTask` classes. It 

61 deliberately lacks many features present in the command-line-only 

62 ``pipetask`` tool in order to keep the implementation simple. Python 

63 callers that need more sophistication should call lower-level tools like 

64 `~lsst.pipe.base.GraphBuilder`, `PreExecInit`, and `SingleQuantumExecutor` 

65 directly. 

66 """ 

67 

68 def __init__(self, quantum_graph: QuantumGraph, butler: Butler): 

69 self.quantum_graph = quantum_graph 

70 self.butler = butler 

71 

72 @classmethod 

73 def prep_butler( 

74 cls, 

75 root: str, 

76 inputs: Iterable[str], 

77 output: str, 

78 output_run: str | None = None, 

79 ) -> Butler: 

80 """Return configured `~lsst.daf.butler.Butler`. 

81 

82 Helper method for creating `~lsst.daf.butler.Butler` instances with 

83 collections appropriate for processing. 

84 

85 Parameters 

86 ---------- 

87 root : `str` 

88 Root of the butler data repository; must already exist, with all 

89 necessary input data. 

90 inputs : `Iterable` [ `str` ] 

91 Collections to search for all input datasets, in search order. 

92 output : `str` 

93 Name of a new output `~CollectionType.CHAINED` collection to create 

94 that will combine both inputs and outputs. 

95 output_run : `str`, optional 

96 Name of the output `~CollectionType.RUN` that will directly hold 

97 all output datasets. If not provided, a name will be created from 

98 ``output`` and a timestamp. 

99 

100 Returns 

101 ------- 

102 butler : `~lsst.daf.butler.Butler` 

103 Butler client instance compatible with all `classmethod` factories. 

104 Always writeable. 

105 """ 

106 if output_run is None: 

107 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

108 # Make initial butler with no collections, since we haven't created 

109 # them yet. 

110 butler = Butler(root, writeable=True) 

111 butler.registry.registerCollection(output_run, CollectionType.RUN) 

112 butler.registry.registerCollection(output, CollectionType.CHAINED) 

113 collections = [output_run] 

114 collections.extend(inputs) 

115 butler.registry.setCollectionChain(output, collections) 

116 # Remake butler to let it infer default data IDs from collections, now 

117 # that those collections exist. 

118 return Butler(butler=butler, collections=[output], run=output_run) 

119 

120 @classmethod 

121 def from_pipeline_filename( 

122 cls, 

123 pipeline_filename: str, 

124 *, 

125 where: str = "", 

126 bind: Mapping[str, Any] | None = None, 

127 butler: Butler, 

128 ) -> SimplePipelineExecutor: 

129 """Create an executor by building a QuantumGraph from an on-disk 

130 pipeline YAML file. 

131 

132 Parameters 

133 ---------- 

134 pipeline_filename : `str` 

135 Name of the YAML file to load the pipeline definition from. 

136 where : `str`, optional 

137 Data ID query expression that constraints the quanta generated. 

138 bind : `Mapping`, optional 

139 Mapping containing literal values that should be injected into the 

140 ``where`` expression, keyed by the identifiers they replace. 

141 butler : `~lsst.daf.butler.Butler` 

142 Butler that manages all I/O. `prep_butler` can be used to create 

143 one. 

144 

145 Returns 

146 ------- 

147 executor : `SimplePipelineExecutor` 

148 An executor instance containing the constructed 

149 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

150 ready for `run` to be called. 

151 """ 

152 pipeline = Pipeline.fromFile(pipeline_filename) 

153 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind) 

154 

155 @classmethod 

156 def from_task_class( 

157 cls, 

158 task_class: type[PipelineTask], 

159 config: Config | None = None, 

160 label: str | None = None, 

161 *, 

162 where: str = "", 

163 bind: Mapping[str, Any] | None = None, 

164 butler: Butler, 

165 ) -> SimplePipelineExecutor: 

166 """Create an executor by building a QuantumGraph from a pipeline 

167 containing a single task. 

168 

169 Parameters 

170 ---------- 

171 task_class : `type` 

172 A concrete `~lsst.pipe.base.PipelineTask` subclass. 

173 config : `Config`, optional 

174 Configuration for the task. If not provided, task-level defaults 

175 will be used (no per-instrument overrides). 

176 label : `str`, optional 

177 Label for the task in its pipeline; defaults to 

178 ``task_class._DefaultName``. 

179 where : `str`, optional 

180 Data ID query expression that constraints the quanta generated. 

181 bind : `Mapping`, optional 

182 Mapping containing literal values that should be injected into the 

183 ``where`` expression, keyed by the identifiers they replace. 

184 butler : `~lsst.daf.butler.Butler` 

185 Butler that manages all I/O. `prep_butler` can be used to create 

186 one. 

187 

188 Returns 

189 ------- 

190 executor : `SimplePipelineExecutor` 

191 An executor instance containing the constructed 

192 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

193 ready for `run` to be called. 

194 """ 

195 if config is None: 

196 config = task_class.ConfigClass() 

197 if label is None: 

198 label = task_class._DefaultName 

199 if not isinstance(config, task_class.ConfigClass): 

200 raise TypeError( 

201 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

202 f"got {type(config).__name__}." 

203 ) 

204 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

205 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind) 

206 

207 @classmethod 

208 def from_pipeline( 

209 cls, 

210 pipeline: Pipeline | Iterable[TaskDef], 

211 *, 

212 where: str = "", 

213 bind: Mapping[str, Any] | None = None, 

214 butler: Butler, 

215 **kwargs: Any, 

216 ) -> SimplePipelineExecutor: 

217 """Create an executor by building a QuantumGraph from an in-memory 

218 pipeline. 

219 

220 Parameters 

221 ---------- 

222 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] 

223 A Python object describing the tasks to run, along with their 

224 labels and configuration. 

225 where : `str`, optional 

226 Data ID query expression that constraints the quanta generated. 

227 bind : `Mapping`, optional 

228 Mapping containing literal values that should be injected into the 

229 ``where`` expression, keyed by the identifiers they replace. 

230 butler : `~lsst.daf.butler.Butler` 

231 Butler that manages all I/O. `prep_butler` can be used to create 

232 one. 

233 

234 Returns 

235 ------- 

236 executor : `SimplePipelineExecutor` 

237 An executor instance containing the constructed 

238 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`, 

239 ready for `run` to be called. 

240 """ 

241 if isinstance(pipeline, Pipeline): 

242 pipeline = list(pipeline.toExpandedPipeline()) 

243 else: 

244 pipeline = list(pipeline) 

245 graph_builder = GraphBuilder(butler.registry) 

246 assert butler.run is not None, "Butler output run collection must be defined" 

247 quantum_graph = graph_builder.makeGraph( 

248 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind 

249 ) 

250 return cls(quantum_graph=quantum_graph, butler=butler) 

251 

252 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]: 

253 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in 

254 topological order. 

255 

256 Use this method to run all quanta in the graph. Use 

257 `as_generator` to get a generator to run the quanta one at 

258 a time. 

259 

260 Parameters 

261 ---------- 

262 register_dataset_types : `bool`, optional 

263 If `True`, register all output dataset types before executing any 

264 quanta. 

265 save_versions : `bool`, optional 

266 If `True` (default), save a package versions dataset. 

267 

268 Returns 

269 ------- 

270 quanta : `list` [ `~lsst.daf.butler.Quantum` ] 

271 Executed quanta. 

272 

273 Notes 

274 ----- 

275 A topological ordering is not in general unique, but no other 

276 guarantees are made about the order in which quanta are processed. 

277 """ 

278 return list( 

279 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions) 

280 ) 

281 

282 def as_generator( 

283 self, register_dataset_types: bool = False, save_versions: bool = True 

284 ) -> Iterator[Quantum]: 

285 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological 

286 order. 

287 

288 These quanta will be run as the returned generator is iterated 

289 over. Use this method to run the quanta one at a time. 

290 Use `run` to run all quanta in the graph. 

291 

292 Parameters 

293 ---------- 

294 register_dataset_types : `bool`, optional 

295 If `True`, register all output dataset types before executing any 

296 quanta. 

297 save_versions : `bool`, optional 

298 If `True` (default), save a package versions dataset. 

299 

300 Returns 

301 ------- 

302 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ] 

303 Executed quanta. 

304 

305 Notes 

306 ----- 

307 Global initialization steps (see `PreExecInit`) are performed 

308 immediately when this method is called, but individual quanta are not 

309 actually executed until the returned iterator is iterated over. 

310 

311 A topological ordering is not in general unique, but no other 

312 guarantees are made about the order in which quanta are processed. 

313 """ 

314 task_factory = TaskFactory() 

315 pre_exec_init = PreExecInit(self.butler, task_factory) 

316 pre_exec_init.initialize( 

317 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions 

318 ) 

319 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory) 

320 # Important that this returns a generator expression rather than being 

321 # a generator itself; that is what makes the PreExecInit stuff above 

322 # happen immediately instead of when the first quanta is executed, 

323 # which might be useful for callers who want to check the state of the 

324 # repo in between. 

325 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)