Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 32%

54 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 00:11 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SimplePipelineExecutor",) 

25 

26from typing import Any, Iterable, Iterator, List, Optional, Type, Union 

27 

28from lsst.daf.butler import Butler, CollectionType, Quantum 

29from lsst.pex.config import Config 

30from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef 

31 

32from .preExecInit import PreExecInit 

33from .singleQuantumExecutor import SingleQuantumExecutor 

34from .taskFactory import TaskFactory 

35 

36 

37class SimplePipelineExecutor: 

38 """A simple, high-level executor for pipelines. 

39 

40 Parameters 

41 ---------- 

42 quantum_graph : `QuantumGraph` 

43 Graph to be executed. 

44 butler : `Butler` 

45 Object that manages all I/O. Must be initialized with `collections` 

46 and `run` properties that correspond to the input and output 

47 collections, which must be consistent with those used to create 

48 ``quantum_graph``. 

49 

50 Notes 

51 ----- 

52 Most callers should use one of the `classmethod` factory functions 

53 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

54 invoking the constructor directly; these guarantee that the `Butler` and 

55 `QuantumGraph` are created consistently. 

56 

57 This class is intended primarily to support unit testing and small-scale 

58 integration testing of `PipelineTask` classes. It deliberately lacks many 

59 features present in the command-line-only ``pipetask`` tool in order to 

60 keep the implementation simple. Python callers that need more 

61 sophistication should call lower-level tools like `GraphBuilder`, 

62 `PreExecInit`, and `SingleQuantumExecutor` directly. 

63 """ 

64 

65 def __init__(self, quantum_graph: QuantumGraph, butler: Butler): 

66 self.quantum_graph = quantum_graph 

67 self.butler = butler 

68 

69 @classmethod 

70 def prep_butler( 

71 cls, 

72 root: str, 

73 inputs: Iterable[str], 

74 output: str, 

75 output_run: Optional[str] = None, 

76 ) -> Butler: 

77 """Helper method for creating `Butler` instances with collections 

78 appropriate for processing. 

79 

80 Parameters 

81 ---------- 

82 root : `str` 

83 Root of the butler data repository; must already exist, with all 

84 necessary input data. 

85 inputs : `Iterable` [ `str` ] 

86 Collections to search for all input datasets, in search order. 

87 output : `str` 

88 Name of a new output `~CollectionType.CHAINED` collection to create 

89 that will combine both inputs and outputs. 

90 output_run : `str`, optional 

91 Name of the output `~CollectionType.RUN` that will directly hold 

92 all output datasets. If not provided, a name will be created from 

93 ``output`` and a timestamp. 

94 

95 Returns 

96 ------- 

97 butler : `Butler` 

98 Butler client instance compatible with all `classmethod` factories. 

99 Always writeable. 

100 """ 

101 if output_run is None: 

102 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

103 # Make initial butler with no collections, since we haven't created 

104 # them yet. 

105 butler = Butler(root, writeable=True) 

106 butler.registry.registerCollection(output_run, CollectionType.RUN) 

107 butler.registry.registerCollection(output, CollectionType.CHAINED) 

108 collections = [output_run] 

109 collections.extend(inputs) 

110 butler.registry.setCollectionChain(output, collections) 

111 # Remake butler to let it infer default data IDs from collections, now 

112 # that those collections exist. 

113 return Butler(butler=butler, collections=[output], run=output_run) 

114 

115 @classmethod 

116 def from_pipeline_filename( 

117 cls, pipeline_filename: str, *, where: str = "", butler: Butler 

118 ) -> SimplePipelineExecutor: 

119 """Create an executor by building a QuantumGraph from an on-disk 

120 pipeline YAML file. 

121 

122 Parameters 

123 ---------- 

124 pipeline_filename : `str` 

125 Name of the YAML file to load the pipeline definition from. 

126 where : `str`, optional 

127 Data ID query expression that constraints the quanta generated. 

128 butler : `Butler` 

129 Butler that manages all I/O. `prep_butler` can be used to create 

130 one. 

131 

132 Returns 

133 ------- 

134 executor : `SimplePipelineExecutor` 

135 An executor instance containing the constructed `QuantumGraph` and 

136 `Butler`, ready for `run` to be called. 

137 """ 

138 pipeline = Pipeline.fromFile(pipeline_filename) 

139 return cls.from_pipeline(pipeline, butler=butler, where=where) 

140 

141 @classmethod 

142 def from_task_class( 

143 cls, 

144 task_class: Type[PipelineTask], 

145 config: Optional[Config] = None, 

146 label: Optional[str] = None, 

147 *, 

148 where: str = "", 

149 butler: Butler, 

150 ) -> SimplePipelineExecutor: 

151 """Create an executor by building a QuantumGraph from a pipeline 

152 containing a single task. 

153 

154 Parameters 

155 ---------- 

156 task_class : `type` 

157 A concrete `PipelineTask` subclass. 

158 config : `Config`, optional 

159 Configuration for the task. If not provided, task-level defaults 

160 will be used (no per-instrument overrides). 

161 label : `str`, optional 

162 Label for the task in its pipeline; defaults to 

163 ``task_class._DefaultName``. 

164 where : `str`, optional 

165 Data ID query expression that constraints the quanta generated. 

166 butler : `Butler` 

167 Butler that manages all I/O. `prep_butler` can be used to create 

168 one. 

169 

170 Returns 

171 ------- 

172 executor : `SimplePipelineExecutor` 

173 An executor instance containing the constructed `QuantumGraph` and 

174 `Butler`, ready for `run` to be called. 

175 """ 

176 if config is None: 

177 config = task_class.ConfigClass() 

178 if label is None: 

179 label = task_class._DefaultName 

180 if not isinstance(config, task_class.ConfigClass): 

181 raise TypeError( 

182 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

183 f"got {type(config).__name__}." 

184 ) 

185 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

186 return cls.from_pipeline([task_def], butler=butler, where=where) 

187 

188 @classmethod 

189 def from_pipeline( 

190 cls, 

191 pipeline: Union[Pipeline, Iterable[TaskDef]], 

192 *, 

193 where: str = "", 

194 butler: Butler, 

195 **kwargs: Any, 

196 ) -> SimplePipelineExecutor: 

197 """Create an executor by building a QuantumGraph from an in-memory 

198 pipeline. 

199 

200 Parameters 

201 ---------- 

202 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] 

203 A Python object describing the tasks to run, along with their 

204 labels and configuration. 

205 where : `str`, optional 

206 Data ID query expression that constraints the quanta generated. 

207 butler : `Butler` 

208 Butler that manages all I/O. `prep_butler` can be used to create 

209 one. 

210 

211 Returns 

212 ------- 

213 executor : `SimplePipelineExecutor` 

214 An executor instance containing the constructed `QuantumGraph` and 

215 `Butler`, ready for `run` to be called. 

216 """ 

217 if isinstance(pipeline, Pipeline): 

218 pipeline = list(pipeline.toExpandedPipeline()) 

219 else: 

220 pipeline = list(pipeline) 

221 graph_builder = GraphBuilder(butler.registry) 

222 quantum_graph = graph_builder.makeGraph( 

223 pipeline, collections=butler.collections, run=butler.run, userQuery=where 

224 ) 

225 return cls(quantum_graph=quantum_graph, butler=butler) 

226 

227 def run(self, register_dataset_types: bool = False) -> List[Quantum]: 

228 """Run all the quanta in the `QuantumGraph` in topological order. 

229 

230 Use this method to run all quanta in the graph. Use 

231 `as_generator` to get a generator to run the quanta one at 

232 a time. 

233 

234 Parameters 

235 ---------- 

236 register_dataset_types : `bool`, optional 

237 If `True`, register all output dataset types before executing any 

238 quanta. 

239 

240 Returns 

241 ------- 

242 quanta : `List` [ `Quantum` ] 

243 Executed quanta. At present, these will contain only unresolved 

244 `DatasetRef` instances for output datasets, reflecting the state of 

245 the quantum just before it was run (but after any adjustments for 

246 predicted but now missing inputs). This may change in the future 

247 to include resolved output `DatasetRef` objects. 

248 

249 Notes 

250 ----- 

251 A topological ordering is not in general unique, but no other 

252 guarantees are made about the order in which quanta are processed. 

253 """ 

254 return list(self.as_generator(register_dataset_types=register_dataset_types)) 

255 

256 def as_generator(self, register_dataset_types: bool = False) -> Iterator[Quantum]: 

257 """Yield quanta in the `QuantumGraph` in topological order. 

258 

259 These quanta will be run as the returned generator is iterated 

260 over. Use this method to run the quanta one at a time. 

261 Use `run` to run all quanta in the graph. 

262 

263 Parameters 

264 ---------- 

265 register_dataset_types : `bool`, optional 

266 If `True`, register all output dataset types before executing any 

267 quanta. 

268 

269 Returns 

270 ------- 

271 quanta : `Iterator` [ `Quantum` ] 

272 Executed quanta. At present, these will contain only unresolved 

273 `DatasetRef` instances for output datasets, reflecting the state of 

274 the quantum just before it was run (but after any adjustments for 

275 predicted but now missing inputs). This may change in the future 

276 to include resolved output `DatasetRef` objects. 

277 

278 

279 Notes 

280 ----- 

281 Global initialization steps (see `PreExecInit`) are performed 

282 immediately when this method is called, but individual quanta are not 

283 actually executed until the returned iterator is iterated over. 

284 

285 A topological ordering is not in general unique, but no other 

286 guarantees are made about the order in which quanta are processed. 

287 """ 

288 task_factory = TaskFactory() 

289 pre_exec_init = PreExecInit(self.butler, task_factory) 

290 pre_exec_init.initialize(graph=self.quantum_graph, registerDatasetTypes=register_dataset_types) 

291 single_quantum_executor = SingleQuantumExecutor(task_factory) 

292 # Important that this returns a generator expression rather than being 

293 # a generator itself; that is what makes the PreExecInit stuff above 

294 # happen immediately instead of when the first quanta is executed, 

295 # which might be useful for callers who want to check the state of the 

296 # repo in between. 

297 return ( 

298 single_quantum_executor.execute(qnode.taskDef, qnode.quantum, self.butler) 

299 for qnode in self.quantum_graph 

300 )