Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 35%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

55 statements  

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SimplePipelineExecutor",) 

25 

26from typing import Any, Iterable, Iterator, Optional, Type, Union, List 

27 

28from lsst.daf.butler import Butler, CollectionType, Quantum 

29from lsst.obs.base import Instrument 

30from lsst.pex.config import Config 

31from lsst.pipe.base import GraphBuilder, Pipeline, PipelineTask, QuantumGraph, TaskDef 

32 

33from .preExecInit import PreExecInit 

34from .singleQuantumExecutor import SingleQuantumExecutor 

35from .taskFactory import TaskFactory 

36 

37 

38class SimplePipelineExecutor: 

39 """A simple, high-level executor for pipelines. 

40 

41 Parameters 

42 ---------- 

43 quantum_graph : `QuantumGraph` 

44 Graph to be executed. 

45 butler : `Butler` 

46 Object that manages all I/O. Must be initialized with `collections` 

47 and `run` properties that correspond to the input and output 

48 collections, which must be consistent with those used to create 

49 ``quantum_graph``. 

50 

51 Notes 

52 ----- 

53 Most callers should use one of the `classmethod` factory functions 

54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

55 invoking the constructor directly; these guarantee that the `Butler` and 

56 `QuantumGraph` are created consistently. 

57 

58 This class is intended primarily to support unit testing and small-scale 

59 integration testing of `PipelineTask` classes. It deliberately lacks many 

60 features present in the command-line-only ``pipetask`` tool in order to 

61 keep the implementation simple. Python callers that need more 

62 sophistication should call lower-level tools like `GraphBuilder`, 

63 `PreExecInit`, and `SingleQuantumExecutor` directly. 

64 """ 

65 

66 def __init__(self, quantum_graph: QuantumGraph, butler: Butler): 

67 self.quantum_graph = quantum_graph 

68 self.butler = butler 

69 

70 @classmethod 

71 def prep_butler( 

72 cls, 

73 root: str, 

74 inputs: Iterable[str], 

75 output: str, 

76 output_run: Optional[str] = None, 

77 ) -> Butler: 

78 """Helper method for creating `Butler` instances with collections 

79 appropriate for processing. 

80 

81 Parameters 

82 ---------- 

83 root : `str` 

84 Root of the butler data repository; must already exist, with all 

85 necessary input data. 

86 inputs : `Iterable` [ `str` ] 

87 Collections to search for all input datasets, in search order. 

88 output : `str` 

89 Name of a new output `~CollectionType.CHAINED` collection to create 

90 that will combine both inputs and outputs. 

91 output_run : `str`, optional 

92 Name of the output `~CollectionType.RUN` that will directly hold 

93 all output datasets. If not provided, a name will be created from 

94 ``output`` and a timestamp. 

95 

96 Returns 

97 ------- 

98 butler : `Butler` 

99 Butler client instance compatible with all `classmethod` factories. 

100 Always writeable. 

101 """ 

102 if output_run is None: 

103 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

104 # Make initial butler with no collections, since we haven't created 

105 # them yet. 

106 butler = Butler(root, writeable=True) 

107 butler.registry.registerCollection(output_run, CollectionType.RUN) 

108 butler.registry.registerCollection(output, CollectionType.CHAINED) 

109 collections = list(inputs) 

110 collections.append(output_run) 

111 butler.registry.setCollectionChain(output, collections) 

112 # Remake butler to let it infer default data IDs from collections, now 

113 # that those collections exist. 

114 return Butler(butler=butler, collections=[output], run=output_run) 

115 

116 @classmethod 

117 def from_pipeline_filename( 

118 cls, pipeline_filename: str, *, where: str = "", butler: Butler 

119 ) -> SimplePipelineExecutor: 

120 """Create an executor by building a QuantumGraph from an on-disk 

121 pipeline YAML file. 

122 

123 Parameters 

124 ---------- 

125 pipeline_filename : `str` 

126 Name of the YAML file to load the pipeline definition from. 

127 where : `str`, optional 

128 Data ID query expression that constraints the quanta generated. 

129 butler : `Butler` 

130 Butler that manages all I/O. `prep_butler` can be used to create 

131 one. 

132 

133 Returns 

134 ------- 

135 executor : `SimplePipelineExecutor` 

136 An executor instance containing the constructed `QuantumGraph` and 

137 `Butler`, ready for `run` to be called. 

138 """ 

139 pipeline = Pipeline.fromFile(pipeline_filename) 

140 return cls.from_pipeline(pipeline, butler=butler, where=where) 

141 

142 @classmethod 

143 def from_task_class( 

144 cls, 

145 task_class: Type[PipelineTask], 

146 config: Optional[Config] = None, 

147 label: Optional[str] = None, 

148 *, 

149 where: str = "", 

150 butler: Butler, 

151 ) -> SimplePipelineExecutor: 

152 """Create an executor by building a QuantumGraph from a pipeline 

153 containing a single task. 

154 

155 Parameters 

156 ---------- 

157 task_class : `type` 

158 A concrete `PipelineTask` subclass. 

159 config : `Config`, optional 

160 Configuration for the task. If not provided, task-level defaults 

161 will be used (no per-instrument overrides). 

162 label : `str`, optional 

163 Label for the task in its pipeline; defaults to 

164 ``task_class._DefaultName``. 

165 where : `str`, optional 

166 Data ID query expression that constraints the quanta generated. 

167 butler : `Butler` 

168 Butler that manages all I/O. `prep_butler` can be used to create 

169 one. 

170 

171 Returns 

172 ------- 

173 executor : `SimplePipelineExecutor` 

174 An executor instance containing the constructed `QuantumGraph` and 

175 `Butler`, ready for `run` to be called. 

176 """ 

177 if config is None: 

178 config = task_class.ConfigClass() 

179 if label is None: 

180 label = task_class._DefaultName 

181 if not isinstance(config, task_class.ConfigClass): 

182 raise TypeError( 

183 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

184 f"got {type(config).__name__}." 

185 ) 

186 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

187 return cls.from_pipeline([task_def], butler=butler, where=where) 

188 

189 @classmethod 

190 def from_pipeline( 

191 cls, 

192 pipeline: Union[Pipeline, Iterable[TaskDef]], 

193 *, 

194 where: str = "", 

195 butler: Butler, 

196 **kwargs: Any, 

197 ) -> SimplePipelineExecutor: 

198 """Create an executor by building a QuantumGraph from an in-memory 

199 pipeline. 

200 

201 Parameters 

202 ---------- 

203 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] 

204 A Python object describing the tasks to run, along with their 

205 labels and configuration. 

206 where : `str`, optional 

207 Data ID query expression that constraints the quanta generated. 

208 butler : `Butler` 

209 Butler that manages all I/O. `prep_butler` can be used to create 

210 one. 

211 

212 Returns 

213 ------- 

214 executor : `SimplePipelineExecutor` 

215 An executor instance containing the constructed `QuantumGraph` and 

216 `Butler`, ready for `run` to be called. 

217 """ 

218 if isinstance(pipeline, Pipeline): 

219 pipeline = list(pipeline.toExpandedPipeline()) 

220 else: 

221 pipeline = list(pipeline) 

222 graph_builder = GraphBuilder(butler.registry) 

223 quantum_graph = graph_builder.makeGraph( 

224 pipeline, collections=butler.collections, run=butler.run, userQuery=where 

225 ) 

226 return cls(quantum_graph=quantum_graph, butler=butler) 

227 

228 def run(self, register_dataset_types: bool = False) -> List[Quantum]: 

229 """Run all the quanta in the `QuantumGraph` in topological order. 

230 

231 Use this method to run all quanta in the graph. Use 

232 `as_generator` to get a generator to run the quanta one at 

233 a time. 

234 

235 Parameters 

236 ---------- 

237 register_dataset_types : `bool`, optional 

238 If `True`, register all output dataset types before executing any 

239 quanta. 

240 

241 Returns 

242 ------- 

243 quanta : `List` [ `Quantum` ] 

244 Executed quanta. At present, these will contain only unresolved 

245 `DatasetRef` instances for output datasets, reflecting the state of 

246 the quantum just before it was run (but after any adjustments for 

247 predicted but now missing inputs). This may change in the future 

248 to include resolved output `DatasetRef` objects. 

249 

250 Notes 

251 ----- 

252 A topological ordering is not in general unique, but no other 

253 guarantees are made about the order in which quanta are processed. 

254 """ 

255 return list(self.as_generator(register_dataset_types=register_dataset_types)) 

256 

257 def as_generator(self, register_dataset_types: bool = False) -> Iterator[Quantum]: 

258 """Yield quanta in the `QuantumGraph` in topological order. 

259 

260 These quanta will be run as the returned generator is iterated 

261 over. Use this method to run the quanta one at a time. 

262 Use `run` to run all quanta in the graph. 

263 

264 Parameters 

265 ---------- 

266 register_dataset_types : `bool`, optional 

267 If `True`, register all output dataset types before executing any 

268 quanta. 

269 

270 Returns 

271 ------- 

272 quanta : `Iterator` [ `Quantum` ] 

273 Executed quanta. At present, these will contain only unresolved 

274 `DatasetRef` instances for output datasets, reflecting the state of 

275 the quantum just before it was run (but after any adjustments for 

276 predicted but now missing inputs). This may change in the future 

277 to include resolved output `DatasetRef` objects. 

278 

279 

280 Notes 

281 ----- 

282 Global initialization steps (see `PreExecInit`) are performed 

283 immediately when this method is called, but individual quanta are not 

284 actually executed until the returned iterator is iterated over. 

285 

286 A topological ordering is not in general unique, but no other 

287 guarantees are made about the order in which quanta are processed. 

288 """ 

289 task_factory = TaskFactory() 

290 pre_exec_init = PreExecInit(self.butler, task_factory) 

291 pre_exec_init.initialize(graph=self.quantum_graph, registerDatasetTypes=register_dataset_types) 

292 single_quantum_executor = SingleQuantumExecutor(task_factory) 

293 # Important that this returns a generator expression rather than being 

294 # a generator itself; that is what makes the PreExecInit stuff above 

295 # happen immediately instead of when the first quanta is executed, 

296 # which might be useful for callers who want to check the state of the 

297 # repo in between. 

298 return ( 

299 single_quantum_executor.execute(qnode.taskDef, qnode.quantum, self.butler) 

300 for qnode in self.quantum_graph 

301 )