Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 33%

55 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-07 00:46 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SimplePipelineExecutor",) 

25 

26from collections.abc import Iterable, Iterator, Mapping 

27from typing import Any, List, Optional, Type, Union 

28 

29from lsst.daf.butler import Butler, CollectionType, Quantum 

30from lsst.pex.config import Config 

31from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef 

32 

33from .preExecInit import PreExecInit 

34from .singleQuantumExecutor import SingleQuantumExecutor 

35from .taskFactory import TaskFactory 

36 

37 

38class SimplePipelineExecutor: 

39 """A simple, high-level executor for pipelines. 

40 

41 Parameters 

42 ---------- 

43 quantum_graph : `QuantumGraph` 

44 Graph to be executed. 

45 butler : `Butler` 

46 Object that manages all I/O. Must be initialized with `collections` 

47 and `run` properties that correspond to the input and output 

48 collections, which must be consistent with those used to create 

49 ``quantum_graph``. 

50 

51 Notes 

52 ----- 

53 Most callers should use one of the `classmethod` factory functions 

54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

55 invoking the constructor directly; these guarantee that the `Butler` and 

56 `QuantumGraph` are created consistently. 

57 

58 This class is intended primarily to support unit testing and small-scale 

59 integration testing of `PipelineTask` classes. It deliberately lacks many 

60 features present in the command-line-only ``pipetask`` tool in order to 

61 keep the implementation simple. Python callers that need more 

62 sophistication should call lower-level tools like `GraphBuilder`, 

63 `PreExecInit`, and `SingleQuantumExecutor` directly. 

64 """ 

65 

66 def __init__(self, quantum_graph: QuantumGraph, butler: Butler): 

67 self.quantum_graph = quantum_graph 

68 self.butler = butler 

69 

70 @classmethod 

71 def prep_butler( 

72 cls, 

73 root: str, 

74 inputs: Iterable[str], 

75 output: str, 

76 output_run: Optional[str] = None, 

77 ) -> Butler: 

78 """Helper method for creating `Butler` instances with collections 

79 appropriate for processing. 

80 

81 Parameters 

82 ---------- 

83 root : `str` 

84 Root of the butler data repository; must already exist, with all 

85 necessary input data. 

86 inputs : `Iterable` [ `str` ] 

87 Collections to search for all input datasets, in search order. 

88 output : `str` 

89 Name of a new output `~CollectionType.CHAINED` collection to create 

90 that will combine both inputs and outputs. 

91 output_run : `str`, optional 

92 Name of the output `~CollectionType.RUN` that will directly hold 

93 all output datasets. If not provided, a name will be created from 

94 ``output`` and a timestamp. 

95 

96 Returns 

97 ------- 

98 butler : `Butler` 

99 Butler client instance compatible with all `classmethod` factories. 

100 Always writeable. 

101 """ 

102 if output_run is None: 

103 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

104 # Make initial butler with no collections, since we haven't created 

105 # them yet. 

106 butler = Butler(root, writeable=True) 

107 butler.registry.registerCollection(output_run, CollectionType.RUN) 

108 butler.registry.registerCollection(output, CollectionType.CHAINED) 

109 collections = [output_run] 

110 collections.extend(inputs) 

111 butler.registry.setCollectionChain(output, collections) 

112 # Remake butler to let it infer default data IDs from collections, now 

113 # that those collections exist. 

114 return Butler(butler=butler, collections=[output], run=output_run) 

115 

116 @classmethod 

117 def from_pipeline_filename( 

118 cls, 

119 pipeline_filename: str, 

120 *, 

121 where: str = "", 

122 bind: Optional[Mapping[str, Any]] = None, 

123 butler: Butler, 

124 ) -> SimplePipelineExecutor: 

125 """Create an executor by building a QuantumGraph from an on-disk 

126 pipeline YAML file. 

127 

128 Parameters 

129 ---------- 

130 pipeline_filename : `str` 

131 Name of the YAML file to load the pipeline definition from. 

132 where : `str`, optional 

133 Data ID query expression that constraints the quanta generated. 

134 bind : `Mapping`, optional 

135 Mapping containing literal values that should be injected into the 

136 ``where`` expression, keyed by the identifiers they replace. 

137 butler : `Butler` 

138 Butler that manages all I/O. `prep_butler` can be used to create 

139 one. 

140 

141 Returns 

142 ------- 

143 executor : `SimplePipelineExecutor` 

144 An executor instance containing the constructed `QuantumGraph` and 

145 `Butler`, ready for `run` to be called. 

146 """ 

147 pipeline = Pipeline.fromFile(pipeline_filename) 

148 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind) 

149 

150 @classmethod 

151 def from_task_class( 

152 cls, 

153 task_class: Type[PipelineTask], 

154 config: Optional[Config] = None, 

155 label: Optional[str] = None, 

156 *, 

157 where: str = "", 

158 bind: Optional[Mapping[str, Any]] = None, 

159 butler: Butler, 

160 ) -> SimplePipelineExecutor: 

161 """Create an executor by building a QuantumGraph from a pipeline 

162 containing a single task. 

163 

164 Parameters 

165 ---------- 

166 task_class : `type` 

167 A concrete `PipelineTask` subclass. 

168 config : `Config`, optional 

169 Configuration for the task. If not provided, task-level defaults 

170 will be used (no per-instrument overrides). 

171 label : `str`, optional 

172 Label for the task in its pipeline; defaults to 

173 ``task_class._DefaultName``. 

174 where : `str`, optional 

175 Data ID query expression that constraints the quanta generated. 

176 bind : `Mapping`, optional 

177 Mapping containing literal values that should be injected into the 

178 ``where`` expression, keyed by the identifiers they replace. 

179 butler : `Butler` 

180 Butler that manages all I/O. `prep_butler` can be used to create 

181 one. 

182 

183 Returns 

184 ------- 

185 executor : `SimplePipelineExecutor` 

186 An executor instance containing the constructed `QuantumGraph` and 

187 `Butler`, ready for `run` to be called. 

188 """ 

189 if config is None: 

190 config = task_class.ConfigClass() 

191 if label is None: 

192 label = task_class._DefaultName 

193 if not isinstance(config, task_class.ConfigClass): 

194 raise TypeError( 

195 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

196 f"got {type(config).__name__}." 

197 ) 

198 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

199 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind) 

200 

201 @classmethod 

202 def from_pipeline( 

203 cls, 

204 pipeline: Union[Pipeline, Iterable[TaskDef]], 

205 *, 

206 where: str = "", 

207 bind: Optional[Mapping[str, Any]] = None, 

208 butler: Butler, 

209 **kwargs: Any, 

210 ) -> SimplePipelineExecutor: 

211 """Create an executor by building a QuantumGraph from an in-memory 

212 pipeline. 

213 

214 Parameters 

215 ---------- 

216 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] 

217 A Python object describing the tasks to run, along with their 

218 labels and configuration. 

219 where : `str`, optional 

220 Data ID query expression that constraints the quanta generated. 

221 bind : `Mapping`, optional 

222 Mapping containing literal values that should be injected into the 

223 ``where`` expression, keyed by the identifiers they replace. 

224 butler : `Butler` 

225 Butler that manages all I/O. `prep_butler` can be used to create 

226 one. 

227 

228 Returns 

229 ------- 

230 executor : `SimplePipelineExecutor` 

231 An executor instance containing the constructed `QuantumGraph` and 

232 `Butler`, ready for `run` to be called. 

233 """ 

234 if isinstance(pipeline, Pipeline): 

235 pipeline = list(pipeline.toExpandedPipeline()) 

236 else: 

237 pipeline = list(pipeline) 

238 graph_builder = GraphBuilder(butler.registry) 

239 quantum_graph = graph_builder.makeGraph( 

240 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind 

241 ) 

242 return cls(quantum_graph=quantum_graph, butler=butler) 

243 

244 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> List[Quantum]: 

245 """Run all the quanta in the `QuantumGraph` in topological order. 

246 

247 Use this method to run all quanta in the graph. Use 

248 `as_generator` to get a generator to run the quanta one at 

249 a time. 

250 

251 Parameters 

252 ---------- 

253 register_dataset_types : `bool`, optional 

254 If `True`, register all output dataset types before executing any 

255 quanta. 

256 save_versions : `bool`, optional 

257 If `True` (default), save a package versions dataset. 

258 

259 Returns 

260 ------- 

261 quanta : `List` [ `Quantum` ] 

262 Executed quanta. At present, these will contain only unresolved 

263 `DatasetRef` instances for output datasets, reflecting the state of 

264 the quantum just before it was run (but after any adjustments for 

265 predicted but now missing inputs). This may change in the future 

266 to include resolved output `DatasetRef` objects. 

267 

268 Notes 

269 ----- 

270 A topological ordering is not in general unique, but no other 

271 guarantees are made about the order in which quanta are processed. 

272 """ 

273 return list( 

274 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions) 

275 ) 

276 

277 def as_generator( 

278 self, register_dataset_types: bool = False, save_versions: bool = True 

279 ) -> Iterator[Quantum]: 

280 """Yield quanta in the `QuantumGraph` in topological order. 

281 

282 These quanta will be run as the returned generator is iterated 

283 over. Use this method to run the quanta one at a time. 

284 Use `run` to run all quanta in the graph. 

285 

286 Parameters 

287 ---------- 

288 register_dataset_types : `bool`, optional 

289 If `True`, register all output dataset types before executing any 

290 quanta. 

291 save_versions : `bool`, optional 

292 If `True` (default), save a package versions dataset. 

293 

294 Returns 

295 ------- 

296 quanta : `Iterator` [ `Quantum` ] 

297 Executed quanta. At present, these will contain only unresolved 

298 `DatasetRef` instances for output datasets, reflecting the state of 

299 the quantum just before it was run (but after any adjustments for 

300 predicted but now missing inputs). This may change in the future 

301 to include resolved output `DatasetRef` objects. 

302 

303 

304 Notes 

305 ----- 

306 Global initialization steps (see `PreExecInit`) are performed 

307 immediately when this method is called, but individual quanta are not 

308 actually executed until the returned iterator is iterated over. 

309 

310 A topological ordering is not in general unique, but no other 

311 guarantees are made about the order in which quanta are processed. 

312 """ 

313 task_factory = TaskFactory() 

314 pre_exec_init = PreExecInit(self.butler, task_factory) 

315 pre_exec_init.initialize( 

316 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions 

317 ) 

318 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory) 

319 # Important that this returns a generator expression rather than being 

320 # a generator itself; that is what makes the PreExecInit stuff above 

321 # happen immediately instead of when the first quanta is executed, 

322 # which might be useful for callers who want to check the state of the 

323 # repo in between. 

324 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)