Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 32%

56 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 10:22 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SimplePipelineExecutor",) 

25 

26from collections.abc import Iterable, Iterator, Mapping 

27from typing import Any, List, Optional, Type, Union 

28 

29from lsst.daf.butler import Butler, CollectionType, Quantum 

30from lsst.pex.config import Config 

31from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef 

32 

33from .preExecInit import PreExecInit 

34from .singleQuantumExecutor import SingleQuantumExecutor 

35from .taskFactory import TaskFactory 

36 

37 

38class SimplePipelineExecutor: 

39 """A simple, high-level executor for pipelines. 

40 

41 Parameters 

42 ---------- 

43 quantum_graph : `QuantumGraph` 

44 Graph to be executed. 

45 butler : `Butler` 

46 Object that manages all I/O. Must be initialized with `collections` 

47 and `run` properties that correspond to the input and output 

48 collections, which must be consistent with those used to create 

49 ``quantum_graph``. 

50 

51 Notes 

52 ----- 

53 Most callers should use one of the `classmethod` factory functions 

54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of 

55 invoking the constructor directly; these guarantee that the `Butler` and 

56 `QuantumGraph` are created consistently. 

57 

58 This class is intended primarily to support unit testing and small-scale 

59 integration testing of `PipelineTask` classes. It deliberately lacks many 

60 features present in the command-line-only ``pipetask`` tool in order to 

61 keep the implementation simple. Python callers that need more 

62 sophistication should call lower-level tools like `GraphBuilder`, 

63 `PreExecInit`, and `SingleQuantumExecutor` directly. 

64 """ 

65 

66 def __init__(self, quantum_graph: QuantumGraph, butler: Butler): 

67 self.quantum_graph = quantum_graph 

68 self.butler = butler 

69 

70 @classmethod 

71 def prep_butler( 

72 cls, 

73 root: str, 

74 inputs: Iterable[str], 

75 output: str, 

76 output_run: Optional[str] = None, 

77 ) -> Butler: 

78 """Helper method for creating `Butler` instances with collections 

79 appropriate for processing. 

80 

81 Parameters 

82 ---------- 

83 root : `str` 

84 Root of the butler data repository; must already exist, with all 

85 necessary input data. 

86 inputs : `Iterable` [ `str` ] 

87 Collections to search for all input datasets, in search order. 

88 output : `str` 

89 Name of a new output `~CollectionType.CHAINED` collection to create 

90 that will combine both inputs and outputs. 

91 output_run : `str`, optional 

92 Name of the output `~CollectionType.RUN` that will directly hold 

93 all output datasets. If not provided, a name will be created from 

94 ``output`` and a timestamp. 

95 

96 Returns 

97 ------- 

98 butler : `Butler` 

99 Butler client instance compatible with all `classmethod` factories. 

100 Always writeable. 

101 """ 

102 if output_run is None: 

103 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}" 

104 # Make initial butler with no collections, since we haven't created 

105 # them yet. 

106 butler = Butler(root, writeable=True) 

107 butler.registry.registerCollection(output_run, CollectionType.RUN) 

108 butler.registry.registerCollection(output, CollectionType.CHAINED) 

109 collections = [output_run] 

110 collections.extend(inputs) 

111 butler.registry.setCollectionChain(output, collections) 

112 # Remake butler to let it infer default data IDs from collections, now 

113 # that those collections exist. 

114 return Butler(butler=butler, collections=[output], run=output_run) 

115 

116 @classmethod 

117 def from_pipeline_filename( 

118 cls, 

119 pipeline_filename: str, 

120 *, 

121 where: str = "", 

122 bind: Optional[Mapping[str, Any]] = None, 

123 butler: Butler, 

124 ) -> SimplePipelineExecutor: 

125 """Create an executor by building a QuantumGraph from an on-disk 

126 pipeline YAML file. 

127 

128 Parameters 

129 ---------- 

130 pipeline_filename : `str` 

131 Name of the YAML file to load the pipeline definition from. 

132 where : `str`, optional 

133 Data ID query expression that constraints the quanta generated. 

134 bind : `Mapping`, optional 

135 Mapping containing literal values that should be injected into the 

136 ``where`` expression, keyed by the identifiers they replace. 

137 butler : `Butler` 

138 Butler that manages all I/O. `prep_butler` can be used to create 

139 one. 

140 

141 Returns 

142 ------- 

143 executor : `SimplePipelineExecutor` 

144 An executor instance containing the constructed `QuantumGraph` and 

145 `Butler`, ready for `run` to be called. 

146 """ 

147 pipeline = Pipeline.fromFile(pipeline_filename) 

148 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind) 

149 

150 @classmethod 

151 def from_task_class( 

152 cls, 

153 task_class: Type[PipelineTask], 

154 config: Optional[Config] = None, 

155 label: Optional[str] = None, 

156 *, 

157 where: str = "", 

158 bind: Optional[Mapping[str, Any]] = None, 

159 butler: Butler, 

160 ) -> SimplePipelineExecutor: 

161 """Create an executor by building a QuantumGraph from a pipeline 

162 containing a single task. 

163 

164 Parameters 

165 ---------- 

166 task_class : `type` 

167 A concrete `PipelineTask` subclass. 

168 config : `Config`, optional 

169 Configuration for the task. If not provided, task-level defaults 

170 will be used (no per-instrument overrides). 

171 label : `str`, optional 

172 Label for the task in its pipeline; defaults to 

173 ``task_class._DefaultName``. 

174 where : `str`, optional 

175 Data ID query expression that constraints the quanta generated. 

176 bind : `Mapping`, optional 

177 Mapping containing literal values that should be injected into the 

178 ``where`` expression, keyed by the identifiers they replace. 

179 butler : `Butler` 

180 Butler that manages all I/O. `prep_butler` can be used to create 

181 one. 

182 

183 Returns 

184 ------- 

185 executor : `SimplePipelineExecutor` 

186 An executor instance containing the constructed `QuantumGraph` and 

187 `Butler`, ready for `run` to be called. 

188 """ 

189 if config is None: 

190 config = task_class.ConfigClass() 

191 if label is None: 

192 label = task_class._DefaultName 

193 if not isinstance(config, task_class.ConfigClass): 

194 raise TypeError( 

195 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, " 

196 f"got {type(config).__name__}." 

197 ) 

198 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class) 

199 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind) 

200 

201 @classmethod 

202 def from_pipeline( 

203 cls, 

204 pipeline: Union[Pipeline, Iterable[TaskDef]], 

205 *, 

206 where: str = "", 

207 bind: Optional[Mapping[str, Any]] = None, 

208 butler: Butler, 

209 **kwargs: Any, 

210 ) -> SimplePipelineExecutor: 

211 """Create an executor by building a QuantumGraph from an in-memory 

212 pipeline. 

213 

214 Parameters 

215 ---------- 

216 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] 

217 A Python object describing the tasks to run, along with their 

218 labels and configuration. 

219 where : `str`, optional 

220 Data ID query expression that constraints the quanta generated. 

221 bind : `Mapping`, optional 

222 Mapping containing literal values that should be injected into the 

223 ``where`` expression, keyed by the identifiers they replace. 

224 butler : `Butler` 

225 Butler that manages all I/O. `prep_butler` can be used to create 

226 one. 

227 

228 Returns 

229 ------- 

230 executor : `SimplePipelineExecutor` 

231 An executor instance containing the constructed `QuantumGraph` and 

232 `Butler`, ready for `run` to be called. 

233 """ 

234 if isinstance(pipeline, Pipeline): 

235 pipeline = list(pipeline.toExpandedPipeline()) 

236 else: 

237 pipeline = list(pipeline) 

238 graph_builder = GraphBuilder(butler.registry) 

239 assert butler.run is not None, "Butler output run collection must be defined" 

240 quantum_graph = graph_builder.makeGraph( 

241 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind 

242 ) 

243 return cls(quantum_graph=quantum_graph, butler=butler) 

244 

245 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> List[Quantum]: 

246 """Run all the quanta in the `QuantumGraph` in topological order. 

247 

248 Use this method to run all quanta in the graph. Use 

249 `as_generator` to get a generator to run the quanta one at 

250 a time. 

251 

252 Parameters 

253 ---------- 

254 register_dataset_types : `bool`, optional 

255 If `True`, register all output dataset types before executing any 

256 quanta. 

257 save_versions : `bool`, optional 

258 If `True` (default), save a package versions dataset. 

259 

260 Returns 

261 ------- 

262 quanta : `List` [ `Quantum` ] 

263 Executed quanta. At present, these will contain only unresolved 

264 `DatasetRef` instances for output datasets, reflecting the state of 

265 the quantum just before it was run (but after any adjustments for 

266 predicted but now missing inputs). This may change in the future 

267 to include resolved output `DatasetRef` objects. 

268 

269 Notes 

270 ----- 

271 A topological ordering is not in general unique, but no other 

272 guarantees are made about the order in which quanta are processed. 

273 """ 

274 return list( 

275 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions) 

276 ) 

277 

278 def as_generator( 

279 self, register_dataset_types: bool = False, save_versions: bool = True 

280 ) -> Iterator[Quantum]: 

281 """Yield quanta in the `QuantumGraph` in topological order. 

282 

283 These quanta will be run as the returned generator is iterated 

284 over. Use this method to run the quanta one at a time. 

285 Use `run` to run all quanta in the graph. 

286 

287 Parameters 

288 ---------- 

289 register_dataset_types : `bool`, optional 

290 If `True`, register all output dataset types before executing any 

291 quanta. 

292 save_versions : `bool`, optional 

293 If `True` (default), save a package versions dataset. 

294 

295 Returns 

296 ------- 

297 quanta : `Iterator` [ `Quantum` ] 

298 Executed quanta. At present, these will contain only unresolved 

299 `DatasetRef` instances for output datasets, reflecting the state of 

300 the quantum just before it was run (but after any adjustments for 

301 predicted but now missing inputs). This may change in the future 

302 to include resolved output `DatasetRef` objects. 

303 

304 

305 Notes 

306 ----- 

307 Global initialization steps (see `PreExecInit`) are performed 

308 immediately when this method is called, but individual quanta are not 

309 actually executed until the returned iterator is iterated over. 

310 

311 A topological ordering is not in general unique, but no other 

312 guarantees are made about the order in which quanta are processed. 

313 """ 

314 task_factory = TaskFactory() 

315 pre_exec_init = PreExecInit(self.butler, task_factory) 

316 pre_exec_init.initialize( 

317 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions 

318 ) 

319 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory) 

320 # Important that this returns a generator expression rather than being 

321 # a generator itself; that is what makes the PreExecInit stuff above 

322 # happen immediately instead of when the first quanta is executed, 

323 # which might be useful for callers who want to check the state of the 

324 # repo in between. 

325 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)