Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 43%

50 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-28 03:02 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28 

29from __future__ import annotations 

30 

31__all__ = [ 

32 "SeparablePipelineExecutor", 

33] 

34 

35 

36import datetime 

37import getpass 

38import logging 

39from collections.abc import Iterable, Mapping 

40from typing import Any, Protocol 

41 

42import lsst.pipe.base 

43import lsst.resources 

44from lsst.daf.butler import Butler 

45 

46from .mpGraphExecutor import MPGraphExecutor 

47from .preExecInit import PreExecInit 

48from .quantumGraphExecutor import QuantumGraphExecutor 

49from .singleQuantumExecutor import SingleQuantumExecutor 

50from .taskFactory import TaskFactory 

51 

52_LOG = logging.getLogger(__name__) 

53 

54 

55# Only way to keep black, flake8, and mypy all happy 

56_dqc = lsst.pipe.base._datasetQueryConstraints 

57 

58 

59class _GraphBuilderLike(Protocol): 

60 def makeGraph( 

61 self, 

62 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef], 

63 collections: Any, 

64 run: str, 

65 userQuery: str | None, 

66 datasetQueryConstraint: _dqc.DatasetQueryConstraintVariant = _dqc._ALL, 

67 metadata: Mapping[str, Any] | None = None, 

68 bind: Mapping[str, Any] | None = None, 

69 ) -> lsst.pipe.base.QuantumGraph: 

70 pass 

71 

72 

73class SeparablePipelineExecutor: 

74 """An executor that allows each step of pipeline execution to be 

75 run independently. 

76 

77 The executor can run any or all of the following steps: 

78 

79 * pre-execution initialization 

80 * pipeline building 

81 * quantum graph generation 

82 * quantum graph execution 

83 

84 Any of these steps can also be handed off to external code without 

85 compromising the remaining ones. 

86 

87 Parameters 

88 ---------- 

89 butler : `lsst.daf.butler.Butler` 

90 A Butler whose ``collections`` and ``run`` attributes contain the input 

91 and output collections to use for processing. 

92 clobber_output : `bool`, optional 

93 If set, the pipeline execution overwrites existing output files. 

94 Otherwise, any conflict between existing and new outputs is an error. 

95 skip_existing_in : iterable [`str`], optional 

96 If not empty, the pipeline execution searches the listed collections 

97 for existing outputs, and skips any quanta that have run to completion 

98 (or have no work to do). Otherwise, all tasks are attempted (subject 

99 to ``clobber_output``). 

100 task_factory : `lsst.pipe.base.TaskFactory`, optional 

101 A custom task factory for use in pre-execution and execution. By 

102 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used. 

103 resources : `~lsst.pipe.base.ExecutionResources` 

104 The resources available to each quantum being executed. 

105 """ 

106 

107 def __init__( 

108 self, 

109 butler: Butler, 

110 clobber_output: bool = False, 

111 skip_existing_in: Iterable[str] | None = None, 

112 task_factory: lsst.pipe.base.TaskFactory | None = None, 

113 resources: lsst.pipe.base.ExecutionResources | None = None, 

114 ): 

115 self._butler = Butler.from_config(butler=butler, collections=butler.collections, run=butler.run) 

116 if not self._butler.collections: 

117 raise ValueError("Butler must specify input collections for pipeline.") 

118 if not self._butler.run: 

119 raise ValueError("Butler must specify output run for pipeline.") 

120 

121 self._clobber_output = clobber_output 

122 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else [] 

123 

124 self._task_factory = task_factory if task_factory else TaskFactory() 

125 self.resources = resources 

126 

127 def pre_execute_qgraph( 

128 self, 

129 graph: lsst.pipe.base.QuantumGraph, 

130 register_dataset_types: bool = False, 

131 save_init_outputs: bool = True, 

132 save_versions: bool = True, 

133 ) -> None: 

134 """Run pre-execution initialization. 

135 

136 This method will be deprecated after DM-38041, to be replaced with a 

137 method that takes either a `~lsst.pipe.base.Pipeline` or a 

138 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`. 

139 

140 Parameters 

141 ---------- 

142 graph : `lsst.pipe.base.QuantumGraph` 

143 The quantum graph defining the pipeline and datasets to 

144 be initialized. 

145 register_dataset_types : `bool`, optional 

146 If `True`, register all output dataset types from the pipeline 

147 represented by ``graph``. 

148 save_init_outputs : `bool`, optional 

149 If `True`, create init-output datasets in this object's output run. 

150 save_versions : `bool`, optional 

151 If `True`, save a package versions dataset. 

152 """ 

153 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output) 

154 pre_exec_init.initialize( 

155 graph=graph, 

156 saveInitOutputs=save_init_outputs, 

157 registerDatasetTypes=register_dataset_types, 

158 saveVersions=save_versions, 

159 ) 

160 

161 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline: 

162 """Build a pipeline from pipeline and configuration information. 

163 

164 Parameters 

165 ---------- 

166 pipeline_uri : `str` or `lsst.resources.ResourcePath` 

167 URI to a file containing a pipeline definition. A URI fragment may 

168 be used to specify a subset of the pipeline, as described in 

169 :ref:`pipeline-running-intro`. 

170 

171 Returns 

172 ------- 

173 pipeline : `lsst.pipe.base.Pipeline` 

174 The fully-built pipeline. 

175 """ 

176 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri) 

177 

178 def make_quantum_graph( 

179 self, pipeline: lsst.pipe.base.Pipeline, where: str = "", builder: _GraphBuilderLike | None = None 

180 ) -> lsst.pipe.base.QuantumGraph: 

181 """Build a quantum graph from a pipeline and input datasets. 

182 

183 Parameters 

184 ---------- 

185 pipeline : `lsst.pipe.base.Pipeline` 

186 The pipeline for which to generate a quantum graph. 

187 where : `str`, optional 

188 A data ID query that constrains the quanta generated. 

189 builder : `lsst.pipe.base.GraphBuilder`-like, optional 

190 A graph builder that implements a 

191 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new 

192 instance of `lsst.pipe.base.GraphBuilder` is used. 

193 

194 Returns 

195 ------- 

196 graph : `lsst.pipe.base.QuantumGraph` 

197 The quantum graph for ``pipeline`` as run on the datasets 

198 identified by ``where``. 

199 

200 Notes 

201 ----- 

202 This method does no special handling of empty quantum graphs. If 

203 needed, clients can use `len` to test if the returned graph is empty. 

204 """ 

205 if not builder: 

206 builder = lsst.pipe.base.GraphBuilder( 

207 self._butler.registry, 

208 skipExistingIn=self._skip_existing_in, 

209 clobberOutputs=self._clobber_output, 

210 ) 

211 

212 metadata = { 

213 "input": self._butler.collections, 

214 "output_run": self._butler.run, 

215 "skip_existing_in": self._skip_existing_in, 

216 "skip_existing": bool(self._skip_existing_in), 

217 "data_query": where, 

218 "user": getpass.getuser(), 

219 "time": str(datetime.datetime.now()), 

220 } 

221 assert self._butler.run is not None, "Butler output run collection must be defined" 

222 graph = builder.makeGraph( 

223 pipeline, 

224 self._butler.collections, 

225 self._butler.run, 

226 userQuery=where, 

227 metadata=metadata, 

228 ) 

229 _LOG.info( 

230 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

231 len(graph), 

232 len(graph.taskGraph), 

233 graph.graphID, 

234 ) 

235 return graph 

236 

237 def run_pipeline( 

238 self, 

239 graph: lsst.pipe.base.QuantumGraph, 

240 fail_fast: bool = False, 

241 graph_executor: QuantumGraphExecutor | None = None, 

242 num_proc: int = 1, 

243 ) -> None: 

244 """Run a pipeline in the form of a prepared quantum graph. 

245 

246 Pre-execution initialization must have already been run; 

247 see `pre_execute_qgraph`. 

248 

249 Parameters 

250 ---------- 

251 graph : `lsst.pipe.base.QuantumGraph` 

252 The pipeline and datasets to execute. 

253 fail_fast : `bool`, optional 

254 If `True`, abort all execution if any task fails when 

255 running with multiple processes. Only used with the default graph 

256 executor). 

257 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional 

258 A custom graph executor. By default, a new instance of 

259 `lsst.ctrl.mpexec.MPGraphExecutor` is used. 

260 num_proc : `int`, optional 

261 The number of processes that can be used to run the pipeline. The 

262 default value ensures that no subprocess is created. Only used with 

263 the default graph executor. 

264 """ 

265 if not graph_executor: 

266 quantum_executor = SingleQuantumExecutor( 

267 self._butler, 

268 self._task_factory, 

269 skipExistingIn=self._skip_existing_in, 

270 clobberOutputs=self._clobber_output, 

271 resources=self.resources, 

272 ) 

273 graph_executor = MPGraphExecutor( 

274 numProc=num_proc, 

275 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days. 

276 quantumExecutor=quantum_executor, 

277 failFast=fail_fast, 

278 ) 

279 # Have to reset connection pool to avoid sharing connections with 

280 # forked processes. 

281 self._butler.registry.resetConnectionPool() 

282 

283 graph_executor.execute(graph)