Coverage for python/lsst/ctrl/bps/parsl/workflow.py: 24%

105 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-15 11:23 +0000

1# This file is part of ctrl_bps_parsl. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28import logging 

29import os 

30import pickle 

31from collections.abc import Iterable, Mapping 

32 

33import parsl 

34import parsl.config 

35from lsst.ctrl.bps import BaseWmsWorkflow, BpsConfig, GenericWorkflow, GenericWorkflowJob 

36from parsl.app.app import bash_app 

37from parsl.app.bash import BashApp 

38from parsl.app.futures import Future 

39 

40from .configuration import get_workflow_filename, set_parsl_logging 

41from .job import ParslJob, get_file_paths 

42from .site import SiteConfig 

43 

44__all__ = ("ParslWorkflow", "get_parsl_config") 

45 

46_log = logging.getLogger("lsst.ctrl.bps.parsl") 

47 

48 

49def get_parsl_config(config: BpsConfig) -> parsl.config.Config: 

50 """Construct parsl configuration from BPS configuration. 

51 

52 For details on the site configuration, see `SiteConfig`. For details on the 

53 monitor configuration, see ``get_parsl_monitor``. 

54 

55 `SiteConfig` provides an implementation of the method ``get_parsl_config`` 

56 which returns a Parsl configuration with sensible defaults. Subclasses 

57 of `SiteConfig` can overwrite that method to configure Parsl in a 

58 way specific to the site's configuration. 

59 

60 Parameters 

61 ---------- 

62 config : `BpsConfig` 

63 BPS configuration. 

64 

65 Returns 

66 ------- 

67 parsl_config : `parsl.config.Config` 

68 Parsl configuration. 

69 """ 

70 site = SiteConfig.from_config(config) 

71 return site.get_parsl_config() 

72 

73 

74class ParslWorkflow(BaseWmsWorkflow): 

75 """Parsl-based workflow object to manage execution of workflow. 

76 

77 Parameters 

78 ---------- 

79 name : `str` 

80 Unique name of workflow. 

81 config : `lsst.ctrl.bps.BpsConfig` 

82 Generic workflow config. 

83 path : `str` 

84 Path prefix for workflow output files. 

85 jobs : `dict` mapping `str` to `ParslJob` 

86 Jobs to be executed. 

87 parents : `dict` mapping `str` to iterable of `str` 

88 Dependency tree. Keywords are job names, and values are a list of job 

89 names that must be executed before the keyword job name can be 

90 executed. 

91 endpoints : iterable of `str` 

92 Endpoints of the dependency tree. These jobs (specified by name) have 

93 no children. 

94 final : `ParslJob`, optional 

95 Final job to be done, e.g., to merge the execution butler. This is done 

96 locally. 

97 """ 

98 

99 def __init__( 

100 self, 

101 name: str, 

102 config: BpsConfig, 

103 path: str, 

104 jobs: dict[str, ParslJob], 

105 parents: Mapping[str, Iterable[str]], 

106 endpoints: Iterable[str], 

107 final: ParslJob | None = None, 

108 ): 

109 super().__init__(name, config) 

110 

111 self.path = path 

112 self.bps_config = config 

113 self.parsl_config = get_parsl_config(config) 

114 self.site = SiteConfig.from_config(config) 

115 self.dfk: parsl.DataFlowKernel | None = None # type: ignore 

116 self.command_prefix = self.site.get_command_prefix() 

117 

118 # these are function decorators 

119 self.apps: dict[str, BashApp] = { 

120 ex.label: bash_app( # type: ignore 

121 executors=[ex.label], cache=True, ignore_for_cache=["stderr", "stdout"] 

122 ) 

123 for ex in self.parsl_config.executors 

124 } 

125 

126 self.jobs = jobs 

127 self.parents = parents 

128 self.endpoints = endpoints 

129 self.final = final 

130 

131 def __reduce__(self): 

132 """Recipe for pickle""" 

133 return type(self), ( 

134 self.name, 

135 self.bps_config, 

136 self.path, 

137 self.jobs, 

138 self.parents, 

139 self.endpoints, 

140 self.final, 

141 ) 

142 

143 @classmethod 

144 def from_generic_workflow( 

145 cls, config: BpsConfig, generic_workflow: GenericWorkflow, out_prefix: str, service_class: str 

146 ) -> BaseWmsWorkflow: 

147 """Create a ParslWorkflow object from a BPS GenericWorkflow. 

148 

149 Parameters 

150 ---------- 

151 config : `BpsConfig` 

152 Configuration of the workflow. 

153 generic_workflow : `lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

154 Generic representation of a single workflow. 

155 out_prefix : `str` 

156 Prefix for workflow output files. 

157 service_class : `str` 

158 Full module name of WMS service class that created this workflow. 

159 

160 Returns 

161 ------- 

162 self : `ParslWorkflow` 

163 Constructed workflow. 

164 """ 

165 # Generate list of jobs 

166 jobs: dict[str, ParslJob] = {} 

167 for job_name in generic_workflow: 

168 generic_job = generic_workflow.get_job(job_name) 

169 assert generic_job.name not in jobs 

170 jobs[job_name] = ParslJob(generic_job, config, get_file_paths(generic_workflow, job_name)) 

171 

172 parents = {name: set(generic_workflow.predecessors(name)) for name in jobs} 

173 endpoints = [name for name in jobs if generic_workflow.out_degree(name) == 0] 

174 

175 # Add final job: execution butler merge 

176 job = generic_workflow.get_final() 

177 final: ParslJob | None = None 

178 if job is not None: 

179 assert isinstance(job, GenericWorkflowJob) 

180 final = ParslJob(job, config, get_file_paths(generic_workflow, job.name)) 

181 

182 return cls(generic_workflow.name, config, out_prefix, jobs, parents, endpoints, final) 

183 

184 def write(self, out_prefix: str): 

185 """Write workflow state. 

186 

187 This, in combination with the parsl checkpoint files, can be used to 

188 restart a workflow that was interrupted. 

189 

190 Parameters 

191 ---------- 

192 out_prefix : `str` 

193 Root directory to be used for WMS workflow inputs and outputs 

194 as well as internal WMS files. 

195 """ 

196 filename = get_workflow_filename(out_prefix) 

197 _log.info("Writing workflow with ID=%s", out_prefix) 

198 with open(filename, "wb") as fd: 

199 pickle.dump(self, fd) 

200 

201 @classmethod 

202 def read(cls, out_prefix: str) -> "ParslWorkflow": 

203 """Construct from the saved workflow state. 

204 

205 Parameters 

206 ---------- 

207 out_prefix : `str` 

208 Root directory to be used for WMS workflow inputs and outputs 

209 as well as internal WMS files. 

210 

211 Returns 

212 ------- 

213 self : `ParslWorkflow` 

214 Constructed workflow. 

215 """ 

216 filename = get_workflow_filename(out_prefix) 

217 with open(filename, "rb") as fd: 

218 self = pickle.load(fd) 

219 assert isinstance(self, cls) 

220 return self 

221 

222 def run(self, block: bool = True) -> list[Future | None]: 

223 """Run the workflow. 

224 

225 Parameters 

226 ---------- 

227 block : `bool`, optional 

228 Block returning from this method until the workflow is complete? If 

229 `False`, jobs may still be running when this returns, and it is the 

230 user's responsibility to call the ``finalize_jobs`` and 

231 ``shutdown`` methods when they are complete. 

232 

233 Returns 

234 ------- 

235 futures : `list` of `Future` 

236 `Future` objects linked to the execution of the endpoint jobs. 

237 """ 

238 futures = [self.execute(name) for name in self.endpoints] 

239 if block: 

240 # Calling .exception() for each future blocks returning 

241 # from this method until all the jobs have executed or 

242 # raised an error. This is needed for running in a 

243 # non-interactive python process that would otherwise end 

244 # before the futures resolve. 

245 for ff in futures: 

246 if ff is not None: 

247 ff.exception() 

248 self.shutdown() 

249 self.finalize_jobs() 

250 return futures 

251 

252 def execute(self, name: str) -> Future | None: 

253 """Execute a job. 

254 

255 Parameters 

256 ---------- 

257 name : `str` 

258 Name of job to execute. 

259 

260 Returns 

261 ------- 

262 future : `Future` or `None` 

263 A `Future` object linked to the execution of the job, or `None` if 

264 the job is being reserved to run locally. 

265 """ 

266 if name in ("pipetaskInit", "mergeExecutionButler"): 

267 # These get done outside of parsl 

268 return None 

269 job = self.jobs[name] 

270 inputs = [self.execute(parent) for parent in self.parents[name]] 

271 executors = self.parsl_config.executors 

272 if len(executors) > 1: 

273 label = self.site.select_executor(job) 

274 else: 

275 label = executors[0].label 

276 return job.get_future( 

277 self.apps[label], 

278 [ff for ff in inputs if ff is not None], 

279 self.command_prefix, 

280 self.site.add_resources, 

281 ) 

282 

283 def load_dfk(self): 

284 """Load data frame kernel. 

285 

286 This starts parsl. 

287 """ 

288 if self.dfk is not None: 

289 raise RuntimeError("Workflow has already started.") 

290 set_parsl_logging(self.bps_config) 

291 self.dfk = parsl.load(self.parsl_config) 

292 

293 def start(self): 

294 """Start the workflow.""" 

295 self.initialize_jobs() 

296 self.load_dfk() 

297 

298 def restart(self): 

299 """Restart the workflow after interruption.""" 

300 self.parsl_config.checkpoint_files = parsl.utils.get_last_checkpoint() 

301 self.load_dfk() 

302 

303 def shutdown(self): 

304 """Shut down the workflow. 

305 

306 This stops parsl. 

307 """ 

308 if self.dfk is None: 

309 raise RuntimeError("Workflow not started.") 

310 self.dfk.cleanup() 

311 self.dfk = None 

312 parsl.DataFlowKernelLoader.clear() 

313 

314 def initialize_jobs(self): 

315 """Run initial jobs. 

316 

317 These jobs are run locally before any other jobs are submitted to 

318 parsl. 

319 

320 This is used to set up the butler. 

321 """ 

322 job = self.jobs.get("pipetaskInit", None) 

323 if job is not None: 

324 os.makedirs(os.path.join(self.path, "logs")) 

325 job.run_local() 

326 

327 def finalize_jobs(self): 

328 """Run final jobs. 

329 

330 These jobs are run locally after all other jobs are complete. 

331 

332 This is used to merge the execution butler. 

333 """ 

334 if self.final is not None and not self.final.done: 

335 self.final.run_local()