Coverage for python/lsst/ctrl/bps/parsl/job.py: 23%

80 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-09 09:52 +0000

1# This file is part of ctrl_bps_parsl. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28import os 

29import re 

30import subprocess 

31from collections.abc import Sequence 

32from functools import partial 

33from textwrap import dedent 

34from typing import Any 

35 

36from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob 

37from parsl.app.bash import BashApp 

38from parsl.app.futures import Future 

39 

40from .configuration import get_bps_config_value 

41 

42__all__ = ("get_file_paths", "ParslJob") 

43 

44_env_regex = re.compile(r"<ENV:(\S+)>") # Regex for replacing <ENV:WHATEVER> in BPS job command-lines 

45_file_regex = re.compile(r"<FILE:(\S+)>") # Regex for replacing <FILE:WHATEVER> in BPS job command-lines 

46 

47 

48def run_command( 

49 command_line: str, 

50 inputs: Sequence[Future] = (), 

51 stdout: str | None = None, 

52 stderr: str | None = None, 

53 parsl_resource_specification: dict[str, Any] | None = None, 

54) -> str: 

55 """Run a command 

56 

57 This function exists to get information into parsl, through the ``inputs``, 

58 ``stdout`` and ``stderr`` parameters. It needs to be wrapped by a parsl 

59 ``bash_app`` decorator before use, after which it will return a `Future`. 

60 

61 Parameters 

62 ---------- 

63 command_line : `str` 

64 Command-line to have parsl run. 

65 inputs : list of `Future` 

66 Other commands that must have run before this. 

67 stdout, stderr : `str`, optional 

68 Filenames for stdout and stderr. 

69 parsl_resource_specification : `dict`, optional 

70 Resources required for job. 

71 

72 Returns 

73 ------- 

74 command_line : `str` 

75 Command-line to have parsl run. 

76 """ 

77 return command_line 

78 

79 

80def get_file_paths(workflow: GenericWorkflow, name: str) -> dict[str, str]: 

81 """Extract file paths for a job 

82 

83 Parameters 

84 ---------- 

85 workflow : `GenericWorkflow` 

86 BPS workflow that knows the file paths. 

87 name : `str` 

88 Job name. 

89 

90 Returns 

91 ------- 

92 paths : `dict` mapping `str` to `str` 

93 File paths for job, indexed by symbolic name. 

94 """ 

95 return {ff.name: ff.src_uri for ff in workflow.get_job_inputs(name)} 

96 

97 

98class ParslJob: 

99 """Job to execute with parsl 

100 

101 Parameters 

102 ---------- 

103 generic : `GenericWorkflowJob` 

104 BPS job information. 

105 config : `BpsConfig` 

106 BPS configuration. 

107 file_paths : `dict` mapping `str` to `str` 

108 File paths for job, indexed by symbolic name. 

109 """ 

110 

111 def __init__( 

112 self, 

113 generic: GenericWorkflowJob, 

114 config: BpsConfig, 

115 file_paths: dict[str, str], 

116 ): 

117 self.generic = generic 

118 self.name = generic.name 

119 self.config = config 

120 self.file_paths = file_paths 

121 self.future = None 

122 self.done = False 

123 log_dir = os.path.join(get_bps_config_value(self.config, "submitPath", str, required=True), "logs") 

124 self.stdout = os.path.join(log_dir, self.name + ".stdout") 

125 self.stderr = os.path.join(log_dir, self.name + ".stderr") 

126 

127 def __reduce__(self): 

128 """Recipe for pickling""" 

129 return type(self), (self.generic, self.config, self.file_paths) 

130 

131 def get_command_line(self, allow_stage=True) -> str: 

132 """Get the bash command-line to run to execute this job 

133 

134 Parameters 

135 ---------- 

136 allow_stage : `bool` 

137 Allow staging of execution butler? This is not appropriate for the 

138 initial or final jobs that run on the local nodes. 

139 

140 Returns 

141 ------- 

142 command : `str` 

143 Command-line to execute for job. 

144 """ 

145 command: str = self.generic.executable.src_uri + " " + self.generic.arguments 

146 if not allow_stage: 

147 return command 

148 exec_butler_dir = get_bps_config_value(self.config, "executionButlerDir", str) 

149 if not exec_butler_dir or not os.path.isdir(exec_butler_dir): 

150 # We're not using the execution butler 

151 return command 

152 

153 # Add commands to copy the execution butler. 

154 # This keeps workers from overloading the sqlite database. 

155 # The copy can be deleted once we're done, because the original 

156 # execution butler contains everything that's required. 

157 job_dir = os.path.join(os.path.dirname(exec_butler_dir), self.name) 

158 # Set the butlerConfig field to the location of the job-specific copy. 

159 command = command.replace("<FILE:butlerConfig>", job_dir) 

160 return dedent( 

161 f""" 

162 if [[ ! -d {job_dir} ]]; then mkdir -p {job_dir}; fi 

163 cp {exec_butler_dir}/* {job_dir} 

164 {command} 

165 retcode=$? 

166 rm -rf {job_dir} 

167 exit $retcode 

168 """ 

169 ) 

170 

171 def evaluate_command_line(self, command: str) -> str: 

172 """Evaluate the bash command-line 

173 

174 BPS provides a command-line with symbolic names for BPS variables, 

175 environment variables and files. Here, we replace those symbolic names 

176 with the actual values, to provide a concrete command that can be 

177 executed. 

178 

179 In replacing file paths, we are implicitly assuming that we are working 

180 on a shared file system, i.e., that workers can see the butler 

181 directory, and that files do not need to be staged to the worker. 

182 

183 Parameters 

184 ---------- 

185 command : `str` 

186 Command-line to execute, from BPS. 

187 

188 Returns 

189 ------- 

190 command : `str` 

191 Command ready for execution on a worker. 

192 """ 

193 command = command.format(**self.generic.cmdvals) # BPS variables 

194 

195 # Make sure *all* symbolic names are resolved. 

196 # 

197 # In general, actual values for some symbolic names may contain other 

198 # symbolic names. As a result, more than one iteration may be required 

199 # to resolve all symbolic names. For example, an actual value for 

200 # a filename may contain a symbolic name for an environment variable. 

201 prev_command = command 

202 while True: 

203 command = re.sub(_env_regex, r"${\g<1>}", command) # Environment variables 

204 command = re.sub(_file_regex, lambda match: self.file_paths[match.group(1)], command) # Files 

205 if prev_command == command: 

206 break 

207 prev_command = command 

208 

209 return command 

210 

211 def get_resources(self) -> dict[str, Any]: 

212 """Return what resources are required for executing this job""" 

213 resources = {} 

214 for bps_name, parsl_name, scale in ( 

215 ("request_memory", "memory", None), # Both BPS and WorkQueueExecutor use MB 

216 ("request_cpus", "cores", None), 

217 ("request_disk", "disk", None), # Both are MB 

218 ("request_walltime", "running_time_min", None), # Both are minutes 

219 ): 

220 value = getattr(self.generic, bps_name) 

221 if scale is not None: 

222 value *= scale 

223 resources[parsl_name] = value 

224 return resources 

225 

226 def get_future( 

227 self, 

228 app: BashApp, 

229 inputs: list[Future], 

230 command_prefix: str | None = None, 

231 add_resources: bool = False, 

232 ) -> Future | None: 

233 """Get the parsl app future for the job 

234 

235 This effectively queues the job for execution by a worker, subject to 

236 dependencies. 

237 

238 Parameters 

239 ---------- 

240 app : callable 

241 A parsl bash_app decorator to use. 

242 inputs : list of `Future` 

243 Dependencies to be satisfied before executing this job. 

244 command_prefix : `str`, optional 

245 Bash commands to execute before the job command, e.g., for setting 

246 the environment. 

247 add_resources : `bool` 

248 Add resource specification when submitting the job? This is only 

249 appropriate for the ``WorkQueue`` executor; other executors will 

250 raise an exception. 

251 

252 Returns 

253 ------- 

254 future : `Future` or `None` 

255 A `Future` object linked to the execution of the job, or `None` if 

256 the job has already been done (e.g., by ``run_local``). 

257 """ 

258 if self.done: 

259 return None # Nothing to do 

260 if not self.future: 

261 command = self.get_command_line() 

262 command = self.evaluate_command_line(command) 

263 if command_prefix: 

264 command = command_prefix + "\n" + command 

265 resources = self.get_resources() if add_resources else None 

266 

267 # Add a layer of indirection to which we can add a useful name. 

268 # This name is used by parsl for tracking workflow status. 

269 func = partial(run_command) 

270 setattr(func, "__name__", self.generic.label) 

271 

272 self.future = app(func)( 

273 command, 

274 inputs=inputs, 

275 stdout=self.stdout, 

276 stderr=self.stderr, 

277 parsl_resource_specification=resources, 

278 ) 

279 return self.future 

280 

281 def run_local(self): 

282 """Run the command locally 

283 

284 This is intended to support jobs that should not be done by a 

285 worker. 

286 """ 

287 if self.done: # Nothing to do 

288 return 

289 command = self.get_command_line(False) 

290 command = self.evaluate_command_line(command) 

291 with open(self.stdout, "w") as stdout, open(self.stderr, "w") as stderr: 

292 subprocess.check_call(command, shell=True, executable="/bin/bash", stdout=stdout, stderr=stderr) 

293 self.done = True