Coverage for python/lsst/ctrl/bps/parsl/job.py: 23%

80 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:37 +0000

1import os 

2import re 

3import subprocess 

4from collections.abc import Sequence 

5from functools import partial 

6from textwrap import dedent 

7from typing import Any 

8 

9from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob 

10from parsl.app.bash import BashApp 

11from parsl.app.futures import Future 

12 

13from .configuration import get_bps_config_value 

14 

15__all__ = ("get_file_paths", "ParslJob") 

16 

17_env_regex = re.compile(r"<ENV:(\S+)>") # Regex for replacing <ENV:WHATEVER> in BPS job command-lines 

18_file_regex = re.compile(r"<FILE:(\S+)>") # Regex for replacing <FILE:WHATEVER> in BPS job command-lines 

19 

20 

21def run_command( 

22 command_line: str, 

23 inputs: Sequence[Future] = (), 

24 stdout: str | None = None, 

25 stderr: str | None = None, 

26 parsl_resource_specification: dict[str, Any] | None = None, 

27) -> str: 

28 """Run a command 

29 

30 This function exists to get information into parsl, through the ``inputs``, 

31 ``stdout`` and ``stderr`` parameters. It needs to be wrapped by a parsl 

32 ``bash_app`` decorator before use, after which it will return a `Future`. 

33 

34 Parameters 

35 ---------- 

36 command_line : `str` 

37 Command-line to have parsl run. 

38 inputs : list of `Future` 

39 Other commands that must have run before this. 

40 stdout, stderr : `str`, optional 

41 Filenames for stdout and stderr. 

42 parsl_resource_specification : `dict`, optional 

43 Resources required for job. 

44 

45 Returns 

46 ------- 

47 command_line : `str` 

48 Command-line to have parsl run. 

49 """ 

50 return command_line 

51 

52 

53def get_file_paths(workflow: GenericWorkflow, name: str) -> dict[str, str]: 

54 """Extract file paths for a job 

55 

56 Parameters 

57 ---------- 

58 workflow : `GenericWorkflow` 

59 BPS workflow that knows the file paths. 

60 name : `str` 

61 Job name. 

62 

63 Returns 

64 ------- 

65 paths : `dict` mapping `str` to `str` 

66 File paths for job, indexed by symbolic name. 

67 """ 

68 return {ff.name: ff.src_uri for ff in workflow.get_job_inputs(name)} 

69 

70 

71class ParslJob: 

72 """Job to execute with parsl 

73 

74 Parameters 

75 ---------- 

76 generic : `GenericWorkflowJob` 

77 BPS job information. 

78 config : `BpsConfig` 

79 BPS configuration. 

80 file_paths : `dict` mapping `str` to `str` 

81 File paths for job, indexed by symbolic name. 

82 """ 

83 

84 def __init__( 

85 self, 

86 generic: GenericWorkflowJob, 

87 config: BpsConfig, 

88 file_paths: dict[str, str], 

89 ): 

90 self.generic = generic 

91 self.name = generic.name 

92 self.config = config 

93 self.file_paths = file_paths 

94 self.future = None 

95 self.done = False 

96 log_dir = os.path.join(get_bps_config_value(self.config, "submitPath", str, required=True), "logs") 

97 self.stdout = os.path.join(log_dir, self.name + ".stdout") 

98 self.stderr = os.path.join(log_dir, self.name + ".stderr") 

99 

100 def __reduce__(self): 

101 """Recipe for pickling""" 

102 return type(self), (self.generic, self.config, self.file_paths) 

103 

104 def get_command_line(self, allow_stage=True) -> str: 

105 """Get the bash command-line to run to execute this job 

106 

107 Parameters 

108 ---------- 

109 allow_stage : `bool` 

110 Allow staging of execution butler? This is not appropriate for the 

111 initial or final jobs that run on the local nodes. 

112 

113 Returns 

114 ------- 

115 command : `str` 

116 Command-line to execute for job. 

117 """ 

118 command: str = self.generic.executable.src_uri + " " + self.generic.arguments 

119 if not allow_stage: 

120 return command 

121 exec_butler_dir = get_bps_config_value(self.config, "executionButlerDir", str) 

122 if not exec_butler_dir or not os.path.isdir(exec_butler_dir): 

123 # We're not using the execution butler 

124 return command 

125 

126 # Add commands to copy the execution butler. 

127 # This keeps workers from overloading the sqlite database. 

128 # The copy can be deleted once we're done, because the original 

129 # execution butler contains everything that's required. 

130 job_dir = os.path.join(os.path.dirname(exec_butler_dir), self.name) 

131 # Set the butlerConfig field to the location of the job-specific copy. 

132 command = command.replace("<FILE:butlerConfig>", job_dir) 

133 return dedent( 

134 f""" 

135 if [[ ! -d {job_dir} ]]; then mkdir -p {job_dir}; fi 

136 cp {exec_butler_dir}/* {job_dir} 

137 {command} 

138 retcode=$? 

139 rm -rf {job_dir} 

140 exit $retcode 

141 """ 

142 ) 

143 

144 def evaluate_command_line(self, command: str) -> str: 

145 """Evaluate the bash command-line 

146 

147 BPS provides a command-line with symbolic names for BPS variables, 

148 environment variables and files. Here, we replace those symbolic names 

149 with the actual values, to provide a concrete command that can be 

150 executed. 

151 

152 In replacing file paths, we are implicitly assuming that we are working 

153 on a shared file system, i.e., that workers can see the butler 

154 directory, and that files do not need to be staged to the worker. 

155 

156 Parameters 

157 ---------- 

158 command : `str` 

159 Command-line to execute, from BPS. 

160 

161 Returns 

162 ------- 

163 command : `str` 

164 Command ready for execution on a worker. 

165 """ 

166 command = command.format(**self.generic.cmdvals) # BPS variables 

167 

168 # Make sure *all* symbolic names are resolved. 

169 # 

170 # In general, actual values for some symbolic names may contain other 

171 # symbolic names. As a result, more than one iteration may be required 

172 # to resolve all symbolic names. For example, an actual value for 

173 # a filename may contain a symbolic name for an environment variable. 

174 prev_command = command 

175 while True: 

176 command = re.sub(_env_regex, r"${\g<1>}", command) # Environment variables 

177 command = re.sub(_file_regex, lambda match: self.file_paths[match.group(1)], command) # Files 

178 if prev_command == command: 

179 break 

180 prev_command = command 

181 

182 return command 

183 

184 def get_resources(self) -> dict[str, Any]: 

185 """Return what resources are required for executing this job""" 

186 resources = {} 

187 for bps_name, parsl_name, scale in ( 

188 ("request_memory", "memory", None), # Both BPS and WorkQueueExecutor use MB 

189 ("request_cpus", "cores", None), 

190 ("request_disk", "disk", None), # Both are MB 

191 ("request_walltime", "running_time_min", None), # Both are minutes 

192 ): 

193 value = getattr(self.generic, bps_name) 

194 if scale is not None: 

195 value *= scale 

196 resources[parsl_name] = value 

197 return resources 

198 

199 def get_future( 

200 self, 

201 app: BashApp, 

202 inputs: list[Future], 

203 command_prefix: str | None = None, 

204 add_resources: bool = False, 

205 ) -> Future | None: 

206 """Get the parsl app future for the job 

207 

208 This effectively queues the job for execution by a worker, subject to 

209 dependencies. 

210 

211 Parameters 

212 ---------- 

213 app : callable 

214 A parsl bash_app decorator to use. 

215 inputs : list of `Future` 

216 Dependencies to be satisfied before executing this job. 

217 command_prefix : `str`, optional 

218 Bash commands to execute before the job command, e.g., for setting 

219 the environment. 

220 add_resources : `bool` 

221 Add resource specification when submitting the job? This is only 

222 appropriate for the ``WorkQueue`` executor; other executors will 

223 raise an exception. 

224 

225 Returns 

226 ------- 

227 future : `Future` or `None` 

228 A `Future` object linked to the execution of the job, or `None` if 

229 the job has already been done (e.g., by ``run_local``). 

230 """ 

231 if self.done: 

232 return None # Nothing to do 

233 if not self.future: 

234 command = self.get_command_line() 

235 command = self.evaluate_command_line(command) 

236 if command_prefix: 

237 command = command_prefix + "\n" + command 

238 resources = self.get_resources() if add_resources else None 

239 

240 # Add a layer of indirection to which we can add a useful name. 

241 # This name is used by parsl for tracking workflow status. 

242 func = partial(run_command) 

243 setattr(func, "__name__", self.generic.label) 

244 

245 self.future = app(func)( 

246 command, 

247 inputs=inputs, 

248 stdout=self.stdout, 

249 stderr=self.stderr, 

250 parsl_resource_specification=resources, 

251 ) 

252 return self.future 

253 

254 def run_local(self): 

255 """Run the command locally 

256 

257 This is intended to support jobs that should not be done by a 

258 worker. 

259 """ 

260 if self.done: # Nothing to do 

261 return 

262 command = self.get_command_line(False) 

263 command = self.evaluate_command_line(command) 

264 with open(self.stdout, "w") as stdout, open(self.stderr, "w") as stderr: 

265 subprocess.check_call(command, shell=True, executable="/bin/bash", stdout=stdout, stderr=stderr) 

266 self.done = True