Coverage for python/lsst/ctrl/bps/parsl/job.py: 23%
80 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-31 09:55 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-31 09:55 +0000
1import os
2import re
3import subprocess
4from collections.abc import Sequence
5from functools import partial
6from textwrap import dedent
7from typing import Any
9from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob
10from parsl.app.bash import BashApp
11from parsl.app.futures import Future
13from .configuration import get_bps_config_value
15__all__ = ("get_file_paths", "ParslJob")
17_env_regex = re.compile(r"<ENV:(\S+)>") # Regex for replacing <ENV:WHATEVER> in BPS job command-lines
18_file_regex = re.compile(r"<FILE:(\S+)>") # Regex for replacing <FILE:WHATEVER> in BPS job command-lines
21def run_command(
22 command_line: str,
23 inputs: Sequence[Future] = (),
24 stdout: str | None = None,
25 stderr: str | None = None,
26 parsl_resource_specification: dict[str, Any] | None = None,
27) -> str:
28 """Run a command
30 This function exists to get information into parsl, through the ``inputs``,
31 ``stdout`` and ``stderr`` parameters. It needs to be wrapped by a parsl
32 ``bash_app`` decorator before use, after which it will return a `Future`.
34 Parameters
35 ----------
36 command_line : `str`
37 Command-line to have parsl run.
38 inputs : list of `Future`
39 Other commands that must have run before this.
40 stdout, stderr : `str`, optional
41 Filenames for stdout and stderr.
42 parsl_resource_specification : `dict`, optional
43 Resources required for job.
45 Returns
46 -------
47 command_line : `str`
48 Command-line to have parsl run.
49 """
50 return command_line
53def get_file_paths(workflow: GenericWorkflow, name: str) -> dict[str, str]:
54 """Extract file paths for a job
56 Parameters
57 ----------
58 workflow : `GenericWorkflow`
59 BPS workflow that knows the file paths.
60 name : `str`
61 Job name.
63 Returns
64 -------
65 paths : `dict` mapping `str` to `str`
66 File paths for job, indexed by symbolic name.
67 """
68 return {ff.name: ff.src_uri for ff in workflow.get_job_inputs(name)}
71class ParslJob:
72 """Job to execute with parsl
74 Parameters
75 ----------
76 generic : `GenericWorkflowJob`
77 BPS job information.
78 config : `BpsConfig`
79 BPS configuration.
80 file_paths : `dict` mapping `str` to `str`
81 File paths for job, indexed by symbolic name.
82 """
84 def __init__(
85 self,
86 generic: GenericWorkflowJob,
87 config: BpsConfig,
88 file_paths: dict[str, str],
89 ):
90 self.generic = generic
91 self.name = generic.name
92 self.config = config
93 self.file_paths = file_paths
94 self.future = None
95 self.done = False
96 log_dir = os.path.join(get_bps_config_value(self.config, "submitPath", str, required=True), "logs")
97 self.stdout = os.path.join(log_dir, self.name + ".stdout")
98 self.stderr = os.path.join(log_dir, self.name + ".stderr")
100 def __reduce__(self):
101 """Recipe for pickling"""
102 return type(self), (self.generic, self.config, self.file_paths)
104 def get_command_line(self, allow_stage=True) -> str:
105 """Get the bash command-line to run to execute this job
107 Parameters
108 ----------
109 allow_stage : `bool`
110 Allow staging of execution butler? This is not appropriate for the
111 initial or final jobs that run on the local nodes.
113 Returns
114 -------
115 command : `str`
116 Command-line to execute for job.
117 """
118 command: str = self.generic.executable.src_uri + " " + self.generic.arguments
119 if not allow_stage:
120 return command
121 exec_butler_dir = get_bps_config_value(self.config, "executionButlerDir", str)
122 if not exec_butler_dir or not os.path.isdir(exec_butler_dir):
123 # We're not using the execution butler
124 return command
126 # Add commands to copy the execution butler.
127 # This keeps workers from overloading the sqlite database.
128 # The copy can be deleted once we're done, because the original
129 # execution butler contains everything that's required.
130 job_dir = os.path.join(os.path.dirname(exec_butler_dir), self.name)
131 # Set the butlerConfig field to the location of the job-specific copy.
132 command = command.replace("<FILE:butlerConfig>", job_dir)
133 return dedent(
134 f"""
135 if [[ ! -d {job_dir} ]]; then mkdir -p {job_dir}; fi
136 cp {exec_butler_dir}/* {job_dir}
137 {command}
138 retcode=$?
139 rm -rf {job_dir}
140 exit $retcode
141 """
142 )
144 def evaluate_command_line(self, command: str) -> str:
145 """Evaluate the bash command-line
147 BPS provides a command-line with symbolic names for BPS variables,
148 environment variables and files. Here, we replace those symbolic names
149 with the actual values, to provide a concrete command that can be
150 executed.
152 In replacing file paths, we are implicitly assuming that we are working
153 on a shared file system, i.e., that workers can see the butler
154 directory, and that files do not need to be staged to the worker.
156 Parameters
157 ----------
158 command : `str`
159 Command-line to execute, from BPS.
161 Returns
162 -------
163 command : `str`
164 Command ready for execution on a worker.
165 """
166 command = command.format(**self.generic.cmdvals) # BPS variables
168 # Make sure *all* symbolic names are resolved.
169 #
170 # In general, actual values for some symbolic names may contain other
171 # symbolic names. As a result, more than one iteration may be required
172 # to resolve all symbolic names. For example, an actual value for
173 # a filename may contain a symbolic name for an environment variable.
174 prev_command = command
175 while True:
176 command = re.sub(_env_regex, r"${\g<1>}", command) # Environment variables
177 command = re.sub(_file_regex, lambda match: self.file_paths[match.group(1)], command) # Files
178 if prev_command == command:
179 break
180 prev_command = command
182 return command
184 def get_resources(self) -> dict[str, Any]:
185 """Return what resources are required for executing this job"""
186 resources = {}
187 for bps_name, parsl_name, scale in (
188 ("request_memory", "memory", None), # Both BPS and WorkQueueExecutor use MB
189 ("request_cpus", "cores", None),
190 ("request_disk", "disk", None), # Both are MB
191 ("request_walltime", "running_time_min", None), # Both are minutes
192 ):
193 value = getattr(self.generic, bps_name)
194 if scale is not None:
195 value *= scale
196 resources[parsl_name] = value
197 return resources
199 def get_future(
200 self,
201 app: BashApp,
202 inputs: list[Future],
203 command_prefix: str | None = None,
204 add_resources: bool = False,
205 ) -> Future | None:
206 """Get the parsl app future for the job
208 This effectively queues the job for execution by a worker, subject to
209 dependencies.
211 Parameters
212 ----------
213 app : callable
214 A parsl bash_app decorator to use.
215 inputs : list of `Future`
216 Dependencies to be satisfied before executing this job.
217 command_prefix : `str`, optional
218 Bash commands to execute before the job command, e.g., for setting
219 the environment.
220 add_resources : `bool`
221 Add resource specification when submitting the job? This is only
222 appropriate for the ``WorkQueue`` executor; other executors will
223 raise an exception.
225 Returns
226 -------
227 future : `Future` or `None`
228 A `Future` object linked to the execution of the job, or `None` if
229 the job has already been done (e.g., by ``run_local``).
230 """
231 if self.done:
232 return None # Nothing to do
233 if not self.future:
234 command = self.get_command_line()
235 command = self.evaluate_command_line(command)
236 if command_prefix:
237 command = command_prefix + "\n" + command
238 resources = self.get_resources() if add_resources else None
240 # Add a layer of indirection to which we can add a useful name.
241 # This name is used by parsl for tracking workflow status.
242 func = partial(run_command)
243 setattr(func, "__name__", self.generic.label)
245 self.future = app(func)(
246 command,
247 inputs=inputs,
248 stdout=self.stdout,
249 stderr=self.stderr,
250 parsl_resource_specification=resources,
251 )
252 return self.future
254 def run_local(self):
255 """Run the command locally
257 This is intended to support jobs that should not be done by a
258 worker.
259 """
260 if self.done: # Nothing to do
261 return
262 command = self.get_command_line(False)
263 command = self.evaluate_command_line(command)
264 with open(self.stdout, "w") as stdout, open(self.stderr, "w") as stderr:
265 subprocess.check_call(command, shell=True, executable="/bin/bash", stdout=stdout, stderr=stderr)
266 self.done = True