Coverage for python/lsst/ctrl/bps/parsl/job.py: 22%
90 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:39 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:39 -0700
1# This file is part of ctrl_bps_parsl.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import os
29import re
30import subprocess
31from collections import defaultdict
32from collections.abc import Sequence
33from functools import partial
34from textwrap import dedent
35from typing import Any
37from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob
38from parsl.app.bash import BashApp
39from parsl.app.futures import Future
41from .configuration import get_bps_config_value
43__all__ = ("get_file_paths", "ParslJob")
45_env_regex = re.compile(r"<ENV:(\S+)>") # Regex for replacing <ENV:WHATEVER> in BPS job command-lines
46_file_regex = re.compile(r"<FILE:(\S+)>") # Regex for replacing <FILE:WHATEVER> in BPS job command-lines
49def run_command(
50 command_line: str,
51 inputs: Sequence[Future] = (),
52 stdout: str | None = None,
53 stderr: str | None = None,
54 parsl_resource_specification: dict[str, Any] | None = None,
55) -> str:
56 """Run a command.
58 This function exists to get information into parsl, through the ``inputs``,
59 ``stdout`` and ``stderr`` parameters. It needs to be wrapped by a parsl
60 ``bash_app`` decorator before use, after which it will return a `Future`.
62 Parameters
63 ----------
64 command_line : `str`
65 Command-line to have parsl run.
66 inputs : list of `Future`
67 Other commands that must have run before this.
68 stdout, stderr : `str`, optional
69 Filenames for stdout and stderr.
70 parsl_resource_specification : `dict`, optional
71 Resources required for job.
73 Returns
74 -------
75 command_line : `str`
76 Command-line to have parsl run.
77 """
78 return command_line
81def get_file_paths(workflow: GenericWorkflow, name: str) -> dict[str, str]:
82 """Extract file paths for a job.
84 Parameters
85 ----------
86 workflow : `GenericWorkflow`
87 BPS workflow that knows the file paths.
88 name : `str`
89 Job name.
91 Returns
92 -------
93 paths : `dict` mapping `str` to `str`
94 File paths for job, indexed by symbolic name.
95 """
96 return {ff.name: ff.src_uri for ff in workflow.get_job_inputs(name)}
99class ParslJob:
100 """Job to execute with parsl.
102 Parameters
103 ----------
104 generic : `GenericWorkflowJob`
105 BPS job information.
106 config : `BpsConfig`
107 BPS configuration.
108 file_paths : `dict` mapping `str` to `str`
109 File paths for job, indexed by symbolic name.
110 """
112 def __init__(
113 self,
114 generic: GenericWorkflowJob,
115 config: BpsConfig,
116 file_paths: dict[str, str],
117 ):
118 self.generic = generic
119 self.name = generic.name
120 self.config = config
121 self.file_paths = file_paths
122 self.future = None
123 self.done = False
125 # Determine directory for job stdout and stderr
126 log_dir = os.path.join(get_bps_config_value(self.config, "submitPath", str, required=True), "logs")
127 _, template = self.config.search(
128 "subDirTemplate",
129 opt={
130 "curvals": {"curr_site": self.config["computeSite"], "curr_cluster": self.generic.label},
131 "replaceVars": False,
132 "default": "",
133 },
134 )
135 job_vals = defaultdict(str)
136 job_vals["label"] = self.generic.label
137 if self.generic.tags:
138 job_vals.update(self.generic.tags)
139 subdir = template.format_map(job_vals)
140 # Call normpath just to make paths easier to read as templates tend
141 # to have variables that aren't used by every job. Avoid calling on
142 # empty string because changes it to dot.
143 same_part = os.path.normpath(os.path.join(log_dir, subdir, self.name))
144 self.stdout = same_part + ".stdout"
145 self.stderr = same_part + ".stderr"
147 def __reduce__(self):
148 """Recipe for pickling."""
149 return type(self), (self.generic, self.config, self.file_paths)
151 def get_command_line(self, allow_stage=True) -> str:
152 """Get the bash command-line to run to execute this job.
154 Parameters
155 ----------
156 allow_stage : `bool`
157 Allow staging of execution butler? This is not appropriate for the
158 initial or final jobs that run on the local nodes.
160 Returns
161 -------
162 command : `str`
163 Command-line to execute for job.
164 """
165 command: str = self.generic.executable.src_uri + " " + self.generic.arguments
166 if not allow_stage:
167 return command
168 exec_butler_dir = get_bps_config_value(self.config, "executionButlerDir", str)
169 if not exec_butler_dir or not os.path.isdir(exec_butler_dir):
170 # We're not using the execution butler
171 return command
173 # Add commands to copy the execution butler.
174 # This keeps workers from overloading the sqlite database.
175 # The copy can be deleted once we're done, because the original
176 # execution butler contains everything that's required.
177 job_dir = os.path.join(os.path.dirname(exec_butler_dir), self.name)
178 # Set the butlerConfig field to the location of the job-specific copy.
179 command = command.replace("<FILE:butlerConfig>", job_dir)
180 return dedent(
181 f"""
182 if [[ ! -d {job_dir} ]]; then mkdir -p {job_dir}; fi
183 cp {exec_butler_dir}/* {job_dir}
184 {command}
185 retcode=$?
186 rm -rf {job_dir}
187 exit $retcode
188 """
189 )
191 def evaluate_command_line(self, command: str) -> str:
192 """Evaluate the bash command-line.
194 BPS provides a command-line with symbolic names for BPS variables,
195 environment variables and files. Here, we replace those symbolic names
196 with the actual values, to provide a concrete command that can be
197 executed.
199 In replacing file paths, we are implicitly assuming that we are working
200 on a shared file system, i.e., that workers can see the butler
201 directory, and that files do not need to be staged to the worker.
203 Parameters
204 ----------
205 command : `str`
206 Command-line to execute, from BPS.
208 Returns
209 -------
210 command : `str`
211 Command ready for execution on a worker.
212 """
213 command = command.format(**self.generic.cmdvals) # BPS variables
215 # Make sure *all* symbolic names are resolved.
216 #
217 # In general, actual values for some symbolic names may contain other
218 # symbolic names. As a result, more than one iteration may be required
219 # to resolve all symbolic names. For example, an actual value for
220 # a filename may contain a symbolic name for an environment variable.
221 prev_command = command
222 while True:
223 command = re.sub(_env_regex, r"${\g<1>}", command) # Environment variables
224 command = re.sub(_file_regex, lambda match: self.file_paths[match.group(1)], command) # Files
225 if prev_command == command:
226 break
227 prev_command = command
229 return command
231 def get_resources(self) -> dict[str, Any]:
232 """Return what resources are required for executing this job."""
233 resources = {}
234 for bps_name, parsl_name, scale in (
235 ("request_memory", "memory", None), # Both BPS and WorkQueueExecutor use MB
236 ("request_cpus", "cores", None),
237 ("request_disk", "disk", None), # Both are MB
238 ("request_walltime", "running_time_min", None), # Both are minutes
239 ):
240 value = getattr(self.generic, bps_name)
241 if scale is not None:
242 value *= scale
243 resources[parsl_name] = value
244 return resources
246 def get_future(
247 self,
248 app: BashApp,
249 inputs: list[Future],
250 command_prefix: str | None = None,
251 add_resources: bool = False,
252 ) -> Future | None:
253 """Get the parsl app future for the job.
255 This effectively queues the job for execution by a worker, subject to
256 dependencies.
258 Parameters
259 ----------
260 app : callable
261 A parsl bash_app decorator to use.
262 inputs : list of `Future`
263 Dependencies to be satisfied before executing this job.
264 command_prefix : `str`, optional
265 Bash commands to execute before the job command, e.g., for setting
266 the environment.
267 add_resources : `bool`
268 Add resource specification when submitting the job? This is only
269 appropriate for the ``WorkQueue`` executor; other executors will
270 raise an exception.
272 Returns
273 -------
274 future : `Future` or `None`
275 A `Future` object linked to the execution of the job, or `None` if
276 the job has already been done (e.g., by ``run_local``).
277 """
278 if self.done:
279 return None # Nothing to do
280 if not self.future:
281 command = self.get_command_line()
282 command = self.evaluate_command_line(command)
283 if command_prefix:
284 command = command_prefix + "\n" + command
285 resources = self.get_resources() if add_resources else None
287 # Add a layer of indirection to which we can add a useful name.
288 # This name is used by parsl for tracking workflow status.
289 func = partial(run_command)
290 setattr(func, "__name__", self.generic.label)
292 self.future = app(func)(
293 command,
294 inputs=inputs,
295 stdout=self.stdout,
296 stderr=self.stderr,
297 parsl_resource_specification=resources,
298 )
299 return self.future
301 def run_local(self):
302 """Run the command locally.
304 This is intended to support jobs that should not be done by a
305 worker.
306 """
307 if self.done: # Nothing to do
308 return
309 command = self.get_command_line(False)
310 command = self.evaluate_command_line(command)
311 os.makedirs(os.path.dirname(self.stdout), exist_ok=True)
312 os.makedirs(os.path.dirname(self.stderr), exist_ok=True)
313 with open(self.stdout, "w") as stdout, open(self.stderr, "w") as stderr:
314 subprocess.check_call(command, shell=True, executable="/bin/bash", stdout=stdout, stderr=stderr)
315 self.done = True