Coverage for python/lsst/ctrl/bps/parsl/job.py: 23%
80 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-09 09:52 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-09 09:52 +0000
1# This file is part of ctrl_bps_parsl.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import os
29import re
30import subprocess
31from collections.abc import Sequence
32from functools import partial
33from textwrap import dedent
34from typing import Any
36from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob
37from parsl.app.bash import BashApp
38from parsl.app.futures import Future
40from .configuration import get_bps_config_value
42__all__ = ("get_file_paths", "ParslJob")
44_env_regex = re.compile(r"<ENV:(\S+)>") # Regex for replacing <ENV:WHATEVER> in BPS job command-lines
45_file_regex = re.compile(r"<FILE:(\S+)>") # Regex for replacing <FILE:WHATEVER> in BPS job command-lines
48def run_command(
49 command_line: str,
50 inputs: Sequence[Future] = (),
51 stdout: str | None = None,
52 stderr: str | None = None,
53 parsl_resource_specification: dict[str, Any] | None = None,
54) -> str:
55 """Run a command
57 This function exists to get information into parsl, through the ``inputs``,
58 ``stdout`` and ``stderr`` parameters. It needs to be wrapped by a parsl
59 ``bash_app`` decorator before use, after which it will return a `Future`.
61 Parameters
62 ----------
63 command_line : `str`
64 Command-line to have parsl run.
65 inputs : list of `Future`
66 Other commands that must have run before this.
67 stdout, stderr : `str`, optional
68 Filenames for stdout and stderr.
69 parsl_resource_specification : `dict`, optional
70 Resources required for job.
72 Returns
73 -------
74 command_line : `str`
75 Command-line to have parsl run.
76 """
77 return command_line
80def get_file_paths(workflow: GenericWorkflow, name: str) -> dict[str, str]:
81 """Extract file paths for a job
83 Parameters
84 ----------
85 workflow : `GenericWorkflow`
86 BPS workflow that knows the file paths.
87 name : `str`
88 Job name.
90 Returns
91 -------
92 paths : `dict` mapping `str` to `str`
93 File paths for job, indexed by symbolic name.
94 """
95 return {ff.name: ff.src_uri for ff in workflow.get_job_inputs(name)}
98class ParslJob:
99 """Job to execute with parsl
101 Parameters
102 ----------
103 generic : `GenericWorkflowJob`
104 BPS job information.
105 config : `BpsConfig`
106 BPS configuration.
107 file_paths : `dict` mapping `str` to `str`
108 File paths for job, indexed by symbolic name.
109 """
111 def __init__(
112 self,
113 generic: GenericWorkflowJob,
114 config: BpsConfig,
115 file_paths: dict[str, str],
116 ):
117 self.generic = generic
118 self.name = generic.name
119 self.config = config
120 self.file_paths = file_paths
121 self.future = None
122 self.done = False
123 log_dir = os.path.join(get_bps_config_value(self.config, "submitPath", str, required=True), "logs")
124 self.stdout = os.path.join(log_dir, self.name + ".stdout")
125 self.stderr = os.path.join(log_dir, self.name + ".stderr")
127 def __reduce__(self):
128 """Recipe for pickling"""
129 return type(self), (self.generic, self.config, self.file_paths)
131 def get_command_line(self, allow_stage=True) -> str:
132 """Get the bash command-line to run to execute this job
134 Parameters
135 ----------
136 allow_stage : `bool`
137 Allow staging of execution butler? This is not appropriate for the
138 initial or final jobs that run on the local nodes.
140 Returns
141 -------
142 command : `str`
143 Command-line to execute for job.
144 """
145 command: str = self.generic.executable.src_uri + " " + self.generic.arguments
146 if not allow_stage:
147 return command
148 exec_butler_dir = get_bps_config_value(self.config, "executionButlerDir", str)
149 if not exec_butler_dir or not os.path.isdir(exec_butler_dir):
150 # We're not using the execution butler
151 return command
153 # Add commands to copy the execution butler.
154 # This keeps workers from overloading the sqlite database.
155 # The copy can be deleted once we're done, because the original
156 # execution butler contains everything that's required.
157 job_dir = os.path.join(os.path.dirname(exec_butler_dir), self.name)
158 # Set the butlerConfig field to the location of the job-specific copy.
159 command = command.replace("<FILE:butlerConfig>", job_dir)
160 return dedent(
161 f"""
162 if [[ ! -d {job_dir} ]]; then mkdir -p {job_dir}; fi
163 cp {exec_butler_dir}/* {job_dir}
164 {command}
165 retcode=$?
166 rm -rf {job_dir}
167 exit $retcode
168 """
169 )
171 def evaluate_command_line(self, command: str) -> str:
172 """Evaluate the bash command-line
174 BPS provides a command-line with symbolic names for BPS variables,
175 environment variables and files. Here, we replace those symbolic names
176 with the actual values, to provide a concrete command that can be
177 executed.
179 In replacing file paths, we are implicitly assuming that we are working
180 on a shared file system, i.e., that workers can see the butler
181 directory, and that files do not need to be staged to the worker.
183 Parameters
184 ----------
185 command : `str`
186 Command-line to execute, from BPS.
188 Returns
189 -------
190 command : `str`
191 Command ready for execution on a worker.
192 """
193 command = command.format(**self.generic.cmdvals) # BPS variables
195 # Make sure *all* symbolic names are resolved.
196 #
197 # In general, actual values for some symbolic names may contain other
198 # symbolic names. As a result, more than one iteration may be required
199 # to resolve all symbolic names. For example, an actual value for
200 # a filename may contain a symbolic name for an environment variable.
201 prev_command = command
202 while True:
203 command = re.sub(_env_regex, r"${\g<1>}", command) # Environment variables
204 command = re.sub(_file_regex, lambda match: self.file_paths[match.group(1)], command) # Files
205 if prev_command == command:
206 break
207 prev_command = command
209 return command
211 def get_resources(self) -> dict[str, Any]:
212 """Return what resources are required for executing this job"""
213 resources = {}
214 for bps_name, parsl_name, scale in (
215 ("request_memory", "memory", None), # Both BPS and WorkQueueExecutor use MB
216 ("request_cpus", "cores", None),
217 ("request_disk", "disk", None), # Both are MB
218 ("request_walltime", "running_time_min", None), # Both are minutes
219 ):
220 value = getattr(self.generic, bps_name)
221 if scale is not None:
222 value *= scale
223 resources[parsl_name] = value
224 return resources
226 def get_future(
227 self,
228 app: BashApp,
229 inputs: list[Future],
230 command_prefix: str | None = None,
231 add_resources: bool = False,
232 ) -> Future | None:
233 """Get the parsl app future for the job
235 This effectively queues the job for execution by a worker, subject to
236 dependencies.
238 Parameters
239 ----------
240 app : callable
241 A parsl bash_app decorator to use.
242 inputs : list of `Future`
243 Dependencies to be satisfied before executing this job.
244 command_prefix : `str`, optional
245 Bash commands to execute before the job command, e.g., for setting
246 the environment.
247 add_resources : `bool`
248 Add resource specification when submitting the job? This is only
249 appropriate for the ``WorkQueue`` executor; other executors will
250 raise an exception.
252 Returns
253 -------
254 future : `Future` or `None`
255 A `Future` object linked to the execution of the job, or `None` if
256 the job has already been done (e.g., by ``run_local``).
257 """
258 if self.done:
259 return None # Nothing to do
260 if not self.future:
261 command = self.get_command_line()
262 command = self.evaluate_command_line(command)
263 if command_prefix:
264 command = command_prefix + "\n" + command
265 resources = self.get_resources() if add_resources else None
267 # Add a layer of indirection to which we can add a useful name.
268 # This name is used by parsl for tracking workflow status.
269 func = partial(run_command)
270 setattr(func, "__name__", self.generic.label)
272 self.future = app(func)(
273 command,
274 inputs=inputs,
275 stdout=self.stdout,
276 stderr=self.stderr,
277 parsl_resource_specification=resources,
278 )
279 return self.future
281 def run_local(self):
282 """Run the command locally
284 This is intended to support jobs that should not be done by a
285 worker.
286 """
287 if self.done: # Nothing to do
288 return
289 command = self.get_command_line(False)
290 command = self.evaluate_command_line(command)
291 with open(self.stdout, "w") as stdout, open(self.stderr, "w") as stderr:
292 subprocess.check_call(command, shell=True, executable="/bin/bash", stdout=stdout, stderr=stderr)
293 self.done = True