Coverage for python/lsst/ctrl/bps/parsl/workflow.py: 24%
108 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-29 09:36 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-29 09:36 +0000
1# This file is part of ctrl_bps_parsl.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import logging
29import os
30import pickle
31from collections.abc import Iterable, Mapping
33import parsl
34import parsl.config
35from lsst.ctrl.bps import BaseWmsWorkflow, BpsConfig, GenericWorkflow, GenericWorkflowJob
36from parsl.app.app import bash_app
37from parsl.app.bash import BashApp
38from parsl.app.futures import Future
40from .configuration import get_bps_config_value, get_workflow_filename, set_parsl_logging
41from .job import ParslJob, get_file_paths
42from .site import SiteConfig
44__all__ = ("ParslWorkflow", "get_parsl_config")
46_log = logging.getLogger("lsst.ctrl.bps.parsl")
49def get_parsl_config(config: BpsConfig) -> parsl.config.Config:
50 """Construct parsl configuration from BPS configuration
52 For details on the site configuration, see `SiteConfig`. For details on the
53 monitor configuration, see ``get_parsl_monitor``.
55 The retries are set from the ``site.<computeSite>.retries`` value.
57 Parameters
58 ----------
59 config : `BpsConfig`
60 BPS configuration
62 Returns
63 -------
64 parsl_config : `parsl.config.Config`
65 Parsl configuration.
66 """
67 site = SiteConfig.from_config(config)
68 executors = site.get_executors()
69 retries = get_bps_config_value(site.site, "retries", int, 1)
70 monitor = site.get_monitor()
71 return parsl.config.Config(
72 executors=executors, monitoring=monitor, retries=retries, checkpoint_mode="task_exit"
73 )
76class ParslWorkflow(BaseWmsWorkflow):
77 """Parsl-based workflow object to manage execution of workflow.
79 Parameters
80 ----------
81 name : `str`
82 Unique name of workflow.
83 config : `lsst.ctrl.bps.BpsConfig`
84 Generic workflow config.
85 path : `str`
86 Path prefix for workflow output files.
87 jobs : `dict` mapping `str` to `ParslJob`
88 Jobs to be executed.
89 parents : `dict` mapping `str` to iterable of `str`
90 Dependency tree. Keywords are job names, and values are a list of job
91 names that must be executed before the keyword job name can be
92 executed.
93 endpoints : iterable of `str`
94 Endpoints of the dependency tree. These jobs (specified by name) have
95 no children.
96 final : `ParslJob`, optional
97 Final job to be done, e.g., to merge the execution butler. This is done
98 locally.
99 """
101 def __init__(
102 self,
103 name: str,
104 config: BpsConfig,
105 path: str,
106 jobs: dict[str, ParslJob],
107 parents: Mapping[str, Iterable[str]],
108 endpoints: Iterable[str],
109 final: ParslJob | None = None,
110 ):
111 super().__init__(name, config)
113 self.path = path
114 self.bps_config = config
115 self.parsl_config = get_parsl_config(config)
116 self.site = SiteConfig.from_config(config)
117 self.dfk: parsl.DataFlowKernel | None = None # type: ignore
118 self.command_prefix = self.site.get_command_prefix()
120 # these are function decorators
121 self.apps: dict[str, BashApp] = {
122 ex.label: bash_app( # type: ignore
123 executors=[ex.label], cache=True, ignore_for_cache=["stderr", "stdout"]
124 )
125 for ex in self.parsl_config.executors
126 }
128 self.jobs = jobs
129 self.parents = parents
130 self.endpoints = endpoints
131 self.final = final
133 def __reduce__(self):
134 """Recipe for pickle"""
135 return type(self), (
136 self.name,
137 self.bps_config,
138 self.path,
139 self.jobs,
140 self.parents,
141 self.endpoints,
142 self.final,
143 )
145 @classmethod
146 def from_generic_workflow(
147 cls, config: BpsConfig, generic_workflow: GenericWorkflow, out_prefix: str, service_class: str
148 ) -> BaseWmsWorkflow:
149 """Create a ParslWorkflow object from a BPS GenericWorkflow.
151 Parameters
152 ----------
153 config: `BpsConfig`
154 Configuration of the workflow.
155 generic_workflow: `lsst.ctrl.bps.generic_workflow.GenericWorkflow`
156 Generic representation of a single workflow.
157 out_prefix: `str`
158 Prefix for workflow output files.
159 service_class: `str`
160 Full module name of WMS service class that created this workflow.
162 Returns
163 -------
164 self : `ParslWorkflow`
165 Constructed workflow.
166 """
167 # Generate list of jobs
168 jobs: dict[str, ParslJob] = {}
169 for job_name in generic_workflow:
170 generic_job = generic_workflow.get_job(job_name)
171 assert generic_job.name not in jobs
172 jobs[job_name] = ParslJob(generic_job, config, get_file_paths(generic_workflow, job_name))
174 parents = {name: set(generic_workflow.predecessors(name)) for name in jobs}
175 endpoints = [name for name in jobs if generic_workflow.out_degree(name) == 0]
177 # Add final job: execution butler merge
178 job = generic_workflow.get_final()
179 final: ParslJob | None = None
180 if job is not None:
181 assert isinstance(job, GenericWorkflowJob)
182 final = ParslJob(job, config, get_file_paths(generic_workflow, job.name))
184 return cls(generic_workflow.name, config, out_prefix, jobs, parents, endpoints, final)
186 def write(self, out_prefix: str):
187 """Write workflow state
189 This, in combination with the parsl checkpoint files, can be used to
190 restart a workflow that was interrupted.
192 Parameters
193 ----------
194 out_prefix : `str`
195 Root directory to be used for WMS workflow inputs and outputs
196 as well as internal WMS files.
197 """
198 filename = get_workflow_filename(out_prefix)
199 _log.info("Writing workflow with ID=%s", out_prefix)
200 with open(filename, "wb") as fd:
201 pickle.dump(self, fd)
203 @classmethod
204 def read(cls, out_prefix: str) -> "ParslWorkflow":
205 """Construct from the saved workflow state
207 Parameters
208 ----------
209 out_prefix : `str`
210 Root directory to be used for WMS workflow inputs and outputs
211 as well as internal WMS files.
213 Returns
214 -------
215 self : `ParslWorkflow`
216 Constructed workflow.
217 """
218 filename = get_workflow_filename(out_prefix)
219 with open(filename, "rb") as fd:
220 self = pickle.load(fd)
221 assert isinstance(self, cls)
222 return self
224 def run(self, block: bool = True) -> list[Future | None]:
225 """Run the workflow
227 Parameters
228 ----------
229 block : `bool`, optional
230 Block returning from this method until the workflow is complete? If
231 `False`, jobs may still be running when this returns, and it is the
232 user's responsibility to call the ``finalize_jobs`` and
233 ``shutdown`` methods when they are complete.
235 Returns
236 -------
237 futures : `list` of `Future`
238 `Future` objects linked to the execution of the endpoint jobs.
239 """
240 futures = [self.execute(name) for name in self.endpoints]
241 if block:
242 # Calling .exception() for each future blocks returning
243 # from this method until all the jobs have executed or
244 # raised an error. This is needed for running in a
245 # non-interactive python process that would otherwise end
246 # before the futures resolve.
247 for ff in futures:
248 if ff is not None:
249 ff.exception()
250 self.shutdown()
251 self.finalize_jobs()
252 return futures
254 def execute(self, name: str) -> Future | None:
255 """Execute a job
257 Parameters
258 ----------
259 name : `str`
260 Name of job to execute.
262 Returns
263 -------
264 future : `Future` or `None`
265 A `Future` object linked to the execution of the job, or `None` if
266 the job is being reserved to run locally.
267 """
268 if name in ("pipetaskInit", "mergeExecutionButler"):
269 # These get done outside of parsl
270 return None
271 job = self.jobs[name]
272 inputs = [self.execute(parent) for parent in self.parents[name]]
273 executors = self.parsl_config.executors
274 if len(executors) > 1:
275 label = self.site.select_executor(job)
276 else:
277 label = executors[0].label
278 return job.get_future(
279 self.apps[label],
280 [ff for ff in inputs if ff is not None],
281 self.command_prefix,
282 self.site.add_resources,
283 )
285 def load_dfk(self):
286 """Load data frame kernel
288 This starts parsl.
289 """
290 if self.dfk is not None:
291 raise RuntimeError("Workflow has already started.")
292 set_parsl_logging(self.bps_config)
293 self.dfk = parsl.load(self.parsl_config)
295 def start(self):
296 """Start the workflow"""
297 self.initialize_jobs()
298 self.load_dfk()
300 def restart(self):
301 """Restart the workflow after interruption"""
302 self.parsl_config.checkpoint_files = parsl.utils.get_last_checkpoint()
303 self.load_dfk()
305 def shutdown(self):
306 """Shut down the workflow
308 This stops parsl.
309 """
310 if self.dfk is None:
311 raise RuntimeError("Workflow not started.")
312 self.dfk.cleanup()
313 self.dfk = None
314 parsl.DataFlowKernelLoader.clear()
316 def initialize_jobs(self):
317 """Run initial jobs
319 These jobs are run locally before any other jobs are submitted to
320 parsl.
322 This is used to set up the butler.
323 """
324 job = self.jobs.get("pipetaskInit", None)
325 if job is not None:
326 os.makedirs(os.path.join(self.path, "logs"))
327 job.run_local()
329 def finalize_jobs(self):
330 """Run final jobs
332 These jobs are run locally after all other jobs are complete.
334 This is used to merge the execution butler.
335 """
336 if self.final is not None and not self.final.done:
337 self.final.run_local()