Coverage for python/lsst/ctrl/bps/parsl/workflow.py: 24%
105 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-18 10:17 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-18 10:17 +0000
1# This file is part of ctrl_bps_parsl.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import logging
29import os
30import pickle
31from collections.abc import Iterable, Mapping
33import parsl
34import parsl.config
35from lsst.ctrl.bps import BaseWmsWorkflow, BpsConfig, GenericWorkflow, GenericWorkflowJob
36from parsl.app.app import bash_app
37from parsl.app.bash import BashApp
38from parsl.app.futures import Future
40from .configuration import get_workflow_filename, set_parsl_logging
41from .job import ParslJob, get_file_paths
42from .site import SiteConfig
44__all__ = ("ParslWorkflow", "get_parsl_config")
46_log = logging.getLogger("lsst.ctrl.bps.parsl")
49def get_parsl_config(config: BpsConfig) -> parsl.config.Config:
50 """Construct parsl configuration from BPS configuration.
52 For details on the site configuration, see `SiteConfig`. For details on the
53 monitor configuration, see ``get_parsl_monitor``.
55 `SiteConfig` provides an implementation of the method ``get_parsl_config``
56 which returns a Parsl configuration with sensible defaults. Subclasses
57 of `SiteConfig` can overwrite that method to configure Parsl in a
58 way specific to the site's configuration.
60 Parameters
61 ----------
62 config : `BpsConfig`
63 BPS configuration.
65 Returns
66 -------
67 parsl_config : `parsl.config.Config`
68 Parsl configuration.
69 """
70 site = SiteConfig.from_config(config)
71 return site.get_parsl_config()
74class ParslWorkflow(BaseWmsWorkflow):
75 """Parsl-based workflow object to manage execution of workflow.
77 Parameters
78 ----------
79 name : `str`
80 Unique name of workflow.
81 config : `lsst.ctrl.bps.BpsConfig`
82 Generic workflow config.
83 path : `str`
84 Path prefix for workflow output files.
85 jobs : `dict` mapping `str` to `ParslJob`
86 Jobs to be executed.
87 parents : `dict` mapping `str` to iterable of `str`
88 Dependency tree. Keywords are job names, and values are a list of job
89 names that must be executed before the keyword job name can be
90 executed.
91 endpoints : iterable of `str`
92 Endpoints of the dependency tree. These jobs (specified by name) have
93 no children.
94 final : `ParslJob`, optional
95 Final job to be done, e.g., to merge the execution butler. This is done
96 locally.
97 """
99 def __init__(
100 self,
101 name: str,
102 config: BpsConfig,
103 path: str,
104 jobs: dict[str, ParslJob],
105 parents: Mapping[str, Iterable[str]],
106 endpoints: Iterable[str],
107 final: ParslJob | None = None,
108 ):
109 super().__init__(name, config)
111 self.path = path
112 self.bps_config = config
113 self.parsl_config = get_parsl_config(config)
114 self.site = SiteConfig.from_config(config)
115 self.dfk: parsl.DataFlowKernel | None = None # type: ignore
116 self.command_prefix = self.site.get_command_prefix()
118 # these are function decorators
119 self.apps: dict[str, BashApp] = {
120 ex.label: bash_app( # type: ignore
121 executors=[ex.label], cache=True, ignore_for_cache=["stderr", "stdout"]
122 )
123 for ex in self.parsl_config.executors
124 }
126 self.jobs = jobs
127 self.parents = parents
128 self.endpoints = endpoints
129 self.final = final
131 def __reduce__(self):
132 """Recipe for pickle"""
133 return type(self), (
134 self.name,
135 self.bps_config,
136 self.path,
137 self.jobs,
138 self.parents,
139 self.endpoints,
140 self.final,
141 )
143 @classmethod
144 def from_generic_workflow(
145 cls, config: BpsConfig, generic_workflow: GenericWorkflow, out_prefix: str, service_class: str
146 ) -> BaseWmsWorkflow:
147 """Create a ParslWorkflow object from a BPS GenericWorkflow.
149 Parameters
150 ----------
151 config : `BpsConfig`
152 Configuration of the workflow.
153 generic_workflow : `lsst.ctrl.bps.generic_workflow.GenericWorkflow`
154 Generic representation of a single workflow.
155 out_prefix : `str`
156 Prefix for workflow output files.
157 service_class : `str`
158 Full module name of WMS service class that created this workflow.
160 Returns
161 -------
162 self : `ParslWorkflow`
163 Constructed workflow.
164 """
165 # Generate list of jobs
166 jobs: dict[str, ParslJob] = {}
167 for job_name in generic_workflow:
168 generic_job = generic_workflow.get_job(job_name)
169 assert generic_job.name not in jobs
170 jobs[job_name] = ParslJob(generic_job, config, get_file_paths(generic_workflow, job_name))
172 parents = {name: set(generic_workflow.predecessors(name)) for name in jobs}
173 endpoints = [name for name in jobs if generic_workflow.out_degree(name) == 0]
175 # Add final job: execution butler merge
176 job = generic_workflow.get_final()
177 final: ParslJob | None = None
178 if job is not None:
179 assert isinstance(job, GenericWorkflowJob)
180 final = ParslJob(job, config, get_file_paths(generic_workflow, job.name))
182 return cls(generic_workflow.name, config, out_prefix, jobs, parents, endpoints, final)
184 def write(self, out_prefix: str):
185 """Write workflow state.
187 This, in combination with the parsl checkpoint files, can be used to
188 restart a workflow that was interrupted.
190 Parameters
191 ----------
192 out_prefix : `str`
193 Root directory to be used for WMS workflow inputs and outputs
194 as well as internal WMS files.
195 """
196 filename = get_workflow_filename(out_prefix)
197 _log.info("Writing workflow with ID=%s", out_prefix)
198 with open(filename, "wb") as fd:
199 pickle.dump(self, fd)
201 @classmethod
202 def read(cls, out_prefix: str) -> "ParslWorkflow":
203 """Construct from the saved workflow state.
205 Parameters
206 ----------
207 out_prefix : `str`
208 Root directory to be used for WMS workflow inputs and outputs
209 as well as internal WMS files.
211 Returns
212 -------
213 self : `ParslWorkflow`
214 Constructed workflow.
215 """
216 filename = get_workflow_filename(out_prefix)
217 with open(filename, "rb") as fd:
218 self = pickle.load(fd)
219 assert isinstance(self, cls)
220 return self
222 def run(self, block: bool = True) -> list[Future | None]:
223 """Run the workflow.
225 Parameters
226 ----------
227 block : `bool`, optional
228 Block returning from this method until the workflow is complete? If
229 `False`, jobs may still be running when this returns, and it is the
230 user's responsibility to call the ``finalize_jobs`` and
231 ``shutdown`` methods when they are complete.
233 Returns
234 -------
235 futures : `list` of `Future`
236 `Future` objects linked to the execution of the endpoint jobs.
237 """
238 futures = [self.execute(name) for name in self.endpoints]
239 if block:
240 # Calling .exception() for each future blocks returning
241 # from this method until all the jobs have executed or
242 # raised an error. This is needed for running in a
243 # non-interactive python process that would otherwise end
244 # before the futures resolve.
245 for ff in futures:
246 if ff is not None:
247 ff.exception()
248 self.shutdown()
249 self.finalize_jobs()
250 return futures
252 def execute(self, name: str) -> Future | None:
253 """Execute a job.
255 Parameters
256 ----------
257 name : `str`
258 Name of job to execute.
260 Returns
261 -------
262 future : `Future` or `None`
263 A `Future` object linked to the execution of the job, or `None` if
264 the job is being reserved to run locally.
265 """
266 if name in ("pipetaskInit", "mergeExecutionButler"):
267 # These get done outside of parsl
268 return None
269 job = self.jobs[name]
270 inputs = [self.execute(parent) for parent in self.parents[name]]
271 executors = self.parsl_config.executors
272 if len(executors) > 1:
273 label = self.site.select_executor(job)
274 else:
275 label = executors[0].label
276 return job.get_future(
277 self.apps[label],
278 [ff for ff in inputs if ff is not None],
279 self.command_prefix,
280 self.site.add_resources,
281 )
283 def load_dfk(self):
284 """Load data frame kernel.
286 This starts parsl.
287 """
288 if self.dfk is not None:
289 raise RuntimeError("Workflow has already started.")
290 set_parsl_logging(self.bps_config)
291 self.dfk = parsl.load(self.parsl_config)
293 def start(self):
294 """Start the workflow."""
295 self.initialize_jobs()
296 self.load_dfk()
298 def restart(self):
299 """Restart the workflow after interruption."""
300 self.parsl_config.checkpoint_files = parsl.utils.get_last_checkpoint()
301 self.load_dfk()
303 def shutdown(self):
304 """Shut down the workflow.
306 This stops parsl.
307 """
308 if self.dfk is None:
309 raise RuntimeError("Workflow not started.")
310 self.dfk.cleanup()
311 self.dfk = None
312 parsl.DataFlowKernelLoader.clear()
314 def initialize_jobs(self):
315 """Run initial jobs.
317 These jobs are run locally before any other jobs are submitted to
318 parsl.
320 This is used to set up the butler.
321 """
322 job = self.jobs.get("pipetaskInit", None)
323 if job is not None:
324 os.makedirs(os.path.join(self.path, "logs"))
325 job.run_local()
327 def finalize_jobs(self):
328 """Run final jobs.
330 These jobs are run locally after all other jobs are complete.
332 This is used to merge the execution butler.
333 """
334 if self.final is not None and not self.final.done:
335 self.final.run_local()