Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 13%
143 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-14 02:41 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-14 02:41 -0800
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21"""Interface between generic workflow to PanDA/iDDS workflow system.
22"""
25__all__ = ["PanDAService", "PandaBpsWmsWorkflow"]
28import json
29import logging
30import os
31import pickle
32import re
34from idds.workflowv2.workflow import Workflow as IDDS_client_workflow
35from lsst.ctrl.bps.panda.constants import PANDA_DEFAULT_MAX_COPY_WORKERS
36from lsst.ctrl.bps.panda.utils import (
37 add_final_idds_work,
38 add_idds_work,
39 copy_files_for_distribution,
40 get_idds_client,
41 get_idds_result,
42)
43from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow, WmsRunReport, WmsStates
45_LOG = logging.getLogger(__name__)
48class PanDAService(BaseWmsService):
49 """PanDA version of WMS service"""
51 def prepare(self, config, generic_workflow, out_prefix=None):
52 # Docstring inherited from BaseWmsService.prepare.
53 _LOG.debug("out_prefix = '%s'", out_prefix)
54 workflow = PandaBpsWmsWorkflow.from_generic_workflow(
55 config, generic_workflow, out_prefix, f"{self.__class__.__module__}.{self.__class__.__name__}"
56 )
57 workflow.write(out_prefix)
58 return workflow
60 def submit(self, workflow):
61 _, max_copy_workers = self.config.search(
62 "maxCopyWorkers", opt={"default": PANDA_DEFAULT_MAX_COPY_WORKERS}
63 )
64 # Docstring inherited from BaseWmsService.submit.
65 copy_files_for_distribution(
66 workflow.files_to_pre_stage, self.config["fileDistributionEndPoint"], max_copy_workers
67 )
69 idds_client = get_idds_client(self.config)
70 ret = idds_client.submit(workflow.idds_client_workflow, username=None, use_dataset_name=False)
71 _LOG.debug("iDDS client manager submit returned = %s", ret)
73 # Check submission success
74 status, result, error = get_idds_result(ret)
75 if status:
76 request_id = int(result)
77 else:
78 raise RuntimeError(f"Error submitting to PanDA service: {error}")
80 _LOG.info("Submitted into iDDs with request id=%s", request_id)
81 workflow.run_id = request_id
83 def restart(self, wms_workflow_id):
84 # Docstring inherited from BaseWmsService.restart.
85 idds_client = get_idds_client(self.config)
86 ret = idds_client.retry(request_id=wms_workflow_id)
87 _LOG.debug("Restart PanDA workflow returned = %s", ret)
89 status, result, error = get_idds_result(ret)
90 if status:
91 _LOG.info("Restarting PanDA workflow %s", result)
92 return wms_workflow_id, None, json.dumps(result)
94 return None, None, f"Error retry PanDA workflow: {str(error)}"
96 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
97 # Docstring inherited from BaseWmsService.report.
98 message = ""
99 run_reports = []
101 if not wms_workflow_id:
102 message = "Run summary not implemented yet, use 'bps report --id <workflow_id>' instead"
103 return run_reports, message
105 idds_client = get_idds_client(self.config)
106 ret = idds_client.get_requests(request_id=wms_workflow_id, with_detail=True)
107 _LOG.debug("PanDA get workflow status returned = %s", str(ret))
109 request_status = ret[0]
110 if request_status != 0:
111 raise RuntimeError(f"Error to get workflow status: {ret} for id: {wms_workflow_id}")
113 tasks = ret[1][1]
114 if not tasks:
115 message = f"No records found for workflow id '{wms_workflow_id}'. Hint: double check the id"
116 else:
117 head = tasks[0]
118 wms_report = WmsRunReport(
119 wms_id=str(head["request_id"]),
120 operator=head["username"],
121 project="",
122 campaign="",
123 payload="",
124 run=head["name"],
125 state=WmsStates.UNKNOWN,
126 total_number_jobs=0,
127 job_state_counts={state: 0 for state in WmsStates},
128 job_summary={},
129 run_summary="",
130 )
132 # The status of a task is taken from the first item of state_map.
133 # The workflow is in status WmsStates.FAILED when:
134 # All tasks have failed.
135 # SubFinished tasks has jobs in
136 # output_processed_files: Finished
137 # output_failed_files: Failed
138 # output_missing_files: Missing
139 state_map = {
140 "Finished": [WmsStates.SUCCEEDED],
141 "SubFinished": [
142 WmsStates.SUCCEEDED,
143 WmsStates.FAILED,
144 WmsStates.PRUNED,
145 ],
146 "Transforming": [
147 WmsStates.RUNNING,
148 WmsStates.SUCCEEDED,
149 WmsStates.FAILED,
150 WmsStates.UNREADY,
151 WmsStates.PRUNED,
152 ],
153 "Failed": [WmsStates.FAILED, WmsStates.PRUNED],
154 }
156 file_map = {
157 WmsStates.SUCCEEDED: "output_processed_files",
158 WmsStates.RUNNING: "output_processing_files",
159 WmsStates.FAILED: "output_failed_files",
160 WmsStates.UNREADY: "input_new_files",
161 WmsStates.PRUNED: "output_missing_files",
162 }
164 # workflow status to report as SUCCEEDED
165 wf_status = ["Finished", "SubFinished", "Transforming"]
167 wf_succeed = False
169 tasks.sort(key=lambda x: x["transform_workload_id"])
171 # Loop over all tasks data returned by idds_client
172 for task in tasks:
173 totaljobs = task["output_total_files"]
174 wms_report.total_number_jobs += totaljobs
175 tasklabel = task["transform_name"]
176 tasklabel = re.sub(wms_report.run + "_", "", tasklabel)
177 status = task["transform_status"]["attributes"]["_name_"]
178 taskstatus = {}
179 # Fill number of jobs in all WmsStates
180 for state in WmsStates:
181 njobs = 0
182 # Each WmsState have many iDDS status mapped to it.
183 for mappedstate in state_map[status]:
184 if state in file_map and mappedstate == state:
185 if task[file_map[mappedstate]] is not None:
186 njobs = task[file_map[mappedstate]]
187 if state == WmsStates.RUNNING:
188 njobs += task["output_new_files"] - task["input_new_files"]
189 break
190 wms_report.job_state_counts[state] += njobs
191 taskstatus[state] = njobs
192 wms_report.job_summary[tasklabel] = taskstatus
194 # To fill the EXPECTED column
195 if wms_report.run_summary:
196 wms_report.run_summary += ";"
197 wms_report.run_summary += f"{tasklabel}:{str(totaljobs)}"
199 if status in wf_status:
200 wf_succeed = True
201 wms_report.state = state_map[status][0]
203 # All tasks have failed, set the workflow FAILED
204 if not wf_succeed:
205 wms_report.state = WmsStates.FAILED
207 run_reports.append(wms_report)
209 return run_reports, message
211 def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False):
212 # Docstring inherited from BaseWmsService.list_submitted_jobs.
213 if wms_id is None and user is not None:
214 raise RuntimeError(
215 "Error to get workflow status report: wms_id is required"
216 " and filtering workflows with 'user' is not supported."
217 )
219 idds_client = get_idds_client(self.config)
220 ret = idds_client.get_requests(request_id=wms_id)
221 _LOG.debug("PanDA get workflows returned = %s", ret)
223 status, result, error = get_idds_result(ret)
224 if status:
225 req_ids = [req["request_id"] for req in result]
226 return req_ids
228 raise RuntimeError(f"Error list PanDA workflow requests: {error}")
230 def cancel(self, wms_id, pass_thru=None):
231 # Docstring inherited from BaseWmsService.cancel.
232 idds_client = get_idds_client(self.config)
233 ret = idds_client.abort(request_id=wms_id)
234 _LOG.debug("Abort PanDA workflow returned = %s", ret)
236 status, result, error = get_idds_result(ret)
237 if status:
238 _LOG.info("Aborting PanDA workflow %s", result)
239 return True, json.dumps(result)
241 return False, f"Error abort PanDA workflow: {str(error)}"
243 def ping(self, pass_thru=None):
244 # Docstring inherited from BaseWmsService.ping.
245 idds_client = get_idds_client(self.config)
246 ret = idds_client.ping()
247 _LOG.debug("Ping PanDA service returned = %s", ret)
249 status, result, error = get_idds_result(ret)
250 if status:
251 if "Status" in result and result["Status"] == "OK":
252 return 0, None
254 return -1, f"Error ping PanDA service: {str(result)}"
256 return -1, f"Error ping PanDA service: {str(error)}"
258 def run_submission_checks(self):
259 # Docstring inherited from BaseWmsService.run_submission_checks.
260 for key in ["PANDA_URL"]:
261 if key not in os.environ:
262 raise OSError(f"Missing environment variable {key}")
264 status, message = self.ping()
265 if status != 0:
266 raise RuntimeError(message)
269class PandaBpsWmsWorkflow(BaseWmsWorkflow):
270 """A single Panda based workflow
272 Parameters
273 ----------
274 name : `str`
275 Unique name for Workflow
276 config : `lsst.ctrl.bps.BpsConfig`
277 BPS configuration that includes necessary submit/runtime information
278 """
280 def __init__(self, name, config=None):
281 super().__init__(name, config)
282 self.files_to_pre_stage = {} # src, dest
283 self.idds_client_workflow = IDDS_client_workflow(name=name)
285 @classmethod
286 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
287 # Docstring inherited from BaseWmsWorkflow.from_generic_workflow.
288 wms_workflow = cls(generic_workflow.name, config)
290 files, dag_sink_work, task_count = add_idds_work(
291 config, generic_workflow, wms_workflow.idds_client_workflow
292 )
293 wms_workflow.files_to_pre_stage.update(files)
295 files = add_final_idds_work(
296 config, generic_workflow, wms_workflow.idds_client_workflow, dag_sink_work, task_count + 1, 1
297 )
298 wms_workflow.files_to_pre_stage.update(files)
300 return wms_workflow
302 def write(self, out_prefix):
303 # Docstring inherited from BaseWmsWorkflow.write.
304 with open(os.path.join(out_prefix, "panda_workflow.pickle"), "wb") as fh:
305 pickle.dump(self, fh)