Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 14%
147 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-10 07:57 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-10 07:57 +0000
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21"""Interface between generic workflow to PanDA/iDDS workflow system.
22"""
25__all__ = ["PanDAService", "PandaBpsWmsWorkflow"]
28import json
29import logging
30import os
31import pickle
32import re
34from idds.workflowv2.workflow import Workflow as IDDS_client_workflow
35from lsst.ctrl.bps import BaseWmsService, BaseWmsWorkflow, WmsRunReport, WmsStates
36from lsst.ctrl.bps.panda.constants import PANDA_DEFAULT_MAX_COPY_WORKERS
37from lsst.ctrl.bps.panda.utils import (
38 add_final_idds_work,
39 add_idds_work,
40 copy_files_for_distribution,
41 get_idds_client,
42 get_idds_result,
43)
45_LOG = logging.getLogger(__name__)
48class PanDAService(BaseWmsService):
49 """PanDA version of WMS service"""
51 def prepare(self, config, generic_workflow, out_prefix=None):
52 # Docstring inherited from BaseWmsService.prepare.
53 _LOG.debug("out_prefix = '%s'", out_prefix)
54 workflow = PandaBpsWmsWorkflow.from_generic_workflow(
55 config, generic_workflow, out_prefix, f"{self.__class__.__module__}.{self.__class__.__name__}"
56 )
57 workflow.write(out_prefix)
58 return workflow
60 def submit(self, workflow):
61 _, max_copy_workers = self.config.search(
62 "maxCopyWorkers", opt={"default": PANDA_DEFAULT_MAX_COPY_WORKERS}
63 )
64 # Docstring inherited from BaseWmsService.submit.
65 file_distribution_uri = self.config["fileDistributionEndPoint"]
66 lsst_temp = "LSST_RUN_TEMP_SPACE"
67 if lsst_temp in file_distribution_uri and lsst_temp not in os.environ:
68 file_distribution_uri = self.config["fileDistributionEndPointDefault"]
70 copy_files_for_distribution(workflow.files_to_pre_stage, file_distribution_uri, max_copy_workers)
72 idds_client = get_idds_client(self.config)
73 ret = idds_client.submit(workflow.idds_client_workflow, username=None, use_dataset_name=False)
74 _LOG.debug("iDDS client manager submit returned = %s", ret)
76 # Check submission success
77 status, result, error = get_idds_result(ret)
78 if status:
79 request_id = int(result)
80 else:
81 raise RuntimeError(f"Error submitting to PanDA service: {error}")
83 _LOG.info("Submitted into iDDs with request id=%s", request_id)
84 workflow.run_id = request_id
86 def restart(self, wms_workflow_id):
87 # Docstring inherited from BaseWmsService.restart.
88 idds_client = get_idds_client(self.config)
89 ret = idds_client.retry(request_id=wms_workflow_id)
90 _LOG.debug("Restart PanDA workflow returned = %s", ret)
92 status, result, error = get_idds_result(ret)
93 if status:
94 _LOG.info("Restarting PanDA workflow %s", result)
95 return wms_workflow_id, None, json.dumps(result)
97 return None, None, f"Error retry PanDA workflow: {str(error)}"
99 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
100 # Docstring inherited from BaseWmsService.report.
101 message = ""
102 run_reports = []
104 if not wms_workflow_id:
105 message = "Run summary not implemented yet, use 'bps report --id <workflow_id>' instead"
106 return run_reports, message
108 idds_client = get_idds_client(self.config)
109 ret = idds_client.get_requests(request_id=wms_workflow_id, with_detail=True)
110 _LOG.debug("PanDA get workflow status returned = %s", str(ret))
112 request_status = ret[0]
113 if request_status != 0:
114 raise RuntimeError(f"Error to get workflow status: {ret} for id: {wms_workflow_id}")
116 tasks = ret[1][1]
117 if not tasks:
118 message = f"No records found for workflow id '{wms_workflow_id}'. Hint: double check the id"
119 else:
120 head = tasks[0]
121 wms_report = WmsRunReport(
122 wms_id=str(head["request_id"]),
123 operator=head["username"],
124 project="",
125 campaign="",
126 payload="",
127 run=head["name"],
128 state=WmsStates.UNKNOWN,
129 total_number_jobs=0,
130 job_state_counts={state: 0 for state in WmsStates},
131 job_summary={},
132 run_summary="",
133 )
135 # The status of a task is taken from the first item of state_map.
136 # The workflow is in status WmsStates.FAILED when:
137 # All tasks have failed.
138 # SubFinished tasks has jobs in
139 # output_processed_files: Finished
140 # output_failed_files: Failed
141 # output_missing_files: Missing
142 state_map = {
143 "Finished": [WmsStates.SUCCEEDED],
144 "SubFinished": [
145 WmsStates.SUCCEEDED,
146 WmsStates.FAILED,
147 WmsStates.PRUNED,
148 ],
149 "Transforming": [
150 WmsStates.RUNNING,
151 WmsStates.SUCCEEDED,
152 WmsStates.FAILED,
153 WmsStates.UNREADY,
154 WmsStates.PRUNED,
155 ],
156 "Failed": [WmsStates.FAILED, WmsStates.PRUNED],
157 }
159 file_map = {
160 WmsStates.SUCCEEDED: "output_processed_files",
161 WmsStates.RUNNING: "output_processing_files",
162 WmsStates.FAILED: "output_failed_files",
163 WmsStates.UNREADY: "input_new_files",
164 WmsStates.PRUNED: "output_missing_files",
165 }
167 # workflow status to report as SUCCEEDED
168 wf_status = ["Finished", "SubFinished", "Transforming"]
170 wf_succeed = False
172 tasks.sort(key=lambda x: x["transform_workload_id"])
174 # Loop over all tasks data returned by idds_client
175 for task in tasks:
176 totaljobs = task["output_total_files"]
177 wms_report.total_number_jobs += totaljobs
178 tasklabel = task["transform_name"]
179 tasklabel = re.sub(wms_report.run + "_", "", tasklabel)
180 status = task["transform_status"]["attributes"]["_name_"]
181 taskstatus = {}
182 # Fill number of jobs in all WmsStates
183 for state in WmsStates:
184 njobs = 0
185 # Each WmsState have many iDDS status mapped to it.
186 for mappedstate in state_map[status]:
187 if state in file_map and mappedstate == state:
188 if task[file_map[mappedstate]] is not None:
189 njobs = task[file_map[mappedstate]]
190 if state == WmsStates.RUNNING:
191 njobs += task["output_new_files"] - task["input_new_files"]
192 break
193 wms_report.job_state_counts[state] += njobs
194 taskstatus[state] = njobs
195 wms_report.job_summary[tasklabel] = taskstatus
197 # To fill the EXPECTED column
198 if wms_report.run_summary:
199 wms_report.run_summary += ";"
200 wms_report.run_summary += f"{tasklabel}:{str(totaljobs)}"
202 if status in wf_status:
203 wf_succeed = True
204 wms_report.state = state_map[status][0]
206 # All tasks have failed, set the workflow FAILED
207 if not wf_succeed:
208 wms_report.state = WmsStates.FAILED
210 run_reports.append(wms_report)
212 return run_reports, message
214 def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False):
215 # Docstring inherited from BaseWmsService.list_submitted_jobs.
216 if wms_id is None and user is not None:
217 raise RuntimeError(
218 "Error to get workflow status report: wms_id is required"
219 " and filtering workflows with 'user' is not supported."
220 )
222 idds_client = get_idds_client(self.config)
223 ret = idds_client.get_requests(request_id=wms_id)
224 _LOG.debug("PanDA get workflows returned = %s", ret)
226 status, result, error = get_idds_result(ret)
227 if status:
228 req_ids = [req["request_id"] for req in result]
229 return req_ids
231 raise RuntimeError(f"Error list PanDA workflow requests: {error}")
233 def cancel(self, wms_id, pass_thru=None):
234 # Docstring inherited from BaseWmsService.cancel.
235 idds_client = get_idds_client(self.config)
236 ret = idds_client.abort(request_id=wms_id)
237 _LOG.debug("Abort PanDA workflow returned = %s", ret)
239 status, result, error = get_idds_result(ret)
240 if status:
241 _LOG.info("Aborting PanDA workflow %s", result)
242 return True, json.dumps(result)
244 return False, f"Error abort PanDA workflow: {str(error)}"
246 def ping(self, pass_thru=None):
247 # Docstring inherited from BaseWmsService.ping.
248 idds_client = get_idds_client(self.config)
249 ret = idds_client.ping()
250 _LOG.debug("Ping PanDA service returned = %s", ret)
252 status, result, error = get_idds_result(ret)
253 if status:
254 if "Status" in result and result["Status"] == "OK":
255 return 0, None
257 return -1, f"Error ping PanDA service: {str(result)}"
259 return -1, f"Error ping PanDA service: {str(error)}"
261 def run_submission_checks(self):
262 # Docstring inherited from BaseWmsService.run_submission_checks.
263 for key in ["PANDA_URL"]:
264 if key not in os.environ:
265 raise OSError(f"Missing environment variable {key}")
267 status, message = self.ping()
268 if status != 0:
269 raise RuntimeError(message)
272class PandaBpsWmsWorkflow(BaseWmsWorkflow):
273 """A single Panda based workflow
275 Parameters
276 ----------
277 name : `str`
278 Unique name for Workflow
279 config : `lsst.ctrl.bps.BpsConfig`
280 BPS configuration that includes necessary submit/runtime information
281 """
283 def __init__(self, name, config=None):
284 super().__init__(name, config)
285 self.files_to_pre_stage = {} # src, dest
286 self.idds_client_workflow = IDDS_client_workflow(name=name)
288 @classmethod
289 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
290 # Docstring inherited from BaseWmsWorkflow.from_generic_workflow.
291 wms_workflow = cls(generic_workflow.name, config)
293 files, dag_sink_work, task_count = add_idds_work(
294 config, generic_workflow, wms_workflow.idds_client_workflow
295 )
296 wms_workflow.files_to_pre_stage.update(files)
298 files = add_final_idds_work(
299 config, generic_workflow, wms_workflow.idds_client_workflow, dag_sink_work, task_count + 1, 1
300 )
301 wms_workflow.files_to_pre_stage.update(files)
303 return wms_workflow
305 def write(self, out_prefix):
306 # Docstring inherited from BaseWmsWorkflow.write.
307 with open(os.path.join(out_prefix, "panda_workflow.pickle"), "wb") as fh:
308 pickle.dump(self, fh)