Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 18%
116 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 16:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 16:06 +0000
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
23__all__ = ["PanDAService", "PandaBpsWmsWorkflow"]
26import binascii
27import concurrent.futures
28import logging
29import os
31import idds.common.utils as idds_utils
32import pandaclient.idds_api
33from idds.doma.workflowv2.domapandawork import DomaPanDAWork
34from idds.workflowv2.workflow import AndCondition
35from idds.workflowv2.workflow import Workflow as IDDS_client_workflow
36from lsst.ctrl.bps.panda.idds_tasks import IDDSWorkflowGenerator
37from lsst.ctrl.bps.panda.panda_auth_utils import panda_auth_update
38from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow
39from lsst.resources import ResourcePath
41_LOG = logging.getLogger(__name__)
44class PanDAService(BaseWmsService):
45 """PanDA version of WMS service"""
47 def prepare(self, config, generic_workflow, out_prefix=None):
48 """Convert generic workflow to an PanDA iDDS ready for submission
50 Parameters
51 ----------
52 config : `lsst.ctrl.bps.BpsConfig`
53 BPS configuration that includes necessary submit/runtime
54 information.
55 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
56 out_prefix : `str`
57 The root directory into which all WMS-specific files are written
59 Returns
60 -------
61 workflow : `lsst.ctrl.bps.panda.panda_service.PandaBpsWmsWorkflow`
62 PanDA workflow ready to be run.
63 """
64 _LOG.debug("out_prefix = '%s'", out_prefix)
65 workflow = PandaBpsWmsWorkflow.from_generic_workflow(
66 config, generic_workflow, out_prefix, f"{self.__class__.__module__}." f"{self.__class__.__name__}"
67 )
68 workflow.write(out_prefix)
69 return workflow
71 def convert_exec_string_to_hex(self, cmdline):
72 """Convert the command line into hex representation.
74 This step is currently involved because large blocks of command lines
75 including special symbols passed to the pilot/container. To make sure
76 the 1 to 1 matching and pass by the special symbol stripping
77 performed by the Pilot we applied the hexing.
79 Parameters
80 ----------
81 cmdline : `str`
82 UTF-8 command line string
84 Returns
85 -------
86 hex : `str`
87 Hex representation of string
88 """
89 return binascii.hexlify(cmdline.encode()).decode("utf-8")
91 def add_decoder_prefix(self, cmd_line, distribution_path, files):
92 """
93 Compose the command line sent to the pilot from the functional part
94 (the actual SW running) and the middleware part (containers invocation)
96 Parameters
97 ----------
98 cmd_line : `str`
99 UTF-8 based functional part of the command line
100 distribution_path : `str`
101 URI of path where all files are located for distribution
102 files `list` [`str`]
103 File names needed for a task
105 Returns
106 -------
107 decoder_prefix : `str`
108 Full command line to be executed on the edge node
109 """
111 cmdline_hex = self.convert_exec_string_to_hex(cmd_line)
112 _, decoder_prefix = self.config.search(
113 "runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False}
114 )
115 decoder_prefix = decoder_prefix.replace(
116 "_cmd_line_",
117 str(cmdline_hex)
118 + " ${IN/L} "
119 + distribution_path
120 + " "
121 + "+".join(f"{k}:{v}" for k, v in files[0].items())
122 + " "
123 + "+".join(files[1]),
124 )
125 return decoder_prefix
127 def submit(self, workflow):
128 """Submit a single PanDA iDDS workflow
130 Parameters
131 ----------
132 workflow : `lsst.ctrl.bps.BaseWorkflow`
133 A single PanDA iDDS workflow to submit
134 """
135 idds_client_workflow = IDDS_client_workflow(name=workflow.name)
136 files = self.copy_files_for_distribution(
137 workflow.generated_tasks, self.config["fileDistributionEndPoint"]
138 )
139 DAG_end_work = []
140 DAG_final_work = None
142 _, processing_type = self.config.search("processing_type", opt={"default": None})
143 _, task_type = self.config.search("task_type", opt={"default": "test"})
144 _, prod_source_label = self.config.search("prod_source_label", opt={"default": "test"})
145 _, vo = self.config.search("vo", opt={"default": "wlcg"})
147 for idx, task in enumerate(workflow.generated_tasks):
148 work = DomaPanDAWork(
149 executable=self.add_decoder_prefix(
150 task.executable, self.config["fileDistributionEndPoint"], files
151 ),
152 primary_input_collection={
153 "scope": "pseudo_dataset",
154 "name": "pseudo_input_collection#" + str(idx),
155 },
156 output_collections=[
157 {"scope": "pseudo_dataset", "name": "pseudo_output_collection#" + str(idx)}
158 ],
159 log_collections=[],
160 dependency_map=task.dependencies,
161 task_name=task.name,
162 task_queue=task.queue,
163 task_log={
164 "destination": "local",
165 "value": "log.tgz",
166 "dataset": "PandaJob_#{pandaid}/",
167 "token": "local",
168 "param_type": "log",
169 "type": "template",
170 },
171 encode_command_line=True,
172 task_rss=task.max_rss,
173 task_cloud=task.cloud,
174 task_site=task.site,
175 task_priority=int(task.priority) if task.priority else 900,
176 core_count=task.core_count,
177 working_group=task.working_group,
178 processing_type=processing_type,
179 task_type=task_type,
180 prodSourceLabel=task.prod_source_label if task.prod_source_label else prod_source_label,
181 vo=vo,
182 maxattempt=task.max_attempt,
183 maxwalltime=task.max_walltime if task.max_walltime else 90000,
184 )
186 idds_client_workflow.add_work(work)
187 if task.is_final:
188 DAG_final_work = work
189 if task.is_dag_end:
190 DAG_end_work.append(work)
192 if DAG_final_work:
193 conditions = []
194 for work in DAG_end_work:
195 conditions.append(work.is_terminated)
196 and_cond = AndCondition(conditions=conditions, true_works=[DAG_final_work])
197 idds_client_workflow.add_condition(and_cond)
198 _, idds_server = self.config.search("iddsServer", opt={"default": None})
199 c = pandaclient.idds_api.get_api(
200 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True
201 )
202 ret = c.submit(idds_client_workflow, username=None, use_dataset_name=False)
203 _LOG.debug("iDDS client manager submit returned = %s", str(ret))
205 # Check submission success
206 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html
207 if ret[0] == 0 and ret[1][0]:
208 request_id = int(ret[1][-1])
209 else:
210 raise RuntimeError(f"Error submitting to PanDA service: {str(ret)}")
212 _LOG.info("Submitted into iDDs with request id=%s", request_id)
213 workflow.run_id = request_id
215 @staticmethod
216 def copy_files_for_distribution(tasks, file_distribution_uri):
217 """
218 Brings locally generated files into Cloud for further
219 utilization them on the edge nodes.
221 Parameters
222 ----------
223 local_pfns: `list` of `tasks`
224 Tasks that input files needs to be placed for
225 distribution
226 file_distribution_uri: `str`
227 Path on the edge node accessed storage,
228 including access protocol, bucket name to place files
230 Returns
231 -------
232 files_plc_hldr, direct_IO_files : `dict` [`str`, `str`], `set` of `str`
233 First parameters is key values pairs
234 of file placeholder - file name
235 Second parameter is set of files which will be directly accessed.
236 """
237 local_pfns = {}
238 direct_IO_files = set()
239 for task in tasks:
240 for file in task.files_used_by_task:
241 if not file.delivered:
242 local_pfns[file.name] = file.submission_url
243 if file.direct_IO:
244 direct_IO_files.add(file.name)
246 files_to_copy = {}
248 # In case there are folders we iterate over its content
249 for local_pfn in local_pfns.values():
250 folder_name = os.path.basename(local_pfn)
251 if os.path.isdir(local_pfn):
252 files_in_folder = ResourcePath.findFileResources([local_pfn])
253 for file in files_in_folder:
254 file_name = file.basename()
255 files_to_copy[file] = ResourcePath(
256 os.path.join(file_distribution_uri, folder_name, file_name)
257 )
258 else:
259 files_to_copy[ResourcePath(local_pfn)] = ResourcePath(
260 os.path.join(file_distribution_uri, folder_name)
261 )
263 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
264 future_file_copy = []
265 for src, trgt in files_to_copy.items():
266 # S3 clients explicitly instantiate here to overpass this
267 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe
268 trgt.exists()
269 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy"))
270 for future in concurrent.futures.as_completed(future_file_copy):
271 if not future.result() is None:
272 raise RuntimeError("Error of placing files to the distribution point")
274 if len(direct_IO_files) == 0:
275 direct_IO_files.add("cmdlineplaceholder")
277 files_plc_hldr = {}
278 for file_placeholder, src_path in local_pfns.items():
279 files_plc_hldr[file_placeholder] = os.path.basename(src_path)
280 if os.path.isdir(src_path):
281 # this is needed to make isdir function working
282 # properly in ButlerURL instance on the egde node
283 files_plc_hldr[file_placeholder] += "/"
285 return files_plc_hldr, direct_IO_files
287 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
288 """Stub for future implementation of the report method
289 Expected to return run information based upon given constraints.
291 Parameters
292 ----------
293 wms_workflow_id : `int` or `str`
294 Limit to specific run based on id.
295 user : `str`
296 Limit results to runs for this user.
297 hist : `float`
298 Limit history search to this many days.
299 pass_thru : `str`
300 Constraints to pass through to HTCondor.
301 is_global : `bool`, optional
302 If set, all available job queues will be queried for job
303 information. Defaults to False which means that only a local job
304 queue will be queried for information.
306 Returns
307 -------
308 runs : `list` [`lsst.ctrl.bps.WmsRunReport`]
309 Information about runs from given job information.
310 message : `str`
311 Extra message for report command to print. This could be
312 pointers to documentation or to WMS specific commands.
313 """
314 message = ""
315 run_reports = None
316 return run_reports, message
318 def run_submission_checks(self):
319 """Checks to run at start if running WMS specific submission steps.
321 Any exception other than NotImplementedError will halt submission.
322 Submit directory may not yet exist when this is called.
323 """
324 for key in ["PANDA_URL"]:
325 if key not in os.environ:
326 raise OSError(f"Missing environment variable {key}")
328 _, idds_server = self.config.search("iddsServer", opt={"default": None})
329 panda_auth_update(idds_server, reset=False)
332class PandaBpsWmsWorkflow(BaseWmsWorkflow):
333 """A single Panda based workflow
335 Parameters
336 ----------
337 name : `str`
338 Unique name for Workflow
339 config : `lsst.ctrl.bps.BpsConfig`
340 BPS configuration that includes necessary submit/runtime information
341 """
343 def __init__(self, name, config=None):
344 super().__init__(name, config)
345 self.generated_tasks = None
347 @classmethod
348 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
349 # Docstring inherited from parent class
350 idds_workflow = cls(generic_workflow.name, config)
351 workflow_generator = IDDSWorkflowGenerator(generic_workflow, config)
352 idds_workflow.generated_tasks = workflow_generator.define_tasks()
353 _LOG.debug("panda dag attribs %s", generic_workflow.run_attrs)
354 return idds_workflow
356 def write(self, out_prefix):
357 """Not yet implemented"""