Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 20%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import binascii
23import concurrent.futures
24import logging
25import os
27import idds.common.utils as idds_utils
28import pandaclient.idds_api
29from idds.doma.workflowv2.domapandawork import DomaPanDAWork
30from idds.workflowv2.workflow import AndCondition
31from idds.workflowv2.workflow import Workflow as IDDS_client_workflow
32from lsst.ctrl.bps.panda.idds_tasks import IDDSWorkflowGenerator
33from lsst.ctrl.bps.panda.panda_auth_utils import panda_auth_update
34from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow
35from lsst.resources import ResourcePath
37_LOG = logging.getLogger(__name__)
40class PanDAService(BaseWmsService):
41 """PanDA version of WMS service"""
43 def prepare(self, config, generic_workflow, out_prefix=None):
44 """Convert generic workflow to an PanDA iDDS ready for submission
46 Parameters
47 ----------
48 config : `lsst.ctrl.bps.BpsConfig`
49 BPS configuration that includes necessary submit/runtime
50 information.
51 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
52 out_prefix : `str`
53 The root directory into which all WMS-specific files are written
55 Returns
56 -------
57 workflow : `lsst.ctrl.bps.panda.panda_service.PandaBpsWmsWorkflow`
58 PanDA workflow ready to be run.
59 """
60 _LOG.debug("out_prefix = '%s'", out_prefix)
61 workflow = PandaBpsWmsWorkflow.from_generic_workflow(
62 config, generic_workflow, out_prefix, f"{self.__class__.__module__}." f"{self.__class__.__name__}"
63 )
64 workflow.write(out_prefix)
65 return workflow
67 def convert_exec_string_to_hex(self, cmdline):
68 """Convert the command line into hex representation.
70 This step is currently involved because large blocks of command lines
71 including special symbols passed to the pilot/container. To make sure
72 the 1 to 1 matching and pass by the special symbol stripping
73 performed by the Pilot we applied the hexing.
75 Parameters
76 ----------
77 cmdline : `str`
78 UTF-8 command line string
80 Returns
81 -------
82 hex : `str`
83 Hex representation of string
84 """
85 return binascii.hexlify(cmdline.encode()).decode("utf-8")
87 def add_decoder_prefix(self, cmd_line, distribution_path, files):
88 """
89 Compose the command line sent to the pilot from the functional part
90 (the actual SW running) and the middleware part (containers invocation)
92 Parameters
93 ----------
94 cmd_line : `str`
95 UTF-8 based functional part of the command line
96 distribution_path : `str`
97 URI of path where all files are located for distribution
98 files `list` [`str`]
99 File names needed for a task
101 Returns
102 -------
103 decoder_prefix : `str`
104 Full command line to be executed on the edge node
105 """
107 cmdline_hex = self.convert_exec_string_to_hex(cmd_line)
108 _, decoder_prefix = self.config.search(
109 "runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False}
110 )
111 decoder_prefix = decoder_prefix.replace(
112 "_cmd_line_",
113 str(cmdline_hex)
114 + " ${IN/L} "
115 + distribution_path
116 + " "
117 + "+".join(f"{k}:{v}" for k, v in files[0].items())
118 + " "
119 + "+".join(files[1]),
120 )
121 return decoder_prefix
123 def submit(self, workflow):
124 """Submit a single PanDA iDDS workflow
126 Parameters
127 ----------
128 workflow : `lsst.ctrl.bps.BaseWorkflow`
129 A single PanDA iDDS workflow to submit
130 """
131 idds_client_workflow = IDDS_client_workflow(name=workflow.name)
132 files = self.copy_files_for_distribution(
133 workflow.generated_tasks, self.config["fileDistributionEndPoint"]
134 )
135 DAG_end_work = []
136 DAG_final_work = None
138 for idx, task in enumerate(workflow.generated_tasks):
139 work = DomaPanDAWork(
140 executable=self.add_decoder_prefix(
141 task.executable, self.config["fileDistributionEndPoint"], files
142 ),
143 primary_input_collection={
144 "scope": "pseudo_dataset",
145 "name": "pseudo_input_collection#" + str(idx),
146 },
147 output_collections=[
148 {"scope": "pseudo_dataset", "name": "pseudo_output_collection#" + str(idx)}
149 ],
150 log_collections=[],
151 dependency_map=task.dependencies,
152 task_name=task.name,
153 task_queue=task.queue,
154 task_log={
155 "destination": "local",
156 "value": "log.tgz",
157 "dataset": "PandaJob_#{pandaid}/",
158 "token": "local",
159 "param_type": "log",
160 "type": "template",
161 },
162 encode_command_line=True,
163 task_rss=task.max_rss,
164 task_cloud=task.cloud,
165 )
166 idds_client_workflow.add_work(work)
167 if task.is_final:
168 DAG_final_work = work
169 if task.is_dag_end:
170 DAG_end_work.append(work)
172 if DAG_final_work:
173 conditions = []
174 for work in DAG_end_work:
175 conditions.append(work.is_terminated)
176 and_cond = AndCondition(conditions=conditions, true_works=[DAG_final_work])
177 idds_client_workflow.add_condition(and_cond)
178 _, idds_server = self.config.search("iddsServer", opt={"default": None})
179 c = pandaclient.idds_api.get_api(
180 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True
181 )
182 ret = c.submit(idds_client_workflow, username=None, use_dataset_name=False)
183 _LOG.debug("iDDS client manager submit returned = %s", str(ret))
185 # Check submission success
186 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html
187 if ret[0] == 0 and ret[1][0]:
188 request_id = int(ret[1][-1])
189 else:
190 raise RuntimeError(f"Error submitting to PanDA service: {str(ret)}")
192 _LOG.info("Submitted into iDDs with request id=%s", request_id)
193 workflow.run_id = request_id
195 @staticmethod
196 def copy_files_for_distribution(tasks, file_distribution_uri):
197 """
198 Brings locally generated files into Cloud for further
199 utilization them on the edge nodes.
201 Parameters
202 ----------
203 local_pfns: `list` of `tasks`
204 Tasks that input files needs to be placed for
205 distribution
206 file_distribution_uri: `str`
207 Path on the edge node accessed storage,
208 including access protocol, bucket name to place files
210 Returns
211 -------
212 files_plc_hldr, direct_IO_files : `dict` [`str`, `str`], `set` of `str`
213 First parameters is key values pairs
214 of file placeholder - file name
215 Second parameter is set of files which will be directly accessed.
216 """
217 local_pfns = {}
218 direct_IO_files = set()
219 for task in tasks:
220 for file in task.files_used_by_task:
221 if not file.delivered:
222 local_pfns[file.name] = file.submission_url
223 if file.direct_IO:
224 direct_IO_files.add(file.name)
226 files_to_copy = {}
228 # In case there are folders we iterate over its content
229 for local_pfn in local_pfns.values():
230 folder_name = os.path.basename(local_pfn)
231 if os.path.isdir(local_pfn):
232 files_in_folder = ResourcePath.findFileResources([local_pfn])
233 for file in files_in_folder:
234 file_name = file.basename()
235 files_to_copy[file] = ResourcePath(
236 os.path.join(file_distribution_uri, folder_name, file_name)
237 )
238 else:
239 files_to_copy[ResourcePath(local_pfn)] = ResourcePath(
240 os.path.join(file_distribution_uri, folder_name)
241 )
243 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
244 future_file_copy = []
245 for src, trgt in files_to_copy.items():
247 # S3 clients explicitly instantiate here to overpass this
248 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe
249 trgt.exists()
250 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy"))
251 for future in concurrent.futures.as_completed(future_file_copy):
252 if not future.result() is None:
253 raise RuntimeError("Error of placing files to the distribution point")
255 if len(direct_IO_files) == 0:
256 direct_IO_files.add("cmdlineplaceholder")
258 files_plc_hldr = {}
259 for file_placeholder, src_path in local_pfns.items():
260 files_plc_hldr[file_placeholder] = os.path.basename(src_path)
261 if os.path.isdir(src_path):
262 # this is needed to make isdir function working
263 # properly in ButlerURL instance on the egde node
264 files_plc_hldr[file_placeholder] += "/"
266 return files_plc_hldr, direct_IO_files
268 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
269 """Stub for future implementation of the report method
270 Expected to return run information based upon given constraints.
272 Parameters
273 ----------
274 wms_workflow_id : `int` or `str`
275 Limit to specific run based on id.
276 user : `str`
277 Limit results to runs for this user.
278 hist : `float`
279 Limit history search to this many days.
280 pass_thru : `str`
281 Constraints to pass through to HTCondor.
282 is_global : `bool`, optional
283 If set, all available job queues will be queried for job
284 information. Defaults to False which means that only a local job
285 queue will be queried for information.
287 Returns
288 -------
289 runs : `list` [`lsst.ctrl.bps.WmsRunReport`]
290 Information about runs from given job information.
291 message : `str`
292 Extra message for report command to print. This could be
293 pointers to documentation or to WMS specific commands.
294 """
295 message = ""
296 run_reports = None
297 return run_reports, message
299 def run_submission_checks(self):
300 """Checks to run at start if running WMS specific submission steps.
302 Any exception other than NotImplementedError will halt submission.
303 Submit directory may not yet exist when this is called.
304 """
305 for key in ["PANDA_URL", "IDDS_CONFIG"]:
306 if key not in os.environ:
307 raise OSError(f"Missing environment variable {key}")
309 _, idds_server = self.config.search("iddsServer", opt={"default": None})
310 panda_auth_update(idds_server, reset=False)
313class PandaBpsWmsWorkflow(BaseWmsWorkflow):
314 """A single Panda based workflow
316 Parameters
317 ----------
318 name : `str`
319 Unique name for Workflow
320 config : `lsst.ctrl.bps.BpsConfig`
321 BPS configuration that includes necessary submit/runtime information
322 """
324 def __init__(self, name, config=None):
325 super().__init__(name, config)
326 self.generated_tasks = None
328 @classmethod
329 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
330 # Docstring inherited from parent class
331 idds_workflow = cls(generic_workflow.name, config)
332 workflow_generator = IDDSWorkflowGenerator(generic_workflow, config)
333 idds_workflow.generated_tasks = workflow_generator.define_tasks()
334 _LOG.debug("panda dag attribs %s", generic_workflow.run_attrs)
335 return idds_workflow
337 def write(self, out_prefix):
338 """Not yet implemented"""