Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 21%
112 statements
« prev ^ index » next coverage.py v6.4, created at 2022-05-27 11:36 +0000
« prev ^ index » next coverage.py v6.4, created at 2022-05-27 11:36 +0000
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
23__all__ = ["PanDAService", "PandaBpsWmsWorkflow"]
26import binascii
27import concurrent.futures
28import logging
29import os
31import idds.common.utils as idds_utils
32import pandaclient.idds_api
33from idds.doma.workflowv2.domapandawork import DomaPanDAWork
34from idds.workflowv2.workflow import AndCondition
35from idds.workflowv2.workflow import Workflow as IDDS_client_workflow
36from lsst.ctrl.bps.panda.idds_tasks import IDDSWorkflowGenerator
37from lsst.ctrl.bps.panda.panda_auth_utils import panda_auth_update
38from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow
39from lsst.resources import ResourcePath
41_LOG = logging.getLogger(__name__)
44class PanDAService(BaseWmsService):
45 """PanDA version of WMS service"""
47 def prepare(self, config, generic_workflow, out_prefix=None):
48 """Convert generic workflow to an PanDA iDDS ready for submission
50 Parameters
51 ----------
52 config : `lsst.ctrl.bps.BpsConfig`
53 BPS configuration that includes necessary submit/runtime
54 information.
55 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
56 out_prefix : `str`
57 The root directory into which all WMS-specific files are written
59 Returns
60 -------
61 workflow : `lsst.ctrl.bps.panda.panda_service.PandaBpsWmsWorkflow`
62 PanDA workflow ready to be run.
63 """
64 _LOG.debug("out_prefix = '%s'", out_prefix)
65 workflow = PandaBpsWmsWorkflow.from_generic_workflow(
66 config, generic_workflow, out_prefix, f"{self.__class__.__module__}." f"{self.__class__.__name__}"
67 )
68 workflow.write(out_prefix)
69 return workflow
71 def convert_exec_string_to_hex(self, cmdline):
72 """Convert the command line into hex representation.
74 This step is currently involved because large blocks of command lines
75 including special symbols passed to the pilot/container. To make sure
76 the 1 to 1 matching and pass by the special symbol stripping
77 performed by the Pilot we applied the hexing.
79 Parameters
80 ----------
81 cmdline : `str`
82 UTF-8 command line string
84 Returns
85 -------
86 hex : `str`
87 Hex representation of string
88 """
89 return binascii.hexlify(cmdline.encode()).decode("utf-8")
91 def add_decoder_prefix(self, cmd_line, distribution_path, files):
92 """
93 Compose the command line sent to the pilot from the functional part
94 (the actual SW running) and the middleware part (containers invocation)
96 Parameters
97 ----------
98 cmd_line : `str`
99 UTF-8 based functional part of the command line
100 distribution_path : `str`
101 URI of path where all files are located for distribution
102 files `list` [`str`]
103 File names needed for a task
105 Returns
106 -------
107 decoder_prefix : `str`
108 Full command line to be executed on the edge node
109 """
111 cmdline_hex = self.convert_exec_string_to_hex(cmd_line)
112 _, decoder_prefix = self.config.search(
113 "runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False}
114 )
115 decoder_prefix = decoder_prefix.replace(
116 "_cmd_line_",
117 str(cmdline_hex)
118 + " ${IN/L} "
119 + distribution_path
120 + " "
121 + "+".join(f"{k}:{v}" for k, v in files[0].items())
122 + " "
123 + "+".join(files[1]),
124 )
125 return decoder_prefix
127 def submit(self, workflow):
128 """Submit a single PanDA iDDS workflow
130 Parameters
131 ----------
132 workflow : `lsst.ctrl.bps.BaseWorkflow`
133 A single PanDA iDDS workflow to submit
134 """
135 idds_client_workflow = IDDS_client_workflow(name=workflow.name)
136 files = self.copy_files_for_distribution(
137 workflow.generated_tasks, self.config["fileDistributionEndPoint"]
138 )
139 DAG_end_work = []
140 DAG_final_work = None
142 for idx, task in enumerate(workflow.generated_tasks):
143 work = DomaPanDAWork(
144 executable=self.add_decoder_prefix(
145 task.executable, self.config["fileDistributionEndPoint"], files
146 ),
147 primary_input_collection={
148 "scope": "pseudo_dataset",
149 "name": "pseudo_input_collection#" + str(idx),
150 },
151 output_collections=[
152 {"scope": "pseudo_dataset", "name": "pseudo_output_collection#" + str(idx)}
153 ],
154 log_collections=[],
155 dependency_map=task.dependencies,
156 task_name=task.name,
157 task_queue=task.queue,
158 task_log={
159 "destination": "local",
160 "value": "log.tgz",
161 "dataset": "PandaJob_#{pandaid}/",
162 "token": "local",
163 "param_type": "log",
164 "type": "template",
165 },
166 encode_command_line=True,
167 task_rss=task.max_rss,
168 task_cloud=task.cloud,
169 )
170 idds_client_workflow.add_work(work)
171 if task.is_final:
172 DAG_final_work = work
173 if task.is_dag_end:
174 DAG_end_work.append(work)
176 if DAG_final_work:
177 conditions = []
178 for work in DAG_end_work:
179 conditions.append(work.is_terminated)
180 and_cond = AndCondition(conditions=conditions, true_works=[DAG_final_work])
181 idds_client_workflow.add_condition(and_cond)
182 _, idds_server = self.config.search("iddsServer", opt={"default": None})
183 c = pandaclient.idds_api.get_api(
184 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True
185 )
186 ret = c.submit(idds_client_workflow, username=None, use_dataset_name=False)
187 _LOG.debug("iDDS client manager submit returned = %s", str(ret))
189 # Check submission success
190 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html
191 if ret[0] == 0 and ret[1][0]:
192 request_id = int(ret[1][-1])
193 else:
194 raise RuntimeError(f"Error submitting to PanDA service: {str(ret)}")
196 _LOG.info("Submitted into iDDs with request id=%s", request_id)
197 workflow.run_id = request_id
199 @staticmethod
200 def copy_files_for_distribution(tasks, file_distribution_uri):
201 """
202 Brings locally generated files into Cloud for further
203 utilization them on the edge nodes.
205 Parameters
206 ----------
207 local_pfns: `list` of `tasks`
208 Tasks that input files needs to be placed for
209 distribution
210 file_distribution_uri: `str`
211 Path on the edge node accessed storage,
212 including access protocol, bucket name to place files
214 Returns
215 -------
216 files_plc_hldr, direct_IO_files : `dict` [`str`, `str`], `set` of `str`
217 First parameters is key values pairs
218 of file placeholder - file name
219 Second parameter is set of files which will be directly accessed.
220 """
221 local_pfns = {}
222 direct_IO_files = set()
223 for task in tasks:
224 for file in task.files_used_by_task:
225 if not file.delivered:
226 local_pfns[file.name] = file.submission_url
227 if file.direct_IO:
228 direct_IO_files.add(file.name)
230 files_to_copy = {}
232 # In case there are folders we iterate over its content
233 for local_pfn in local_pfns.values():
234 folder_name = os.path.basename(local_pfn)
235 if os.path.isdir(local_pfn):
236 files_in_folder = ResourcePath.findFileResources([local_pfn])
237 for file in files_in_folder:
238 file_name = file.basename()
239 files_to_copy[file] = ResourcePath(
240 os.path.join(file_distribution_uri, folder_name, file_name)
241 )
242 else:
243 files_to_copy[ResourcePath(local_pfn)] = ResourcePath(
244 os.path.join(file_distribution_uri, folder_name)
245 )
247 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
248 future_file_copy = []
249 for src, trgt in files_to_copy.items():
251 # S3 clients explicitly instantiate here to overpass this
252 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe
253 trgt.exists()
254 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy"))
255 for future in concurrent.futures.as_completed(future_file_copy):
256 if not future.result() is None:
257 raise RuntimeError("Error of placing files to the distribution point")
259 if len(direct_IO_files) == 0:
260 direct_IO_files.add("cmdlineplaceholder")
262 files_plc_hldr = {}
263 for file_placeholder, src_path in local_pfns.items():
264 files_plc_hldr[file_placeholder] = os.path.basename(src_path)
265 if os.path.isdir(src_path):
266 # this is needed to make isdir function working
267 # properly in ButlerURL instance on the egde node
268 files_plc_hldr[file_placeholder] += "/"
270 return files_plc_hldr, direct_IO_files
272 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
273 """Stub for future implementation of the report method
274 Expected to return run information based upon given constraints.
276 Parameters
277 ----------
278 wms_workflow_id : `int` or `str`
279 Limit to specific run based on id.
280 user : `str`
281 Limit results to runs for this user.
282 hist : `float`
283 Limit history search to this many days.
284 pass_thru : `str`
285 Constraints to pass through to HTCondor.
286 is_global : `bool`, optional
287 If set, all available job queues will be queried for job
288 information. Defaults to False which means that only a local job
289 queue will be queried for information.
291 Returns
292 -------
293 runs : `list` [`lsst.ctrl.bps.WmsRunReport`]
294 Information about runs from given job information.
295 message : `str`
296 Extra message for report command to print. This could be
297 pointers to documentation or to WMS specific commands.
298 """
299 message = ""
300 run_reports = None
301 return run_reports, message
303 def run_submission_checks(self):
304 """Checks to run at start if running WMS specific submission steps.
306 Any exception other than NotImplementedError will halt submission.
307 Submit directory may not yet exist when this is called.
308 """
309 for key in ["PANDA_URL"]:
310 if key not in os.environ:
311 raise OSError(f"Missing environment variable {key}")
313 _, idds_server = self.config.search("iddsServer", opt={"default": None})
314 panda_auth_update(idds_server, reset=False)
317class PandaBpsWmsWorkflow(BaseWmsWorkflow):
318 """A single Panda based workflow
320 Parameters
321 ----------
322 name : `str`
323 Unique name for Workflow
324 config : `lsst.ctrl.bps.BpsConfig`
325 BPS configuration that includes necessary submit/runtime information
326 """
328 def __init__(self, name, config=None):
329 super().__init__(name, config)
330 self.generated_tasks = None
332 @classmethod
333 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
334 # Docstring inherited from parent class
335 idds_workflow = cls(generic_workflow.name, config)
336 workflow_generator = IDDSWorkflowGenerator(generic_workflow, config)
337 idds_workflow.generated_tasks = workflow_generator.define_tasks()
338 _LOG.debug("panda dag attribs %s", generic_workflow.run_attrs)
339 return idds_workflow
341 def write(self, out_prefix):
342 """Not yet implemented"""