Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 20%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

111 statements  

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import binascii 

23import concurrent.futures 

24import logging 

25import os 

26 

27import idds.common.utils as idds_utils 

28import pandaclient.idds_api 

29from idds.doma.workflowv2.domapandawork import DomaPanDAWork 

30from idds.workflowv2.workflow import AndCondition 

31from idds.workflowv2.workflow import Workflow as IDDS_client_workflow 

32from lsst.ctrl.bps.panda.idds_tasks import IDDSWorkflowGenerator 

33from lsst.ctrl.bps.panda.panda_auth_utils import panda_auth_update 

34from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow 

35from lsst.resources import ResourcePath 

36 

37_LOG = logging.getLogger(__name__) 

38 

39 

40class PanDAService(BaseWmsService): 

41 """PanDA version of WMS service""" 

42 

43 def prepare(self, config, generic_workflow, out_prefix=None): 

44 """Convert generic workflow to an PanDA iDDS ready for submission 

45 

46 Parameters 

47 ---------- 

48 config : `lsst.ctrl.bps.BpsConfig` 

49 BPS configuration that includes necessary submit/runtime 

50 information. 

51 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

52 out_prefix : `str` 

53 The root directory into which all WMS-specific files are written 

54 

55 Returns 

56 ------- 

57 workflow : `lsst.ctrl.bps.panda.panda_service.PandaBpsWmsWorkflow` 

58 PanDA workflow ready to be run. 

59 """ 

60 _LOG.debug("out_prefix = '%s'", out_prefix) 

61 workflow = PandaBpsWmsWorkflow.from_generic_workflow( 

62 config, generic_workflow, out_prefix, f"{self.__class__.__module__}." f"{self.__class__.__name__}" 

63 ) 

64 workflow.write(out_prefix) 

65 return workflow 

66 

67 def convert_exec_string_to_hex(self, cmdline): 

68 """Convert the command line into hex representation. 

69 

70 This step is currently involved because large blocks of command lines 

71 including special symbols passed to the pilot/container. To make sure 

72 the 1 to 1 matching and pass by the special symbol stripping 

73 performed by the Pilot we applied the hexing. 

74 

75 Parameters 

76 ---------- 

77 cmdline : `str` 

78 UTF-8 command line string 

79 

80 Returns 

81 ------- 

82 hex : `str` 

83 Hex representation of string 

84 """ 

85 return binascii.hexlify(cmdline.encode()).decode("utf-8") 

86 

87 def add_decoder_prefix(self, cmd_line, distribution_path, files): 

88 """ 

89 Compose the command line sent to the pilot from the functional part 

90 (the actual SW running) and the middleware part (containers invocation) 

91 

92 Parameters 

93 ---------- 

94 cmd_line : `str` 

95 UTF-8 based functional part of the command line 

96 distribution_path : `str` 

97 URI of path where all files are located for distribution 

98 files `list` [`str`] 

99 File names needed for a task 

100 

101 Returns 

102 ------- 

103 decoder_prefix : `str` 

104 Full command line to be executed on the edge node 

105 """ 

106 

107 cmdline_hex = self.convert_exec_string_to_hex(cmd_line) 

108 _, decoder_prefix = self.config.search( 

109 "runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False} 

110 ) 

111 decoder_prefix = decoder_prefix.replace( 

112 "_cmd_line_", 

113 str(cmdline_hex) 

114 + " ${IN/L} " 

115 + distribution_path 

116 + " " 

117 + "+".join(f"{k}:{v}" for k, v in files[0].items()) 

118 + " " 

119 + "+".join(files[1]), 

120 ) 

121 return decoder_prefix 

122 

123 def submit(self, workflow): 

124 """Submit a single PanDA iDDS workflow 

125 

126 Parameters 

127 ---------- 

128 workflow : `lsst.ctrl.bps.BaseWorkflow` 

129 A single PanDA iDDS workflow to submit 

130 """ 

131 idds_client_workflow = IDDS_client_workflow(name=workflow.name) 

132 files = self.copy_files_for_distribution( 

133 workflow.generated_tasks, self.config["fileDistributionEndPoint"] 

134 ) 

135 DAG_end_work = [] 

136 DAG_final_work = None 

137 

138 for idx, task in enumerate(workflow.generated_tasks): 

139 work = DomaPanDAWork( 

140 executable=self.add_decoder_prefix( 

141 task.executable, self.config["fileDistributionEndPoint"], files 

142 ), 

143 primary_input_collection={ 

144 "scope": "pseudo_dataset", 

145 "name": "pseudo_input_collection#" + str(idx), 

146 }, 

147 output_collections=[ 

148 {"scope": "pseudo_dataset", "name": "pseudo_output_collection#" + str(idx)} 

149 ], 

150 log_collections=[], 

151 dependency_map=task.dependencies, 

152 task_name=task.name, 

153 task_queue=task.queue, 

154 task_log={ 

155 "destination": "local", 

156 "value": "log.tgz", 

157 "dataset": "PandaJob_#{pandaid}/", 

158 "token": "local", 

159 "param_type": "log", 

160 "type": "template", 

161 }, 

162 encode_command_line=True, 

163 task_rss=task.max_rss, 

164 task_cloud=task.cloud, 

165 ) 

166 idds_client_workflow.add_work(work) 

167 if task.is_final: 

168 DAG_final_work = work 

169 if task.is_dag_end: 

170 DAG_end_work.append(work) 

171 

172 if DAG_final_work: 

173 conditions = [] 

174 for work in DAG_end_work: 

175 conditions.append(work.is_terminated) 

176 and_cond = AndCondition(conditions=conditions, true_works=[DAG_final_work]) 

177 idds_client_workflow.add_condition(and_cond) 

178 _, idds_server = self.config.search("iddsServer", opt={"default": None}) 

179 c = pandaclient.idds_api.get_api( 

180 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True 

181 ) 

182 ret = c.submit(idds_client_workflow, username=None, use_dataset_name=False) 

183 _LOG.debug("iDDS client manager submit returned = %s", str(ret)) 

184 

185 # Check submission success 

186 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html 

187 if ret[0] == 0 and ret[1][0]: 

188 request_id = int(ret[1][-1]) 

189 else: 

190 raise RuntimeError(f"Error submitting to PanDA service: {str(ret)}") 

191 

192 _LOG.info("Submitted into iDDs with request id=%s", request_id) 

193 workflow.run_id = request_id 

194 

195 @staticmethod 

196 def copy_files_for_distribution(tasks, file_distribution_uri): 

197 """ 

198 Brings locally generated files into Cloud for further 

199 utilization them on the edge nodes. 

200 

201 Parameters 

202 ---------- 

203 local_pfns: `list` of `tasks` 

204 Tasks that input files needs to be placed for 

205 distribution 

206 file_distribution_uri: `str` 

207 Path on the edge node accessed storage, 

208 including access protocol, bucket name to place files 

209 

210 Returns 

211 ------- 

212 files_plc_hldr, direct_IO_files : `dict` [`str`, `str`], `set` of `str` 

213 First parameters is key values pairs 

214 of file placeholder - file name 

215 Second parameter is set of files which will be directly accessed. 

216 """ 

217 local_pfns = {} 

218 direct_IO_files = set() 

219 for task in tasks: 

220 for file in task.files_used_by_task: 

221 if not file.delivered: 

222 local_pfns[file.name] = file.submission_url 

223 if file.direct_IO: 

224 direct_IO_files.add(file.name) 

225 

226 files_to_copy = {} 

227 

228 # In case there are folders we iterate over its content 

229 for local_pfn in local_pfns.values(): 

230 folder_name = os.path.basename(local_pfn) 

231 if os.path.isdir(local_pfn): 

232 files_in_folder = ResourcePath.findFileResources([local_pfn]) 

233 for file in files_in_folder: 

234 file_name = file.basename() 

235 files_to_copy[file] = ResourcePath( 

236 os.path.join(file_distribution_uri, folder_name, file_name) 

237 ) 

238 else: 

239 files_to_copy[ResourcePath(local_pfn)] = ResourcePath( 

240 os.path.join(file_distribution_uri, folder_name) 

241 ) 

242 

243 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) 

244 future_file_copy = [] 

245 for src, trgt in files_to_copy.items(): 

246 

247 # S3 clients explicitly instantiate here to overpass this 

248 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe 

249 trgt.exists() 

250 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy")) 

251 for future in concurrent.futures.as_completed(future_file_copy): 

252 if not future.result() is None: 

253 raise RuntimeError("Error of placing files to the distribution point") 

254 

255 if len(direct_IO_files) == 0: 

256 direct_IO_files.add("cmdlineplaceholder") 

257 

258 files_plc_hldr = {} 

259 for file_placeholder, src_path in local_pfns.items(): 

260 files_plc_hldr[file_placeholder] = os.path.basename(src_path) 

261 if os.path.isdir(src_path): 

262 # this is needed to make isdir function working 

263 # properly in ButlerURL instance on the egde node 

264 files_plc_hldr[file_placeholder] += "/" 

265 

266 return files_plc_hldr, direct_IO_files 

267 

268 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False): 

269 """Stub for future implementation of the report method 

270 Expected to return run information based upon given constraints. 

271 

272 Parameters 

273 ---------- 

274 wms_workflow_id : `int` or `str` 

275 Limit to specific run based on id. 

276 user : `str` 

277 Limit results to runs for this user. 

278 hist : `float` 

279 Limit history search to this many days. 

280 pass_thru : `str` 

281 Constraints to pass through to HTCondor. 

282 is_global : `bool`, optional 

283 If set, all available job queues will be queried for job 

284 information. Defaults to False which means that only a local job 

285 queue will be queried for information. 

286 

287 Returns 

288 ------- 

289 runs : `list` [`lsst.ctrl.bps.WmsRunReport`] 

290 Information about runs from given job information. 

291 message : `str` 

292 Extra message for report command to print. This could be 

293 pointers to documentation or to WMS specific commands. 

294 """ 

295 message = "" 

296 run_reports = None 

297 return run_reports, message 

298 

299 def run_submission_checks(self): 

300 """Checks to run at start if running WMS specific submission steps. 

301 

302 Any exception other than NotImplementedError will halt submission. 

303 Submit directory may not yet exist when this is called. 

304 """ 

305 for key in ["PANDA_URL", "IDDS_CONFIG"]: 

306 if key not in os.environ: 

307 raise OSError(f"Missing environment variable {key}") 

308 

309 _, idds_server = self.config.search("iddsServer", opt={"default": None}) 

310 panda_auth_update(idds_server, reset=False) 

311 

312 

313class PandaBpsWmsWorkflow(BaseWmsWorkflow): 

314 """A single Panda based workflow 

315 

316 Parameters 

317 ---------- 

318 name : `str` 

319 Unique name for Workflow 

320 config : `lsst.ctrl.bps.BpsConfig` 

321 BPS configuration that includes necessary submit/runtime information 

322 """ 

323 

324 def __init__(self, name, config=None): 

325 super().__init__(name, config) 

326 self.generated_tasks = None 

327 

328 @classmethod 

329 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class): 

330 # Docstring inherited from parent class 

331 idds_workflow = cls(generic_workflow.name, config) 

332 workflow_generator = IDDSWorkflowGenerator(generic_workflow, config) 

333 idds_workflow.generated_tasks = workflow_generator.define_tasks() 

334 _LOG.debug("panda dag attribs %s", generic_workflow.run_attrs) 

335 return idds_workflow 

336 

337 def write(self, out_prefix): 

338 """Not yet implemented"""