Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 18%

116 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-02 00:00 -0700

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["PanDAService", "PandaBpsWmsWorkflow"] 

24 

25 

26import binascii 

27import concurrent.futures 

28import logging 

29import os 

30 

31import idds.common.utils as idds_utils 

32import pandaclient.idds_api 

33from idds.doma.workflowv2.domapandawork import DomaPanDAWork 

34from idds.workflowv2.workflow import AndCondition 

35from idds.workflowv2.workflow import Workflow as IDDS_client_workflow 

36from lsst.ctrl.bps.panda.idds_tasks import IDDSWorkflowGenerator 

37from lsst.ctrl.bps.panda.panda_auth_utils import panda_auth_update 

38from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow 

39from lsst.resources import ResourcePath 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class PanDAService(BaseWmsService): 

45 """PanDA version of WMS service""" 

46 

47 def prepare(self, config, generic_workflow, out_prefix=None): 

48 """Convert generic workflow to an PanDA iDDS ready for submission 

49 

50 Parameters 

51 ---------- 

52 config : `lsst.ctrl.bps.BpsConfig` 

53 BPS configuration that includes necessary submit/runtime 

54 information. 

55 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

56 out_prefix : `str` 

57 The root directory into which all WMS-specific files are written 

58 

59 Returns 

60 ------- 

61 workflow : `lsst.ctrl.bps.panda.panda_service.PandaBpsWmsWorkflow` 

62 PanDA workflow ready to be run. 

63 """ 

64 _LOG.debug("out_prefix = '%s'", out_prefix) 

65 workflow = PandaBpsWmsWorkflow.from_generic_workflow( 

66 config, generic_workflow, out_prefix, f"{self.__class__.__module__}." f"{self.__class__.__name__}" 

67 ) 

68 workflow.write(out_prefix) 

69 return workflow 

70 

71 def convert_exec_string_to_hex(self, cmdline): 

72 """Convert the command line into hex representation. 

73 

74 This step is currently involved because large blocks of command lines 

75 including special symbols passed to the pilot/container. To make sure 

76 the 1 to 1 matching and pass by the special symbol stripping 

77 performed by the Pilot we applied the hexing. 

78 

79 Parameters 

80 ---------- 

81 cmdline : `str` 

82 UTF-8 command line string 

83 

84 Returns 

85 ------- 

86 hex : `str` 

87 Hex representation of string 

88 """ 

89 return binascii.hexlify(cmdline.encode()).decode("utf-8") 

90 

91 def add_decoder_prefix(self, cmd_line, distribution_path, files): 

92 """ 

93 Compose the command line sent to the pilot from the functional part 

94 (the actual SW running) and the middleware part (containers invocation) 

95 

96 Parameters 

97 ---------- 

98 cmd_line : `str` 

99 UTF-8 based functional part of the command line 

100 distribution_path : `str` 

101 URI of path where all files are located for distribution 

102 files `list` [`str`] 

103 File names needed for a task 

104 

105 Returns 

106 ------- 

107 decoder_prefix : `str` 

108 Full command line to be executed on the edge node 

109 """ 

110 

111 cmdline_hex = self.convert_exec_string_to_hex(cmd_line) 

112 _, decoder_prefix = self.config.search( 

113 "runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False} 

114 ) 

115 decoder_prefix = decoder_prefix.replace( 

116 "_cmd_line_", 

117 str(cmdline_hex) 

118 + " ${IN/L} " 

119 + distribution_path 

120 + " " 

121 + "+".join(f"{k}:{v}" for k, v in files[0].items()) 

122 + " " 

123 + "+".join(files[1]), 

124 ) 

125 return decoder_prefix 

126 

127 def submit(self, workflow): 

128 """Submit a single PanDA iDDS workflow 

129 

130 Parameters 

131 ---------- 

132 workflow : `lsst.ctrl.bps.BaseWorkflow` 

133 A single PanDA iDDS workflow to submit 

134 """ 

135 idds_client_workflow = IDDS_client_workflow(name=workflow.name) 

136 files = self.copy_files_for_distribution( 

137 workflow.generated_tasks, self.config["fileDistributionEndPoint"] 

138 ) 

139 DAG_end_work = [] 

140 DAG_final_work = None 

141 

142 _, processing_type = self.config.search("processing_type", opt={"default": None}) 

143 _, task_type = self.config.search("task_type", opt={"default": "test"}) 

144 _, prod_source_label = self.config.search("prod_source_label", opt={"default": "test"}) 

145 _, vo = self.config.search("vo", opt={"default": "wlcg"}) 

146 

147 for idx, task in enumerate(workflow.generated_tasks): 

148 work = DomaPanDAWork( 

149 executable=self.add_decoder_prefix( 

150 task.executable, self.config["fileDistributionEndPoint"], files 

151 ), 

152 primary_input_collection={ 

153 "scope": "pseudo_dataset", 

154 "name": "pseudo_input_collection#" + str(idx), 

155 }, 

156 output_collections=[ 

157 {"scope": "pseudo_dataset", "name": "pseudo_output_collection#" + str(idx)} 

158 ], 

159 log_collections=[], 

160 dependency_map=task.dependencies, 

161 task_name=task.name, 

162 task_queue=task.queue, 

163 task_log={ 

164 "destination": "local", 

165 "value": "log.tgz", 

166 "dataset": "PandaJob_#{pandaid}/", 

167 "token": "local", 

168 "param_type": "log", 

169 "type": "template", 

170 }, 

171 encode_command_line=True, 

172 task_rss=task.max_rss, 

173 task_cloud=task.cloud, 

174 task_site=task.site, 

175 task_priority=int(task.priority) if task.priority else 900, 

176 core_count=task.core_count, 

177 working_group=task.working_group, 

178 processing_type=processing_type, 

179 task_type=task_type, 

180 prodSourceLabel=task.prod_source_label if task.prod_source_label else prod_source_label, 

181 vo=vo, 

182 maxattempt=task.max_attempt, 

183 maxwalltime=task.max_walltime if task.max_walltime else 90000, 

184 ) 

185 

186 idds_client_workflow.add_work(work) 

187 if task.is_final: 

188 DAG_final_work = work 

189 if task.is_dag_end: 

190 DAG_end_work.append(work) 

191 

192 if DAG_final_work: 

193 conditions = [] 

194 for work in DAG_end_work: 

195 conditions.append(work.is_terminated) 

196 and_cond = AndCondition(conditions=conditions, true_works=[DAG_final_work]) 

197 idds_client_workflow.add_condition(and_cond) 

198 _, idds_server = self.config.search("iddsServer", opt={"default": None}) 

199 c = pandaclient.idds_api.get_api( 

200 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True 

201 ) 

202 ret = c.submit(idds_client_workflow, username=None, use_dataset_name=False) 

203 _LOG.debug("iDDS client manager submit returned = %s", str(ret)) 

204 

205 # Check submission success 

206 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html 

207 if ret[0] == 0 and ret[1][0]: 

208 request_id = int(ret[1][-1]) 

209 else: 

210 raise RuntimeError(f"Error submitting to PanDA service: {str(ret)}") 

211 

212 _LOG.info("Submitted into iDDs with request id=%s", request_id) 

213 workflow.run_id = request_id 

214 

215 @staticmethod 

216 def copy_files_for_distribution(tasks, file_distribution_uri): 

217 """ 

218 Brings locally generated files into Cloud for further 

219 utilization them on the edge nodes. 

220 

221 Parameters 

222 ---------- 

223 local_pfns: `list` of `tasks` 

224 Tasks that input files needs to be placed for 

225 distribution 

226 file_distribution_uri: `str` 

227 Path on the edge node accessed storage, 

228 including access protocol, bucket name to place files 

229 

230 Returns 

231 ------- 

232 files_plc_hldr, direct_IO_files : `dict` [`str`, `str`], `set` of `str` 

233 First parameters is key values pairs 

234 of file placeholder - file name 

235 Second parameter is set of files which will be directly accessed. 

236 """ 

237 local_pfns = {} 

238 direct_IO_files = set() 

239 for task in tasks: 

240 for file in task.files_used_by_task: 

241 if not file.delivered: 

242 local_pfns[file.name] = file.submission_url 

243 if file.direct_IO: 

244 direct_IO_files.add(file.name) 

245 

246 files_to_copy = {} 

247 

248 # In case there are folders we iterate over its content 

249 for local_pfn in local_pfns.values(): 

250 folder_name = os.path.basename(local_pfn) 

251 if os.path.isdir(local_pfn): 

252 files_in_folder = ResourcePath.findFileResources([local_pfn]) 

253 for file in files_in_folder: 

254 file_name = file.basename() 

255 files_to_copy[file] = ResourcePath( 

256 os.path.join(file_distribution_uri, folder_name, file_name) 

257 ) 

258 else: 

259 files_to_copy[ResourcePath(local_pfn)] = ResourcePath( 

260 os.path.join(file_distribution_uri, folder_name) 

261 ) 

262 

263 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) 

264 future_file_copy = [] 

265 for src, trgt in files_to_copy.items(): 

266 # S3 clients explicitly instantiate here to overpass this 

267 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe 

268 trgt.exists() 

269 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy")) 

270 for future in concurrent.futures.as_completed(future_file_copy): 

271 if not future.result() is None: 

272 raise RuntimeError("Error of placing files to the distribution point") 

273 

274 if len(direct_IO_files) == 0: 

275 direct_IO_files.add("cmdlineplaceholder") 

276 

277 files_plc_hldr = {} 

278 for file_placeholder, src_path in local_pfns.items(): 

279 files_plc_hldr[file_placeholder] = os.path.basename(src_path) 

280 if os.path.isdir(src_path): 

281 # this is needed to make isdir function working 

282 # properly in ButlerURL instance on the egde node 

283 files_plc_hldr[file_placeholder] += "/" 

284 

285 return files_plc_hldr, direct_IO_files 

286 

287 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False): 

288 """Stub for future implementation of the report method 

289 Expected to return run information based upon given constraints. 

290 

291 Parameters 

292 ---------- 

293 wms_workflow_id : `int` or `str` 

294 Limit to specific run based on id. 

295 user : `str` 

296 Limit results to runs for this user. 

297 hist : `float` 

298 Limit history search to this many days. 

299 pass_thru : `str` 

300 Constraints to pass through to HTCondor. 

301 is_global : `bool`, optional 

302 If set, all available job queues will be queried for job 

303 information. Defaults to False which means that only a local job 

304 queue will be queried for information. 

305 

306 Returns 

307 ------- 

308 runs : `list` [`lsst.ctrl.bps.WmsRunReport`] 

309 Information about runs from given job information. 

310 message : `str` 

311 Extra message for report command to print. This could be 

312 pointers to documentation or to WMS specific commands. 

313 """ 

314 message = "" 

315 run_reports = None 

316 return run_reports, message 

317 

318 def run_submission_checks(self): 

319 """Checks to run at start if running WMS specific submission steps. 

320 

321 Any exception other than NotImplementedError will halt submission. 

322 Submit directory may not yet exist when this is called. 

323 """ 

324 for key in ["PANDA_URL"]: 

325 if key not in os.environ: 

326 raise OSError(f"Missing environment variable {key}") 

327 

328 _, idds_server = self.config.search("iddsServer", opt={"default": None}) 

329 panda_auth_update(idds_server, reset=False) 

330 

331 

332class PandaBpsWmsWorkflow(BaseWmsWorkflow): 

333 """A single Panda based workflow 

334 

335 Parameters 

336 ---------- 

337 name : `str` 

338 Unique name for Workflow 

339 config : `lsst.ctrl.bps.BpsConfig` 

340 BPS configuration that includes necessary submit/runtime information 

341 """ 

342 

343 def __init__(self, name, config=None): 

344 super().__init__(name, config) 

345 self.generated_tasks = None 

346 

347 @classmethod 

348 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class): 

349 # Docstring inherited from parent class 

350 idds_workflow = cls(generic_workflow.name, config) 

351 workflow_generator = IDDSWorkflowGenerator(generic_workflow, config) 

352 idds_workflow.generated_tasks = workflow_generator.define_tasks() 

353 _LOG.debug("panda dag attribs %s", generic_workflow.run_attrs) 

354 return idds_workflow 

355 

356 def write(self, out_prefix): 

357 """Not yet implemented"""