Coverage for python/lsst/ctrl/bps/panda/panda_service.py: 21%

112 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-17 02:20 -0700

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["PanDAService", "PandaBpsWmsWorkflow"] 

24 

25 

26import binascii 

27import concurrent.futures 

28import logging 

29import os 

30 

31import idds.common.utils as idds_utils 

32import pandaclient.idds_api 

33from idds.doma.workflowv2.domapandawork import DomaPanDAWork 

34from idds.workflowv2.workflow import AndCondition 

35from idds.workflowv2.workflow import Workflow as IDDS_client_workflow 

36from lsst.ctrl.bps.panda.idds_tasks import IDDSWorkflowGenerator 

37from lsst.ctrl.bps.panda.panda_auth_utils import panda_auth_update 

38from lsst.ctrl.bps.wms_service import BaseWmsService, BaseWmsWorkflow 

39from lsst.resources import ResourcePath 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class PanDAService(BaseWmsService): 

45 """PanDA version of WMS service""" 

46 

47 def prepare(self, config, generic_workflow, out_prefix=None): 

48 """Convert generic workflow to an PanDA iDDS ready for submission 

49 

50 Parameters 

51 ---------- 

52 config : `lsst.ctrl.bps.BpsConfig` 

53 BPS configuration that includes necessary submit/runtime 

54 information. 

55 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

56 out_prefix : `str` 

57 The root directory into which all WMS-specific files are written 

58 

59 Returns 

60 ------- 

61 workflow : `lsst.ctrl.bps.panda.panda_service.PandaBpsWmsWorkflow` 

62 PanDA workflow ready to be run. 

63 """ 

64 _LOG.debug("out_prefix = '%s'", out_prefix) 

65 workflow = PandaBpsWmsWorkflow.from_generic_workflow( 

66 config, generic_workflow, out_prefix, f"{self.__class__.__module__}." f"{self.__class__.__name__}" 

67 ) 

68 workflow.write(out_prefix) 

69 return workflow 

70 

71 def convert_exec_string_to_hex(self, cmdline): 

72 """Convert the command line into hex representation. 

73 

74 This step is currently involved because large blocks of command lines 

75 including special symbols passed to the pilot/container. To make sure 

76 the 1 to 1 matching and pass by the special symbol stripping 

77 performed by the Pilot we applied the hexing. 

78 

79 Parameters 

80 ---------- 

81 cmdline : `str` 

82 UTF-8 command line string 

83 

84 Returns 

85 ------- 

86 hex : `str` 

87 Hex representation of string 

88 """ 

89 return binascii.hexlify(cmdline.encode()).decode("utf-8") 

90 

91 def add_decoder_prefix(self, cmd_line, distribution_path, files): 

92 """ 

93 Compose the command line sent to the pilot from the functional part 

94 (the actual SW running) and the middleware part (containers invocation) 

95 

96 Parameters 

97 ---------- 

98 cmd_line : `str` 

99 UTF-8 based functional part of the command line 

100 distribution_path : `str` 

101 URI of path where all files are located for distribution 

102 files `list` [`str`] 

103 File names needed for a task 

104 

105 Returns 

106 ------- 

107 decoder_prefix : `str` 

108 Full command line to be executed on the edge node 

109 """ 

110 

111 cmdline_hex = self.convert_exec_string_to_hex(cmd_line) 

112 _, decoder_prefix = self.config.search( 

113 "runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False} 

114 ) 

115 decoder_prefix = decoder_prefix.replace( 

116 "_cmd_line_", 

117 str(cmdline_hex) 

118 + " ${IN/L} " 

119 + distribution_path 

120 + " " 

121 + "+".join(f"{k}:{v}" for k, v in files[0].items()) 

122 + " " 

123 + "+".join(files[1]), 

124 ) 

125 return decoder_prefix 

126 

127 def submit(self, workflow): 

128 """Submit a single PanDA iDDS workflow 

129 

130 Parameters 

131 ---------- 

132 workflow : `lsst.ctrl.bps.BaseWorkflow` 

133 A single PanDA iDDS workflow to submit 

134 """ 

135 idds_client_workflow = IDDS_client_workflow(name=workflow.name) 

136 files = self.copy_files_for_distribution( 

137 workflow.generated_tasks, self.config["fileDistributionEndPoint"] 

138 ) 

139 DAG_end_work = [] 

140 DAG_final_work = None 

141 

142 for idx, task in enumerate(workflow.generated_tasks): 

143 work = DomaPanDAWork( 

144 executable=self.add_decoder_prefix( 

145 task.executable, self.config["fileDistributionEndPoint"], files 

146 ), 

147 primary_input_collection={ 

148 "scope": "pseudo_dataset", 

149 "name": "pseudo_input_collection#" + str(idx), 

150 }, 

151 output_collections=[ 

152 {"scope": "pseudo_dataset", "name": "pseudo_output_collection#" + str(idx)} 

153 ], 

154 log_collections=[], 

155 dependency_map=task.dependencies, 

156 task_name=task.name, 

157 task_queue=task.queue, 

158 task_log={ 

159 "destination": "local", 

160 "value": "log.tgz", 

161 "dataset": "PandaJob_#{pandaid}/", 

162 "token": "local", 

163 "param_type": "log", 

164 "type": "template", 

165 }, 

166 encode_command_line=True, 

167 task_rss=task.max_rss, 

168 task_cloud=task.cloud, 

169 ) 

170 idds_client_workflow.add_work(work) 

171 if task.is_final: 

172 DAG_final_work = work 

173 if task.is_dag_end: 

174 DAG_end_work.append(work) 

175 

176 if DAG_final_work: 

177 conditions = [] 

178 for work in DAG_end_work: 

179 conditions.append(work.is_terminated) 

180 and_cond = AndCondition(conditions=conditions, true_works=[DAG_final_work]) 

181 idds_client_workflow.add_condition(and_cond) 

182 _, idds_server = self.config.search("iddsServer", opt={"default": None}) 

183 c = pandaclient.idds_api.get_api( 

184 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True 

185 ) 

186 ret = c.submit(idds_client_workflow, username=None, use_dataset_name=False) 

187 _LOG.debug("iDDS client manager submit returned = %s", str(ret)) 

188 

189 # Check submission success 

190 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html 

191 if ret[0] == 0 and ret[1][0]: 

192 request_id = int(ret[1][-1]) 

193 else: 

194 raise RuntimeError(f"Error submitting to PanDA service: {str(ret)}") 

195 

196 _LOG.info("Submitted into iDDs with request id=%s", request_id) 

197 workflow.run_id = request_id 

198 

199 @staticmethod 

200 def copy_files_for_distribution(tasks, file_distribution_uri): 

201 """ 

202 Brings locally generated files into Cloud for further 

203 utilization them on the edge nodes. 

204 

205 Parameters 

206 ---------- 

207 local_pfns: `list` of `tasks` 

208 Tasks that input files needs to be placed for 

209 distribution 

210 file_distribution_uri: `str` 

211 Path on the edge node accessed storage, 

212 including access protocol, bucket name to place files 

213 

214 Returns 

215 ------- 

216 files_plc_hldr, direct_IO_files : `dict` [`str`, `str`], `set` of `str` 

217 First parameters is key values pairs 

218 of file placeholder - file name 

219 Second parameter is set of files which will be directly accessed. 

220 """ 

221 local_pfns = {} 

222 direct_IO_files = set() 

223 for task in tasks: 

224 for file in task.files_used_by_task: 

225 if not file.delivered: 

226 local_pfns[file.name] = file.submission_url 

227 if file.direct_IO: 

228 direct_IO_files.add(file.name) 

229 

230 files_to_copy = {} 

231 

232 # In case there are folders we iterate over its content 

233 for local_pfn in local_pfns.values(): 

234 folder_name = os.path.basename(local_pfn) 

235 if os.path.isdir(local_pfn): 

236 files_in_folder = ResourcePath.findFileResources([local_pfn]) 

237 for file in files_in_folder: 

238 file_name = file.basename() 

239 files_to_copy[file] = ResourcePath( 

240 os.path.join(file_distribution_uri, folder_name, file_name) 

241 ) 

242 else: 

243 files_to_copy[ResourcePath(local_pfn)] = ResourcePath( 

244 os.path.join(file_distribution_uri, folder_name) 

245 ) 

246 

247 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) 

248 future_file_copy = [] 

249 for src, trgt in files_to_copy.items(): 

250 

251 # S3 clients explicitly instantiate here to overpass this 

252 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe 

253 trgt.exists() 

254 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy")) 

255 for future in concurrent.futures.as_completed(future_file_copy): 

256 if not future.result() is None: 

257 raise RuntimeError("Error of placing files to the distribution point") 

258 

259 if len(direct_IO_files) == 0: 

260 direct_IO_files.add("cmdlineplaceholder") 

261 

262 files_plc_hldr = {} 

263 for file_placeholder, src_path in local_pfns.items(): 

264 files_plc_hldr[file_placeholder] = os.path.basename(src_path) 

265 if os.path.isdir(src_path): 

266 # this is needed to make isdir function working 

267 # properly in ButlerURL instance on the egde node 

268 files_plc_hldr[file_placeholder] += "/" 

269 

270 return files_plc_hldr, direct_IO_files 

271 

272 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False): 

273 """Stub for future implementation of the report method 

274 Expected to return run information based upon given constraints. 

275 

276 Parameters 

277 ---------- 

278 wms_workflow_id : `int` or `str` 

279 Limit to specific run based on id. 

280 user : `str` 

281 Limit results to runs for this user. 

282 hist : `float` 

283 Limit history search to this many days. 

284 pass_thru : `str` 

285 Constraints to pass through to HTCondor. 

286 is_global : `bool`, optional 

287 If set, all available job queues will be queried for job 

288 information. Defaults to False which means that only a local job 

289 queue will be queried for information. 

290 

291 Returns 

292 ------- 

293 runs : `list` [`lsst.ctrl.bps.WmsRunReport`] 

294 Information about runs from given job information. 

295 message : `str` 

296 Extra message for report command to print. This could be 

297 pointers to documentation or to WMS specific commands. 

298 """ 

299 message = "" 

300 run_reports = None 

301 return run_reports, message 

302 

303 def run_submission_checks(self): 

304 """Checks to run at start if running WMS specific submission steps. 

305 

306 Any exception other than NotImplementedError will halt submission. 

307 Submit directory may not yet exist when this is called. 

308 """ 

309 for key in ["PANDA_URL"]: 

310 if key not in os.environ: 

311 raise OSError(f"Missing environment variable {key}") 

312 

313 _, idds_server = self.config.search("iddsServer", opt={"default": None}) 

314 panda_auth_update(idds_server, reset=False) 

315 

316 

317class PandaBpsWmsWorkflow(BaseWmsWorkflow): 

318 """A single Panda based workflow 

319 

320 Parameters 

321 ---------- 

322 name : `str` 

323 Unique name for Workflow 

324 config : `lsst.ctrl.bps.BpsConfig` 

325 BPS configuration that includes necessary submit/runtime information 

326 """ 

327 

328 def __init__(self, name, config=None): 

329 super().__init__(name, config) 

330 self.generated_tasks = None 

331 

332 @classmethod 

333 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class): 

334 # Docstring inherited from parent class 

335 idds_workflow = cls(generic_workflow.name, config) 

336 workflow_generator = IDDSWorkflowGenerator(generic_workflow, config) 

337 idds_workflow.generated_tasks = workflow_generator.define_tasks() 

338 _LOG.debug("panda dag attribs %s", generic_workflow.run_attrs) 

339 return idds_workflow 

340 

341 def write(self, out_prefix): 

342 """Not yet implemented"""