Coverage for python/lsst/ctrl/bps/panda/utils.py: 9%

195 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 10:19 +0000

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Utilities for bps PanDA plugin.""" 

29 

30__all__ = [ 

31 "copy_files_for_distribution", 

32 "get_idds_client", 

33 "get_idds_result", 

34 "convert_exec_string_to_hex", 

35 "add_decoder_prefix", 

36] 

37 

38import binascii 

39import concurrent.futures 

40import logging 

41import os 

42 

43import idds.common.utils as idds_utils 

44import pandaclient.idds_api 

45from idds.doma.workflowv2.domapandawork import DomaPanDAWork 

46from idds.workflowv2.workflow import AndCondition 

47from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob 

48from lsst.ctrl.bps.panda.cmd_line_embedder import CommandLineEmbedder 

49from lsst.ctrl.bps.panda.constants import ( 

50 PANDA_DEFAULT_CLOUD, 

51 PANDA_DEFAULT_CORE_COUNT, 

52 PANDA_DEFAULT_MAX_ATTEMPTS, 

53 PANDA_DEFAULT_MAX_JOBS_PER_TASK, 

54 PANDA_DEFAULT_MAX_WALLTIME, 

55 PANDA_DEFAULT_PRIORITY, 

56 PANDA_DEFAULT_PROCESSING_TYPE, 

57 PANDA_DEFAULT_PROD_SOURCE_LABEL, 

58 PANDA_DEFAULT_RSS, 

59 PANDA_DEFAULT_RSS_MAX, 

60 PANDA_DEFAULT_TASK_TYPE, 

61 PANDA_DEFAULT_VO, 

62) 

63from lsst.resources import ResourcePath 

64 

65_LOG = logging.getLogger(__name__) 

66 

67 

68def copy_files_for_distribution(files_to_stage, file_distribution_uri, max_copy_workers): 

69 """Brings locally generated files into Cloud for further 

70 utilization them on the edge nodes. 

71 

72 Parameters 

73 ---------- 

74 files_to_stage : `dict` [`str`, `str`] 

75 Files which need to be copied to a workflow staging area. 

76 file_distribution_uri : `str` 

77 Path on the edge node accessed storage, 

78 including access protocol, bucket name to place files. 

79 max_copy_workers : `int` 

80 Maximum number of workers for copying files. 

81 

82 Raises 

83 ------ 

84 RuntimeError 

85 Raised when error copying files to the distribution point. 

86 """ 

87 files_to_copy = {} 

88 

89 # In case there are folders we iterate over its content 

90 for local_pfn in files_to_stage.values(): 

91 folder_name = os.path.basename(os.path.normpath(local_pfn)) 

92 if os.path.isdir(local_pfn): 

93 files_in_folder = ResourcePath.findFileResources([local_pfn]) 

94 for file in files_in_folder: 

95 file_name = file.basename() 

96 files_to_copy[file] = ResourcePath( 

97 os.path.join(file_distribution_uri, folder_name, file_name) 

98 ) 

99 else: 

100 files_to_copy[ResourcePath(local_pfn)] = ResourcePath( 

101 os.path.join(file_distribution_uri, folder_name) 

102 ) 

103 

104 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_copy_workers) 

105 future_file_copy = [] 

106 for src, trgt in files_to_copy.items(): 

107 _LOG.debug("Staging %s to %s", src, trgt) 

108 # S3 clients explicitly instantiate here to overpass this 

109 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe 

110 trgt.exists() 

111 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy")) 

112 

113 for future in concurrent.futures.as_completed(future_file_copy): 

114 if future.result() is not None: 

115 raise RuntimeError("Error of placing files to the distribution point") 

116 

117 

118def get_idds_client(config): 

119 """Get the idds client. 

120 

121 Parameters 

122 ---------- 

123 config : `lsst.ctrl.bps.BpsConfig` 

124 BPS configuration. 

125 

126 Returns 

127 ------- 

128 idds_client: `idds.client.clientmanager.ClientManager` 

129 The iDDS ClientManager object. 

130 """ 

131 idds_server = None 

132 if isinstance(config, BpsConfig): 

133 _, idds_server = config.search("iddsServer", opt={"default": None}) 

134 elif isinstance(config, dict) and "iddsServer" in config: 

135 idds_server = config["iddsServer"] 

136 # if idds_server is None, a default value on the panda relay service 

137 # will be used 

138 idds_client = pandaclient.idds_api.get_api( 

139 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True 

140 ) 

141 return idds_client 

142 

143 

144def get_idds_result(ret): 

145 """Parse the results returned from iDDS. 

146 

147 Parameters 

148 ---------- 

149 ret : `tuple` [`int`, `tuple` [`bool`, payload ]] 

150 The first part ``ret[0]`` is the status of PanDA relay service. 

151 The part of ``ret[1][0]`` is the status of iDDS service. 

152 The part of ``ret[1][1]`` is the returned payload. 

153 If ``ret[1][0]`` is `False`, ``ret[1][1]`` can be error messages. 

154 

155 Returns 

156 ------- 

157 status: `bool` 

158 The status of iDDS calls. 

159 result: `int` or `list` or `dict` or `None` 

160 The result returned from iDDS. `None` if error state. 

161 error: `str` or `None` 

162 Error messages. `None` if no error state. 

163 """ 

164 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html 

165 if not isinstance(ret, list | tuple) or ret[0] != 0: 

166 # Something wrong with the PanDA relay service. 

167 # The call may not be delivered to iDDS. 

168 status = False 

169 result = None 

170 error = f"PanDA relay service returns errors: {str(ret)}" 

171 else: 

172 if ret[1][0]: 

173 status = True 

174 result = ret[1][1] 

175 error = None 

176 if isinstance(result, str) and "Authentication no permission" in result: 

177 status = False 

178 result = None 

179 error = result 

180 else: 

181 # iDDS returns errors 

182 status = False 

183 result = None 

184 error = f"iDDS returns errors: {str(ret[1][1])}" 

185 return status, result, error 

186 

187 

188def _make_pseudo_filename(config, gwjob): 

189 """Make the job pseudo filename. 

190 

191 Parameters 

192 ---------- 

193 config : `lsst.ctrl.bps.BpsConfig` 

194 BPS configuration. 

195 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

196 Job for which to create the pseudo filename. 

197 

198 Returns 

199 ------- 

200 pseudo_filename : `str` 

201 The pseudo filename for the given job. 

202 """ 

203 cmd_line_embedder = CommandLineEmbedder(config) 

204 _, pseudo_filename = cmd_line_embedder.substitute_command_line( 

205 gwjob.executable.src_uri + " " + gwjob.arguments, gwjob.cmdvals, gwjob.name, [] 

206 ) 

207 return pseudo_filename 

208 

209 

210def _make_doma_work(config, generic_workflow, gwjob, task_count, task_chunk): 

211 """Make the DOMA Work object for a PanDA task. 

212 

213 Parameters 

214 ---------- 

215 config : `lsst.ctrl.bps.BpsConfig` 

216 BPS configuration. 

217 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

218 Job representing the jobs for the PanDA task. 

219 task_count : `int` 

220 Count of PanDA tasks used when making unique names. 

221 task_chunk : `int` 

222 Count of chunk of a PanDA tasks used when making unique names. 

223 

224 Returns 

225 ------- 

226 work : `idds.doma.workflowv2.domapandawork.DomaPanDAWork` 

227 The client representation of a PanDA task. 

228 local_pfns : `dict` [`str`, `str`] 

229 Files which need to be copied to a workflow staging area. 

230 """ 

231 _LOG.debug("Using gwjob %s to create new PanDA task (gwjob=%s)", gwjob.name, gwjob) 

232 cvals = {"curr_cluster": gwjob.label} 

233 _, site = config.search("computeSite", opt={"curvals": cvals, "required": True}) 

234 cvals["curr_site"] = site 

235 _, processing_type = config.search( 

236 "processing_type", opt={"curvals": cvals, "default": PANDA_DEFAULT_PROCESSING_TYPE} 

237 ) 

238 _, task_type = config.search("taskType", opt={"curvals": cvals, "default": PANDA_DEFAULT_TASK_TYPE}) 

239 _, prod_source_label = config.search( 

240 "prodSourceLabel", opt={"curvals": cvals, "default": PANDA_DEFAULT_PROD_SOURCE_LABEL} 

241 ) 

242 _, vo = config.search("vo", opt={"curvals": cvals, "default": PANDA_DEFAULT_VO}) 

243 

244 _, file_distribution_end_point = config.search( 

245 "fileDistributionEndPoint", opt={"curvals": cvals, "default": None} 

246 ) 

247 

248 _, file_distribution_end_point_default = config.search( 

249 "fileDistributionEndPointDefault", opt={"curvals": cvals, "default": None} 

250 ) 

251 

252 task_rss = gwjob.request_memory if gwjob.request_memory else PANDA_DEFAULT_RSS 

253 task_rss_retry_step = task_rss * gwjob.memory_multiplier if gwjob.memory_multiplier else 0 

254 task_rss_retry_offset = 0 if task_rss_retry_step else task_rss 

255 

256 # Assume input files are same across task 

257 local_pfns = {} 

258 direct_io_files = set() 

259 

260 if gwjob.executable.transfer_executable: 

261 local_pfns["job_executable"] = gwjob.executable.src_uri 

262 job_executable = f"./{os.path.basename(gwjob.executable.src_uri)}" 

263 else: 

264 job_executable = gwjob.executable.src_uri 

265 cmd_line_embedder = CommandLineEmbedder(config) 

266 _LOG.debug( 

267 "job %s inputs = %s, outputs = %s", 

268 gwjob.name, 

269 generic_workflow.get_job_inputs(gwjob.name), 

270 generic_workflow.get_job_outputs(gwjob.name), 

271 ) 

272 

273 cmd_line, _ = cmd_line_embedder.substitute_command_line( 

274 job_executable + " " + gwjob.arguments, 

275 gwjob.cmdvals, 

276 gwjob.name, 

277 generic_workflow.get_job_inputs(gwjob.name) + generic_workflow.get_job_outputs(gwjob.name), 

278 ) 

279 

280 for gwfile in generic_workflow.get_job_inputs(gwjob.name, transfer_only=True): 

281 local_pfns[gwfile.name] = gwfile.src_uri 

282 if os.path.isdir(gwfile.src_uri): 

283 # this is needed to make isdir function working 

284 # properly in ButlerURL instance on the edge node 

285 local_pfns[gwfile.name] += "/" 

286 

287 if gwfile.job_access_remote: 

288 direct_io_files.add(gwfile.name) 

289 

290 if not direct_io_files: 

291 direct_io_files.add("cmdlineplaceholder") 

292 

293 lsst_temp = "LSST_RUN_TEMP_SPACE" 

294 if lsst_temp in file_distribution_end_point and lsst_temp not in os.environ: 

295 file_distribution_end_point = file_distribution_end_point_default 

296 

297 executable = add_decoder_prefix( 

298 config, cmd_line, file_distribution_end_point, (local_pfns, direct_io_files) 

299 ) 

300 work = DomaPanDAWork( 

301 executable=executable, 

302 primary_input_collection={ 

303 "scope": "pseudo_dataset", 

304 "name": f"pseudo_input_collection#{str(task_count)}", 

305 }, 

306 output_collections=[ 

307 {"scope": "pseudo_dataset", "name": f"pseudo_output_collection#{str(task_count)}"} 

308 ], 

309 log_collections=[], 

310 dependency_map=[], 

311 task_name=f"{generic_workflow.name}_{task_count:02d}_{gwjob.label}_{task_chunk:02d}", 

312 task_queue=gwjob.queue, 

313 task_log={ 

314 "destination": "local", 

315 "value": "log.tgz", 

316 "dataset": "PandaJob_#{pandaid}/", 

317 "token": "local", 

318 "param_type": "log", 

319 "type": "template", 

320 }, 

321 encode_command_line=True, 

322 task_rss=task_rss, 

323 task_rss_retry_offset=task_rss_retry_offset, 

324 task_rss_retry_step=task_rss_retry_step, 

325 task_rss_max=gwjob.request_memory_max if gwjob.request_memory_max else PANDA_DEFAULT_RSS_MAX, 

326 task_cloud=gwjob.compute_cloud if gwjob.compute_cloud else PANDA_DEFAULT_CLOUD, 

327 task_site=site, 

328 task_priority=int(gwjob.priority) if gwjob.priority else PANDA_DEFAULT_PRIORITY, 

329 core_count=gwjob.request_cpus if gwjob.request_cpus else PANDA_DEFAULT_CORE_COUNT, 

330 working_group=gwjob.accounting_group, 

331 processing_type=processing_type, 

332 task_type=task_type, 

333 prodSourceLabel=prod_source_label, 

334 vo=vo, 

335 maxattempt=gwjob.number_of_retries if gwjob.number_of_retries else PANDA_DEFAULT_MAX_ATTEMPTS, 

336 maxwalltime=gwjob.request_walltime if gwjob.request_walltime else PANDA_DEFAULT_MAX_WALLTIME, 

337 ) 

338 return work, local_pfns 

339 

340 

341def add_final_idds_work( 

342 config, generic_workflow, idds_client_workflow, dag_sink_work, task_count, task_chunk 

343): 

344 """Add the special final PanDA task to the client workflow. 

345 

346 Parameters 

347 ---------- 

348 config : `lsst.ctrl.bps.BpsConfig` 

349 BPS configuration. 

350 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

351 Generic workflow in which to find the final job. 

352 idds_client_workflow : `idds.workflowv2.workflow.Workflow` 

353 The iDDS client representation of the workflow to which the final task 

354 is added. 

355 dag_sink_work : `list` [`idds.doma.workflowv2.domapandawork.DomaPanDAWork`] 

356 The work nodes in the client workflow which have no successors. 

357 task_count : `int` 

358 Count of PanDA tasks used when making unique names. 

359 task_chunk : `int` 

360 Count of chunk of a PanDA tasks used when making unique names. 

361 

362 Returns 

363 ------- 

364 files : `dict` [`str`, `str`] 

365 Files which need to be copied to a workflow staging area. 

366 

367 Raises 

368 ------ 

369 NotImplementedError 

370 Raised if final job in GenericWorkflow is itself a workflow. 

371 TypeError 

372 Raised if final job in GenericWorkflow is invalid type. 

373 """ 

374 files = {} 

375 

376 # If final job exists in generic workflow, create DAG final job 

377 final = generic_workflow.get_final() 

378 if final: 

379 if isinstance(final, GenericWorkflow): 

380 raise NotImplementedError("PanDA plugin does not support a workflow as the final job") 

381 

382 if not isinstance(final, GenericWorkflowJob): 

383 raise TypeError(f"Invalid type for GenericWorkflow.get_final() results ({type(final)})") 

384 

385 dag_final_work, files = _make_doma_work( 

386 config, 

387 generic_workflow, 

388 final, 

389 task_count, 

390 task_chunk, 

391 ) 

392 pseudo_filename = "pure_pseudoinput+qgraphNodeId:+qgraphId:" 

393 dag_final_work.dependency_map.append( 

394 {"name": pseudo_filename, "submitted": False, "dependencies": []} 

395 ) 

396 idds_client_workflow.add_work(dag_final_work) 

397 conditions = [] 

398 for work in dag_sink_work: 

399 conditions.append(work.is_terminated) 

400 and_cond = AndCondition(conditions=conditions, true_works=[dag_final_work]) 

401 idds_client_workflow.add_condition(and_cond) 

402 else: 

403 _LOG.debug("No final job in GenericWorkflow") 

404 return files 

405 

406 

407def convert_exec_string_to_hex(cmdline): 

408 """Convert the command line into hex representation. 

409 

410 This step is currently involved because large blocks of command lines 

411 including special symbols passed to the pilot/container. To make sure 

412 the 1 to 1 matching and pass by the special symbol stripping 

413 performed by the Pilot we applied the hexing. 

414 

415 Parameters 

416 ---------- 

417 cmdline : `str` 

418 UTF-8 command line string. 

419 

420 Returns 

421 ------- 

422 hex : `str` 

423 Hex representation of string. 

424 """ 

425 return binascii.hexlify(cmdline.encode()).decode("utf-8") 

426 

427 

428def add_decoder_prefix(config, cmd_line, distribution_path, files): 

429 """Compose the command line sent to the pilot from the functional part 

430 (the actual SW running) and the middleware part (containers invocation). 

431 

432 Parameters 

433 ---------- 

434 config : `lsst.ctrl.bps.BpsConfig` 

435 Configuration information. 

436 cmd_line : `str` 

437 UTF-8 based functional part of the command line. 

438 distribution_path : `str` 

439 URI of path where all files are located for distribution. 

440 files : `tuple` [`dict` [`str`, `str`], `list` [`str`]] 

441 File names needed for a task (copied local, direct access). 

442 

443 Returns 

444 ------- 

445 decoder_prefix : `str` 

446 Full command line to be executed on the edge node. 

447 """ 

448 # Manipulate file paths for placement on cmdline 

449 files_plc_hldr = {} 

450 for key, pfn in files[0].items(): 

451 if pfn.endswith("/"): 

452 files_plc_hldr[key] = os.path.basename(pfn[:-1]) 

453 isdir = True 

454 else: 

455 files_plc_hldr[key] = os.path.basename(pfn) 

456 _, extension = os.path.splitext(pfn) 

457 isdir = os.path.isdir(pfn) or (key == "butlerConfig" and extension != "yaml") 

458 if isdir: 

459 # this is needed to make isdir function working 

460 # properly in ButlerURL instance on the egde node 

461 files_plc_hldr[key] += "/" 

462 _LOG.debug("files_plc_hldr[%s] = %s", key, files_plc_hldr[key]) 

463 

464 cmdline_hex = convert_exec_string_to_hex(cmd_line) 

465 _, runner_command = config.search("runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False}) 

466 runner_command = runner_command.replace("\n", " ") 

467 decoder_prefix = runner_command.replace( 

468 "_cmd_line_", 

469 str(cmdline_hex) 

470 + " ${IN/L} " 

471 + distribution_path 

472 + " " 

473 + "+".join(f"{k}:{v}" for k, v in files_plc_hldr.items()) 

474 + " " 

475 + "+".join(files[1]), 

476 ) 

477 return decoder_prefix 

478 

479 

480def add_idds_work(config, generic_workflow, idds_workflow): 

481 """Convert GenericWorkflowJobs to iDDS work and add them to the iDDS 

482 workflow. 

483 

484 Parameters 

485 ---------- 

486 config : `lsst.ctrl.bps.BpsConfig` 

487 BPS configuration. 

488 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

489 Generic workflow containing jobs to convert. 

490 idds_workflow : `idds.workflowv2.workflow.Workflow` 

491 The iDDS workflow to which the converted jobs should be added. 

492 

493 Returns 

494 ------- 

495 files_to_pre_stage : `dict` [`str`, `str`] 

496 Files that need to be copied to the staging area before submission. 

497 dag_sink_work : `list` [`idds.doma.workflowv2.domapandawork.DomaPanDAWork`] 

498 The work nodes in the client workflow which have no successors. 

499 task_count : `int` 

500 Number of tasks in iDDS workflow used for unique task names. 

501 

502 Raises 

503 ------ 

504 RuntimeError 

505 If cannot recover from dependency issues after pass through workflow. 

506 """ 

507 # Limit number of jobs in single PanDA task 

508 _, max_jobs_per_task = config.search("maxJobsPerTask", opt={"default": PANDA_DEFAULT_MAX_JOBS_PER_TASK}) 

509 

510 files_to_pre_stage = {} 

511 dag_sink_work = [] # Workflow sink nodes that need to be connected to final task 

512 job_to_task = {} 

513 job_to_pseudo_filename = {} 

514 task_count = 0 # Task number/ID in idds workflow used for unique name 

515 

516 # To avoid dying due to optimizing number of times through workflow, 

517 # catch dependency issues to loop through again later. 

518 jobs_with_dependency_issues = {} 

519 

520 # Assume jobs with same label share config values 

521 for job_label in generic_workflow.labels: 

522 _LOG.debug("job_label = %s", job_label) 

523 # Add each job with a particular label to a corresponding PanDA task 

524 # A PanDA task has a limit on number of jobs, so break into multiple 

525 # PanDA tasks if needed. 

526 job_count = 0 # Number of jobs in idds task used for task chunking 

527 task_chunk = 1 # Task chunk number within job label used for unique name 

528 work = None 

529 

530 # Instead of changing code to make chunks up front and round-robin 

531 # assign jobs to chunks, for now keeping chunk creation in loop 

532 # but using knowledge of how many chunks there will be to set better 

533 # maximum number of jobs in a chunk for more even distribution. 

534 jobs_by_label = generic_workflow.get_jobs_by_label(job_label) 

535 num_chunks = -(-len(jobs_by_label) // max_jobs_per_task) # ceil 

536 max_jobs_per_task_this_label = -(-len(jobs_by_label) // num_chunks) 

537 _LOG.debug( 

538 "For job_label = %s, num jobs = %s, num_chunks = %s, max_jobs = %s", 

539 job_label, 

540 len(jobs_by_label), 

541 num_chunks, 

542 max_jobs_per_task_this_label, 

543 ) 

544 for gwjob in jobs_by_label: 

545 job_count += 1 

546 if job_count > max_jobs_per_task_this_label: 

547 job_count = 1 

548 task_chunk += 1 

549 

550 if job_count == 1: 

551 # Create new PanDA task object 

552 task_count += 1 

553 work, files = _make_doma_work(config, generic_workflow, gwjob, task_count, task_chunk) 

554 files_to_pre_stage.update(files) 

555 idds_workflow.add_work(work) 

556 if generic_workflow.out_degree(gwjob.name) == 0: 

557 dag_sink_work.append(work) 

558 

559 pseudo_filename = _make_pseudo_filename(config, gwjob) 

560 job_to_pseudo_filename[gwjob.name] = pseudo_filename 

561 job_to_task[gwjob.name] = work.get_work_name() 

562 deps = [] 

563 missing_deps = False 

564 for parent_job_name in generic_workflow.predecessors(gwjob.name): 

565 if parent_job_name not in job_to_task: 

566 _LOG.debug("job_to_task.keys() = %s", job_to_task.keys()) 

567 missing_deps = True 

568 break 

569 else: 

570 deps.append( 

571 { 

572 "task": job_to_task[parent_job_name], 

573 "inputname": job_to_pseudo_filename[parent_job_name], 

574 "available": False, 

575 } 

576 ) 

577 if not missing_deps: 

578 work.dependency_map.append({"name": pseudo_filename, "dependencies": deps}) 

579 else: 

580 jobs_with_dependency_issues[gwjob.name] = work 

581 

582 # If there were any issues figuring out dependencies through earlier loop 

583 if jobs_with_dependency_issues: 

584 _LOG.warning("Could not prepare workflow in single pass. Please notify developers.") 

585 _LOG.info("Trying to recover...") 

586 for job_name, work in jobs_with_dependency_issues.items(): 

587 deps = [] 

588 for parent_job_name in generic_workflow.predecessors(job_name): 

589 if parent_job_name not in job_to_task: 

590 _LOG.debug("job_to_task.keys() = %s", job_to_task.keys()) 

591 raise RuntimeError( 

592 "Could not recover from dependency issues ({job_name} missing {parent_job_name})." 

593 ) 

594 deps.append( 

595 { 

596 "task": job_to_task[parent_job_name], 

597 "inputname": job_to_pseudo_filename[parent_job_name], 

598 "available": False, 

599 } 

600 ) 

601 pseudo_filename = job_to_pseudo_filename[job_name] 

602 work.dependency_map.append({"name": pseudo_filename, "dependencies": deps}) 

603 _LOG.info("Successfully recovered.") 

604 

605 return files_to_pre_stage, dag_sink_work, task_count