Coverage for python/lsst/ctrl/bps/panda/utils.py: 9%

192 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:17 +0000

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Utilities for bps PanDA plugin.""" 

29 

30__all__ = [ 

31 "copy_files_for_distribution", 

32 "get_idds_client", 

33 "get_idds_result", 

34 "convert_exec_string_to_hex", 

35 "add_decoder_prefix", 

36] 

37 

38import binascii 

39import concurrent.futures 

40import logging 

41import os 

42 

43import idds.common.utils as idds_utils 

44import pandaclient.idds_api 

45from idds.doma.workflowv2.domapandawork import DomaPanDAWork 

46from idds.workflowv2.workflow import AndCondition 

47from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob 

48from lsst.ctrl.bps.panda.cmd_line_embedder import CommandLineEmbedder 

49from lsst.ctrl.bps.panda.constants import ( 

50 PANDA_DEFAULT_CLOUD, 

51 PANDA_DEFAULT_CORE_COUNT, 

52 PANDA_DEFAULT_MAX_ATTEMPTS, 

53 PANDA_DEFAULT_MAX_JOBS_PER_TASK, 

54 PANDA_DEFAULT_MAX_WALLTIME, 

55 PANDA_DEFAULT_PRIORITY, 

56 PANDA_DEFAULT_PROCESSING_TYPE, 

57 PANDA_DEFAULT_PROD_SOURCE_LABEL, 

58 PANDA_DEFAULT_RSS, 

59 PANDA_DEFAULT_TASK_TYPE, 

60 PANDA_DEFAULT_VO, 

61) 

62from lsst.resources import ResourcePath 

63 

64_LOG = logging.getLogger(__name__) 

65 

66 

67def copy_files_for_distribution(files_to_stage, file_distribution_uri, max_copy_workers): 

68 """Brings locally generated files into Cloud for further 

69 utilization them on the edge nodes. 

70 

71 Parameters 

72 ---------- 

73 local_pfns : `dict` [`str`, `str`] 

74 Files which need to be copied to a workflow staging area. 

75 file_distribution_uri: `str` 

76 Path on the edge node accessed storage, 

77 including access protocol, bucket name to place files. 

78 max_copy_workers : `int` 

79 Maximum number of workers for copying files. 

80 

81 Raises 

82 ------ 

83 RuntimeError 

84 Raised when error copying files to the distribution point. 

85 """ 

86 files_to_copy = {} 

87 

88 # In case there are folders we iterate over its content 

89 for local_pfn in files_to_stage.values(): 

90 folder_name = os.path.basename(os.path.normpath(local_pfn)) 

91 if os.path.isdir(local_pfn): 

92 files_in_folder = ResourcePath.findFileResources([local_pfn]) 

93 for file in files_in_folder: 

94 file_name = file.basename() 

95 files_to_copy[file] = ResourcePath( 

96 os.path.join(file_distribution_uri, folder_name, file_name) 

97 ) 

98 else: 

99 files_to_copy[ResourcePath(local_pfn)] = ResourcePath( 

100 os.path.join(file_distribution_uri, folder_name) 

101 ) 

102 

103 copy_executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_copy_workers) 

104 future_file_copy = [] 

105 for src, trgt in files_to_copy.items(): 

106 _LOG.debug("Staging %s to %s", src, trgt) 

107 # S3 clients explicitly instantiate here to overpass this 

108 # https://stackoverflow.com/questions/52820971/is-boto3-client-thread-safe 

109 trgt.exists() 

110 future_file_copy.append(copy_executor.submit(trgt.transfer_from, src, transfer="copy")) 

111 

112 for future in concurrent.futures.as_completed(future_file_copy): 

113 if future.result() is not None: 

114 raise RuntimeError("Error of placing files to the distribution point") 

115 

116 

117def get_idds_client(config): 

118 """Get the idds client. 

119 

120 Parameters 

121 ---------- 

122 config : `lsst.ctrl.bps.BpsConfig` 

123 BPS configuration. 

124 

125 Returns 

126 ------- 

127 idds_client: `idds.client.clientmanager.ClientManager` 

128 iDDS ClientManager object. 

129 """ 

130 idds_server = None 

131 if isinstance(config, BpsConfig): 

132 _, idds_server = config.search("iddsServer", opt={"default": None}) 

133 elif isinstance(config, dict) and "iddsServer" in config: 

134 idds_server = config["iddsServer"] 

135 # if idds_server is None, a default value on the panda relay service 

136 # will be used 

137 idds_client = pandaclient.idds_api.get_api( 

138 idds_utils.json_dumps, idds_host=idds_server, compress=True, manager=True 

139 ) 

140 return idds_client 

141 

142 

143def get_idds_result(ret): 

144 """Parse the results returned from iDDS. 

145 

146 Parameters 

147 ---------- 

148 ret: `tuple` of (`int`, (`bool`, payload)). 

149 The first part ret[0] is the status of PanDA relay service. 

150 The part of ret[1][0] is the status of iDDS service. 

151 The part of ret[1][1] is the returned payload. 

152 If ret[1][0] is False, ret[1][1] can be error messages. 

153 

154 Returns 

155 ------- 

156 status: `bool` 

157 The status of iDDS calls. 

158 result: `int` or `list` or `dict` 

159 The result returned from iDDS. 

160 error: `str` 

161 Error messages. 

162 """ 

163 # https://panda-wms.readthedocs.io/en/latest/client/rest_idds.html 

164 if not isinstance(ret, list | tuple) or ret[0] != 0: 

165 # Something wrong with the PanDA relay service. 

166 # The call may not be delivered to iDDS. 

167 status = False 

168 result = None 

169 error = f"PanDA relay service returns errors: {str(ret)}" 

170 else: 

171 if ret[1][0]: 

172 status = True 

173 result = ret[1][1] 

174 error = None 

175 if isinstance(result, str) and "Authentication no permission" in result: 

176 status = False 

177 result = None 

178 error = result 

179 else: 

180 # iDDS returns errors 

181 status = False 

182 result = None 

183 error = f"iDDS returns errors: {str(ret[1][1])}" 

184 return status, result, error 

185 

186 

187def _make_pseudo_filename(config, gwjob): 

188 """Make the job pseudo filename. 

189 

190 Parameters 

191 ---------- 

192 config : `lsst.ctrl.bps.BpsConfig` 

193 BPS configuration. 

194 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

195 Job for which to create the pseudo filename. 

196 

197 Returns 

198 ------- 

199 pseudo_filename : `str` 

200 The pseudo filename for the given job. 

201 """ 

202 cmd_line_embedder = CommandLineEmbedder(config) 

203 _, pseudo_filename = cmd_line_embedder.substitute_command_line( 

204 gwjob.executable.src_uri + " " + gwjob.arguments, gwjob.cmdvals, gwjob.name, [] 

205 ) 

206 return pseudo_filename 

207 

208 

209def _make_doma_work(config, generic_workflow, gwjob, task_count, task_chunk): 

210 """Make the DOMA Work object for a PanDA task. 

211 

212 Parameters 

213 ---------- 

214 config : `lsst.ctrl.bps.BpsConfig` 

215 BPS configuration. 

216 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

217 Job representing the jobs for the PanDA task. 

218 task_count : `int` 

219 Count of PanDA tasks used when making unique names. 

220 task_chunk : `int` 

221 Count of chunk of a PanDA tasks used when making unique names. 

222 

223 Returns 

224 ------- 

225 work : `idds.doma.workflowv2.domapandawork.DomaPanDAWork` 

226 The client representation of a PanDA task. 

227 local_pfns : `dict` [`str`, `str`] 

228 Files which need to be copied to a workflow staging area. 

229 """ 

230 _LOG.debug("Using gwjob %s to create new PanDA task (gwjob=%s)", gwjob.name, gwjob) 

231 cvals = {"curr_cluster": gwjob.label} 

232 _, site = config.search("computeSite", opt={"curvals": cvals, "required": True}) 

233 cvals["curr_site"] = site 

234 _, processing_type = config.search( 

235 "processing_type", opt={"curvals": cvals, "default": PANDA_DEFAULT_PROCESSING_TYPE} 

236 ) 

237 _, task_type = config.search("taskType", opt={"curvals": cvals, "default": PANDA_DEFAULT_TASK_TYPE}) 

238 _, prod_source_label = config.search( 

239 "prodSourceLabel", opt={"curvals": cvals, "default": PANDA_DEFAULT_PROD_SOURCE_LABEL} 

240 ) 

241 _, vo = config.search("vo", opt={"curvals": cvals, "default": PANDA_DEFAULT_VO}) 

242 

243 _, file_distribution_end_point = config.search( 

244 "fileDistributionEndPoint", opt={"curvals": cvals, "default": None} 

245 ) 

246 

247 _, file_distribution_end_point_default = config.search( 

248 "fileDistributionEndPointDefault", opt={"curvals": cvals, "default": None} 

249 ) 

250 

251 # Assume input files are same across task 

252 local_pfns = {} 

253 direct_io_files = set() 

254 

255 if gwjob.executable.transfer_executable: 

256 local_pfns["job_executable"] = gwjob.executable.src_uri 

257 job_executable = f"./{os.path.basename(gwjob.executable.src_uri)}" 

258 else: 

259 job_executable = gwjob.executable.src_uri 

260 cmd_line_embedder = CommandLineEmbedder(config) 

261 _LOG.debug( 

262 "job %s inputs = %s, outputs = %s", 

263 gwjob.name, 

264 generic_workflow.get_job_inputs(gwjob.name), 

265 generic_workflow.get_job_outputs(gwjob.name), 

266 ) 

267 

268 cmd_line, _ = cmd_line_embedder.substitute_command_line( 

269 job_executable + " " + gwjob.arguments, 

270 gwjob.cmdvals, 

271 gwjob.name, 

272 generic_workflow.get_job_inputs(gwjob.name) + generic_workflow.get_job_outputs(gwjob.name), 

273 ) 

274 

275 for gwfile in generic_workflow.get_job_inputs(gwjob.name, transfer_only=True): 

276 local_pfns[gwfile.name] = gwfile.src_uri 

277 if os.path.isdir(gwfile.src_uri): 

278 # this is needed to make isdir function working 

279 # properly in ButlerURL instance on the edge node 

280 local_pfns[gwfile.name] += "/" 

281 

282 if gwfile.job_access_remote: 

283 direct_io_files.add(gwfile.name) 

284 

285 if not direct_io_files: 

286 direct_io_files.add("cmdlineplaceholder") 

287 

288 lsst_temp = "LSST_RUN_TEMP_SPACE" 

289 if lsst_temp in file_distribution_end_point and lsst_temp not in os.environ: 

290 file_distribution_end_point = file_distribution_end_point_default 

291 

292 executable = add_decoder_prefix( 

293 config, cmd_line, file_distribution_end_point, (local_pfns, direct_io_files) 

294 ) 

295 work = DomaPanDAWork( 

296 executable=executable, 

297 primary_input_collection={ 

298 "scope": "pseudo_dataset", 

299 "name": f"pseudo_input_collection#{str(task_count)}", 

300 }, 

301 output_collections=[ 

302 {"scope": "pseudo_dataset", "name": f"pseudo_output_collection#{str(task_count)}"} 

303 ], 

304 log_collections=[], 

305 dependency_map=[], 

306 task_name=f"{generic_workflow.name}_{task_count:02d}_{gwjob.label}_{task_chunk:02d}", 

307 task_queue=gwjob.queue, 

308 task_log={ 

309 "destination": "local", 

310 "value": "log.tgz", 

311 "dataset": "PandaJob_#{pandaid}/", 

312 "token": "local", 

313 "param_type": "log", 

314 "type": "template", 

315 }, 

316 encode_command_line=True, 

317 task_rss=gwjob.request_memory if gwjob.request_memory else PANDA_DEFAULT_RSS, 

318 task_cloud=gwjob.compute_cloud if gwjob.compute_cloud else PANDA_DEFAULT_CLOUD, 

319 task_site=site, 

320 task_priority=int(gwjob.priority) if gwjob.priority else PANDA_DEFAULT_PRIORITY, 

321 core_count=gwjob.request_cpus if gwjob.request_cpus else PANDA_DEFAULT_CORE_COUNT, 

322 working_group=gwjob.accounting_group, 

323 processing_type=processing_type, 

324 task_type=task_type, 

325 prodSourceLabel=prod_source_label, 

326 vo=vo, 

327 maxattempt=gwjob.number_of_retries if gwjob.number_of_retries else PANDA_DEFAULT_MAX_ATTEMPTS, 

328 maxwalltime=gwjob.request_walltime if gwjob.request_walltime else PANDA_DEFAULT_MAX_WALLTIME, 

329 ) 

330 return work, local_pfns 

331 

332 

333def add_final_idds_work( 

334 config, generic_workflow, idds_client_workflow, dag_sink_work, task_count, task_chunk 

335): 

336 """Add the special final PanDA task to the client workflow. 

337 

338 Parameters 

339 ---------- 

340 config : `lsst.ctrl.bps.BpsConfig` 

341 BPS configuration. 

342 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

343 Generic workflow in which to find the final job. 

344 idds_client_workflow : `idds.workflowv2.workflow.Workflow` 

345 iDDS client representation of the workflow to which the final task 

346 is added. 

347 dag_sink_work : `list` [`idds.doma.workflowv2.domapandawork.DomaPanDAWork`] 

348 The work nodes in the client workflow which have no successors. 

349 task_count : `int` 

350 Count of PanDA tasks used when making unique names. 

351 task_chunk : `int` 

352 Count of chunk of a PanDA tasks used when making unique names. 

353 

354 Returns 

355 ------- 

356 files : `dict` [`str`, `str`] 

357 Files which need to be copied to a workflow staging area. 

358 

359 Raises 

360 ------ 

361 NotImplementedError 

362 Raised if final job in GenericWorkflow is itself a workflow. 

363 TypeError 

364 Raised if final job in GenericWorkflow is invalid type. 

365 """ 

366 files = {} 

367 

368 # If final job exists in generic workflow, create DAG final job 

369 final = generic_workflow.get_final() 

370 if final: 

371 if isinstance(final, GenericWorkflow): 

372 raise NotImplementedError("PanDA plugin does not support a workflow as the final job") 

373 

374 if not isinstance(final, GenericWorkflowJob): 

375 raise TypeError(f"Invalid type for GenericWorkflow.get_final() results ({type(final)})") 

376 

377 dag_final_work, files = _make_doma_work( 

378 config, 

379 generic_workflow, 

380 final, 

381 task_count, 

382 task_chunk, 

383 ) 

384 pseudo_filename = "pure_pseudoinput+qgraphNodeId:+qgraphId:" 

385 dag_final_work.dependency_map.append( 

386 {"name": pseudo_filename, "submitted": False, "dependencies": []} 

387 ) 

388 idds_client_workflow.add_work(dag_final_work) 

389 conditions = [] 

390 for work in dag_sink_work: 

391 conditions.append(work.is_terminated) 

392 and_cond = AndCondition(conditions=conditions, true_works=[dag_final_work]) 

393 idds_client_workflow.add_condition(and_cond) 

394 else: 

395 _LOG.debug("No final job in GenericWorkflow") 

396 return files 

397 

398 

399def convert_exec_string_to_hex(cmdline): 

400 """Convert the command line into hex representation. 

401 

402 This step is currently involved because large blocks of command lines 

403 including special symbols passed to the pilot/container. To make sure 

404 the 1 to 1 matching and pass by the special symbol stripping 

405 performed by the Pilot we applied the hexing. 

406 

407 Parameters 

408 ---------- 

409 cmdline : `str` 

410 UTF-8 command line string 

411 

412 Returns 

413 ------- 

414 hex : `str` 

415 Hex representation of string 

416 """ 

417 return binascii.hexlify(cmdline.encode()).decode("utf-8") 

418 

419 

420def add_decoder_prefix(config, cmd_line, distribution_path, files): 

421 """Compose the command line sent to the pilot from the functional part 

422 (the actual SW running) and the middleware part (containers invocation) 

423 

424 Parameters 

425 ---------- 

426 config : `lsst.ctrl.bps.BpsConfig` 

427 Configuration information 

428 cmd_line : `str` 

429 UTF-8 based functional part of the command line 

430 distribution_path : `str` 

431 URI of path where all files are located for distribution 

432 files : `tuple` [`dict` [`str`, `str`], `list` [`str`]] 

433 File names needed for a task (copied local, direct access) 

434 

435 Returns 

436 ------- 

437 decoder_prefix : `str` 

438 Full command line to be executed on the edge node 

439 """ 

440 # Manipulate file paths for placement on cmdline 

441 files_plc_hldr = {} 

442 for key, pfn in files[0].items(): 

443 if pfn.endswith("/"): 

444 files_plc_hldr[key] = os.path.basename(pfn[:-1]) 

445 isdir = True 

446 else: 

447 files_plc_hldr[key] = os.path.basename(pfn) 

448 _, extension = os.path.splitext(pfn) 

449 isdir = os.path.isdir(pfn) or (key == "butlerConfig" and extension != "yaml") 

450 if isdir: 

451 # this is needed to make isdir function working 

452 # properly in ButlerURL instance on the egde node 

453 files_plc_hldr[key] += "/" 

454 _LOG.debug("files_plc_hldr[%s] = %s", key, files_plc_hldr[key]) 

455 

456 cmdline_hex = convert_exec_string_to_hex(cmd_line) 

457 _, runner_command = config.search("runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False}) 

458 runner_command = runner_command.replace("\n", " ") 

459 decoder_prefix = runner_command.replace( 

460 "_cmd_line_", 

461 str(cmdline_hex) 

462 + " ${IN/L} " 

463 + distribution_path 

464 + " " 

465 + "+".join(f"{k}:{v}" for k, v in files_plc_hldr.items()) 

466 + " " 

467 + "+".join(files[1]), 

468 ) 

469 return decoder_prefix 

470 

471 

472def add_idds_work(config, generic_workflow, idds_workflow): 

473 """Convert GenericWorkflowJobs to iDDS work and add them to the iDDS 

474 workflow. 

475 

476 Parameters 

477 ---------- 

478 config : `lsst.ctrl.bps.BpsConfig` 

479 BPS configuration 

480 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

481 Generic workflow containing jobs to convert. 

482 idds_workflow : `idds.workflowv2.workflow.Workflow` 

483 iDDS workflow to which the converted jobs should be added. 

484 

485 Returns 

486 ------- 

487 files_to_pre_stage : `dict` [`str`, `str`] 

488 Files that need to be copied to the staging area before submission. 

489 dag_sink_work : `list` [`idds.doma.workflowv2.domapandawork.DomaPanDAWork`] 

490 The work nodes in the client workflow which have no successors. 

491 task_count : `int` 

492 Number of tasks in iDDS workflow used for unique task names 

493 

494 Raises 

495 ------ 

496 RuntimeError 

497 If cannot recover from dependency issues after pass through workflow. 

498 """ 

499 # Limit number of jobs in single PanDA task 

500 _, max_jobs_per_task = config.search("maxJobsPerTask", opt={"default": PANDA_DEFAULT_MAX_JOBS_PER_TASK}) 

501 

502 files_to_pre_stage = {} 

503 dag_sink_work = [] # Workflow sink nodes that need to be connected to final task 

504 job_to_task = {} 

505 job_to_pseudo_filename = {} 

506 task_count = 0 # Task number/ID in idds workflow used for unique name 

507 

508 # To avoid dying due to optimizing number of times through workflow, 

509 # catch dependency issues to loop through again later. 

510 jobs_with_dependency_issues = {} 

511 

512 # Assume jobs with same label share config values 

513 for job_label in generic_workflow.labels: 

514 _LOG.debug("job_label = %s", job_label) 

515 # Add each job with a particular label to a corresponding PanDA task 

516 # A PanDA task has a limit on number of jobs, so break into multiple 

517 # PanDA tasks if needed. 

518 job_count = 0 # Number of jobs in idds task used for task chunking 

519 task_chunk = 1 # Task chunk number within job label used for unique name 

520 work = None 

521 

522 # Instead of changing code to make chunks up front and round-robin 

523 # assign jobs to chunks, for now keeping chunk creation in loop 

524 # but using knowledge of how many chunks there will be to set better 

525 # maximum number of jobs in a chunk for more even distribution. 

526 jobs_by_label = generic_workflow.get_jobs_by_label(job_label) 

527 num_chunks = -(-len(jobs_by_label) // max_jobs_per_task) # ceil 

528 max_jobs_per_task_this_label = -(-len(jobs_by_label) // num_chunks) 

529 _LOG.debug( 

530 "For job_label = %s, num jobs = %s, num_chunks = %s, max_jobs = %s", 

531 job_label, 

532 len(jobs_by_label), 

533 num_chunks, 

534 max_jobs_per_task_this_label, 

535 ) 

536 for gwjob in jobs_by_label: 

537 job_count += 1 

538 if job_count > max_jobs_per_task_this_label: 

539 job_count = 1 

540 task_chunk += 1 

541 

542 if job_count == 1: 

543 # Create new PanDA task object 

544 task_count += 1 

545 work, files = _make_doma_work(config, generic_workflow, gwjob, task_count, task_chunk) 

546 files_to_pre_stage.update(files) 

547 idds_workflow.add_work(work) 

548 if generic_workflow.out_degree(gwjob.name) == 0: 

549 dag_sink_work.append(work) 

550 

551 pseudo_filename = _make_pseudo_filename(config, gwjob) 

552 job_to_pseudo_filename[gwjob.name] = pseudo_filename 

553 job_to_task[gwjob.name] = work.get_work_name() 

554 deps = [] 

555 missing_deps = False 

556 for parent_job_name in generic_workflow.predecessors(gwjob.name): 

557 if parent_job_name not in job_to_task: 

558 _LOG.debug("job_to_task.keys() = %s", job_to_task.keys()) 

559 missing_deps = True 

560 break 

561 else: 

562 deps.append( 

563 { 

564 "task": job_to_task[parent_job_name], 

565 "inputname": job_to_pseudo_filename[parent_job_name], 

566 "available": False, 

567 } 

568 ) 

569 if not missing_deps: 

570 work.dependency_map.append({"name": pseudo_filename, "dependencies": deps}) 

571 else: 

572 jobs_with_dependency_issues[gwjob.name] = work 

573 

574 # If there were any issues figuring out dependencies through earlier loop 

575 if jobs_with_dependency_issues: 

576 _LOG.warning("Could not prepare workflow in single pass. Please notify developers.") 

577 _LOG.info("Trying to recover...") 

578 for job_name, work in jobs_with_dependency_issues.items(): 

579 deps = [] 

580 for parent_job_name in generic_workflow.predecessors(job_name): 

581 if parent_job_name not in job_to_task: 

582 _LOG.debug("job_to_task.keys() = %s", job_to_task.keys()) 

583 raise RuntimeError( 

584 "Could not recover from dependency issues ({job_name} missing {parent_job_name})." 

585 ) 

586 deps.append( 

587 { 

588 "task": job_to_task[parent_job_name], 

589 "inputname": job_to_pseudo_filename[parent_job_name], 

590 "available": False, 

591 } 

592 ) 

593 pseudo_filename = job_to_pseudo_filename[job_name] 

594 work.dependency_map.append({"name": pseudo_filename, "dependencies": deps}) 

595 _LOG.info("Successfully recovered.") 

596 

597 return files_to_pre_stage, dag_sink_work, task_count