Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import os 

27import re 

28import time 

29import dataclasses 

30 

31from . import BpsConfig, GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile, GenericWorkflowExec 

32from .bps_utils import (save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename, 

33 _create_execution_butler) 

34 

35_LOG = logging.getLogger(__name__) 

36 

37 

38def transform(config, clustered_quantum_graph, prefix): 

39 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

40 

41 Parameters 

42 ---------- 

43 config : `lsst.ctrl.bps.BpsConfig` 

44 BPS configuration. 

45 clustered_quantum_graph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

46 A clustered quantum graph to transform into a generic workflow. 

47 prefix : `str` 

48 Root path for any output files. 

49 

50 Returns 

51 ------- 

52 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

53 The generic workflow transformed from the clustered quantum graph. 

54 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

55 Configuration to accompany GenericWorkflow. 

56 """ 

57 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None: 

58 name = clustered_quantum_graph.graph["name"] 

59 else: 

60 _, name = config.search("uniqProcName", opt={"required": True}) 

61 

62 _, when_create = config.search(".executionButler.whenCreate") 

63 if when_create.upper() == "TRANSFORM": 

64 _LOG.info("Creating execution butler") 

65 stime = time.time() 

66 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

67 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

68 _LOG.info("Creating execution butler took %.2f seconds", time.time() - stime) 

69 

70 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

71 generic_workflow_config = create_generic_workflow_config(config, prefix) 

72 

73 return generic_workflow, generic_workflow_config 

74 

75 

76def update_job(config, job): 

77 """Update given job with workflow attribute and profile values. 

78 

79 Parameters 

80 ---------- 

81 config : `lsst.ctrl.bps.BpsConfig` 

82 BPS configuration. 

83 job : `lsst.ctrl.bps.GenericWorkflowJob` 

84 Job to which the attributes and profile values should be added. 

85 """ 

86 key = f".site.{job.compute_site}.profile.condor" 

87 

88 if key in config: 

89 for key, val in config[key].items(): 

90 if key.startswith("+"): 

91 job.attrs[key[1:]] = val 

92 else: 

93 job.profile[key] = val 

94 

95 

96def add_workflow_init_nodes(config, generic_workflow): 

97 """Add nodes to workflow graph that perform initialization steps. 

98 

99 Assumes that all of the initialization should be executed prior to any 

100 of the current workflow. 

101 

102 Parameters 

103 ---------- 

104 config : `lsst.ctrl.bps.BpsConfig` 

105 BPS configuration. 

106 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

107 Generic workflow to which the initialization steps should be added. 

108 """ 

109 # Create a workflow graph that will have task and file nodes necessary for 

110 # initializing the pipeline execution 

111 init_workflow = create_init_workflow(config, generic_workflow.get_file("runQgraphFile")) 

112 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

113 generic_workflow.add_workflow_source(init_workflow) 

114 old_run_summary = generic_workflow.run_attrs.get("bps_run_summary", "") 

115 init_summary = init_workflow.run_attrs.get("bps_run_summary", "") 

116 generic_workflow.run_attrs["bps_run_summary"] = ';'.join(x for x in [init_summary, old_run_summary] if x) 

117 

118 

119def create_init_workflow(config, qgraph_gwfile): 

120 """Create workflow for running initialization job(s). 

121 

122 Parameters 

123 ---------- 

124 config : `lsst.ctrl.bps.BpsConfig` 

125 BPS configuration. 

126 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

127 File object for the full run QuantumGraph file. 

128 

129 Returns 

130 ------- 

131 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

132 GenericWorkflow consisting of job(s) to initialize workflow. 

133 """ 

134 _LOG.debug("creating init subgraph") 

135 _LOG.debug("creating init task input(s)") 

136 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

137 "replaceVars": False, 

138 "expandEnvVars": False, 

139 "replaceEnvVars": True, 

140 "required": False} 

141 

142 init_workflow = GenericWorkflow("init") 

143 init_workflow.add_file(qgraph_gwfile) 

144 

145 # create job for executing --init-only 

146 gwjob = GenericWorkflowJob("pipetaskInit") 

147 gwjob.label = "pipetaskInit" 

148 

149 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

150 

151 # Handle universal values. 

152 _handle_job_values_universal(job_values, gwjob) 

153 

154 # Handle aggregate values. 

155 _handle_job_values_aggregate(job_values, gwjob) 

156 

157 # Save summary of Quanta in job. 

158 gwjob.tags["quanta_summary"] = "pipetaskInit:1" 

159 

160 # Update job with workflow attribute and profile values. 

161 update_job(config, gwjob) 

162 

163 init_workflow.add_job(gwjob) 

164 butler_gwfile = _get_butler_gwfile(config, config["submitPath"]) 

165 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

166 init_workflow.run_attrs["bps_run_summary"] = gwjob.tags["quanta_summary"] 

167 _enhance_command(config, init_workflow, gwjob) 

168 

169 return init_workflow 

170 

171 

172def _enhance_command(config, generic_workflow, gwjob): 

173 """Enhance command line with env and file placeholders 

174 and gather command line values. 

175 

176 Parameters 

177 ---------- 

178 config : `lsst.ctrl.bps.BpsConfig` 

179 BPS configuration. 

180 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

181 Generic workflow that contains the job. 

182 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

183 Generic workflow job to which the updated executable, arguments, 

184 and values should be saved. 

185 """ 

186 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

187 "replaceVars": False, 

188 "expandEnvVars": False, 

189 "replaceEnvVars": True, 

190 "required": False} 

191 

192 # Change qgraph variable to match whether using run or per-job qgraph 

193 # Note: these are lookup keys, not actual physical filenames. 

194 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

195 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

196 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

197 elif gwjob.name == "pipetaskInit": 

198 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

199 else: # Needed unique file keys for per-job QuantumGraphs 

200 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

201 

202 # Replace files with special placeholders 

203 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

204 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

205 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

206 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

207 

208 # Save dict of other values needed to complete command line. 

209 # (Be careful to not replace env variables as they may 

210 # be different in compute job.) 

211 search_opt["replaceVars"] = True 

212 

213 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

214 if key not in gwjob.cmdvals: 

215 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt) 

216 

217 # backwards compatibility 

218 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True}) 

219 if not use_lazy_commands: 

220 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals) 

221 

222 

223def _fill_arguments(config, generic_workflow, arguments, cmdvals): 

224 """Replace placeholders in command line string in job. 

225 

226 Parameters 

227 ---------- 

228 config : `lsst.ctrl.bps.BpsConfig` 

229 Bps configuration. 

230 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

231 Generic workflow containing the job. 

232 arguments : `str` 

233 String containing placeholders. 

234 cmdvals : `dict` [`str`, `Any`] 

235 Any command line values that can be used to replace placeholders. 

236 

237 Returns 

238 ------- 

239 arguments : `str` 

240 Command line with FILE and ENV placeholders replaced. 

241 """ 

242 # Replace file placeholders 

243 _, use_shared = config.search("bpsUseShared", opt={"default": False}) 

244 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

245 gwfile = generic_workflow.get_file(file_key) 

246 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared: 

247 uri = os.path.basename(gwfile.src_uri) 

248 else: 

249 uri = gwfile.src_uri 

250 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

251 

252 # Replace env placeholder with submit-side values 

253 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

254 arguments = os.path.expandvars(arguments) 

255 

256 # Replace remaining vars 

257 arguments = arguments.format(**cmdvals) 

258 

259 return arguments 

260 

261 

262def _get_butler_gwfile(config, prefix): 

263 """Get butler location to be used by job. 

264 

265 Parameters 

266 ---------- 

267 config : `lsst.ctrl.bps.BpsConfig` 

268 Bps configuration. 

269 prefix : `str` 

270 Root path for any output files. 

271 

272 Returns 

273 ------- 

274 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

275 Representation of butler location. 

276 """ 

277 _, when_create = config.search(".executionButler.whenCreate") 

278 if when_create.upper() == "NEVER": 

279 _, butler_config = config.search("butlerConfig") 

280 wms_transfer = False 

281 job_access_remote = True 

282 job_shared = True 

283 else: 

284 _, butler_config = config.search(".bps_defined.executionButlerDir") 

285 butler_config = os.path.join(prefix, butler_config) 

286 wms_transfer = True 

287 job_access_remote = False 

288 job_shared = False 

289 

290 gwfile = GenericWorkflowFile("butlerConfig", 

291 src_uri=butler_config, 

292 wms_transfer=wms_transfer, 

293 job_access_remote=job_access_remote, 

294 job_shared=job_shared) 

295 

296 return gwfile 

297 

298 

299def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix): 

300 """Get qgraph location to be used by job. 

301 

302 Parameters 

303 ---------- 

304 config : `lsst.ctrl.bps.BpsConfig` 

305 Bps configuration. 

306 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

307 Job for which determining QuantumGraph file. 

308 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

309 File representation of the full run QuantumGraph. 

310 prefix : `str` 

311 Path prefix for any files written. 

312 

313 Returns 

314 ------- 

315 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

316 Representation of butler location (may not include filename). 

317 """ 

318 per_job_qgraph_file = True 

319 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

320 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

321 per_job_qgraph_file = False 

322 

323 qgraph_gwfile = None 

324 if per_job_qgraph_file: 

325 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

326 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

327 wms_transfer=True, 

328 job_access_remote=True, 

329 job_shared=True) 

330 else: 

331 qgraph_gwfile = run_qgraph_file 

332 

333 return qgraph_gwfile 

334 

335 

336def _get_job_values(config, search_opt, cmd_line_key): 

337 """Gather generic workflow job values from the bps config. 

338 

339 Parameters 

340 ---------- 

341 config : `lsst.ctrl.bps.BpsConfig` 

342 Bps configuration. 

343 search_opt : `dict` [`str`, `Any`] 

344 Search options to be used when searching config. 

345 cmd_line_key : `str` or None 

346 Which command line key to search for (e.g., "runQuantumCommand"). 

347 

348 Returns 

349 ------- 

350 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

351 Representation of butler location (may not include filename). 

352 """ 

353 special_values = ['name', 'label', 'cmdline', 'pre_cmdline', 'post_cmdline'] 

354 

355 job_values = {} 

356 for field in dataclasses.fields(GenericWorkflowJob): 

357 if field not in special_values: 

358 # Variable names in yaml are camel case instead of snake case. 

359 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), field.name) 

360 found, value = config.search(yaml_name, opt=search_opt) 

361 if not found and '_' in field.name: 

362 # Just in case someone used snake case: 

363 found, value = config.search(field.name, opt=search_opt) 

364 if found: 

365 job_values[field.name] = value 

366 else: 

367 job_values[field.name] = None 

368 

369 if cmd_line_key: 

370 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

371 # Make sure cmdline isn't None as that could be sent in as a 

372 # default value in search_opt. 

373 if found and cmdline: 

374 cmd_parts = cmdline.split(" ", 1) 

375 job_values["executable"] = cmd_parts[0] 

376 if len(cmd_parts) > 1: 

377 job_values["arguments"] = cmd_parts[1] 

378 

379 return job_values 

380 

381 

382def _handle_job_values_universal(quantum_job_values, gwjob): 

383 """Handle job values that must be same value for every PipelineTask in 

384 cluster. 

385 

386 Parameters 

387 ---------- 

388 quantum_job_values : `dict` [`str`, `Any`] 

389 Job values for running single Quantum. 

390 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

391 Generic workflow job in which to store the universal values. 

392 """ 

393 universal_values = ["arguments", "compute_site"] 

394 for key in universal_values: 

395 current_value = getattr(gwjob, key) 

396 if not current_value: 

397 setattr(gwjob, key, quantum_job_values[key]) 

398 elif current_value != quantum_job_values[key]: 

399 _LOG.error("Inconsistent value for %s in " 

400 "Cluster %s Quantum Number %s\n" 

401 "Current cluster value: %s\n" 

402 "Quantum value: %s", 

403 key, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

404 quantum_job_values[key]) 

405 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.") 

406 

407 # Handle cmdline special 

408 if not gwjob.executable: 

409 gwjob.executable = GenericWorkflowExec(os.path.basename(quantum_job_values['executable']), 

410 quantum_job_values['executable'], False) 

411 elif quantum_job_values['executable'] != gwjob.executable.src_uri: 

412 _LOG.error("Inconsistent value for %s in " 

413 "Cluster %s Quantum Number %s\n" 

414 "Current cluster value: %s\n" 

415 "Quantum value: %s", 

416 key, gwjob.name, quantum_job_values.get("executable", "MISSING"), gwjob.executable.src_uri, 

417 quantum_job_values[key]) 

418 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.") 

419 

420 

421def _handle_job_values_aggregate(quantum_job_values, gwjob): 

422 """Handle job values that are aggregate of values from PipelineTasks 

423 in QuantumGraph. 

424 

425 Parameters 

426 ---------- 

427 quantum_job_values : `dict` [`str`, `Any`] 

428 Job values for running single Quantum. 

429 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

430 Generic workflow job in which to store the aggregate values. 

431 """ 

432 values_max = ['request_cpus', 'request_memory'] 

433 values_sum = ['request_disk', 'request_walltime'] 

434 

435 for key in values_max: 

436 current_value = getattr(gwjob, key) 

437 if not current_value: 

438 setattr(gwjob, key, quantum_job_values[key]) 

439 else: 

440 setattr(gwjob, key, max(getattr(gwjob, key), quantum_job_values[key])) 

441 

442 for key in values_sum: 

443 current_value = getattr(gwjob, key) 

444 if not current_value: 

445 setattr(gwjob, key, quantum_job_values[key]) 

446 else: 

447 setattr(gwjob, key, getattr(gwjob, key) + quantum_job_values[key]) 

448 

449 

450def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

451 """Create a generic workflow from a ClusteredQuantumGraph such that it 

452 has information needed for WMS (e.g., command lines). 

453 

454 Parameters 

455 ---------- 

456 config : `lsst.ctrl.bps.BpsConfig` 

457 BPS configuration. 

458 clustered_quanta_graph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

459 ClusteredQuantumGraph for running a specific pipeline on a specific 

460 payload. 

461 name : `str` 

462 Name for the workflow (typically unique). 

463 prefix : `str` 

464 Root path for any output files. 

465 

466 Returns 

467 ------- 

468 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

469 Generic workflow for the given ClusteredQuantumGraph + config. 

470 """ 

471 # Determine whether saving per-job QuantumGraph files in the loop. 

472 save_per_job_qgraph = False 

473 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

474 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM: 

475 save_per_job_qgraph = True 

476 

477 generic_workflow = GenericWorkflow(name) 

478 

479 # Save full run QuantumGraph for use by jobs 

480 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

481 src_uri=config["runQgraphFile"], 

482 wms_transfer=True, 

483 job_access_remote=True, 

484 job_shared=True)) 

485 

486 qgraph = clustered_quanta_graph.graph["qgraph"] 

487 task_labels = [task.label for task in qgraph.iterTaskGraph()] 

488 run_label_counts = dict.fromkeys(task_labels, 0) 

489 for node_name, data in clustered_quanta_graph.nodes(data=True): 

490 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name, 

491 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4]) 

492 gwjob = GenericWorkflowJob(node_name) 

493 if "tags" in data: 

494 gwjob.tags = data["tags"] 

495 if "label" in data: 

496 gwjob.label = data["label"] 

497 # Getting labels in pipeline order. 

498 label_counts = dict.fromkeys(task_labels, 0) 

499 

500 # Get job info either common or aggregate for all Quanta in cluster. 

501 for node_id in data["qgraph_node_ids"]: 

502 qnode = qgraph.getQuantumNodeByNodeId(node_id) 

503 label_counts[qnode.taskDef.label] += 1 

504 

505 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, 

506 "replaceVars": False, 

507 "expandEnvVars": False, 

508 "replaceEnvVars": True, 

509 "required": False} 

510 

511 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

512 

513 # Handle universal values. 

514 _handle_job_values_universal(quantum_job_values, gwjob) 

515 

516 # Handle aggregate values. 

517 _handle_job_values_aggregate(quantum_job_values, gwjob) 

518 

519 # Save summary of Quanta in job. 

520 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v]) 

521 # Save job quanta counts to run 

522 for key in task_labels: 

523 run_label_counts[key] += label_counts[key] 

524 

525 # Update job with workflow attribute and profile values. 

526 update_job(config, gwjob) 

527 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"), 

528 config["submitPath"]) 

529 butler_gwfile = _get_butler_gwfile(config, config["submitPath"]) 

530 

531 generic_workflow.add_job(gwjob) 

532 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

533 

534 gwjob.cmdvals["qgraphId"] = data["qgraph_node_ids"][0].buildId 

535 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in 

536 data["qgraph_node_ids"]])) 

537 _enhance_command(config, generic_workflow, gwjob) 

538 

539 # If writing per-job QuantumGraph files during TRANSFORM stage, 

540 # write it now while in memory. 

541 if save_per_job_qgraph: 

542 save_qg_subgraph(qgraph, qgraph_gwfile.src_uri, data["qgraph_node_ids"]) 

543 

544 # Save run's Quanta summary 

545 run_summary = ";".join([f"{k}:{v}" for k, v in run_label_counts.items()]) 

546 generic_workflow.run_attrs["bps_run_summary"] = run_summary 

547 

548 # Create job dependencies. 

549 for node_name in clustered_quanta_graph.nodes(): 

550 for child in clustered_quanta_graph.successors(node_name): 

551 generic_workflow.add_job_relationships(node_name, child) 

552 

553 # Add initial workflow. 

554 if config.get("runInit", "{default: False}"): 

555 add_workflow_init_nodes(config, generic_workflow) 

556 

557 generic_workflow.run_attrs.update({"bps_isjob": "True", 

558 "bps_project": config["project"], 

559 "bps_campaign": config["campaign"], 

560 "bps_run": generic_workflow.name, 

561 "bps_operator": config["operator"], 

562 "bps_payload": config["payloadName"], 

563 "bps_runsite": config["computeSite"]}) 

564 

565 # Add final job 

566 add_final_job(config, generic_workflow, prefix) 

567 

568 return generic_workflow 

569 

570 

571def create_generic_workflow_config(config, prefix): 

572 """Create generic workflow configuration. 

573 

574 Parameters 

575 ---------- 

576 config : `lsst.ctrl.bps.BpsConfig` 

577 Bps configuration. 

578 prefix : `str` 

579 Root path for any output files. 

580 

581 Returns 

582 ------- 

583 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

584 Configuration accompanying the GenericWorkflow. 

585 """ 

586 generic_workflow_config = BpsConfig(config) 

587 generic_workflow_config["workflowName"] = config["uniqProcName"] 

588 generic_workflow_config["workflowPath"] = prefix 

589 return generic_workflow_config 

590 

591 

592def add_final_job(config, generic_workflow, prefix): 

593 """Add final workflow job depending upon configuration. 

594 

595 Parameters 

596 ---------- 

597 config : `lsst.ctrl.bps.BpsConfig` 

598 Bps configuration. 

599 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

600 Generic workflow to which attributes should be added. 

601 prefix : `str` 

602 Directory in which to output final script. 

603 """ 

604 _, when_create = config.search(".executionButler.whenCreate") 

605 _, when_merge = config.search(".executionButler.whenMerge") 

606 

607 search_opt = {"searchobj": config[".executionButler"], "default": None} 

608 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

609 # create gwjob 

610 gwjob = GenericWorkflowJob("mergeExecutionButler") 

611 gwjob.label = "mergeExecutionButler" 

612 

613 job_values = _get_job_values(config, search_opt, None) 

614 for field in dataclasses.fields(GenericWorkflowJob): 

615 if not getattr(gwjob, field.name) and job_values[field.name]: 

616 setattr(gwjob, field.name, job_values[field.name]) 

617 

618 update_job(config, gwjob) 

619 

620 # Create script and add command line to job. 

621 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

622 

623 # Determine inputs from command line. 

624 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

625 gwfile = generic_workflow.get_file(file_key) 

626 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

627 

628 _enhance_command(config, generic_workflow, gwjob) 

629 

630 # Put transfer repo job in appropriate location in workflow. 

631 if when_merge.upper() == "ALWAYS": 

632 # add as special final job 

633 generic_workflow.add_final(gwjob) 

634 elif when_merge.upper() == "SUCCESS": 

635 # add as regular sink node 

636 add_final_job_as_sink(generic_workflow, gwjob) 

637 else: 

638 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

639 

640 generic_workflow.run_attrs["bps_run_summary"] += ";mergeExecutionButler:1" 

641 

642 

643def _create_final_command(config, prefix): 

644 """Create the command and shell script for the final job. 

645 

646 Parameters 

647 ---------- 

648 config : `lsst.ctrl.bps.BpsConfig` 

649 Bps configuration. 

650 prefix : `str` 

651 Directory in which to output final script. 

652 

653 Returns 

654 ------- 

655 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

656 Executable object for the final script. 

657 arguments : `str` 

658 Command line needed to call the final script. 

659 """ 

660 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False} 

661 

662 script_file = os.path.join(prefix, "final_job.bash") 

663 with open(script_file, "w") as fh: 

664 print("#!/bin/bash\n", file=fh) 

665 print("set -e", file=fh) 

666 print("set -x", file=fh) 

667 

668 print("butlerConfig=$1", file=fh) 

669 print("executionButlerDir=$2", file=fh) 

670 

671 i = 1 

672 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

673 while found: 

674 # Temporarily replace any env vars so formatter doesn't try to 

675 # replace them. 

676 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

677 

678 # executionButlerDir and butlerConfig will be args to script and 

679 # set to env vars 

680 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

681 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

682 

683 # Replace all other vars in command string 

684 search_opt["replaceVars"] = True 

685 command = config.formatter.format(command, config, search_opt) 

686 search_opt["replaceVars"] = False 

687 

688 # Replace any temporary env place holders. 

689 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

690 

691 print(command, file=fh) 

692 i += 1 

693 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

694 os.chmod(script_file, 0o755) 

695 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

696 

697 _, orig_butler = config.search("butlerConfig") 

698 # The execution butler was saved as butlerConfig in the workflow. 

699 return executable, f"{orig_butler} <FILE:butlerConfig>" 

700 

701 

702def add_final_job_as_sink(generic_workflow, final_job): 

703 """Add final job as the single sink for the workflow. 

704 

705 Parameters 

706 ---------- 

707 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

708 Generic workflow to which attributes should be added. 

709 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

710 Job to add as new sink node depending upon all previous sink nodes. 

711 """ 

712 # Find sink nodes of generic workflow graph. 

713 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

714 _LOG.debug("gw_sinks = %s", gw_sinks) 

715 

716 generic_workflow.add_job(final_job) 

717 generic_workflow.add_job_relationships(gw_sinks, final_job.name)