Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import math 

27import os 

28import re 

29import time 

30import dataclasses 

31 

32from . import ( 

33 DEFAULT_MEM_RETRIES, 

34 BpsConfig, 

35 GenericWorkflow, 

36 GenericWorkflowJob, 

37 GenericWorkflowFile, 

38 GenericWorkflowExec, 

39) 

40from .bps_utils import ( 

41 save_qg_subgraph, 

42 WhenToSaveQuantumGraphs, 

43 create_job_quantum_graph_filename, 

44 _create_execution_butler 

45) 

46 

47# All available job attributes. 

48_ATTRS_ALL = set([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

49 

50# Job attributes that need to be set to their maximal value in the cluster. 

51_ATTRS_MAX = { 

52 "memory_multiplier", 

53 "number_of_retries", 

54 "request_cpus", 

55 "request_memory", 

56} 

57 

58# Job attributes that need to be set to sum of their values in the cluster. 

59_ATTRS_SUM = { 

60 "request_disk", 

61 "request_walltime", 

62} 

63 

64# Job attributes do not fall into a specific category 

65_ATTRS_MISC = { 

66 "name", 

67 "label", 

68 "tags", 

69 "cmdline", 

70 "cmdvals", 

71 "environment", 

72 "pre_cmdline", 

73 "post_cmdline", 

74 "profile", 

75 "attrs", 

76} 

77 

78# Attributes that need to be the same for each quanta in the cluster. 

79_ATTRS_UNIVERSAL = _ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM) 

80 

81_LOG = logging.getLogger(__name__) 

82 

83 

84def transform(config, clustered_quantum_graph, prefix): 

85 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

86 

87 Parameters 

88 ---------- 

89 config : `lsst.ctrl.bps.BpsConfig` 

90 BPS configuration. 

91 clustered_quantum_graph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

92 A clustered quantum graph to transform into a generic workflow. 

93 prefix : `str` 

94 Root path for any output files. 

95 

96 Returns 

97 ------- 

98 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

99 The generic workflow transformed from the clustered quantum graph. 

100 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

101 Configuration to accompany GenericWorkflow. 

102 """ 

103 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None: 

104 name = clustered_quantum_graph.graph["name"] 

105 else: 

106 _, name = config.search("uniqProcName", opt={"required": True}) 

107 

108 _, when_create = config.search(".executionButler.whenCreate") 

109 if when_create.upper() == "TRANSFORM": 

110 _LOG.info("Creating execution butler") 

111 stime = time.time() 

112 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

113 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

114 _LOG.info("Creating execution butler took %.2f seconds", time.time() - stime) 

115 

116 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

117 generic_workflow_config = create_generic_workflow_config(config, prefix) 

118 

119 return generic_workflow, generic_workflow_config 

120 

121 

122def update_job(config, job): 

123 """Update given job with workflow attribute and profile values. 

124 

125 Parameters 

126 ---------- 

127 config : `lsst.ctrl.bps.BpsConfig` 

128 BPS configuration. 

129 job : `lsst.ctrl.bps.GenericWorkflowJob` 

130 Job to which the attributes and profile values should be added. 

131 """ 

132 key = f".site.{job.compute_site}.profile.condor" 

133 

134 if key in config: 

135 for key, val in config[key].items(): 

136 if key.startswith("+"): 

137 job.attrs[key[1:]] = val 

138 else: 

139 job.profile[key] = val 

140 

141 

142def add_workflow_init_nodes(config, qgraph, generic_workflow): 

143 """Add nodes to workflow graph that perform initialization steps. 

144 

145 Assumes that all of the initialization should be executed prior to any 

146 of the current workflow. 

147 

148 Parameters 

149 ---------- 

150 config : `lsst.ctrl.bps.BpsConfig` 

151 BPS configuration. 

152 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

153 The quantum graph the generic workflow represents. 

154 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

155 Generic workflow to which the initialization steps should be added. 

156 """ 

157 # Create a workflow graph that will have task and file nodes necessary for 

158 # initializing the pipeline execution 

159 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

160 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

161 generic_workflow.add_workflow_source(init_workflow) 

162 old_run_summary = generic_workflow.run_attrs.get("bps_run_summary", "") 

163 init_summary = init_workflow.run_attrs.get("bps_run_summary", "") 

164 generic_workflow.run_attrs["bps_run_summary"] = ';'.join(x for x in [init_summary, old_run_summary] if x) 

165 

166 

167def create_init_workflow(config, qgraph, qgraph_gwfile): 

168 """Create workflow for running initialization job(s). 

169 

170 Parameters 

171 ---------- 

172 config : `lsst.ctrl.bps.BpsConfig` 

173 BPS configuration. 

174 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

175 The quantum graph the generic workflow represents. 

176 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

177 File object for the full run QuantumGraph file. 

178 

179 Returns 

180 ------- 

181 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

182 GenericWorkflow consisting of job(s) to initialize workflow. 

183 """ 

184 _LOG.debug("creating init subgraph") 

185 _LOG.debug("creating init task input(s)") 

186 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

187 "replaceVars": False, 

188 "expandEnvVars": False, 

189 "replaceEnvVars": True, 

190 "required": False} 

191 

192 init_workflow = GenericWorkflow("init") 

193 init_workflow.add_file(qgraph_gwfile) 

194 

195 # create job for executing --init-only 

196 gwjob = GenericWorkflowJob("pipetaskInit") 

197 gwjob.label = "pipetaskInit" 

198 

199 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

200 

201 # Adjust job attributes values if necessary. 

202 attrs = {"universal": _ATTRS_UNIVERSAL, "max": _ATTRS_MAX, "sum": _ATTRS_SUM} 

203 _handle_job_values(attrs, job_values, gwjob) 

204 

205 # Pick a node id for each task (not quantum!) to avoid reading the entire 

206 # quantum graph during the initialization stage. 

207 node_ids = [] 

208 for task in qgraph.iterTaskGraph(): 

209 task_def = qgraph.findTaskDefByLabel(task.label) 

210 node = next(iter(qgraph.getNodesForTask(task_def))) 

211 node_ids.append(node.nodeId) 

212 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

213 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids])) 

214 

215 # Save summary of Quanta in job. 

216 gwjob.tags["quanta_summary"] = "pipetaskInit:1" 

217 

218 # Update job with workflow attribute and profile values. 

219 update_job(config, gwjob) 

220 

221 init_workflow.add_job(gwjob) 

222 butler_gwfile = _get_butler_gwfile(config, config["submitPath"]) 

223 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

224 init_workflow.run_attrs["bps_run_summary"] = gwjob.tags["quanta_summary"] 

225 _enhance_command(config, init_workflow, gwjob) 

226 

227 return init_workflow 

228 

229 

230def _enhance_command(config, generic_workflow, gwjob): 

231 """Enhance command line with env and file placeholders 

232 and gather command line values. 

233 

234 Parameters 

235 ---------- 

236 config : `lsst.ctrl.bps.BpsConfig` 

237 BPS configuration. 

238 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

239 Generic workflow that contains the job. 

240 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

241 Generic workflow job to which the updated executable, arguments, 

242 and values should be saved. 

243 """ 

244 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

245 "replaceVars": False, 

246 "expandEnvVars": False, 

247 "replaceEnvVars": True, 

248 "required": False} 

249 

250 # Change qgraph variable to match whether using run or per-job qgraph 

251 # Note: these are lookup keys, not actual physical filenames. 

252 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

253 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

254 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

255 elif gwjob.name == "pipetaskInit": 

256 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

257 else: # Needed unique file keys for per-job QuantumGraphs 

258 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

259 

260 # Replace files with special placeholders 

261 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

262 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

263 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

264 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

265 

266 # Save dict of other values needed to complete command line. 

267 # (Be careful to not replace env variables as they may 

268 # be different in compute job.) 

269 search_opt["replaceVars"] = True 

270 

271 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

272 if key not in gwjob.cmdvals: 

273 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt) 

274 

275 # backwards compatibility 

276 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True}) 

277 if not use_lazy_commands: 

278 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals) 

279 

280 

281def _fill_arguments(config, generic_workflow, arguments, cmdvals): 

282 """Replace placeholders in command line string in job. 

283 

284 Parameters 

285 ---------- 

286 config : `lsst.ctrl.bps.BpsConfig` 

287 Bps configuration. 

288 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

289 Generic workflow containing the job. 

290 arguments : `str` 

291 String containing placeholders. 

292 cmdvals : `dict` [`str`, `Any`] 

293 Any command line values that can be used to replace placeholders. 

294 

295 Returns 

296 ------- 

297 arguments : `str` 

298 Command line with FILE and ENV placeholders replaced. 

299 """ 

300 # Replace file placeholders 

301 _, use_shared = config.search("bpsUseShared", opt={"default": False}) 

302 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

303 gwfile = generic_workflow.get_file(file_key) 

304 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared: 

305 uri = os.path.basename(gwfile.src_uri) 

306 else: 

307 uri = gwfile.src_uri 

308 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

309 

310 # Replace env placeholder with submit-side values 

311 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

312 arguments = os.path.expandvars(arguments) 

313 

314 # Replace remaining vars 

315 arguments = arguments.format(**cmdvals) 

316 

317 return arguments 

318 

319 

320def _get_butler_gwfile(config, prefix): 

321 """Get butler location to be used by job. 

322 

323 Parameters 

324 ---------- 

325 config : `lsst.ctrl.bps.BpsConfig` 

326 Bps configuration. 

327 prefix : `str` 

328 Root path for any output files. 

329 

330 Returns 

331 ------- 

332 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

333 Representation of butler location. 

334 """ 

335 _, when_create = config.search(".executionButler.whenCreate") 

336 if when_create.upper() == "NEVER": 

337 _, butler_config = config.search("butlerConfig") 

338 wms_transfer = False 

339 job_access_remote = True 

340 job_shared = True 

341 else: 

342 _, butler_config = config.search(".bps_defined.executionButlerDir") 

343 butler_config = os.path.join(prefix, butler_config) 

344 wms_transfer = True 

345 job_access_remote = False 

346 job_shared = False 

347 

348 gwfile = GenericWorkflowFile("butlerConfig", 

349 src_uri=butler_config, 

350 wms_transfer=wms_transfer, 

351 job_access_remote=job_access_remote, 

352 job_shared=job_shared) 

353 

354 return gwfile 

355 

356 

357def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix): 

358 """Get qgraph location to be used by job. 

359 

360 Parameters 

361 ---------- 

362 config : `lsst.ctrl.bps.BpsConfig` 

363 Bps configuration. 

364 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

365 Job for which determining QuantumGraph file. 

366 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

367 File representation of the full run QuantumGraph. 

368 prefix : `str` 

369 Path prefix for any files written. 

370 

371 Returns 

372 ------- 

373 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

374 Representation of butler location (may not include filename). 

375 """ 

376 per_job_qgraph_file = True 

377 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

378 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

379 per_job_qgraph_file = False 

380 

381 qgraph_gwfile = None 

382 if per_job_qgraph_file: 

383 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

384 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

385 wms_transfer=True, 

386 job_access_remote=True, 

387 job_shared=True) 

388 else: 

389 qgraph_gwfile = run_qgraph_file 

390 

391 return qgraph_gwfile 

392 

393 

394def _get_job_values(config, search_opt, cmd_line_key): 

395 """Gather generic workflow job values from the bps config. 

396 

397 Parameters 

398 ---------- 

399 config : `lsst.ctrl.bps.BpsConfig` 

400 Bps configuration. 

401 search_opt : `dict` [`str`, `Any`] 

402 Search options to be used when searching config. 

403 cmd_line_key : `str` or None 

404 Which command line key to search for (e.g., "runQuantumCommand"). 

405 

406 Returns 

407 ------- 

408 job_values : `dict` [ `str`, `Any` ]` 

409 A mapping between job attributes and their values. 

410 """ 

411 special_values = ['name', 'label', 'cmdline', 'pre_cmdline', 'post_cmdline'] 

412 

413 job_values = {} 

414 for field in dataclasses.fields(GenericWorkflowJob): 

415 if field.name not in special_values: 

416 # Variable names in yaml are camel case instead of snake case. 

417 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), field.name) 

418 found, value = config.search(yaml_name, opt=search_opt) 

419 if not found and '_' in field.name: 

420 # Just in case someone used snake case: 

421 found, value = config.search(field.name, opt=search_opt) 

422 if found: 

423 job_values[field.name] = value 

424 else: 

425 job_values[field.name] = None 

426 

427 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

428 # is set and it is a positive number greater than 1.0), adjust number 

429 # of retries when necessary. If the memory multiplier is invalid, disable 

430 # automatic memory scaling. 

431 if job_values["memory_multiplier"] is not None: 

432 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

433 if job_values["number_of_retries"] is None: 

434 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

435 else: 

436 job_values["memory_multiplier"] = None 

437 

438 if cmd_line_key: 

439 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

440 # Make sure cmdline isn't None as that could be sent in as a 

441 # default value in search_opt. 

442 if found and cmdline: 

443 cmd, args = cmdline.split(" ", 1) 

444 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

445 if args: 

446 job_values["arguments"] = args 

447 

448 return job_values 

449 

450 

451def _handle_job_values(attributes, quantum_job_values, gwjob): 

452 """Set the job attributes in the cluster to their correct values. 

453 

454 Parameters 

455 ---------- 

456 attributes: `dict` [`str`, Iterable [`str`]] 

457 Job attributes grouped by category. Supported categories are: 

458 * ``universal``: job attributes that need to be the same 

459 in the cluster, 

460 * ``max``: job attributes that need to be set to their maximal values 

461 in the cluster, 

462 * ``sum``: job attributes that need to be sum of their values 

463 in the cluster. 

464 quantum_job_values : `dict` [`str`, Any] 

465 Job values for running single Quantum. 

466 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

467 Generic workflow job in which to store the universal values. 

468 """ 

469 dispatcher = { 

470 'universal': _handle_job_values_universal, 

471 'max': _handle_job_values_max, 

472 'sum': _handle_job_values_sum, 

473 } 

474 for type_, func in dispatcher.items(): 

475 attrs = attributes[type_] 

476 func(attrs, quantum_job_values, gwjob) 

477 

478 

479def _handle_job_values_universal(attributes, quantum_job_values, gwjob): 

480 """Handle job attributes that must have the same value for every quantum 

481 in the cluster. 

482 

483 Parameters 

484 ---------- 

485 attributes : Iterable [`str`] 

486 A list of field names which must have a single value for the entire 

487 cluster. 

488 quantum_job_values : `dict` [`str`, Any] 

489 Job values for running single Quantum. 

490 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

491 Generic workflow job in which to store the universal values. 

492 """ 

493 for attr in attributes: 

494 current_value = getattr(gwjob, attr) 

495 try: 

496 quantum_value = quantum_job_values[attr] 

497 except KeyError: 

498 continue 

499 else: 

500 if not current_value: 

501 setattr(gwjob, attr, quantum_value) 

502 elif current_value != quantum_value: 

503 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

504 "Current cluster value: %s\n" 

505 "Quantum value: %s", 

506 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

507 quantum_value) 

508 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

509 

510 

511def _handle_job_values_max(attributes, quantum_job_values, gwjob): 

512 """Handle job attributes that should be set to their maximum value in 

513 the in cluster. 

514 

515 Parameters 

516 ---------- 

517 attributes : Iterable [`str`] 

518 The names of job attributes which needs to be set to their maximum 

519 value in the entire cluster. 

520 quantum_job_values : `dict` [`str`, `Any`] 

521 Job values for running single Quantum. 

522 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

523 Generic workflow job in which to store the aggregate values. 

524 """ 

525 for attr in attributes: 

526 current_value = getattr(gwjob, attr) 

527 try: 

528 quantum_value = quantum_job_values[attr] 

529 except KeyError: 

530 continue 

531 else: 

532 needs_update = False 

533 if current_value is None: 

534 if quantum_value is not None: 

535 needs_update = True 

536 else: 

537 if quantum_value is not None and current_value < quantum_value: 

538 needs_update = True 

539 if needs_update: 

540 setattr(gwjob, attr, quantum_value) 

541 

542 # When updating memory requirements for a job, check if memory 

543 # autoscaling is enabled. If it is, always use the memory 

544 # multiplier and the number of retries which comes with the 

545 # quantum. 

546 # 

547 # Note that as a result, the quantum with the biggest memory 

548 # requirements will determine whether the memory autoscaling 

549 # will be enabled (or disabled) depending on the value of its 

550 # memory multiplier. 

551 if attr == "request_memory": 

552 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

553 if gwjob.memory_multiplier is not None: 

554 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

555 

556 

557def _handle_job_values_sum(attributes, quantum_job_values, gwjob): 

558 """Handle job attributes that are the sum of their values in the cluster. 

559 

560 Parameters 

561 ---------- 

562 attributes : Iterable [`str`] 

563 The names of job attributes which need to be a sum of the respective 

564 attributes in the entire cluster. 

565 quantum_job_values : `dict` [`str`, `Any`] 

566 Job values for running single Quantum. 

567 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

568 Generic workflow job in which to store the aggregate values. 

569 """ 

570 for attr in attributes: 

571 current_value = getattr(gwjob, attr) 

572 if not current_value: 

573 setattr(gwjob, attr, quantum_job_values[attr]) 

574 else: 

575 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

576 

577 

578def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

579 """Create a generic workflow from a ClusteredQuantumGraph such that it 

580 has information needed for WMS (e.g., command lines). 

581 

582 Parameters 

583 ---------- 

584 config : `lsst.ctrl.bps.BpsConfig` 

585 BPS configuration. 

586 clustered_quanta_graph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

587 ClusteredQuantumGraph for running a specific pipeline on a specific 

588 payload. 

589 name : `str` 

590 Name for the workflow (typically unique). 

591 prefix : `str` 

592 Root path for any output files. 

593 

594 Returns 

595 ------- 

596 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

597 Generic workflow for the given ClusteredQuantumGraph + config. 

598 """ 

599 # Determine whether saving per-job QuantumGraph files in the loop. 

600 save_per_job_qgraph = False 

601 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

602 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM: 

603 save_per_job_qgraph = True 

604 

605 generic_workflow = GenericWorkflow(name) 

606 

607 # Save full run QuantumGraph for use by jobs 

608 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

609 src_uri=config["runQgraphFile"], 

610 wms_transfer=True, 

611 job_access_remote=True, 

612 job_shared=True)) 

613 

614 qgraph = clustered_quanta_graph.graph["qgraph"] 

615 task_labels = [task.label for task in qgraph.iterTaskGraph()] 

616 run_label_counts = dict.fromkeys(task_labels, 0) 

617 for node_name, data in clustered_quanta_graph.nodes(data=True): 

618 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name, 

619 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4]) 

620 gwjob = GenericWorkflowJob(node_name) 

621 if "tags" in data: 

622 gwjob.tags = data["tags"] 

623 if "label" in data: 

624 gwjob.label = data["label"] 

625 

626 # Getting labels in pipeline order. 

627 label_counts = dict.fromkeys(task_labels, 0) 

628 

629 # Get job info either common or aggregate for all Quanta in cluster. 

630 for node_id in data["qgraph_node_ids"]: 

631 qnode = qgraph.getQuantumNodeByNodeId(node_id) 

632 label_counts[qnode.taskDef.label] += 1 

633 

634 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, 

635 "replaceVars": False, 

636 "expandEnvVars": False, 

637 "replaceEnvVars": True, 

638 "required": False} 

639 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

640 

641 attrs = {"universal": _ATTRS_UNIVERSAL, "max": _ATTRS_MAX, "sum": _ATTRS_SUM} 

642 _handle_job_values(attrs, quantum_job_values, gwjob) 

643 

644 # Save summary of Quanta in job. 

645 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v]) 

646 # Save job quanta counts to run 

647 for key in task_labels: 

648 run_label_counts[key] += label_counts[key] 

649 

650 # Update job with workflow attribute and profile values. 

651 update_job(config, gwjob) 

652 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"), 

653 config["submitPath"]) 

654 butler_gwfile = _get_butler_gwfile(config, config["submitPath"]) 

655 

656 generic_workflow.add_job(gwjob) 

657 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

658 

659 gwjob.cmdvals["qgraphId"] = data["qgraph_node_ids"][0].buildId 

660 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in 

661 data["qgraph_node_ids"]])) 

662 _enhance_command(config, generic_workflow, gwjob) 

663 

664 # If writing per-job QuantumGraph files during TRANSFORM stage, 

665 # write it now while in memory. 

666 if save_per_job_qgraph: 

667 save_qg_subgraph(qgraph, qgraph_gwfile.src_uri, data["qgraph_node_ids"]) 

668 

669 # Save run's Quanta summary 

670 run_summary = ";".join([f"{k}:{v}" for k, v in run_label_counts.items()]) 

671 generic_workflow.run_attrs["bps_run_summary"] = run_summary 

672 

673 # Create job dependencies. 

674 for node_name in clustered_quanta_graph.nodes(): 

675 for child in clustered_quanta_graph.successors(node_name): 

676 generic_workflow.add_job_relationships(node_name, child) 

677 

678 # Add initial workflow. 

679 if config.get("runInit", "{default: False}"): 

680 add_workflow_init_nodes(config, qgraph, generic_workflow) 

681 

682 generic_workflow.run_attrs.update({"bps_isjob": "True", 

683 "bps_project": config["project"], 

684 "bps_campaign": config["campaign"], 

685 "bps_run": generic_workflow.name, 

686 "bps_operator": config["operator"], 

687 "bps_payload": config["payloadName"], 

688 "bps_runsite": config["computeSite"]}) 

689 

690 # Add final job 

691 add_final_job(config, generic_workflow, prefix) 

692 

693 return generic_workflow 

694 

695 

696def create_generic_workflow_config(config, prefix): 

697 """Create generic workflow configuration. 

698 

699 Parameters 

700 ---------- 

701 config : `lsst.ctrl.bps.BpsConfig` 

702 Bps configuration. 

703 prefix : `str` 

704 Root path for any output files. 

705 

706 Returns 

707 ------- 

708 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

709 Configuration accompanying the GenericWorkflow. 

710 """ 

711 generic_workflow_config = BpsConfig(config) 

712 generic_workflow_config["workflowName"] = config["uniqProcName"] 

713 generic_workflow_config["workflowPath"] = prefix 

714 return generic_workflow_config 

715 

716 

717def add_final_job(config, generic_workflow, prefix): 

718 """Add final workflow job depending upon configuration. 

719 

720 Parameters 

721 ---------- 

722 config : `lsst.ctrl.bps.BpsConfig` 

723 Bps configuration. 

724 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

725 Generic workflow to which attributes should be added. 

726 prefix : `str` 

727 Directory in which to output final script. 

728 """ 

729 _, when_create = config.search(".executionButler.whenCreate") 

730 _, when_merge = config.search(".executionButler.whenMerge") 

731 

732 search_opt = {"searchobj": config[".executionButler"], "default": None} 

733 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

734 # create gwjob 

735 gwjob = GenericWorkflowJob("mergeExecutionButler") 

736 gwjob.label = "mergeExecutionButler" 

737 

738 job_values = _get_job_values(config, search_opt, None) 

739 for field in dataclasses.fields(GenericWorkflowJob): 

740 if not getattr(gwjob, field.name) and job_values.get(field.name, None): 

741 setattr(gwjob, field.name, job_values[field.name]) 

742 

743 update_job(config, gwjob) 

744 

745 # Create script and add command line to job. 

746 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

747 

748 # Determine inputs from command line. 

749 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

750 gwfile = generic_workflow.get_file(file_key) 

751 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

752 

753 _enhance_command(config, generic_workflow, gwjob) 

754 

755 # Put transfer repo job in appropriate location in workflow. 

756 if when_merge.upper() == "ALWAYS": 

757 # add as special final job 

758 generic_workflow.add_final(gwjob) 

759 elif when_merge.upper() == "SUCCESS": 

760 # add as regular sink node 

761 add_final_job_as_sink(generic_workflow, gwjob) 

762 else: 

763 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

764 

765 generic_workflow.run_attrs["bps_run_summary"] += ";mergeExecutionButler:1" 

766 

767 

768def _create_final_command(config, prefix): 

769 """Create the command and shell script for the final job. 

770 

771 Parameters 

772 ---------- 

773 config : `lsst.ctrl.bps.BpsConfig` 

774 Bps configuration. 

775 prefix : `str` 

776 Directory in which to output final script. 

777 

778 Returns 

779 ------- 

780 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

781 Executable object for the final script. 

782 arguments : `str` 

783 Command line needed to call the final script. 

784 """ 

785 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False} 

786 

787 script_file = os.path.join(prefix, "final_job.bash") 

788 with open(script_file, "w") as fh: 

789 print("#!/bin/bash\n", file=fh) 

790 print("set -e", file=fh) 

791 print("set -x", file=fh) 

792 

793 print("butlerConfig=$1", file=fh) 

794 print("executionButlerDir=$2", file=fh) 

795 

796 i = 1 

797 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

798 while found: 

799 # Temporarily replace any env vars so formatter doesn't try to 

800 # replace them. 

801 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

802 

803 # executionButlerDir and butlerConfig will be args to script and 

804 # set to env vars 

805 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

806 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

807 

808 # Replace all other vars in command string 

809 search_opt["replaceVars"] = True 

810 command = config.formatter.format(command, config, search_opt) 

811 search_opt["replaceVars"] = False 

812 

813 # Replace any temporary env place holders. 

814 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

815 

816 print(command, file=fh) 

817 i += 1 

818 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

819 os.chmod(script_file, 0o755) 

820 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

821 

822 _, orig_butler = config.search("butlerConfig") 

823 # The execution butler was saved as butlerConfig in the workflow. 

824 return executable, f"{orig_butler} <FILE:butlerConfig>" 

825 

826 

827def add_final_job_as_sink(generic_workflow, final_job): 

828 """Add final job as the single sink for the workflow. 

829 

830 Parameters 

831 ---------- 

832 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

833 Generic workflow to which attributes should be added. 

834 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

835 Job to add as new sink node depending upon all previous sink nodes. 

836 """ 

837 # Find sink nodes of generic workflow graph. 

838 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

839 _LOG.debug("gw_sinks = %s", gw_sinks) 

840 

841 generic_workflow.add_job(final_job) 

842 generic_workflow.add_job_relationships(gw_sinks, final_job.name)