Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import math 

27import os 

28import re 

29import time 

30import dataclasses 

31 

32from . import ( 

33 DEFAULT_MEM_RETRIES, 

34 BpsConfig, 

35 GenericWorkflow, 

36 GenericWorkflowJob, 

37 GenericWorkflowFile, 

38 GenericWorkflowExec, 

39) 

40from .bps_utils import ( 

41 save_qg_subgraph, 

42 WhenToSaveQuantumGraphs, 

43 create_job_quantum_graph_filename, 

44 _create_execution_butler 

45) 

46 

47# All available job attributes. 

48_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

49 

50# Job attributes that need to be set to their maximal value in the cluster. 

51_ATTRS_MAX = frozenset({ 

52 "memory_multiplier", 

53 "number_of_retries", 

54 "request_cpus", 

55 "request_memory", 

56}) 

57 

58# Job attributes that need to be set to sum of their values in the cluster. 

59_ATTRS_SUM = frozenset({ 

60 "request_disk", 

61 "request_walltime", 

62}) 

63 

64# Job attributes do not fall into a specific category 

65_ATTRS_MISC = frozenset({ 

66 "cmdline", 

67 "cmdvals", 

68 "environment", 

69 "pre_cmdline", 

70 "post_cmdline", 

71 "profile", 

72 "attrs", 

73}) 

74 

75# Attributes that need to be the same for each quanta in the cluster. 

76_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

77 

78_LOG = logging.getLogger(__name__) 

79 

80 

81def transform(config, cqgraph, prefix): 

82 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

83 

84 Parameters 

85 ---------- 

86 config : `lsst.ctrl.bps.BpsConfig` 

87 BPS configuration. 

88 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

89 A clustered quantum graph to transform into a generic workflow. 

90 prefix : `str` 

91 Root path for any output files. 

92 

93 Returns 

94 ------- 

95 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

96 The generic workflow transformed from the clustered quantum graph. 

97 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

98 Configuration to accompany GenericWorkflow. 

99 """ 

100 _, when_create = config.search(".executionButler.whenCreate") 

101 if when_create.upper() == "TRANSFORM": 

102 _LOG.info("Creating execution butler") 

103 stime = time.time() 

104 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

105 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

106 _LOG.info("Creating execution butler took %.2f seconds", time.time() - stime) 

107 

108 if cqgraph.name is not None: 

109 name = cqgraph.name 

110 else: 

111 _, name = config.search("uniqProcName", opt={"required": True}) 

112 

113 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

114 generic_workflow_config = create_generic_workflow_config(config, prefix) 

115 

116 return generic_workflow, generic_workflow_config 

117 

118 

119def update_job(config, job): 

120 """Update given job with workflow attribute and profile values. 

121 

122 Parameters 

123 ---------- 

124 config : `lsst.ctrl.bps.BpsConfig` 

125 BPS configuration. 

126 job : `lsst.ctrl.bps.GenericWorkflowJob` 

127 Job to which the attributes and profile values should be added. 

128 """ 

129 key = f".site.{job.compute_site}.profile.condor" 

130 

131 if key in config: 

132 for key, val in config[key].items(): 

133 if key.startswith("+"): 

134 job.attrs[key[1:]] = val 

135 else: 

136 job.profile[key] = val 

137 

138 

139def add_workflow_init_nodes(config, qgraph, generic_workflow): 

140 """Add nodes to workflow graph that perform initialization steps. 

141 

142 Assumes that all of the initialization should be executed prior to any 

143 of the current workflow. 

144 

145 Parameters 

146 ---------- 

147 config : `lsst.ctrl.bps.BpsConfig` 

148 BPS configuration. 

149 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

150 The quantum graph the generic workflow represents. 

151 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

152 Generic workflow to which the initialization steps should be added. 

153 """ 

154 # Create a workflow graph that will have task and file nodes necessary for 

155 # initializing the pipeline execution 

156 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

157 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

158 generic_workflow.add_workflow_source(init_workflow) 

159 

160 

161def create_init_workflow(config, qgraph, qgraph_gwfile): 

162 """Create workflow for running initialization job(s). 

163 

164 Parameters 

165 ---------- 

166 config : `lsst.ctrl.bps.BpsConfig` 

167 BPS configuration. 

168 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

169 The quantum graph the generic workflow represents. 

170 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

171 File object for the full run QuantumGraph file. 

172 

173 Returns 

174 ------- 

175 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

176 GenericWorkflow consisting of job(s) to initialize workflow. 

177 """ 

178 _LOG.debug("creating init subgraph") 

179 _LOG.debug("creating init task input(s)") 

180 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

181 "replaceVars": False, 

182 "expandEnvVars": False, 

183 "replaceEnvVars": True, 

184 "required": False} 

185 

186 init_workflow = GenericWorkflow("init") 

187 init_workflow.add_file(qgraph_gwfile) 

188 

189 # create job for executing --init-only 

190 gwjob = GenericWorkflowJob("pipetaskInit") 

191 

192 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

193 job_values["name"] = "pipetaskInit" 

194 job_values["label"] = "pipetaskInit" 

195 

196 # Adjust job attributes values if necessary. 

197 _handle_job_values(job_values, gwjob) 

198 

199 # Pick a node id for each task (not quantum!) to avoid reading the entire 

200 # quantum graph during the initialization stage. 

201 node_ids = [] 

202 for task in qgraph.iterTaskGraph(): 

203 task_def = qgraph.findTaskDefByLabel(task.label) 

204 node = next(iter(qgraph.getNodesForTask(task_def))) 

205 node_ids.append(node.nodeId) 

206 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

207 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids])) 

208 

209 # Update job with workflow attribute and profile values. 

210 update_job(config, gwjob) 

211 

212 init_workflow.add_job(gwjob) 

213 butler_gwfile = _get_butler_gwfile(config, config["submitPath"]) 

214 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

215 _enhance_command(config, init_workflow, gwjob) 

216 

217 return init_workflow 

218 

219 

220def _enhance_command(config, generic_workflow, gwjob): 

221 """Enhance command line with env and file placeholders 

222 and gather command line values. 

223 

224 Parameters 

225 ---------- 

226 config : `lsst.ctrl.bps.BpsConfig` 

227 BPS configuration. 

228 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

229 Generic workflow that contains the job. 

230 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

231 Generic workflow job to which the updated executable, arguments, 

232 and values should be saved. 

233 """ 

234 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

235 

236 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

237 "replaceVars": False, 

238 "expandEnvVars": False, 

239 "replaceEnvVars": True, 

240 "required": False} 

241 

242 # Change qgraph variable to match whether using run or per-job qgraph 

243 # Note: these are lookup keys, not actual physical filenames. 

244 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

245 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

246 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

247 elif gwjob.name == "pipetaskInit": 

248 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

249 else: # Needed unique file keys for per-job QuantumGraphs 

250 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

251 

252 # Replace files with special placeholders 

253 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

254 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

255 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

256 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

257 

258 # Save dict of other values needed to complete command line. 

259 # (Be careful to not replace env variables as they may 

260 # be different in compute job.) 

261 search_opt["replaceVars"] = True 

262 

263 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

264 if key not in gwjob.cmdvals: 

265 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt) 

266 

267 # backwards compatibility 

268 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True}) 

269 if not use_lazy_commands: 

270 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals) 

271 

272 

273def _fill_arguments(config, generic_workflow, arguments, cmdvals): 

274 """Replace placeholders in command line string in job. 

275 

276 Parameters 

277 ---------- 

278 config : `lsst.ctrl.bps.BpsConfig` 

279 Bps configuration. 

280 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

281 Generic workflow containing the job. 

282 arguments : `str` 

283 String containing placeholders. 

284 cmdvals : `dict` [`str`, `Any`] 

285 Any command line values that can be used to replace placeholders. 

286 

287 Returns 

288 ------- 

289 arguments : `str` 

290 Command line with FILE and ENV placeholders replaced. 

291 """ 

292 # Replace file placeholders 

293 _, use_shared = config.search("bpsUseShared", opt={"default": False}) 

294 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

295 gwfile = generic_workflow.get_file(file_key) 

296 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared: 

297 uri = os.path.basename(gwfile.src_uri) 

298 else: 

299 uri = gwfile.src_uri 

300 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

301 

302 # Replace env placeholder with submit-side values 

303 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

304 arguments = os.path.expandvars(arguments) 

305 

306 # Replace remaining vars 

307 arguments = arguments.format(**cmdvals) 

308 

309 return arguments 

310 

311 

312def _get_butler_gwfile(config, prefix): 

313 """Get butler location to be used by job. 

314 

315 Parameters 

316 ---------- 

317 config : `lsst.ctrl.bps.BpsConfig` 

318 Bps configuration. 

319 prefix : `str` 

320 Root path for any output files. 

321 

322 Returns 

323 ------- 

324 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

325 Representation of butler location. 

326 """ 

327 _, when_create = config.search(".executionButler.whenCreate") 

328 if when_create.upper() == "NEVER": 

329 _, butler_config = config.search("butlerConfig") 

330 wms_transfer = False 

331 job_access_remote = True 

332 job_shared = True 

333 else: 

334 _, butler_config = config.search(".bps_defined.executionButlerDir") 

335 butler_config = os.path.join(prefix, butler_config) 

336 wms_transfer = True 

337 job_access_remote = False 

338 job_shared = False 

339 

340 gwfile = GenericWorkflowFile("butlerConfig", 

341 src_uri=butler_config, 

342 wms_transfer=wms_transfer, 

343 job_access_remote=job_access_remote, 

344 job_shared=job_shared) 

345 

346 return gwfile 

347 

348 

349def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix): 

350 """Get qgraph location to be used by job. 

351 

352 Parameters 

353 ---------- 

354 config : `lsst.ctrl.bps.BpsConfig` 

355 Bps configuration. 

356 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

357 Job for which determining QuantumGraph file. 

358 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

359 File representation of the full run QuantumGraph. 

360 prefix : `str` 

361 Path prefix for any files written. 

362 

363 Returns 

364 ------- 

365 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

366 Representation of butler location (may not include filename). 

367 """ 

368 per_job_qgraph_file = True 

369 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

370 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

371 per_job_qgraph_file = False 

372 

373 qgraph_gwfile = None 

374 if per_job_qgraph_file: 

375 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

376 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

377 wms_transfer=True, 

378 job_access_remote=True, 

379 job_shared=True) 

380 else: 

381 qgraph_gwfile = run_qgraph_file 

382 

383 return qgraph_gwfile 

384 

385 

386def _get_job_values(config, search_opt, cmd_line_key): 

387 """Gather generic workflow job values from the bps config. 

388 

389 Parameters 

390 ---------- 

391 config : `lsst.ctrl.bps.BpsConfig` 

392 Bps configuration. 

393 search_opt : `dict` [`str`, `Any`] 

394 Search options to be used when searching config. 

395 cmd_line_key : `str` or None 

396 Which command line key to search for (e.g., "runQuantumCommand"). 

397 

398 Returns 

399 ------- 

400 job_values : `dict` [ `str`, `Any` ]` 

401 A mapping between job attributes and their values. 

402 """ 

403 job_values = {} 

404 for attr in _ATTRS_ALL: 

405 found, value = config.search(attr, opt=search_opt) 

406 if found: 

407 job_values[attr] = value 

408 else: 

409 job_values[attr] = None 

410 

411 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

412 # is set and it is a positive number greater than 1.0), adjust number 

413 # of retries when necessary. If the memory multiplier is invalid, disable 

414 # automatic memory scaling. 

415 if job_values["memory_multiplier"] is not None: 

416 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

417 if job_values["number_of_retries"] is None: 

418 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

419 else: 

420 job_values["memory_multiplier"] = None 

421 

422 if cmd_line_key: 

423 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

424 # Make sure cmdline isn't None as that could be sent in as a 

425 # default value in search_opt. 

426 if found and cmdline: 

427 cmd, args = cmdline.split(" ", 1) 

428 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

429 if args: 

430 job_values["arguments"] = args 

431 

432 return job_values 

433 

434 

435def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

436 """Set the job attributes in the cluster to their correct values. 

437 

438 Parameters 

439 ---------- 

440 quantum_job_values : `dict` [`str`, Any] 

441 Job values for running single Quantum. 

442 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

443 Generic workflow job in which to store the universal values. 

444 attributes: `Iterable` [`str`], optional 

445 Job attributes to be set in the job following different rules. 

446 The default value is _ATTRS_ALL. 

447 """ 

448 _LOG.debug("Call to _handle_job_values") 

449 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

450 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

451 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

452 

453 

454def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

455 """Handle job attributes that must have the same value for every quantum 

456 in the cluster. 

457 

458 Parameters 

459 ---------- 

460 quantum_job_values : `dict` [`str`, Any] 

461 Job values for running single Quantum. 

462 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

463 Generic workflow job in which to store the universal values. 

464 attributes: `Iterable` [`str`], optional 

465 Job attributes to be set in the job following different rules. 

466 The default value is _ATTRS_UNIVERSAL. 

467 """ 

468 for attr in _ATTRS_UNIVERSAL & set(attributes): 

469 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr), 

470 quantum_job_values.get(attr, "MISSING")) 

471 current_value = getattr(gwjob, attr) 

472 try: 

473 quantum_value = quantum_job_values[attr] 

474 except KeyError: 

475 continue 

476 else: 

477 if not current_value: 

478 setattr(gwjob, attr, quantum_value) 

479 elif current_value != quantum_value: 

480 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

481 "Current cluster value: %s\n" 

482 "Quantum value: %s", 

483 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

484 quantum_value) 

485 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

486 

487 

488def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

489 """Handle job attributes that should be set to their maximum value in 

490 the in cluster. 

491 

492 Parameters 

493 ---------- 

494 quantum_job_values : `dict` [`str`, `Any`] 

495 Job values for running single Quantum. 

496 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

497 Generic workflow job in which to store the aggregate values. 

498 attributes: `Iterable` [`str`], optional 

499 Job attributes to be set in the job following different rules. 

500 The default value is _ATTR_MAX. 

501 """ 

502 for attr in _ATTRS_MAX & set(attributes): 

503 current_value = getattr(gwjob, attr) 

504 try: 

505 quantum_value = quantum_job_values[attr] 

506 except KeyError: 

507 continue 

508 else: 

509 needs_update = False 

510 if current_value is None: 

511 if quantum_value is not None: 

512 needs_update = True 

513 else: 

514 if quantum_value is not None and current_value < quantum_value: 

515 needs_update = True 

516 if needs_update: 

517 setattr(gwjob, attr, quantum_value) 

518 

519 # When updating memory requirements for a job, check if memory 

520 # autoscaling is enabled. If it is, always use the memory 

521 # multiplier and the number of retries which comes with the 

522 # quantum. 

523 # 

524 # Note that as a result, the quantum with the biggest memory 

525 # requirements will determine whether the memory autoscaling 

526 # will be enabled (or disabled) depending on the value of its 

527 # memory multiplier. 

528 if attr == "request_memory": 

529 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

530 if gwjob.memory_multiplier is not None: 

531 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

532 

533 

534def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

535 """Handle job attributes that are the sum of their values in the cluster. 

536 

537 Parameters 

538 ---------- 

539 quantum_job_values : `dict` [`str`, `Any`] 

540 Job values for running single Quantum. 

541 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

542 Generic workflow job in which to store the aggregate values. 

543 attributes: `Iterable` [`str`], optional 

544 Job attributes to be set in the job following different rules. 

545 The default value is _ATTRS_SUM. 

546 """ 

547 for attr in _ATTRS_SUM & set(attributes): 

548 current_value = getattr(gwjob, attr) 

549 if not current_value: 

550 setattr(gwjob, attr, quantum_job_values[attr]) 

551 else: 

552 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

553 

554 

555def create_generic_workflow(config, cqgraph, name, prefix): 

556 """Create a generic workflow from a ClusteredQuantumGraph such that it 

557 has information needed for WMS (e.g., command lines). 

558 

559 Parameters 

560 ---------- 

561 config : `lsst.ctrl.bps.BpsConfig` 

562 BPS configuration. 

563 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

564 ClusteredQuantumGraph for running a specific pipeline on a specific 

565 payload. 

566 name : `str` 

567 Name for the workflow (typically unique). 

568 prefix : `str` 

569 Root path for any output files. 

570 

571 Returns 

572 ------- 

573 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

574 Generic workflow for the given ClusteredQuantumGraph + config. 

575 """ 

576 # Determine whether saving per-job QuantumGraph files in the loop. 

577 save_per_job_qgraph = False 

578 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

579 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM: 

580 save_per_job_qgraph = True 

581 

582 generic_workflow = GenericWorkflow(name) 

583 

584 # Save full run QuantumGraph for use by jobs 

585 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

586 src_uri=config["runQgraphFile"], 

587 wms_transfer=True, 

588 job_access_remote=True, 

589 job_shared=True)) 

590 

591 for cluster in cqgraph.clusters(): 

592 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

593 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name, 

594 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids) 

595 

596 gwjob = GenericWorkflowJob(cluster.name) 

597 

598 # First get job values from cluster or cluster config 

599 search_opt = {"curvals": {}, 

600 "replaceVars": False, 

601 "expandEnvVars": False, 

602 "replaceEnvVars": True, 

603 "required": False} 

604 

605 # If some config values are set for this cluster 

606 if cluster.label in config["cluster"]: 

607 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

608 cluster_job_values = _get_job_values(config["cluster"][cluster.label], search_opt, 

609 "runQuantumCommand") 

610 else: 

611 cluster_job_values = {} 

612 

613 cluster_job_values['name'] = cluster.name 

614 cluster_job_values['label'] = cluster.label 

615 cluster_job_values['quanta_counts'] = cluster.quanta_counts 

616 cluster_job_values['tags'] = cluster.tags 

617 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

618 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

619 

620 # For purposes of whether to continue searching for a value is whether 

621 # the value evaluates to False. 

622 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

623 

624 _LOG.debug("unset_attributes=%s", unset_attributes) 

625 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

626 

627 # For job info not defined at cluster level, attempt to get job info 

628 # either common or aggregate for all Quanta in cluster. 

629 for node_id in iter(cluster.qgraph_node_ids): 

630 _LOG.debug("node_id=%s", node_id) 

631 qnode = cqgraph.get_quantum_node(node_id) 

632 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label} 

633 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

634 _handle_job_values(quantum_job_values, gwjob, unset_attributes) 

635 

636 # Update job with workflow attribute and profile values. 

637 update_job(config, gwjob) 

638 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"), 

639 config["submitPath"]) 

640 butler_gwfile = _get_butler_gwfile(config, config["submitPath"]) 

641 

642 generic_workflow.add_job(gwjob) 

643 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

644 

645 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

646 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in 

647 cluster.qgraph_node_ids])) 

648 _enhance_command(config, generic_workflow, gwjob) 

649 

650 # If writing per-job QuantumGraph files during TRANSFORM stage, 

651 # write it now while in memory. 

652 if save_per_job_qgraph: 

653 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

654 

655 # Create job dependencies. 

656 for parent in cqgraph.clusters(): 

657 for child in cqgraph.successors(parent): 

658 generic_workflow.add_job_relationships(parent.name, child.name) 

659 

660 # Add initial workflow. 

661 if config.get("runInit", "{default: False}"): 

662 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

663 

664 generic_workflow.run_attrs.update({"bps_isjob": "True", 

665 "bps_project": config["project"], 

666 "bps_campaign": config["campaign"], 

667 "bps_run": generic_workflow.name, 

668 "bps_operator": config["operator"], 

669 "bps_payload": config["payloadName"], 

670 "bps_runsite": config["computeSite"]}) 

671 

672 # Add final job 

673 add_final_job(config, generic_workflow, prefix) 

674 

675 return generic_workflow 

676 

677 

678def create_generic_workflow_config(config, prefix): 

679 """Create generic workflow configuration. 

680 

681 Parameters 

682 ---------- 

683 config : `lsst.ctrl.bps.BpsConfig` 

684 Bps configuration. 

685 prefix : `str` 

686 Root path for any output files. 

687 

688 Returns 

689 ------- 

690 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

691 Configuration accompanying the GenericWorkflow. 

692 """ 

693 generic_workflow_config = BpsConfig(config) 

694 generic_workflow_config["workflowName"] = config["uniqProcName"] 

695 generic_workflow_config["workflowPath"] = prefix 

696 return generic_workflow_config 

697 

698 

699def add_final_job(config, generic_workflow, prefix): 

700 """Add final workflow job depending upon configuration. 

701 

702 Parameters 

703 ---------- 

704 config : `lsst.ctrl.bps.BpsConfig` 

705 Bps configuration. 

706 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

707 Generic workflow to which attributes should be added. 

708 prefix : `str` 

709 Directory in which to output final script. 

710 """ 

711 _, when_create = config.search(".executionButler.whenCreate") 

712 _, when_merge = config.search(".executionButler.whenMerge") 

713 

714 search_opt = {"searchobj": config[".executionButler"], "default": None} 

715 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

716 # create gwjob 

717 gwjob = GenericWorkflowJob("mergeExecutionButler") 

718 gwjob.label = "mergeExecutionButler" 

719 

720 job_values = _get_job_values(config, search_opt, None) 

721 for attr in _ATTRS_ALL: 

722 if not getattr(gwjob, attr) and job_values.get(attr, None): 

723 setattr(gwjob, attr, job_values[attr]) 

724 

725 update_job(config, gwjob) 

726 

727 # Create script and add command line to job. 

728 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

729 

730 # Determine inputs from command line. 

731 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

732 gwfile = generic_workflow.get_file(file_key) 

733 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

734 

735 _enhance_command(config, generic_workflow, gwjob) 

736 

737 # Put transfer repo job in appropriate location in workflow. 

738 if when_merge.upper() == "ALWAYS": 

739 # add as special final job 

740 generic_workflow.add_final(gwjob) 

741 elif when_merge.upper() == "SUCCESS": 

742 # add as regular sink node 

743 add_final_job_as_sink(generic_workflow, gwjob) 

744 else: 

745 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

746 

747 

748def _create_final_command(config, prefix): 

749 """Create the command and shell script for the final job. 

750 

751 Parameters 

752 ---------- 

753 config : `lsst.ctrl.bps.BpsConfig` 

754 Bps configuration. 

755 prefix : `str` 

756 Directory in which to output final script. 

757 

758 Returns 

759 ------- 

760 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

761 Executable object for the final script. 

762 arguments : `str` 

763 Command line needed to call the final script. 

764 """ 

765 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False, 

766 'searchobj': config['executionButler']} 

767 

768 script_file = os.path.join(prefix, "final_job.bash") 

769 with open(script_file, "w") as fh: 

770 print("#!/bin/bash\n", file=fh) 

771 print("set -e", file=fh) 

772 print("set -x", file=fh) 

773 

774 print("butlerConfig=$1", file=fh) 

775 print("executionButlerDir=$2", file=fh) 

776 

777 i = 1 

778 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

779 while found: 

780 # Temporarily replace any env vars so formatter doesn't try to 

781 # replace them. 

782 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

783 

784 # executionButlerDir and butlerConfig will be args to script and 

785 # set to env vars 

786 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

787 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

788 

789 # Replace all other vars in command string 

790 search_opt["replaceVars"] = True 

791 command = config.formatter.format(command, config, search_opt) 

792 search_opt["replaceVars"] = False 

793 

794 # Replace any temporary env place holders. 

795 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

796 

797 print(command, file=fh) 

798 i += 1 

799 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

800 os.chmod(script_file, 0o755) 

801 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

802 

803 _, orig_butler = config.search("butlerConfig") 

804 # The execution butler was saved as butlerConfig in the workflow. 

805 return executable, f"{orig_butler} <FILE:butlerConfig>" 

806 

807 

808def add_final_job_as_sink(generic_workflow, final_job): 

809 """Add final job as the single sink for the workflow. 

810 

811 Parameters 

812 ---------- 

813 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

814 Generic workflow to which attributes should be added. 

815 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

816 Job to add as new sink node depending upon all previous sink nodes. 

817 """ 

818 # Find sink nodes of generic workflow graph. 

819 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

820 _LOG.debug("gw_sinks = %s", gw_sinks) 

821 

822 generic_workflow.add_job(final_job) 

823 generic_workflow.add_job_relationships(gw_sinks, final_job.name)