Coverage for python/lsst/ctrl/bps/transform.py: 8%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

315 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import copy 

26import dataclasses 

27import logging 

28import math 

29import os 

30import re 

31 

32from lsst.utils.logging import VERBOSE 

33from lsst.utils.timer import time_this, timeMethod 

34 

35from . import ( 

36 DEFAULT_MEM_RETRIES, 

37 BpsConfig, 

38 GenericWorkflow, 

39 GenericWorkflowExec, 

40 GenericWorkflowFile, 

41 GenericWorkflowJob, 

42) 

43from .bps_utils import ( 

44 WhenToSaveQuantumGraphs, 

45 _create_execution_butler, 

46 create_job_quantum_graph_filename, 

47 save_qg_subgraph, 

48) 

49 

50# All available job attributes. 

51_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

52 

53# Job attributes that need to be set to their maximal value in the cluster. 

54_ATTRS_MAX = frozenset( 

55 { 

56 "memory_multiplier", 

57 "number_of_retries", 

58 "request_cpus", 

59 "request_memory", 

60 "request_memory_max", 

61 } 

62) 

63 

64# Job attributes that need to be set to sum of their values in the cluster. 

65_ATTRS_SUM = frozenset( 

66 { 

67 "request_disk", 

68 "request_walltime", 

69 } 

70) 

71 

72# Job attributes do not fall into a specific category 

73_ATTRS_MISC = frozenset( 

74 { 

75 "cmdline", 

76 "cmdvals", 

77 "environment", 

78 "pre_cmdline", 

79 "post_cmdline", 

80 "profile", 

81 "attrs", 

82 } 

83) 

84 

85# Attributes that need to be the same for each quanta in the cluster. 

86_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91@timeMethod(logger=_LOG, logLevel=VERBOSE) 

92def transform(config, cqgraph, prefix): 

93 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

94 

95 Parameters 

96 ---------- 

97 config : `lsst.ctrl.bps.BpsConfig` 

98 BPS configuration. 

99 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

100 A clustered quantum graph to transform into a generic workflow. 

101 prefix : `str` 

102 Root path for any output files. 

103 

104 Returns 

105 ------- 

106 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

107 The generic workflow transformed from the clustered quantum graph. 

108 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

109 Configuration to accompany GenericWorkflow. 

110 """ 

111 _, when_create = config.search(".executionButler.whenCreate") 

112 if when_create.upper() == "TRANSFORM": 

113 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

114 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

115 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

116 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

117 

118 if cqgraph.name is not None: 

119 name = cqgraph.name 

120 else: 

121 _, name = config.search("uniqProcName", opt={"required": True}) 

122 

123 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

124 generic_workflow_config = create_generic_workflow_config(config, prefix) 

125 

126 return generic_workflow, generic_workflow_config 

127 

128 

129def add_workflow_init_nodes(config, qgraph, generic_workflow): 

130 """Add nodes to workflow graph that perform initialization steps. 

131 

132 Assumes that all of the initialization should be executed prior to any 

133 of the current workflow. 

134 

135 Parameters 

136 ---------- 

137 config : `lsst.ctrl.bps.BpsConfig` 

138 BPS configuration. 

139 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

140 The quantum graph the generic workflow represents. 

141 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

142 Generic workflow to which the initialization steps should be added. 

143 """ 

144 # Create a workflow graph that will have task and file nodes necessary for 

145 # initializing the pipeline execution 

146 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

147 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

148 generic_workflow.add_workflow_source(init_workflow) 

149 

150 

151def create_init_workflow(config, qgraph, qgraph_gwfile): 

152 """Create workflow for running initialization job(s). 

153 

154 Parameters 

155 ---------- 

156 config : `lsst.ctrl.bps.BpsConfig` 

157 BPS configuration. 

158 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

159 The quantum graph the generic workflow represents. 

160 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

161 File object for the full run QuantumGraph file. 

162 

163 Returns 

164 ------- 

165 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

166 GenericWorkflow consisting of job(s) to initialize workflow. 

167 """ 

168 _LOG.debug("creating init subgraph") 

169 _LOG.debug("creating init task input(s)") 

170 search_opt = { 

171 "curvals": {"curr_pipetask": "pipetaskInit"}, 

172 "replaceVars": False, 

173 "expandEnvVars": False, 

174 "replaceEnvVars": True, 

175 "required": False, 

176 } 

177 

178 init_workflow = GenericWorkflow("init") 

179 init_workflow.add_file(qgraph_gwfile) 

180 

181 # create job for executing --init-only 

182 gwjob = GenericWorkflowJob("pipetaskInit") 

183 

184 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

185 job_values["name"] = "pipetaskInit" 

186 job_values["label"] = "pipetaskInit" 

187 

188 # Adjust job attributes values if necessary. 

189 _handle_job_values(job_values, gwjob) 

190 

191 # Pick a node id for each task (not quantum!) to avoid reading the entire 

192 # quantum graph during the initialization stage. 

193 node_ids = [] 

194 for task in qgraph.iterTaskGraph(): 

195 task_def = qgraph.findTaskDefByLabel(task.label) 

196 node = next(iter(qgraph.getNodesForTask(task_def))) 

197 node_ids.append(node.nodeId) 

198 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

199 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in node_ids])) 

200 

201 init_workflow.add_job(gwjob) 

202 

203 # Lookup butler values 

204 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

205 _, butler_config = config.search("butlerConfig", opt=search_opt) 

206 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

207 prefix = config["submitPath"] 

208 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

209 

210 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

211 _enhance_command(config, init_workflow, gwjob, {}) 

212 

213 return init_workflow 

214 

215 

216def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

217 """Enhance command line with env and file placeholders 

218 and gather command line values. 

219 

220 Parameters 

221 ---------- 

222 config : `lsst.ctrl.bps.BpsConfig` 

223 BPS configuration. 

224 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

225 Generic workflow that contains the job. 

226 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

227 Generic workflow job to which the updated executable, arguments, 

228 and values should be saved. 

229 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

230 Cached values common across jobs with same label. Updated if values 

231 aren't already saved for given gwjob's label. 

232 """ 

233 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

234 

235 search_opt = { 

236 "curvals": {"curr_pipetask": gwjob.label}, 

237 "replaceVars": False, 

238 "expandEnvVars": False, 

239 "replaceEnvVars": True, 

240 "required": False, 

241 } 

242 

243 if gwjob.label not in cached_job_values: 

244 cached_job_values[gwjob.label] = {} 

245 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

246 key = "whenSaveJobQgraph" 

247 _, when_save = config.search(key, opt=search_opt) 

248 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

249 

250 key = "useLazyCommands" 

251 search_opt["default"] = True 

252 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

253 del search_opt["default"] 

254 

255 # Change qgraph variable to match whether using run or per-job qgraph 

256 # Note: these are lookup keys, not actual physical filenames. 

257 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

258 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

259 elif gwjob.name == "pipetaskInit": 

260 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

261 else: # Needed unique file keys for per-job QuantumGraphs 

262 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

263 

264 # Replace files with special placeholders 

265 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

266 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

267 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

268 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

269 

270 # Save dict of other values needed to complete command line. 

271 # (Be careful to not replace env variables as they may 

272 # be different in compute job.) 

273 search_opt["replaceVars"] = True 

274 

275 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

276 if key not in gwjob.cmdvals: 

277 if key not in cached_job_values[gwjob.label]: 

278 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

279 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

280 

281 # backwards compatibility 

282 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

283 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

284 key = "bpsUseShared" 

285 search_opt["default"] = True 

286 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

287 del search_opt["default"] 

288 

289 gwjob.arguments = _fill_arguments( 

290 cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, gwjob.arguments, gwjob.cmdvals 

291 ) 

292 

293 

294def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

295 """Replace placeholders in command line string in job. 

296 

297 Parameters 

298 ---------- 

299 use_shared : `bool` 

300 Whether using shared filesystem. 

301 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

302 Generic workflow containing the job. 

303 arguments : `str` 

304 String containing placeholders. 

305 cmdvals : `dict` [`str`, `Any`] 

306 Any command line values that can be used to replace placeholders. 

307 

308 Returns 

309 ------- 

310 arguments : `str` 

311 Command line with FILE and ENV placeholders replaced. 

312 """ 

313 # Replace file placeholders 

314 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

315 gwfile = generic_workflow.get_file(file_key) 

316 if not gwfile.wms_transfer: 

317 # Must assume full URI if in command line and told WMS is not 

318 # responsible for transferring file. 

319 uri = gwfile.src_uri 

320 elif use_shared: 

321 if gwfile.job_shared: 

322 # Have shared filesystems and jobs can share file. 

323 uri = gwfile.src_uri 

324 else: 

325 # Taking advantage of inside knowledge. Not future-proof. 

326 # Temporary fix until have job wrapper that pulls files 

327 # within job. 

328 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

329 uri = "butler.yaml" 

330 else: 

331 uri = os.path.basename(gwfile.src_uri) 

332 else: # Using push transfer 

333 uri = os.path.basename(gwfile.src_uri) 

334 

335 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

336 

337 # Replace env placeholder with submit-side values 

338 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

339 arguments = os.path.expandvars(arguments) 

340 

341 # Replace remaining vars 

342 arguments = arguments.format(**cmdvals) 

343 

344 return arguments 

345 

346 

347def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

348 """Get butler location to be used by job. 

349 

350 Parameters 

351 ---------- 

352 prefix : `str` 

353 Root path for any output files. 

354 when_create : `str` 

355 When to create the execution butler used to determine whether job is 

356 using execution butler or not. 

357 butler_config : `str` 

358 Location of central butler repositories config file. 

359 execution_butler_dir : `str` 

360 Location of execution butler repository. 

361 

362 Returns 

363 ------- 

364 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

365 Representation of butler location. 

366 """ 

367 if when_create.upper() == "NEVER": 

368 wms_transfer = False 

369 job_access_remote = True 

370 job_shared = True 

371 else: 

372 butler_config = execution_butler_dir 

373 if not butler_config.startswith("/"): 

374 butler_config = f"{prefix}/{butler_config}" 

375 wms_transfer = True 

376 job_access_remote = False 

377 job_shared = False 

378 

379 gwfile = GenericWorkflowFile( 

380 "butlerConfig", 

381 src_uri=butler_config, 

382 wms_transfer=wms_transfer, 

383 job_access_remote=job_access_remote, 

384 job_shared=job_shared, 

385 ) 

386 

387 return gwfile 

388 

389 

390def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

391 """Get qgraph location to be used by job. 

392 

393 Parameters 

394 ---------- 

395 config : `lsst.ctrl.bps.BpsConfig` 

396 Bps configuration. 

397 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

398 What submission stage to save per-job qgraph files (or NEVER) 

399 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

400 Job for which determining QuantumGraph file. 

401 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

402 File representation of the full run QuantumGraph. 

403 prefix : `str` 

404 Path prefix for any files written. 

405 

406 Returns 

407 ------- 

408 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

409 Representation of butler location (may not include filename). 

410 """ 

411 qgraph_gwfile = None 

412 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

413 qgraph_gwfile = GenericWorkflowFile( 

414 f"qgraphFile_{gwjob.name}", 

415 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

416 wms_transfer=True, 

417 job_access_remote=True, 

418 job_shared=True, 

419 ) 

420 else: 

421 qgraph_gwfile = run_qgraph_file 

422 

423 return qgraph_gwfile 

424 

425 

426def _get_job_values(config, search_opt, cmd_line_key): 

427 """Gather generic workflow job values from the bps config. 

428 

429 Parameters 

430 ---------- 

431 config : `lsst.ctrl.bps.BpsConfig` 

432 Bps configuration. 

433 search_opt : `dict` [`str`, `Any`] 

434 Search options to be used when searching config. 

435 cmd_line_key : `str` or None 

436 Which command line key to search for (e.g., "runQuantumCommand"). 

437 

438 Returns 

439 ------- 

440 job_values : `dict` [ `str`, `Any` ]` 

441 A mapping between job attributes and their values. 

442 """ 

443 job_values = {} 

444 for attr in _ATTRS_ALL: 

445 # Variable names in yaml are camel case instead of snake case. 

446 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr) 

447 found, value = config.search(yaml_name, opt=search_opt) 

448 if found: 

449 job_values[attr] = value 

450 else: 

451 job_values[attr] = None 

452 

453 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

454 # is set and it is a positive number greater than 1.0), adjust number 

455 # of retries when necessary. If the memory multiplier is invalid, disable 

456 # automatic memory scaling. 

457 if job_values["memory_multiplier"] is not None: 

458 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

459 if job_values["number_of_retries"] is None: 

460 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

461 else: 

462 job_values["memory_multiplier"] = None 

463 

464 if cmd_line_key: 

465 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

466 # Make sure cmdline isn't None as that could be sent in as a 

467 # default value in search_opt. 

468 if found and cmdline: 

469 cmd, args = cmdline.split(" ", 1) 

470 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

471 if args: 

472 job_values["arguments"] = args 

473 

474 return job_values 

475 

476 

477def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

478 """Set the job attributes in the cluster to their correct values. 

479 

480 Parameters 

481 ---------- 

482 quantum_job_values : `dict` [`str`, Any] 

483 Job values for running single Quantum. 

484 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

485 Generic workflow job in which to store the universal values. 

486 attributes: `Iterable` [`str`], optional 

487 Job attributes to be set in the job following different rules. 

488 The default value is _ATTRS_ALL. 

489 """ 

490 _LOG.debug("Call to _handle_job_values") 

491 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

492 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

493 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

494 

495 

496def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

497 """Handle job attributes that must have the same value for every quantum 

498 in the cluster. 

499 

500 Parameters 

501 ---------- 

502 quantum_job_values : `dict` [`str`, Any] 

503 Job values for running single Quantum. 

504 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

505 Generic workflow job in which to store the universal values. 

506 attributes: `Iterable` [`str`], optional 

507 Job attributes to be set in the job following different rules. 

508 The default value is _ATTRS_UNIVERSAL. 

509 """ 

510 for attr in _ATTRS_UNIVERSAL & set(attributes): 

511 _LOG.debug( 

512 "Handling job %s (job=%s, quantum=%s)", 

513 attr, 

514 getattr(gwjob, attr), 

515 quantum_job_values.get(attr, "MISSING"), 

516 ) 

517 current_value = getattr(gwjob, attr) 

518 try: 

519 quantum_value = quantum_job_values[attr] 

520 except KeyError: 

521 continue 

522 else: 

523 if not current_value: 

524 setattr(gwjob, attr, quantum_value) 

525 elif current_value != quantum_value: 

526 _LOG.error( 

527 "Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

528 "Current cluster value: %s\n" 

529 "Quantum value: %s", 

530 attr, 

531 gwjob.name, 

532 quantum_job_values.get("qgraphNodeId", "MISSING"), 

533 current_value, 

534 quantum_value, 

535 ) 

536 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

537 

538 

539def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

540 """Handle job attributes that should be set to their maximum value in 

541 the in cluster. 

542 

543 Parameters 

544 ---------- 

545 quantum_job_values : `dict` [`str`, `Any`] 

546 Job values for running single Quantum. 

547 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

548 Generic workflow job in which to store the aggregate values. 

549 attributes: `Iterable` [`str`], optional 

550 Job attributes to be set in the job following different rules. 

551 The default value is _ATTR_MAX. 

552 """ 

553 for attr in _ATTRS_MAX & set(attributes): 

554 current_value = getattr(gwjob, attr) 

555 try: 

556 quantum_value = quantum_job_values[attr] 

557 except KeyError: 

558 continue 

559 else: 

560 needs_update = False 

561 if current_value is None: 

562 if quantum_value is not None: 

563 needs_update = True 

564 else: 

565 if quantum_value is not None and current_value < quantum_value: 

566 needs_update = True 

567 if needs_update: 

568 setattr(gwjob, attr, quantum_value) 

569 

570 # When updating memory requirements for a job, check if memory 

571 # autoscaling is enabled. If it is, always use the memory 

572 # multiplier and the number of retries which comes with the 

573 # quantum. 

574 # 

575 # Note that as a result, the quantum with the biggest memory 

576 # requirements will determine whether the memory autoscaling 

577 # will be enabled (or disabled) depending on the value of its 

578 # memory multiplier. 

579 if attr == "request_memory": 

580 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

581 if gwjob.memory_multiplier is not None: 

582 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

583 

584 

585def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

586 """Handle job attributes that are the sum of their values in the cluster. 

587 

588 Parameters 

589 ---------- 

590 quantum_job_values : `dict` [`str`, `Any`] 

591 Job values for running single Quantum. 

592 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

593 Generic workflow job in which to store the aggregate values. 

594 attributes: `Iterable` [`str`], optional 

595 Job attributes to be set in the job following different rules. 

596 The default value is _ATTRS_SUM. 

597 """ 

598 for attr in _ATTRS_SUM & set(attributes): 

599 current_value = getattr(gwjob, attr) 

600 if not current_value: 

601 setattr(gwjob, attr, quantum_job_values[attr]) 

602 else: 

603 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

604 

605 

606def create_generic_workflow(config, cqgraph, name, prefix): 

607 """Create a generic workflow from a ClusteredQuantumGraph such that it 

608 has information needed for WMS (e.g., command lines). 

609 

610 Parameters 

611 ---------- 

612 config : `lsst.ctrl.bps.BpsConfig` 

613 BPS configuration. 

614 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

615 ClusteredQuantumGraph for running a specific pipeline on a specific 

616 payload. 

617 name : `str` 

618 Name for the workflow (typically unique). 

619 prefix : `str` 

620 Root path for any output files. 

621 

622 Returns 

623 ------- 

624 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

625 Generic workflow for the given ClusteredQuantumGraph + config. 

626 """ 

627 # Determine whether saving per-job QuantumGraph files in the loop. 

628 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

629 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

630 

631 search_opt = {"replaceVars": False, "expandEnvVars": False, "replaceEnvVars": True, "required": False} 

632 

633 # Lookup butler values once 

634 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

635 _, butler_config = config.search("butlerConfig", opt=search_opt) 

636 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

637 

638 generic_workflow = GenericWorkflow(name) 

639 

640 # Save full run QuantumGraph for use by jobs 

641 generic_workflow.add_file( 

642 GenericWorkflowFile( 

643 "runQgraphFile", 

644 src_uri=config["runQgraphFile"], 

645 wms_transfer=True, 

646 job_access_remote=True, 

647 job_shared=True, 

648 ) 

649 ) 

650 

651 # Cache pipetask specific or more generic job values to minimize number 

652 # on config searches. 

653 cached_job_values = {} 

654 cached_pipetask_values = {} 

655 

656 for cluster in cqgraph.clusters(): 

657 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

658 _LOG.debug( 

659 "cqgraph: name=%s, len=%s, label=%s, ids=%s", 

660 cluster.name, 

661 len(cluster.qgraph_node_ids), 

662 cluster.label, 

663 cluster.qgraph_node_ids, 

664 ) 

665 

666 gwjob = GenericWorkflowJob(cluster.name) 

667 

668 # First get job values from cluster or cluster config 

669 search_opt["curvals"] = {"curr_cluster": cluster.label} 

670 

671 # If some config values are set for this cluster 

672 if cluster.label not in cached_job_values: 

673 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

674 cached_job_values[cluster.label] = {} 

675 

676 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

677 key = "whenSaveJobQgraph" 

678 _, when_save = config.search(key, opt=search_opt) 

679 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

680 

681 key = "useLazyCommands" 

682 search_opt["default"] = True 

683 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

684 del search_opt["default"] 

685 

686 if cluster.label in config["cluster"]: 

687 # Don't want to get global defaults here so only look in 

688 # cluster section. 

689 cached_job_values[cluster.label].update( 

690 _get_job_values(config["cluster"][cluster.label], search_opt, "runQuantumCommand") 

691 ) 

692 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

693 

694 cluster_job_values["name"] = cluster.name 

695 cluster_job_values["label"] = cluster.label 

696 cluster_job_values["quanta_counts"] = cluster.quanta_counts 

697 cluster_job_values["tags"] = cluster.tags 

698 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

699 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

700 

701 # For purposes of whether to continue searching for a value is whether 

702 # the value evaluates to False. 

703 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

704 

705 _LOG.debug("unset_attributes=%s", unset_attributes) 

706 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

707 

708 # For job info not defined at cluster level, attempt to get job info 

709 # either common or aggregate for all Quanta in cluster. 

710 for node_id in iter(cluster.qgraph_node_ids): 

711 _LOG.debug("node_id=%s", node_id) 

712 qnode = cqgraph.get_quantum_node(node_id) 

713 

714 if qnode.taskDef.label not in cached_pipetask_values: 

715 search_opt["curvals"] = {"curr_pipetask": qnode.taskDef.label} 

716 cached_pipetask_values[qnode.taskDef.label] = _get_job_values( 

717 config, search_opt, "runQuantumCommand" 

718 ) 

719 

720 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

721 

722 # Update job with workflow attribute and profile values. 

723 qgraph_gwfile = _get_qgraph_gwfile( 

724 config, save_qgraph_per_job, gwjob, generic_workflow.get_file("runQgraphFile"), prefix 

725 ) 

726 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

727 

728 generic_workflow.add_job(gwjob) 

729 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

730 

731 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

732 gwjob.cmdvals["qgraphNodeId"] = ",".join( 

733 sorted([f"{node_id}" for node_id in cluster.qgraph_node_ids]) 

734 ) 

735 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

736 

737 # If writing per-job QuantumGraph files during TRANSFORM stage, 

738 # write it now while in memory. 

739 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

740 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

741 

742 # Create job dependencies. 

743 for parent in cqgraph.clusters(): 

744 for child in cqgraph.successors(parent): 

745 generic_workflow.add_job_relationships(parent.name, child.name) 

746 

747 # Add initial workflow. 

748 if config.get("runInit", "{default: False}"): 

749 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

750 

751 generic_workflow.run_attrs.update( 

752 { 

753 "bps_isjob": "True", 

754 "bps_project": config["project"], 

755 "bps_campaign": config["campaign"], 

756 "bps_run": generic_workflow.name, 

757 "bps_operator": config["operator"], 

758 "bps_payload": config["payloadName"], 

759 "bps_runsite": config["computeSite"], 

760 } 

761 ) 

762 

763 # Add final job 

764 add_final_job(config, generic_workflow, prefix) 

765 

766 return generic_workflow 

767 

768 

769def create_generic_workflow_config(config, prefix): 

770 """Create generic workflow configuration. 

771 

772 Parameters 

773 ---------- 

774 config : `lsst.ctrl.bps.BpsConfig` 

775 Bps configuration. 

776 prefix : `str` 

777 Root path for any output files. 

778 

779 Returns 

780 ------- 

781 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

782 Configuration accompanying the GenericWorkflow. 

783 """ 

784 generic_workflow_config = BpsConfig(config) 

785 generic_workflow_config["workflowName"] = config["uniqProcName"] 

786 generic_workflow_config["workflowPath"] = prefix 

787 return generic_workflow_config 

788 

789 

790def add_final_job(config, generic_workflow, prefix): 

791 """Add final workflow job depending upon configuration. 

792 

793 Parameters 

794 ---------- 

795 config : `lsst.ctrl.bps.BpsConfig` 

796 Bps configuration. 

797 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

798 Generic workflow to which attributes should be added. 

799 prefix : `str` 

800 Directory in which to output final script. 

801 """ 

802 _, when_create = config.search(".executionButler.whenCreate") 

803 _, when_merge = config.search(".executionButler.whenMerge") 

804 

805 search_opt = {"searchobj": config[".executionButler"], "default": None} 

806 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

807 # create gwjob 

808 gwjob = GenericWorkflowJob("mergeExecutionButler") 

809 gwjob.label = "mergeExecutionButler" 

810 

811 job_values = _get_job_values(config, search_opt, None) 

812 for attr in _ATTRS_ALL: 

813 if not getattr(gwjob, attr) and job_values.get(attr, None): 

814 setattr(gwjob, attr, job_values[attr]) 

815 

816 # Create script and add command line to job. 

817 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

818 

819 # Determine inputs from command line. 

820 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

821 gwfile = generic_workflow.get_file(file_key) 

822 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

823 

824 _enhance_command(config, generic_workflow, gwjob, {}) 

825 

826 # Put transfer repo job in appropriate location in workflow. 

827 if when_merge.upper() == "ALWAYS": 

828 # add as special final job 

829 generic_workflow.add_final(gwjob) 

830 elif when_merge.upper() == "SUCCESS": 

831 # add as regular sink node 

832 add_final_job_as_sink(generic_workflow, gwjob) 

833 else: 

834 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

835 

836 

837def _create_final_command(config, prefix): 

838 """Create the command and shell script for the final job. 

839 

840 Parameters 

841 ---------- 

842 config : `lsst.ctrl.bps.BpsConfig` 

843 Bps configuration. 

844 prefix : `str` 

845 Directory in which to output final script. 

846 

847 Returns 

848 ------- 

849 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

850 Executable object for the final script. 

851 arguments : `str` 

852 Command line needed to call the final script. 

853 """ 

854 search_opt = { 

855 "replaceVars": False, 

856 "replaceEnvVars": False, 

857 "expandEnvVars": False, 

858 "searchobj": config["executionButler"], 

859 } 

860 

861 script_file = os.path.join(prefix, "final_job.bash") 

862 with open(script_file, "w") as fh: 

863 print("#!/bin/bash\n", file=fh) 

864 print("set -e", file=fh) 

865 print("set -x", file=fh) 

866 

867 print("butlerConfig=$1", file=fh) 

868 print("executionButlerDir=$2", file=fh) 

869 

870 i = 1 

871 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

872 while found: 

873 # Temporarily replace any env vars so formatter doesn't try to 

874 # replace them. 

875 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

876 

877 # executionButlerDir and butlerConfig will be args to script and 

878 # set to env vars 

879 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

880 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

881 

882 # Replace all other vars in command string 

883 search_opt["replaceVars"] = True 

884 command = config.formatter.format(command, config, search_opt) 

885 search_opt["replaceVars"] = False 

886 

887 # Replace any temporary env place holders. 

888 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

889 

890 print(command, file=fh) 

891 i += 1 

892 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

893 os.chmod(script_file, 0o755) 

894 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

895 

896 _, orig_butler = config.search("butlerConfig") 

897 # The execution butler was saved as butlerConfig in the workflow. 

898 return executable, f"{orig_butler} <FILE:butlerConfig>" 

899 

900 

901def add_final_job_as_sink(generic_workflow, final_job): 

902 """Add final job as the single sink for the workflow. 

903 

904 Parameters 

905 ---------- 

906 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

907 Generic workflow to which attributes should be added. 

908 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

909 Job to add as new sink node depending upon all previous sink nodes. 

910 """ 

911 # Find sink nodes of generic workflow graph. 

912 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

913 _LOG.debug("gw_sinks = %s", gw_sinks) 

914 

915 generic_workflow.add_job(final_job) 

916 generic_workflow.add_job_relationships(gw_sinks, final_job.name)