Coverage for python/lsst/ctrl/bps/transform.py: 8%

335 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-23 10:48 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import copy 

26import dataclasses 

27import logging 

28import math 

29import os 

30import re 

31 

32from lsst.utils.logging import VERBOSE 

33from lsst.utils.timer import time_this, timeMethod 

34 

35from . import ( 

36 DEFAULT_MEM_RETRIES, 

37 BpsConfig, 

38 GenericWorkflow, 

39 GenericWorkflowExec, 

40 GenericWorkflowFile, 

41 GenericWorkflowJob, 

42) 

43from .bps_utils import ( 

44 WhenToSaveQuantumGraphs, 

45 _create_execution_butler, 

46 create_job_quantum_graph_filename, 

47 save_qg_subgraph, 

48) 

49 

50# All available job attributes. 

51_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

52 

53# Job attributes that need to be set to their maximal value in the cluster. 

54_ATTRS_MAX = frozenset( 

55 { 

56 "memory_multiplier", 

57 "number_of_retries", 

58 "request_cpus", 

59 "request_memory", 

60 "request_memory_max", 

61 } 

62) 

63 

64# Job attributes that need to be set to sum of their values in the cluster. 

65_ATTRS_SUM = frozenset( 

66 { 

67 "request_disk", 

68 "request_walltime", 

69 } 

70) 

71 

72# Job attributes do not fall into a specific category 

73_ATTRS_MISC = frozenset( 

74 { 

75 "cmdvals", 

76 "profile", 

77 "attrs", 

78 } 

79) 

80 

81# Attributes that need to be the same for each quanta in the cluster. 

82_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

83 

84_LOG = logging.getLogger(__name__) 

85 

86 

87@timeMethod(logger=_LOG, logLevel=VERBOSE) 

88def transform(config, cqgraph, prefix): 

89 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

90 

91 Parameters 

92 ---------- 

93 config : `lsst.ctrl.bps.BpsConfig` 

94 BPS configuration. 

95 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

96 A clustered quantum graph to transform into a generic workflow. 

97 prefix : `str` 

98 Root path for any output files. 

99 

100 Returns 

101 ------- 

102 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

103 The generic workflow transformed from the clustered quantum graph. 

104 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

105 Configuration to accompany GenericWorkflow. 

106 """ 

107 _, when_create = config.search(".executionButler.whenCreate") 

108 if when_create.upper() == "TRANSFORM": 

109 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

110 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

111 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

112 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

113 

114 if cqgraph.name is not None: 

115 name = cqgraph.name 

116 else: 

117 _, name = config.search("uniqProcName", opt={"required": True}) 

118 

119 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

120 generic_workflow_config = create_generic_workflow_config(config, prefix) 

121 

122 return generic_workflow, generic_workflow_config 

123 

124 

125def add_workflow_init_nodes(config, qgraph, generic_workflow): 

126 """Add nodes to workflow graph that perform initialization steps. 

127 

128 Assumes that all of the initialization should be executed prior to any 

129 of the current workflow. 

130 

131 Parameters 

132 ---------- 

133 config : `lsst.ctrl.bps.BpsConfig` 

134 BPS configuration. 

135 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

136 The quantum graph the generic workflow represents. 

137 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

138 Generic workflow to which the initialization steps should be added. 

139 """ 

140 # Create a workflow graph that will have task and file nodes necessary for 

141 # initializing the pipeline execution 

142 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

143 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

144 generic_workflow.add_workflow_source(init_workflow) 

145 

146 

147def create_init_workflow(config, qgraph, qgraph_gwfile): 

148 """Create workflow for running initialization job(s). 

149 

150 Parameters 

151 ---------- 

152 config : `lsst.ctrl.bps.BpsConfig` 

153 BPS configuration. 

154 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

155 The quantum graph the generic workflow represents. 

156 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

157 File object for the full run QuantumGraph file. 

158 

159 Returns 

160 ------- 

161 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

162 GenericWorkflow consisting of job(s) to initialize workflow. 

163 """ 

164 _LOG.debug("creating init subgraph") 

165 _LOG.debug("creating init task input(s)") 

166 search_opt = { 

167 "curvals": {"curr_pipetask": "pipetaskInit"}, 

168 "replaceVars": False, 

169 "expandEnvVars": False, 

170 "replaceEnvVars": True, 

171 "required": False, 

172 } 

173 found, value = config.search("computeSite", opt=search_opt) 

174 if found: 

175 search_opt["curvals"]["curr_site"] = value 

176 found, value = config.search("computeCloud", opt=search_opt) 

177 if found: 

178 search_opt["curvals"]["curr_cloud"] = value 

179 

180 init_workflow = GenericWorkflow("init") 

181 init_workflow.add_file(qgraph_gwfile) 

182 

183 # create job for executing --init-only 

184 gwjob = GenericWorkflowJob("pipetaskInit") 

185 

186 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

187 job_values["name"] = "pipetaskInit" 

188 job_values["label"] = "pipetaskInit" 

189 

190 # Adjust job attributes values if necessary. 

191 _handle_job_values(job_values, gwjob) 

192 

193 # Pick a node id for each task (not quantum!) to avoid reading the entire 

194 # quantum graph during the initialization stage. 

195 node_ids = [] 

196 for task in qgraph.iterTaskGraph(): 

197 task_def = qgraph.findTaskDefByLabel(task.label) 

198 node = next(iter(qgraph.getNodesForTask(task_def))) 

199 node_ids.append(node.nodeId) 

200 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

201 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in node_ids])) 

202 

203 init_workflow.add_job(gwjob) 

204 

205 # Lookup butler values 

206 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

207 _, butler_config = config.search("butlerConfig", opt=search_opt) 

208 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

209 prefix = config["submitPath"] 

210 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

211 

212 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

213 _enhance_command(config, init_workflow, gwjob, {}) 

214 

215 return init_workflow 

216 

217 

218def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

219 """Enhance command line with env and file placeholders 

220 and gather command line values. 

221 

222 Parameters 

223 ---------- 

224 config : `lsst.ctrl.bps.BpsConfig` 

225 BPS configuration. 

226 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

227 Generic workflow that contains the job. 

228 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

229 Generic workflow job to which the updated executable, arguments, 

230 and values should be saved. 

231 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

232 Cached values common across jobs with same label. Updated if values 

233 aren't already saved for given gwjob's label. 

234 """ 

235 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

236 

237 search_opt = { 

238 "curvals": {"curr_pipetask": gwjob.label}, 

239 "replaceVars": False, 

240 "expandEnvVars": False, 

241 "replaceEnvVars": True, 

242 "required": False, 

243 } 

244 

245 if gwjob.label not in cached_job_values: 

246 cached_job_values[gwjob.label] = {} 

247 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

248 key = "whenSaveJobQgraph" 

249 _, when_save = config.search(key, opt=search_opt) 

250 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

251 

252 key = "useLazyCommands" 

253 search_opt["default"] = True 

254 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

255 del search_opt["default"] 

256 

257 # Change qgraph variable to match whether using run or per-job qgraph 

258 # Note: these are lookup keys, not actual physical filenames. 

259 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

260 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

261 elif gwjob.name == "pipetaskInit": 

262 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

263 else: # Needed unique file keys for per-job QuantumGraphs 

264 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

265 

266 # Replace files with special placeholders 

267 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

268 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

269 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

270 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

271 

272 # Save dict of other values needed to complete command line. 

273 # (Be careful to not replace env variables as they may 

274 # be different in compute job.) 

275 search_opt["replaceVars"] = True 

276 

277 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

278 if key not in gwjob.cmdvals: 

279 if key not in cached_job_values[gwjob.label]: 

280 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

281 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

282 

283 # backwards compatibility 

284 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

285 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

286 key = "bpsUseShared" 

287 search_opt["default"] = True 

288 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

289 del search_opt["default"] 

290 

291 gwjob.arguments = _fill_arguments( 

292 cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, gwjob.arguments, gwjob.cmdvals 

293 ) 

294 

295 

296def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

297 """Replace placeholders in command line string in job. 

298 

299 Parameters 

300 ---------- 

301 use_shared : `bool` 

302 Whether using shared filesystem. 

303 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

304 Generic workflow containing the job. 

305 arguments : `str` 

306 String containing placeholders. 

307 cmdvals : `dict` [`str`, `Any`] 

308 Any command line values that can be used to replace placeholders. 

309 

310 Returns 

311 ------- 

312 arguments : `str` 

313 Command line with FILE and ENV placeholders replaced. 

314 """ 

315 # Replace file placeholders 

316 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

317 gwfile = generic_workflow.get_file(file_key) 

318 if not gwfile.wms_transfer: 

319 # Must assume full URI if in command line and told WMS is not 

320 # responsible for transferring file. 

321 uri = gwfile.src_uri 

322 elif use_shared: 

323 if gwfile.job_shared: 

324 # Have shared filesystems and jobs can share file. 

325 uri = gwfile.src_uri 

326 else: 

327 # Taking advantage of inside knowledge. Not future-proof. 

328 # Temporary fix until have job wrapper that pulls files 

329 # within job. 

330 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

331 uri = "butler.yaml" 

332 else: 

333 uri = os.path.basename(gwfile.src_uri) 

334 else: # Using push transfer 

335 uri = os.path.basename(gwfile.src_uri) 

336 

337 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

338 

339 # Replace env placeholder with submit-side values 

340 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

341 arguments = os.path.expandvars(arguments) 

342 

343 # Replace remaining vars 

344 arguments = arguments.format(**cmdvals) 

345 

346 return arguments 

347 

348 

349def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

350 """Get butler location to be used by job. 

351 

352 Parameters 

353 ---------- 

354 prefix : `str` 

355 Root path for any output files. 

356 when_create : `str` 

357 When to create the execution butler used to determine whether job is 

358 using execution butler or not. 

359 butler_config : `str` 

360 Location of central butler repositories config file. 

361 execution_butler_dir : `str` 

362 Location of execution butler repository. 

363 

364 Returns 

365 ------- 

366 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

367 Representation of butler location. 

368 """ 

369 if when_create.upper() == "NEVER": 

370 wms_transfer = False 

371 job_access_remote = True 

372 job_shared = True 

373 else: 

374 butler_config = execution_butler_dir 

375 if not butler_config.startswith("/"): 

376 butler_config = f"{prefix}/{butler_config}" 

377 wms_transfer = True 

378 job_access_remote = False 

379 job_shared = False 

380 

381 gwfile = GenericWorkflowFile( 

382 "butlerConfig", 

383 src_uri=butler_config, 

384 wms_transfer=wms_transfer, 

385 job_access_remote=job_access_remote, 

386 job_shared=job_shared, 

387 ) 

388 

389 return gwfile 

390 

391 

392def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

393 """Get qgraph location to be used by job. 

394 

395 Parameters 

396 ---------- 

397 config : `lsst.ctrl.bps.BpsConfig` 

398 Bps configuration. 

399 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

400 What submission stage to save per-job qgraph files (or NEVER) 

401 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

402 Job for which determining QuantumGraph file. 

403 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

404 File representation of the full run QuantumGraph. 

405 prefix : `str` 

406 Path prefix for any files written. 

407 

408 Returns 

409 ------- 

410 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

411 Representation of butler location (may not include filename). 

412 """ 

413 qgraph_gwfile = None 

414 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

415 qgraph_gwfile = GenericWorkflowFile( 

416 f"qgraphFile_{gwjob.name}", 

417 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

418 wms_transfer=True, 

419 job_access_remote=True, 

420 job_shared=True, 

421 ) 

422 else: 

423 qgraph_gwfile = run_qgraph_file 

424 

425 return qgraph_gwfile 

426 

427 

428def _get_job_values(config, search_opt, cmd_line_key): 

429 """Gather generic workflow job values from the bps config. 

430 

431 Parameters 

432 ---------- 

433 config : `lsst.ctrl.bps.BpsConfig` 

434 Bps configuration. 

435 search_opt : `dict` [`str`, `Any`] 

436 Search options to be used when searching config. 

437 cmd_line_key : `str` or None 

438 Which command line key to search for (e.g., "runQuantumCommand"). 

439 

440 Returns 

441 ------- 

442 job_values : `dict` [ `str`, `Any` ]` 

443 A mapping between job attributes and their values. 

444 """ 

445 _LOG.debug("cmd_line_key=%s, search_opt=%s", cmd_line_key, search_opt) 

446 

447 # Create a dummy job to easily access the default values. 

448 default_gwjob = GenericWorkflowJob("default_job") 

449 

450 job_values = {} 

451 for attr in _ATTRS_ALL: 

452 # Variable names in yaml are camel case instead of snake case. 

453 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr) 

454 found, value = config.search(yaml_name, opt=search_opt) 

455 if found: 

456 job_values[attr] = value 

457 else: 

458 job_values[attr] = getattr(default_gwjob, attr) 

459 

460 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

461 # is set and it is a positive number greater than 1.0), adjust number 

462 # of retries when necessary. If the memory multiplier is invalid, disable 

463 # automatic memory scaling. 

464 if job_values["memory_multiplier"] is not None: 

465 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

466 if job_values["number_of_retries"] is None: 

467 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

468 else: 

469 job_values["memory_multiplier"] = None 

470 

471 if cmd_line_key: 

472 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

473 # Make sure cmdline isn't None as that could be sent in as a 

474 # default value in search_opt. 

475 if found and cmdline: 

476 cmd, args = cmdline.split(" ", 1) 

477 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

478 if args: 

479 job_values["arguments"] = args 

480 

481 return job_values 

482 

483 

484def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

485 """Set the job attributes in the cluster to their correct values. 

486 

487 Parameters 

488 ---------- 

489 quantum_job_values : `dict` [`str`, Any] 

490 Job values for running single Quantum. 

491 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

492 Generic workflow job in which to store the universal values. 

493 attributes: `Iterable` [`str`], optional 

494 Job attributes to be set in the job following different rules. 

495 The default value is _ATTRS_ALL. 

496 """ 

497 _LOG.debug("Call to _handle_job_values") 

498 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

499 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

500 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

501 

502 

503def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

504 """Handle job attributes that must have the same value for every quantum 

505 in the cluster. 

506 

507 Parameters 

508 ---------- 

509 quantum_job_values : `dict` [`str`, Any] 

510 Job values for running single Quantum. 

511 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

512 Generic workflow job in which to store the universal values. 

513 attributes: `Iterable` [`str`], optional 

514 Job attributes to be set in the job following different rules. 

515 The default value is _ATTRS_UNIVERSAL. 

516 """ 

517 for attr in _ATTRS_UNIVERSAL & set(attributes): 

518 _LOG.debug( 

519 "Handling job %s (job=%s, quantum=%s)", 

520 attr, 

521 getattr(gwjob, attr), 

522 quantum_job_values.get(attr, "MISSING"), 

523 ) 

524 current_value = getattr(gwjob, attr) 

525 try: 

526 quantum_value = quantum_job_values[attr] 

527 except KeyError: 

528 continue 

529 else: 

530 if not current_value: 

531 setattr(gwjob, attr, quantum_value) 

532 elif current_value != quantum_value: 

533 _LOG.error( 

534 "Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

535 "Current cluster value: %s\n" 

536 "Quantum value: %s", 

537 attr, 

538 gwjob.name, 

539 quantum_job_values.get("qgraphNodeId", "MISSING"), 

540 current_value, 

541 quantum_value, 

542 ) 

543 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

544 

545 

546def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

547 """Handle job attributes that should be set to their maximum value in 

548 the in cluster. 

549 

550 Parameters 

551 ---------- 

552 quantum_job_values : `dict` [`str`, `Any`] 

553 Job values for running single Quantum. 

554 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

555 Generic workflow job in which to store the aggregate values. 

556 attributes: `Iterable` [`str`], optional 

557 Job attributes to be set in the job following different rules. 

558 The default value is _ATTR_MAX. 

559 """ 

560 for attr in _ATTRS_MAX & set(attributes): 

561 current_value = getattr(gwjob, attr) 

562 try: 

563 quantum_value = quantum_job_values[attr] 

564 except KeyError: 

565 continue 

566 else: 

567 needs_update = False 

568 if current_value is None: 

569 if quantum_value is not None: 

570 needs_update = True 

571 else: 

572 if quantum_value is not None and current_value < quantum_value: 

573 needs_update = True 

574 if needs_update: 

575 setattr(gwjob, attr, quantum_value) 

576 

577 # When updating memory requirements for a job, check if memory 

578 # autoscaling is enabled. If it is, always use the memory 

579 # multiplier and the number of retries which comes with the 

580 # quantum. 

581 # 

582 # Note that as a result, the quantum with the biggest memory 

583 # requirements will determine whether the memory autoscaling 

584 # will be enabled (or disabled) depending on the value of its 

585 # memory multiplier. 

586 if attr == "request_memory": 

587 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

588 if gwjob.memory_multiplier is not None: 

589 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

590 

591 

592def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

593 """Handle job attributes that are the sum of their values in the cluster. 

594 

595 Parameters 

596 ---------- 

597 quantum_job_values : `dict` [`str`, `Any`] 

598 Job values for running single Quantum. 

599 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

600 Generic workflow job in which to store the aggregate values. 

601 attributes: `Iterable` [`str`], optional 

602 Job attributes to be set in the job following different rules. 

603 The default value is _ATTRS_SUM. 

604 """ 

605 for attr in _ATTRS_SUM & set(attributes): 

606 current_value = getattr(gwjob, attr) 

607 if not current_value: 

608 setattr(gwjob, attr, quantum_job_values[attr]) 

609 else: 

610 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

611 

612 

613def create_generic_workflow(config, cqgraph, name, prefix): 

614 """Create a generic workflow from a ClusteredQuantumGraph such that it 

615 has information needed for WMS (e.g., command lines). 

616 

617 Parameters 

618 ---------- 

619 config : `lsst.ctrl.bps.BpsConfig` 

620 BPS configuration. 

621 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

622 ClusteredQuantumGraph for running a specific pipeline on a specific 

623 payload. 

624 name : `str` 

625 Name for the workflow (typically unique). 

626 prefix : `str` 

627 Root path for any output files. 

628 

629 Returns 

630 ------- 

631 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

632 Generic workflow for the given ClusteredQuantumGraph + config. 

633 """ 

634 # Determine whether saving per-job QuantumGraph files in the loop. 

635 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

636 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

637 

638 search_opt = {"replaceVars": False, "expandEnvVars": False, "replaceEnvVars": True, "required": False} 

639 

640 # Lookup butler values once 

641 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

642 _, butler_config = config.search("butlerConfig", opt=search_opt) 

643 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

644 

645 generic_workflow = GenericWorkflow(name) 

646 

647 # Save full run QuantumGraph for use by jobs 

648 generic_workflow.add_file( 

649 GenericWorkflowFile( 

650 "runQgraphFile", 

651 src_uri=config["runQgraphFile"], 

652 wms_transfer=True, 

653 job_access_remote=True, 

654 job_shared=True, 

655 ) 

656 ) 

657 

658 # Cache pipetask specific or more generic job values to minimize number 

659 # on config searches. 

660 cached_job_values = {} 

661 cached_pipetask_values = {} 

662 

663 for cluster in cqgraph.clusters(): 

664 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

665 _LOG.debug( 

666 "cqgraph: name=%s, len=%s, label=%s, ids=%s", 

667 cluster.name, 

668 len(cluster.qgraph_node_ids), 

669 cluster.label, 

670 cluster.qgraph_node_ids, 

671 ) 

672 

673 gwjob = GenericWorkflowJob(cluster.name) 

674 

675 # First get job values from cluster or cluster config 

676 search_opt["curvals"] = {"curr_cluster": cluster.label} 

677 found, value = config.search("computeSite", opt=search_opt) 

678 if found: 

679 search_opt["curvals"]["curr_site"] = value 

680 found, value = config.search("computeCloud", opt=search_opt) 

681 if found: 

682 search_opt["curvals"]["curr_cloud"] = value 

683 

684 # If some config values are set for this cluster 

685 if cluster.label not in cached_job_values: 

686 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

687 cached_job_values[cluster.label] = {} 

688 

689 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

690 key = "whenSaveJobQgraph" 

691 _, when_save = config.search(key, opt=search_opt) 

692 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

693 

694 key = "useLazyCommands" 

695 search_opt["default"] = True 

696 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

697 del search_opt["default"] 

698 

699 if cluster.label in config["cluster"]: 

700 # Don't want to get global defaults here so only look in 

701 # cluster section. 

702 cached_job_values[cluster.label].update( 

703 _get_job_values(config["cluster"][cluster.label], search_opt, "runQuantumCommand") 

704 ) 

705 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

706 

707 cluster_job_values["name"] = cluster.name 

708 cluster_job_values["label"] = cluster.label 

709 cluster_job_values["quanta_counts"] = cluster.quanta_counts 

710 cluster_job_values["tags"] = cluster.tags 

711 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

712 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

713 

714 # For purposes of whether to continue searching for a value is whether 

715 # the value evaluates to False. 

716 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

717 

718 _LOG.debug("unset_attributes=%s", unset_attributes) 

719 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

720 

721 # For job info not defined at cluster level, attempt to get job info 

722 # either common or aggregate for all Quanta in cluster. 

723 for node_id in iter(cluster.qgraph_node_ids): 

724 _LOG.debug("node_id=%s", node_id) 

725 qnode = cqgraph.get_quantum_node(node_id) 

726 

727 if qnode.taskDef.label not in cached_pipetask_values: 

728 search_opt["curvals"]["curr_pipetask"] = qnode.taskDef.label 

729 cached_pipetask_values[qnode.taskDef.label] = _get_job_values( 

730 config, search_opt, "runQuantumCommand" 

731 ) 

732 

733 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

734 

735 # Update job with workflow attribute and profile values. 

736 qgraph_gwfile = _get_qgraph_gwfile( 

737 config, save_qgraph_per_job, gwjob, generic_workflow.get_file("runQgraphFile"), prefix 

738 ) 

739 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

740 

741 generic_workflow.add_job(gwjob) 

742 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

743 

744 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

745 gwjob.cmdvals["qgraphNodeId"] = ",".join( 

746 sorted([f"{node_id}" for node_id in cluster.qgraph_node_ids]) 

747 ) 

748 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

749 

750 # If writing per-job QuantumGraph files during TRANSFORM stage, 

751 # write it now while in memory. 

752 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

753 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

754 

755 # Create job dependencies. 

756 for parent in cqgraph.clusters(): 

757 for child in cqgraph.successors(parent): 

758 generic_workflow.add_job_relationships(parent.name, child.name) 

759 

760 # Add initial workflow. 

761 if config.get("runInit", "{default: False}"): 

762 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

763 

764 generic_workflow.run_attrs.update( 

765 { 

766 "bps_isjob": "True", 

767 "bps_project": config["project"], 

768 "bps_campaign": config["campaign"], 

769 "bps_run": generic_workflow.name, 

770 "bps_operator": config["operator"], 

771 "bps_payload": config["payloadName"], 

772 "bps_runsite": config["computeSite"], 

773 } 

774 ) 

775 

776 # Add final job 

777 add_final_job(config, generic_workflow, prefix) 

778 

779 return generic_workflow 

780 

781 

782def create_generic_workflow_config(config, prefix): 

783 """Create generic workflow configuration. 

784 

785 Parameters 

786 ---------- 

787 config : `lsst.ctrl.bps.BpsConfig` 

788 Bps configuration. 

789 prefix : `str` 

790 Root path for any output files. 

791 

792 Returns 

793 ------- 

794 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

795 Configuration accompanying the GenericWorkflow. 

796 """ 

797 generic_workflow_config = BpsConfig(config) 

798 generic_workflow_config["workflowName"] = config["uniqProcName"] 

799 generic_workflow_config["workflowPath"] = prefix 

800 return generic_workflow_config 

801 

802 

803def add_final_job(config, generic_workflow, prefix): 

804 """Add final workflow job depending upon configuration. 

805 

806 Parameters 

807 ---------- 

808 config : `lsst.ctrl.bps.BpsConfig` 

809 Bps configuration. 

810 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

811 Generic workflow to which attributes should be added. 

812 prefix : `str` 

813 Directory in which to output final script. 

814 """ 

815 _, when_create = config.search(".executionButler.whenCreate") 

816 _, when_merge = config.search(".executionButler.whenMerge") 

817 

818 search_opt = {"searchobj": config[".executionButler"], "curvals": {}, "default": None} 

819 found, value = config.search("computeSite", opt=search_opt) 

820 if found: 

821 search_opt["curvals"]["curr_site"] = value 

822 found, value = config.search("computeCloud", opt=search_opt) 

823 if found: 

824 search_opt["curvals"]["curr_cloud"] = value 

825 

826 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

827 # create gwjob 

828 gwjob = GenericWorkflowJob("mergeExecutionButler") 

829 gwjob.label = "mergeExecutionButler" 

830 

831 # Set job attributes based on the values find in the config excluding 

832 # the ones in the _ATTRS_MISC group. The attributes in this group are 

833 # somewhat "special": 

834 # * HTCondor plugin, which uses 'attrs' and 'profile', has its own 

835 # mechanism for setting them, 

836 # * 'cmdvals' is being set internally, not via config. 

837 job_values = _get_job_values(config, search_opt, None) 

838 for attr in _ATTRS_ALL - _ATTRS_MISC: 

839 if not getattr(gwjob, attr) and job_values.get(attr, None): 

840 setattr(gwjob, attr, job_values[attr]) 

841 

842 # Create script and add command line to job. 

843 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

844 

845 # Determine inputs from command line. 

846 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

847 gwfile = generic_workflow.get_file(file_key) 

848 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

849 

850 _enhance_command(config, generic_workflow, gwjob, {}) 

851 

852 # Put transfer repo job in appropriate location in workflow. 

853 if when_merge.upper() == "ALWAYS": 

854 # add as special final job 

855 generic_workflow.add_final(gwjob) 

856 elif when_merge.upper() == "SUCCESS": 

857 # add as regular sink node 

858 add_final_job_as_sink(generic_workflow, gwjob) 

859 else: 

860 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

861 

862 

863def _create_final_command(config, prefix): 

864 """Create the command and shell script for the final job. 

865 

866 Parameters 

867 ---------- 

868 config : `lsst.ctrl.bps.BpsConfig` 

869 Bps configuration. 

870 prefix : `str` 

871 Directory in which to output final script. 

872 

873 Returns 

874 ------- 

875 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

876 Executable object for the final script. 

877 arguments : `str` 

878 Command line needed to call the final script. 

879 """ 

880 search_opt = { 

881 "replaceVars": False, 

882 "replaceEnvVars": False, 

883 "expandEnvVars": False, 

884 "searchobj": config["executionButler"], 

885 } 

886 

887 script_file = os.path.join(prefix, "final_job.bash") 

888 with open(script_file, "w") as fh: 

889 print("#!/bin/bash\n", file=fh) 

890 print("set -e", file=fh) 

891 print("set -x", file=fh) 

892 

893 print("butlerConfig=$1", file=fh) 

894 print("executionButlerDir=$2", file=fh) 

895 

896 i = 1 

897 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

898 while found: 

899 # Temporarily replace any env vars so formatter doesn't try to 

900 # replace them. 

901 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

902 

903 # executionButlerDir and butlerConfig will be args to script and 

904 # set to env vars 

905 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

906 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

907 

908 # Replace all other vars in command string 

909 search_opt["replaceVars"] = True 

910 command = config.formatter.format(command, config, search_opt) 

911 search_opt["replaceVars"] = False 

912 

913 # Replace any temporary env place holders. 

914 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

915 

916 print(command, file=fh) 

917 i += 1 

918 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

919 os.chmod(script_file, 0o755) 

920 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

921 

922 _, orig_butler = config.search("butlerConfig") 

923 # The execution butler was saved as butlerConfig in the workflow. 

924 return executable, f"{orig_butler} <FILE:butlerConfig>" 

925 

926 

927def add_final_job_as_sink(generic_workflow, final_job): 

928 """Add final job as the single sink for the workflow. 

929 

930 Parameters 

931 ---------- 

932 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

933 Generic workflow to which attributes should be added. 

934 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

935 Job to add as new sink node depending upon all previous sink nodes. 

936 """ 

937 # Find sink nodes of generic workflow graph. 

938 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

939 _LOG.debug("gw_sinks = %s", gw_sinks) 

940 

941 generic_workflow.add_job(final_job) 

942 generic_workflow.add_job_relationships(gw_sinks, final_job.name)