Coverage for python/lsst/ctrl/bps/transform.py: 8%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

312 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import math 

27import os 

28import re 

29import dataclasses 

30import copy 

31 

32from lsst.utils.timer import time_this 

33 

34from . import ( 

35 DEFAULT_MEM_RETRIES, 

36 BpsConfig, 

37 GenericWorkflow, 

38 GenericWorkflowJob, 

39 GenericWorkflowFile, 

40 GenericWorkflowExec, 

41) 

42from .bps_utils import ( 

43 save_qg_subgraph, 

44 WhenToSaveQuantumGraphs, 

45 create_job_quantum_graph_filename, 

46 _create_execution_butler 

47) 

48 

49# All available job attributes. 

50_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

51 

52# Job attributes that need to be set to their maximal value in the cluster. 

53_ATTRS_MAX = frozenset({ 

54 "memory_multiplier", 

55 "number_of_retries", 

56 "request_cpus", 

57 "request_memory", 

58 "request_memory_max", 

59}) 

60 

61# Job attributes that need to be set to sum of their values in the cluster. 

62_ATTRS_SUM = frozenset({ 

63 "request_disk", 

64 "request_walltime", 

65}) 

66 

67# Job attributes do not fall into a specific category 

68_ATTRS_MISC = frozenset({ 

69 "cmdline", 

70 "cmdvals", 

71 "environment", 

72 "pre_cmdline", 

73 "post_cmdline", 

74 "profile", 

75 "attrs", 

76}) 

77 

78# Attributes that need to be the same for each quanta in the cluster. 

79_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

80 

81_LOG = logging.getLogger(__name__) 

82 

83 

84def transform(config, cqgraph, prefix): 

85 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

86 

87 Parameters 

88 ---------- 

89 config : `lsst.ctrl.bps.BpsConfig` 

90 BPS configuration. 

91 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

92 A clustered quantum graph to transform into a generic workflow. 

93 prefix : `str` 

94 Root path for any output files. 

95 

96 Returns 

97 ------- 

98 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

99 The generic workflow transformed from the clustered quantum graph. 

100 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

101 Configuration to accompany GenericWorkflow. 

102 """ 

103 _, when_create = config.search(".executionButler.whenCreate") 

104 if when_create.upper() == "TRANSFORM": 

105 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

106 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

107 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

108 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

109 

110 if cqgraph.name is not None: 

111 name = cqgraph.name 

112 else: 

113 _, name = config.search("uniqProcName", opt={"required": True}) 

114 

115 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

116 generic_workflow_config = create_generic_workflow_config(config, prefix) 

117 

118 return generic_workflow, generic_workflow_config 

119 

120 

121def add_workflow_init_nodes(config, qgraph, generic_workflow): 

122 """Add nodes to workflow graph that perform initialization steps. 

123 

124 Assumes that all of the initialization should be executed prior to any 

125 of the current workflow. 

126 

127 Parameters 

128 ---------- 

129 config : `lsst.ctrl.bps.BpsConfig` 

130 BPS configuration. 

131 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

132 The quantum graph the generic workflow represents. 

133 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

134 Generic workflow to which the initialization steps should be added. 

135 """ 

136 # Create a workflow graph that will have task and file nodes necessary for 

137 # initializing the pipeline execution 

138 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

139 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

140 generic_workflow.add_workflow_source(init_workflow) 

141 

142 

143def create_init_workflow(config, qgraph, qgraph_gwfile): 

144 """Create workflow for running initialization job(s). 

145 

146 Parameters 

147 ---------- 

148 config : `lsst.ctrl.bps.BpsConfig` 

149 BPS configuration. 

150 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

151 The quantum graph the generic workflow represents. 

152 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

153 File object for the full run QuantumGraph file. 

154 

155 Returns 

156 ------- 

157 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

158 GenericWorkflow consisting of job(s) to initialize workflow. 

159 """ 

160 _LOG.debug("creating init subgraph") 

161 _LOG.debug("creating init task input(s)") 

162 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

163 "replaceVars": False, 

164 "expandEnvVars": False, 

165 "replaceEnvVars": True, 

166 "required": False} 

167 

168 init_workflow = GenericWorkflow("init") 

169 init_workflow.add_file(qgraph_gwfile) 

170 

171 # create job for executing --init-only 

172 gwjob = GenericWorkflowJob("pipetaskInit") 

173 

174 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

175 job_values["name"] = "pipetaskInit" 

176 job_values["label"] = "pipetaskInit" 

177 

178 # Adjust job attributes values if necessary. 

179 _handle_job_values(job_values, gwjob) 

180 

181 # Pick a node id for each task (not quantum!) to avoid reading the entire 

182 # quantum graph during the initialization stage. 

183 node_ids = [] 

184 for task in qgraph.iterTaskGraph(): 

185 task_def = qgraph.findTaskDefByLabel(task.label) 

186 node = next(iter(qgraph.getNodesForTask(task_def))) 

187 node_ids.append(node.nodeId) 

188 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

189 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids])) 

190 

191 init_workflow.add_job(gwjob) 

192 

193 # Lookup butler values 

194 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

195 _, butler_config = config.search("butlerConfig", opt=search_opt) 

196 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

197 prefix = config["submitPath"] 

198 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

199 

200 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

201 _enhance_command(config, init_workflow, gwjob, {}) 

202 

203 return init_workflow 

204 

205 

206def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

207 """Enhance command line with env and file placeholders 

208 and gather command line values. 

209 

210 Parameters 

211 ---------- 

212 config : `lsst.ctrl.bps.BpsConfig` 

213 BPS configuration. 

214 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

215 Generic workflow that contains the job. 

216 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

217 Generic workflow job to which the updated executable, arguments, 

218 and values should be saved. 

219 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

220 Cached values common across jobs with same label. Updated if values 

221 aren't already saved for given gwjob's label. 

222 """ 

223 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

224 

225 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

226 "replaceVars": False, 

227 "expandEnvVars": False, 

228 "replaceEnvVars": True, 

229 "required": False} 

230 

231 if gwjob.label not in cached_job_values: 

232 cached_job_values[gwjob.label] = {} 

233 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

234 key = "whenSaveJobQgraph" 

235 _, when_save = config.search(key, opt=search_opt) 

236 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

237 

238 key = "useLazyCommands" 

239 search_opt["default"] = True 

240 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

241 del search_opt["default"] 

242 

243 # Change qgraph variable to match whether using run or per-job qgraph 

244 # Note: these are lookup keys, not actual physical filenames. 

245 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

246 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

247 elif gwjob.name == "pipetaskInit": 

248 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

249 else: # Needed unique file keys for per-job QuantumGraphs 

250 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

251 

252 # Replace files with special placeholders 

253 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

254 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

255 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

256 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

257 

258 # Save dict of other values needed to complete command line. 

259 # (Be careful to not replace env variables as they may 

260 # be different in compute job.) 

261 search_opt["replaceVars"] = True 

262 

263 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

264 if key not in gwjob.cmdvals: 

265 if key not in cached_job_values[gwjob.label]: 

266 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

267 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

268 

269 # backwards compatibility 

270 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

271 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

272 key = "bpsUseShared" 

273 search_opt["default"] = True 

274 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

275 del search_opt["default"] 

276 

277 gwjob.arguments = _fill_arguments(cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, 

278 gwjob.arguments, gwjob.cmdvals) 

279 

280 

281def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

282 """Replace placeholders in command line string in job. 

283 

284 Parameters 

285 ---------- 

286 use_shared : `bool` 

287 Whether using shared filesystem. 

288 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

289 Generic workflow containing the job. 

290 arguments : `str` 

291 String containing placeholders. 

292 cmdvals : `dict` [`str`, `Any`] 

293 Any command line values that can be used to replace placeholders. 

294 

295 Returns 

296 ------- 

297 arguments : `str` 

298 Command line with FILE and ENV placeholders replaced. 

299 """ 

300 # Replace file placeholders 

301 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

302 gwfile = generic_workflow.get_file(file_key) 

303 if not gwfile.wms_transfer: 

304 # Must assume full URI if in command line and told WMS is not 

305 # responsible for transferring file. 

306 uri = gwfile.src_uri 

307 elif use_shared: 

308 if gwfile.job_shared: 

309 # Have shared filesystems and jobs can share file. 

310 uri = gwfile.src_uri 

311 else: 

312 # Taking advantage of inside knowledge. Not future-proof. 

313 # Temporary fix until have job wrapper that pulls files 

314 # within job. 

315 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

316 uri = "butler.yaml" 

317 else: 

318 uri = os.path.basename(gwfile.src_uri) 

319 else: # Using push transfer 

320 uri = os.path.basename(gwfile.src_uri) 

321 

322 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

323 

324 # Replace env placeholder with submit-side values 

325 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

326 arguments = os.path.expandvars(arguments) 

327 

328 # Replace remaining vars 

329 arguments = arguments.format(**cmdvals) 

330 

331 return arguments 

332 

333 

334def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

335 """Get butler location to be used by job. 

336 

337 Parameters 

338 ---------- 

339 prefix : `str` 

340 Root path for any output files. 

341 when_create : `str` 

342 When to create the execution butler used to determine whether job is 

343 using execution butler or not. 

344 butler_config : `str` 

345 Location of central butler repositories config file. 

346 execution_butler_dir : `str` 

347 Location of execution butler repository. 

348 

349 Returns 

350 ------- 

351 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

352 Representation of butler location. 

353 """ 

354 if when_create.upper() == "NEVER": 

355 wms_transfer = False 

356 job_access_remote = True 

357 job_shared = True 

358 else: 

359 butler_config = execution_butler_dir 

360 if not butler_config.startswith("/"): 

361 butler_config = f"{prefix}/{butler_config}" 

362 wms_transfer = True 

363 job_access_remote = False 

364 job_shared = False 

365 

366 gwfile = GenericWorkflowFile("butlerConfig", 

367 src_uri=butler_config, 

368 wms_transfer=wms_transfer, 

369 job_access_remote=job_access_remote, 

370 job_shared=job_shared) 

371 

372 return gwfile 

373 

374 

375def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

376 """Get qgraph location to be used by job. 

377 

378 Parameters 

379 ---------- 

380 config : `lsst.ctrl.bps.BpsConfig` 

381 Bps configuration. 

382 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

383 What submission stage to save per-job qgraph files (or NEVER) 

384 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

385 Job for which determining QuantumGraph file. 

386 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

387 File representation of the full run QuantumGraph. 

388 prefix : `str` 

389 Path prefix for any files written. 

390 

391 Returns 

392 ------- 

393 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

394 Representation of butler location (may not include filename). 

395 """ 

396 qgraph_gwfile = None 

397 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

398 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

399 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

400 wms_transfer=True, 

401 job_access_remote=True, 

402 job_shared=True) 

403 else: 

404 qgraph_gwfile = run_qgraph_file 

405 

406 return qgraph_gwfile 

407 

408 

409def _get_job_values(config, search_opt, cmd_line_key): 

410 """Gather generic workflow job values from the bps config. 

411 

412 Parameters 

413 ---------- 

414 config : `lsst.ctrl.bps.BpsConfig` 

415 Bps configuration. 

416 search_opt : `dict` [`str`, `Any`] 

417 Search options to be used when searching config. 

418 cmd_line_key : `str` or None 

419 Which command line key to search for (e.g., "runQuantumCommand"). 

420 

421 Returns 

422 ------- 

423 job_values : `dict` [ `str`, `Any` ]` 

424 A mapping between job attributes and their values. 

425 """ 

426 job_values = {} 

427 for attr in _ATTRS_ALL: 

428 found, value = config.search(attr, opt=search_opt) 

429 if found: 

430 job_values[attr] = value 

431 else: 

432 job_values[attr] = None 

433 

434 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

435 # is set and it is a positive number greater than 1.0), adjust number 

436 # of retries when necessary. If the memory multiplier is invalid, disable 

437 # automatic memory scaling. 

438 if job_values["memory_multiplier"] is not None: 

439 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

440 if job_values["number_of_retries"] is None: 

441 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

442 else: 

443 job_values["memory_multiplier"] = None 

444 

445 if cmd_line_key: 

446 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

447 # Make sure cmdline isn't None as that could be sent in as a 

448 # default value in search_opt. 

449 if found and cmdline: 

450 cmd, args = cmdline.split(" ", 1) 

451 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

452 if args: 

453 job_values["arguments"] = args 

454 

455 return job_values 

456 

457 

458def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

459 """Set the job attributes in the cluster to their correct values. 

460 

461 Parameters 

462 ---------- 

463 quantum_job_values : `dict` [`str`, Any] 

464 Job values for running single Quantum. 

465 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

466 Generic workflow job in which to store the universal values. 

467 attributes: `Iterable` [`str`], optional 

468 Job attributes to be set in the job following different rules. 

469 The default value is _ATTRS_ALL. 

470 """ 

471 _LOG.debug("Call to _handle_job_values") 

472 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

473 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

474 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

475 

476 

477def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

478 """Handle job attributes that must have the same value for every quantum 

479 in the cluster. 

480 

481 Parameters 

482 ---------- 

483 quantum_job_values : `dict` [`str`, Any] 

484 Job values for running single Quantum. 

485 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

486 Generic workflow job in which to store the universal values. 

487 attributes: `Iterable` [`str`], optional 

488 Job attributes to be set in the job following different rules. 

489 The default value is _ATTRS_UNIVERSAL. 

490 """ 

491 for attr in _ATTRS_UNIVERSAL & set(attributes): 

492 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr), 

493 quantum_job_values.get(attr, "MISSING")) 

494 current_value = getattr(gwjob, attr) 

495 try: 

496 quantum_value = quantum_job_values[attr] 

497 except KeyError: 

498 continue 

499 else: 

500 if not current_value: 

501 setattr(gwjob, attr, quantum_value) 

502 elif current_value != quantum_value: 

503 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

504 "Current cluster value: %s\n" 

505 "Quantum value: %s", 

506 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

507 quantum_value) 

508 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

509 

510 

511def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

512 """Handle job attributes that should be set to their maximum value in 

513 the in cluster. 

514 

515 Parameters 

516 ---------- 

517 quantum_job_values : `dict` [`str`, `Any`] 

518 Job values for running single Quantum. 

519 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

520 Generic workflow job in which to store the aggregate values. 

521 attributes: `Iterable` [`str`], optional 

522 Job attributes to be set in the job following different rules. 

523 The default value is _ATTR_MAX. 

524 """ 

525 for attr in _ATTRS_MAX & set(attributes): 

526 current_value = getattr(gwjob, attr) 

527 try: 

528 quantum_value = quantum_job_values[attr] 

529 except KeyError: 

530 continue 

531 else: 

532 needs_update = False 

533 if current_value is None: 

534 if quantum_value is not None: 

535 needs_update = True 

536 else: 

537 if quantum_value is not None and current_value < quantum_value: 

538 needs_update = True 

539 if needs_update: 

540 setattr(gwjob, attr, quantum_value) 

541 

542 # When updating memory requirements for a job, check if memory 

543 # autoscaling is enabled. If it is, always use the memory 

544 # multiplier and the number of retries which comes with the 

545 # quantum. 

546 # 

547 # Note that as a result, the quantum with the biggest memory 

548 # requirements will determine whether the memory autoscaling 

549 # will be enabled (or disabled) depending on the value of its 

550 # memory multiplier. 

551 if attr == "request_memory": 

552 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

553 if gwjob.memory_multiplier is not None: 

554 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

555 

556 

557def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

558 """Handle job attributes that are the sum of their values in the cluster. 

559 

560 Parameters 

561 ---------- 

562 quantum_job_values : `dict` [`str`, `Any`] 

563 Job values for running single Quantum. 

564 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

565 Generic workflow job in which to store the aggregate values. 

566 attributes: `Iterable` [`str`], optional 

567 Job attributes to be set in the job following different rules. 

568 The default value is _ATTRS_SUM. 

569 """ 

570 for attr in _ATTRS_SUM & set(attributes): 

571 current_value = getattr(gwjob, attr) 

572 if not current_value: 

573 setattr(gwjob, attr, quantum_job_values[attr]) 

574 else: 

575 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

576 

577 

578def create_generic_workflow(config, cqgraph, name, prefix): 

579 """Create a generic workflow from a ClusteredQuantumGraph such that it 

580 has information needed for WMS (e.g., command lines). 

581 

582 Parameters 

583 ---------- 

584 config : `lsst.ctrl.bps.BpsConfig` 

585 BPS configuration. 

586 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

587 ClusteredQuantumGraph for running a specific pipeline on a specific 

588 payload. 

589 name : `str` 

590 Name for the workflow (typically unique). 

591 prefix : `str` 

592 Root path for any output files. 

593 

594 Returns 

595 ------- 

596 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

597 Generic workflow for the given ClusteredQuantumGraph + config. 

598 """ 

599 # Determine whether saving per-job QuantumGraph files in the loop. 

600 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

601 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

602 

603 search_opt = {"replaceVars": False, 

604 "expandEnvVars": False, 

605 "replaceEnvVars": True, 

606 "required": False} 

607 

608 # Lookup butler values once 

609 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

610 _, butler_config = config.search("butlerConfig", opt=search_opt) 

611 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

612 

613 generic_workflow = GenericWorkflow(name) 

614 

615 # Save full run QuantumGraph for use by jobs 

616 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

617 src_uri=config["runQgraphFile"], 

618 wms_transfer=True, 

619 job_access_remote=True, 

620 job_shared=True)) 

621 

622 # Cache pipetask specific or more generic job values to minimize number 

623 # on config searches. 

624 cached_job_values = {} 

625 cached_pipetask_values = {} 

626 

627 for cluster in cqgraph.clusters(): 

628 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

629 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name, 

630 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids) 

631 

632 gwjob = GenericWorkflowJob(cluster.name) 

633 

634 # First get job values from cluster or cluster config 

635 search_opt["curvals"] = {"curr_cluster": cluster.label} 

636 

637 # If some config values are set for this cluster 

638 if cluster.label not in cached_job_values: 

639 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

640 cached_job_values[cluster.label] = {} 

641 

642 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

643 key = "whenSaveJobQgraph" 

644 _, when_save = config.search(key, opt=search_opt) 

645 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

646 

647 key = "useLazyCommands" 

648 search_opt["default"] = True 

649 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

650 del search_opt["default"] 

651 

652 if cluster.label in config["cluster"]: 

653 # Don't want to get global defaults here so only look in 

654 # cluster section. 

655 cached_job_values[cluster.label].update(_get_job_values(config["cluster"][cluster.label], 

656 search_opt, "runQuantumCommand")) 

657 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

658 

659 cluster_job_values['name'] = cluster.name 

660 cluster_job_values['label'] = cluster.label 

661 cluster_job_values['quanta_counts'] = cluster.quanta_counts 

662 cluster_job_values['tags'] = cluster.tags 

663 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

664 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

665 

666 # For purposes of whether to continue searching for a value is whether 

667 # the value evaluates to False. 

668 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

669 

670 _LOG.debug("unset_attributes=%s", unset_attributes) 

671 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

672 

673 # For job info not defined at cluster level, attempt to get job info 

674 # either common or aggregate for all Quanta in cluster. 

675 for node_id in iter(cluster.qgraph_node_ids): 

676 _LOG.debug("node_id=%s", node_id) 

677 qnode = cqgraph.get_quantum_node(node_id) 

678 

679 if qnode.taskDef.label not in cached_pipetask_values: 

680 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label} 

681 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(config, search_opt, 

682 "runQuantumCommand") 

683 

684 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

685 

686 # Update job with workflow attribute and profile values. 

687 qgraph_gwfile = _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, 

688 generic_workflow.get_file("runQgraphFile"), prefix) 

689 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

690 

691 generic_workflow.add_job(gwjob) 

692 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

693 

694 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

695 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in 

696 cluster.qgraph_node_ids])) 

697 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

698 

699 # If writing per-job QuantumGraph files during TRANSFORM stage, 

700 # write it now while in memory. 

701 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

702 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

703 

704 # Create job dependencies. 

705 for parent in cqgraph.clusters(): 

706 for child in cqgraph.successors(parent): 

707 generic_workflow.add_job_relationships(parent.name, child.name) 

708 

709 # Add initial workflow. 

710 if config.get("runInit", "{default: False}"): 

711 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

712 

713 generic_workflow.run_attrs.update({"bps_isjob": "True", 

714 "bps_project": config["project"], 

715 "bps_campaign": config["campaign"], 

716 "bps_run": generic_workflow.name, 

717 "bps_operator": config["operator"], 

718 "bps_payload": config["payloadName"], 

719 "bps_runsite": config["computeSite"]}) 

720 

721 # Add final job 

722 add_final_job(config, generic_workflow, prefix) 

723 

724 return generic_workflow 

725 

726 

727def create_generic_workflow_config(config, prefix): 

728 """Create generic workflow configuration. 

729 

730 Parameters 

731 ---------- 

732 config : `lsst.ctrl.bps.BpsConfig` 

733 Bps configuration. 

734 prefix : `str` 

735 Root path for any output files. 

736 

737 Returns 

738 ------- 

739 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

740 Configuration accompanying the GenericWorkflow. 

741 """ 

742 generic_workflow_config = BpsConfig(config) 

743 generic_workflow_config["workflowName"] = config["uniqProcName"] 

744 generic_workflow_config["workflowPath"] = prefix 

745 return generic_workflow_config 

746 

747 

748def add_final_job(config, generic_workflow, prefix): 

749 """Add final workflow job depending upon configuration. 

750 

751 Parameters 

752 ---------- 

753 config : `lsst.ctrl.bps.BpsConfig` 

754 Bps configuration. 

755 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

756 Generic workflow to which attributes should be added. 

757 prefix : `str` 

758 Directory in which to output final script. 

759 """ 

760 _, when_create = config.search(".executionButler.whenCreate") 

761 _, when_merge = config.search(".executionButler.whenMerge") 

762 

763 search_opt = {"searchobj": config[".executionButler"], "default": None} 

764 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

765 # create gwjob 

766 gwjob = GenericWorkflowJob("mergeExecutionButler") 

767 gwjob.label = "mergeExecutionButler" 

768 

769 job_values = _get_job_values(config, search_opt, None) 

770 for attr in _ATTRS_ALL: 

771 if not getattr(gwjob, attr) and job_values.get(attr, None): 

772 setattr(gwjob, attr, job_values[attr]) 

773 

774 # Create script and add command line to job. 

775 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

776 

777 # Determine inputs from command line. 

778 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

779 gwfile = generic_workflow.get_file(file_key) 

780 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

781 

782 _enhance_command(config, generic_workflow, gwjob, {}) 

783 

784 # Put transfer repo job in appropriate location in workflow. 

785 if when_merge.upper() == "ALWAYS": 

786 # add as special final job 

787 generic_workflow.add_final(gwjob) 

788 elif when_merge.upper() == "SUCCESS": 

789 # add as regular sink node 

790 add_final_job_as_sink(generic_workflow, gwjob) 

791 else: 

792 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

793 

794 

795def _create_final_command(config, prefix): 

796 """Create the command and shell script for the final job. 

797 

798 Parameters 

799 ---------- 

800 config : `lsst.ctrl.bps.BpsConfig` 

801 Bps configuration. 

802 prefix : `str` 

803 Directory in which to output final script. 

804 

805 Returns 

806 ------- 

807 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

808 Executable object for the final script. 

809 arguments : `str` 

810 Command line needed to call the final script. 

811 """ 

812 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False, 

813 'searchobj': config['executionButler']} 

814 

815 script_file = os.path.join(prefix, "final_job.bash") 

816 with open(script_file, "w") as fh: 

817 print("#!/bin/bash\n", file=fh) 

818 print("set -e", file=fh) 

819 print("set -x", file=fh) 

820 

821 print("butlerConfig=$1", file=fh) 

822 print("executionButlerDir=$2", file=fh) 

823 

824 i = 1 

825 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

826 while found: 

827 # Temporarily replace any env vars so formatter doesn't try to 

828 # replace them. 

829 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

830 

831 # executionButlerDir and butlerConfig will be args to script and 

832 # set to env vars 

833 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

834 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

835 

836 # Replace all other vars in command string 

837 search_opt["replaceVars"] = True 

838 command = config.formatter.format(command, config, search_opt) 

839 search_opt["replaceVars"] = False 

840 

841 # Replace any temporary env place holders. 

842 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

843 

844 print(command, file=fh) 

845 i += 1 

846 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

847 os.chmod(script_file, 0o755) 

848 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

849 

850 _, orig_butler = config.search("butlerConfig") 

851 # The execution butler was saved as butlerConfig in the workflow. 

852 return executable, f"{orig_butler} <FILE:butlerConfig>" 

853 

854 

855def add_final_job_as_sink(generic_workflow, final_job): 

856 """Add final job as the single sink for the workflow. 

857 

858 Parameters 

859 ---------- 

860 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

861 Generic workflow to which attributes should be added. 

862 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

863 Job to add as new sink node depending upon all previous sink nodes. 

864 """ 

865 # Find sink nodes of generic workflow graph. 

866 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

867 _LOG.debug("gw_sinks = %s", gw_sinks) 

868 

869 generic_workflow.add_job(final_job) 

870 generic_workflow.add_job_relationships(gw_sinks, final_job.name)