Coverage for python/lsst/ctrl/bps/transform.py: 8%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

313 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import math 

27import os 

28import re 

29import dataclasses 

30import copy 

31 

32from lsst.daf.butler.core.utils import time_this 

33 

34from . import ( 

35 DEFAULT_MEM_RETRIES, 

36 BpsConfig, 

37 GenericWorkflow, 

38 GenericWorkflowJob, 

39 GenericWorkflowFile, 

40 GenericWorkflowExec, 

41) 

42from .bps_utils import ( 

43 save_qg_subgraph, 

44 WhenToSaveQuantumGraphs, 

45 create_job_quantum_graph_filename, 

46 _create_execution_butler 

47) 

48 

49# All available job attributes. 

50_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

51 

52# Job attributes that need to be set to their maximal value in the cluster. 

53_ATTRS_MAX = frozenset({ 

54 "memory_multiplier", 

55 "number_of_retries", 

56 "request_cpus", 

57 "request_memory", 

58}) 

59 

60# Job attributes that need to be set to sum of their values in the cluster. 

61_ATTRS_SUM = frozenset({ 

62 "request_disk", 

63 "request_walltime", 

64}) 

65 

66# Job attributes do not fall into a specific category 

67_ATTRS_MISC = frozenset({ 

68 "cmdline", 

69 "cmdvals", 

70 "environment", 

71 "pre_cmdline", 

72 "post_cmdline", 

73 "profile", 

74 "attrs", 

75}) 

76 

77# Attributes that need to be the same for each quanta in the cluster. 

78_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

79 

80_LOG = logging.getLogger(__name__) 

81 

82 

83def transform(config, cqgraph, prefix): 

84 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

85 

86 Parameters 

87 ---------- 

88 config : `lsst.ctrl.bps.BpsConfig` 

89 BPS configuration. 

90 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

91 A clustered quantum graph to transform into a generic workflow. 

92 prefix : `str` 

93 Root path for any output files. 

94 

95 Returns 

96 ------- 

97 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

98 The generic workflow transformed from the clustered quantum graph. 

99 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

100 Configuration to accompany GenericWorkflow. 

101 """ 

102 _, when_create = config.search(".executionButler.whenCreate") 

103 if when_create.upper() == "TRANSFORM": 

104 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

105 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

106 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

107 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

108 

109 if cqgraph.name is not None: 

110 name = cqgraph.name 

111 else: 

112 _, name = config.search("uniqProcName", opt={"required": True}) 

113 

114 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

115 generic_workflow_config = create_generic_workflow_config(config, prefix) 

116 

117 return generic_workflow, generic_workflow_config 

118 

119 

120def add_workflow_init_nodes(config, qgraph, generic_workflow): 

121 """Add nodes to workflow graph that perform initialization steps. 

122 

123 Assumes that all of the initialization should be executed prior to any 

124 of the current workflow. 

125 

126 Parameters 

127 ---------- 

128 config : `lsst.ctrl.bps.BpsConfig` 

129 BPS configuration. 

130 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

131 The quantum graph the generic workflow represents. 

132 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

133 Generic workflow to which the initialization steps should be added. 

134 """ 

135 # Create a workflow graph that will have task and file nodes necessary for 

136 # initializing the pipeline execution 

137 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

138 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

139 generic_workflow.add_workflow_source(init_workflow) 

140 

141 

142def create_init_workflow(config, qgraph, qgraph_gwfile): 

143 """Create workflow for running initialization job(s). 

144 

145 Parameters 

146 ---------- 

147 config : `lsst.ctrl.bps.BpsConfig` 

148 BPS configuration. 

149 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

150 The quantum graph the generic workflow represents. 

151 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

152 File object for the full run QuantumGraph file. 

153 

154 Returns 

155 ------- 

156 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

157 GenericWorkflow consisting of job(s) to initialize workflow. 

158 """ 

159 _LOG.debug("creating init subgraph") 

160 _LOG.debug("creating init task input(s)") 

161 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

162 "replaceVars": False, 

163 "expandEnvVars": False, 

164 "replaceEnvVars": True, 

165 "required": False} 

166 

167 init_workflow = GenericWorkflow("init") 

168 init_workflow.add_file(qgraph_gwfile) 

169 

170 # create job for executing --init-only 

171 gwjob = GenericWorkflowJob("pipetaskInit") 

172 

173 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

174 job_values["name"] = "pipetaskInit" 

175 job_values["label"] = "pipetaskInit" 

176 

177 # Adjust job attributes values if necessary. 

178 _handle_job_values(job_values, gwjob) 

179 

180 # Pick a node id for each task (not quantum!) to avoid reading the entire 

181 # quantum graph during the initialization stage. 

182 node_ids = [] 

183 for task in qgraph.iterTaskGraph(): 

184 task_def = qgraph.findTaskDefByLabel(task.label) 

185 node = next(iter(qgraph.getNodesForTask(task_def))) 

186 node_ids.append(node.nodeId) 

187 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

188 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids])) 

189 

190 init_workflow.add_job(gwjob) 

191 

192 # Lookup butler values 

193 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

194 _, butler_config = config.search("butlerConfig", opt=search_opt) 

195 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

196 prefix = config["submitPath"] 

197 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

198 

199 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

200 _enhance_command(config, init_workflow, gwjob, {}) 

201 

202 return init_workflow 

203 

204 

205def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

206 """Enhance command line with env and file placeholders 

207 and gather command line values. 

208 

209 Parameters 

210 ---------- 

211 config : `lsst.ctrl.bps.BpsConfig` 

212 BPS configuration. 

213 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

214 Generic workflow that contains the job. 

215 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

216 Generic workflow job to which the updated executable, arguments, 

217 and values should be saved. 

218 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

219 Cached values common across jobs with same label. Updated if values 

220 aren't already saved for given gwjob's label. 

221 """ 

222 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

223 

224 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

225 "replaceVars": False, 

226 "expandEnvVars": False, 

227 "replaceEnvVars": True, 

228 "required": False} 

229 

230 if gwjob.label not in cached_job_values: 

231 cached_job_values[gwjob.label] = {} 

232 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

233 key = "whenSaveJobQgraph" 

234 _, when_save = config.search(key, opt=search_opt) 

235 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

236 

237 key = "useLazyCommands" 

238 search_opt["default"] = True 

239 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

240 del search_opt["default"] 

241 

242 # Change qgraph variable to match whether using run or per-job qgraph 

243 # Note: these are lookup keys, not actual physical filenames. 

244 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

245 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

246 elif gwjob.name == "pipetaskInit": 

247 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

248 else: # Needed unique file keys for per-job QuantumGraphs 

249 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

250 

251 # Replace files with special placeholders 

252 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

253 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

254 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

255 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

256 

257 # Save dict of other values needed to complete command line. 

258 # (Be careful to not replace env variables as they may 

259 # be different in compute job.) 

260 search_opt["replaceVars"] = True 

261 

262 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

263 if key not in gwjob.cmdvals: 

264 if key not in cached_job_values[gwjob.label]: 

265 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

266 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

267 

268 # backwards compatibility 

269 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

270 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

271 key = "bpsUseShared" 

272 search_opt["default"] = True 

273 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

274 del search_opt["default"] 

275 

276 gwjob.arguments = _fill_arguments(cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, 

277 gwjob.arguments, gwjob.cmdvals) 

278 

279 

280def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

281 """Replace placeholders in command line string in job. 

282 

283 Parameters 

284 ---------- 

285 use_shared : `bool` 

286 Whether using shared filesystem. 

287 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

288 Generic workflow containing the job. 

289 arguments : `str` 

290 String containing placeholders. 

291 cmdvals : `dict` [`str`, `Any`] 

292 Any command line values that can be used to replace placeholders. 

293 

294 Returns 

295 ------- 

296 arguments : `str` 

297 Command line with FILE and ENV placeholders replaced. 

298 """ 

299 # Replace file placeholders 

300 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

301 gwfile = generic_workflow.get_file(file_key) 

302 if not gwfile.wms_transfer: 

303 # Must assume full URI if in command line and told WMS is not 

304 # responsible for transferring file. 

305 uri = gwfile.src_uri 

306 elif use_shared: 

307 if gwfile.job_shared: 

308 # Have shared filesystems and jobs can share file. 

309 uri = gwfile.src_uri 

310 else: 

311 # Taking advantage of inside knowledge. Not future-proof. 

312 # Temporary fix until have job wrapper that pulls files 

313 # within job. 

314 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

315 uri = "butler.yaml" 

316 else: 

317 uri = os.path.basename(gwfile.src_uri) 

318 else: # Using push transfer 

319 uri = os.path.basename(gwfile.src_uri) 

320 

321 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

322 

323 # Replace env placeholder with submit-side values 

324 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

325 arguments = os.path.expandvars(arguments) 

326 

327 # Replace remaining vars 

328 arguments = arguments.format(**cmdvals) 

329 

330 return arguments 

331 

332 

333def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

334 """Get butler location to be used by job. 

335 

336 Parameters 

337 ---------- 

338 prefix : `str` 

339 Root path for any output files. 

340 when_create : `str` 

341 When to create the execution butler used to determine whether job is 

342 using execution butler or not. 

343 butler_config : `str` 

344 Location of central butler repositories config file. 

345 execution_butler_dir : `str` 

346 Location of execution butler repository. 

347 

348 Returns 

349 ------- 

350 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

351 Representation of butler location. 

352 """ 

353 if when_create.upper() == "NEVER": 

354 wms_transfer = False 

355 job_access_remote = True 

356 job_shared = True 

357 else: 

358 butler_config = execution_butler_dir 

359 if not butler_config.startswith("/"): 

360 butler_config = f"{prefix}/{butler_config}" 

361 wms_transfer = True 

362 job_access_remote = False 

363 job_shared = False 

364 

365 gwfile = GenericWorkflowFile("butlerConfig", 

366 src_uri=butler_config, 

367 wms_transfer=wms_transfer, 

368 job_access_remote=job_access_remote, 

369 job_shared=job_shared) 

370 

371 return gwfile 

372 

373 

374def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

375 """Get qgraph location to be used by job. 

376 

377 Parameters 

378 ---------- 

379 config : `lsst.ctrl.bps.BpsConfig` 

380 Bps configuration. 

381 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

382 What submission stage to save per-job qgraph files (or NEVER) 

383 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

384 Job for which determining QuantumGraph file. 

385 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

386 File representation of the full run QuantumGraph. 

387 prefix : `str` 

388 Path prefix for any files written. 

389 

390 Returns 

391 ------- 

392 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

393 Representation of butler location (may not include filename). 

394 """ 

395 qgraph_gwfile = None 

396 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

397 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

398 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

399 wms_transfer=True, 

400 job_access_remote=True, 

401 job_shared=True) 

402 else: 

403 qgraph_gwfile = run_qgraph_file 

404 

405 return qgraph_gwfile 

406 

407 

408def _get_job_values(config, search_opt, cmd_line_key): 

409 """Gather generic workflow job values from the bps config. 

410 

411 Parameters 

412 ---------- 

413 config : `lsst.ctrl.bps.BpsConfig` 

414 Bps configuration. 

415 search_opt : `dict` [`str`, `Any`] 

416 Search options to be used when searching config. 

417 cmd_line_key : `str` or None 

418 Which command line key to search for (e.g., "runQuantumCommand"). 

419 

420 Returns 

421 ------- 

422 job_values : `dict` [ `str`, `Any` ]` 

423 A mapping between job attributes and their values. 

424 """ 

425 job_values = {} 

426 for attr in _ATTRS_ALL: 

427 # Variable names in yaml are camel case instead of snake case. 

428 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr) 

429 found, value = config.search(yaml_name, opt=search_opt) 

430 if found: 

431 job_values[attr] = value 

432 else: 

433 job_values[attr] = None 

434 

435 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

436 # is set and it is a positive number greater than 1.0), adjust number 

437 # of retries when necessary. If the memory multiplier is invalid, disable 

438 # automatic memory scaling. 

439 if job_values["memory_multiplier"] is not None: 

440 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

441 if job_values["number_of_retries"] is None: 

442 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

443 else: 

444 job_values["memory_multiplier"] = None 

445 

446 if cmd_line_key: 

447 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

448 # Make sure cmdline isn't None as that could be sent in as a 

449 # default value in search_opt. 

450 if found and cmdline: 

451 cmd, args = cmdline.split(" ", 1) 

452 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

453 if args: 

454 job_values["arguments"] = args 

455 

456 return job_values 

457 

458 

459def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

460 """Set the job attributes in the cluster to their correct values. 

461 

462 Parameters 

463 ---------- 

464 quantum_job_values : `dict` [`str`, Any] 

465 Job values for running single Quantum. 

466 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

467 Generic workflow job in which to store the universal values. 

468 attributes: `Iterable` [`str`], optional 

469 Job attributes to be set in the job following different rules. 

470 The default value is _ATTRS_ALL. 

471 """ 

472 _LOG.debug("Call to _handle_job_values") 

473 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

474 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

475 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

476 

477 

478def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

479 """Handle job attributes that must have the same value for every quantum 

480 in the cluster. 

481 

482 Parameters 

483 ---------- 

484 quantum_job_values : `dict` [`str`, Any] 

485 Job values for running single Quantum. 

486 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

487 Generic workflow job in which to store the universal values. 

488 attributes: `Iterable` [`str`], optional 

489 Job attributes to be set in the job following different rules. 

490 The default value is _ATTRS_UNIVERSAL. 

491 """ 

492 for attr in _ATTRS_UNIVERSAL & set(attributes): 

493 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr), 

494 quantum_job_values.get(attr, "MISSING")) 

495 current_value = getattr(gwjob, attr) 

496 try: 

497 quantum_value = quantum_job_values[attr] 

498 except KeyError: 

499 continue 

500 else: 

501 if not current_value: 

502 setattr(gwjob, attr, quantum_value) 

503 elif current_value != quantum_value: 

504 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

505 "Current cluster value: %s\n" 

506 "Quantum value: %s", 

507 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

508 quantum_value) 

509 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

510 

511 

512def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

513 """Handle job attributes that should be set to their maximum value in 

514 the in cluster. 

515 

516 Parameters 

517 ---------- 

518 quantum_job_values : `dict` [`str`, `Any`] 

519 Job values for running single Quantum. 

520 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

521 Generic workflow job in which to store the aggregate values. 

522 attributes: `Iterable` [`str`], optional 

523 Job attributes to be set in the job following different rules. 

524 The default value is _ATTR_MAX. 

525 """ 

526 for attr in _ATTRS_MAX & set(attributes): 

527 current_value = getattr(gwjob, attr) 

528 try: 

529 quantum_value = quantum_job_values[attr] 

530 except KeyError: 

531 continue 

532 else: 

533 needs_update = False 

534 if current_value is None: 

535 if quantum_value is not None: 

536 needs_update = True 

537 else: 

538 if quantum_value is not None and current_value < quantum_value: 

539 needs_update = True 

540 if needs_update: 

541 setattr(gwjob, attr, quantum_value) 

542 

543 # When updating memory requirements for a job, check if memory 

544 # autoscaling is enabled. If it is, always use the memory 

545 # multiplier and the number of retries which comes with the 

546 # quantum. 

547 # 

548 # Note that as a result, the quantum with the biggest memory 

549 # requirements will determine whether the memory autoscaling 

550 # will be enabled (or disabled) depending on the value of its 

551 # memory multiplier. 

552 if attr == "request_memory": 

553 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

554 if gwjob.memory_multiplier is not None: 

555 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

556 

557 

558def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

559 """Handle job attributes that are the sum of their values in the cluster. 

560 

561 Parameters 

562 ---------- 

563 quantum_job_values : `dict` [`str`, `Any`] 

564 Job values for running single Quantum. 

565 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

566 Generic workflow job in which to store the aggregate values. 

567 attributes: `Iterable` [`str`], optional 

568 Job attributes to be set in the job following different rules. 

569 The default value is _ATTRS_SUM. 

570 """ 

571 for attr in _ATTRS_SUM & set(attributes): 

572 current_value = getattr(gwjob, attr) 

573 if not current_value: 

574 setattr(gwjob, attr, quantum_job_values[attr]) 

575 else: 

576 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

577 

578 

579def create_generic_workflow(config, cqgraph, name, prefix): 

580 """Create a generic workflow from a ClusteredQuantumGraph such that it 

581 has information needed for WMS (e.g., command lines). 

582 

583 Parameters 

584 ---------- 

585 config : `lsst.ctrl.bps.BpsConfig` 

586 BPS configuration. 

587 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

588 ClusteredQuantumGraph for running a specific pipeline on a specific 

589 payload. 

590 name : `str` 

591 Name for the workflow (typically unique). 

592 prefix : `str` 

593 Root path for any output files. 

594 

595 Returns 

596 ------- 

597 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

598 Generic workflow for the given ClusteredQuantumGraph + config. 

599 """ 

600 # Determine whether saving per-job QuantumGraph files in the loop. 

601 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

602 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

603 

604 search_opt = {"replaceVars": False, 

605 "expandEnvVars": False, 

606 "replaceEnvVars": True, 

607 "required": False} 

608 

609 # Lookup butler values once 

610 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

611 _, butler_config = config.search("butlerConfig", opt=search_opt) 

612 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

613 

614 generic_workflow = GenericWorkflow(name) 

615 

616 # Save full run QuantumGraph for use by jobs 

617 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

618 src_uri=config["runQgraphFile"], 

619 wms_transfer=True, 

620 job_access_remote=True, 

621 job_shared=True)) 

622 

623 # Cache pipetask specific or more generic job values to minimize number 

624 # on config searches. 

625 cached_job_values = {} 

626 cached_pipetask_values = {} 

627 

628 for cluster in cqgraph.clusters(): 

629 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

630 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name, 

631 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids) 

632 

633 gwjob = GenericWorkflowJob(cluster.name) 

634 

635 # First get job values from cluster or cluster config 

636 search_opt["curvals"] = {"curr_cluster": cluster.label} 

637 

638 # If some config values are set for this cluster 

639 if cluster.label not in cached_job_values: 

640 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

641 cached_job_values[cluster.label] = {} 

642 

643 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

644 key = "whenSaveJobQgraph" 

645 _, when_save = config.search(key, opt=search_opt) 

646 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

647 

648 key = "useLazyCommands" 

649 search_opt["default"] = True 

650 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

651 del search_opt["default"] 

652 

653 if cluster.label in config["cluster"]: 

654 # Don't want to get global defaults here so only look in 

655 # cluster section. 

656 cached_job_values[cluster.label].update(_get_job_values(config["cluster"][cluster.label], 

657 search_opt, "runQuantumCommand")) 

658 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

659 

660 cluster_job_values['name'] = cluster.name 

661 cluster_job_values['label'] = cluster.label 

662 cluster_job_values['quanta_counts'] = cluster.quanta_counts 

663 cluster_job_values['tags'] = cluster.tags 

664 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

665 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

666 

667 # For purposes of whether to continue searching for a value is whether 

668 # the value evaluates to False. 

669 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

670 

671 _LOG.debug("unset_attributes=%s", unset_attributes) 

672 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

673 

674 # For job info not defined at cluster level, attempt to get job info 

675 # either common or aggregate for all Quanta in cluster. 

676 for node_id in iter(cluster.qgraph_node_ids): 

677 _LOG.debug("node_id=%s", node_id) 

678 qnode = cqgraph.get_quantum_node(node_id) 

679 

680 if qnode.taskDef.label not in cached_pipetask_values: 

681 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label} 

682 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(config, search_opt, 

683 "runQuantumCommand") 

684 

685 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

686 

687 # Update job with workflow attribute and profile values. 

688 qgraph_gwfile = _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, 

689 generic_workflow.get_file("runQgraphFile"), prefix) 

690 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

691 

692 generic_workflow.add_job(gwjob) 

693 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

694 

695 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

696 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in 

697 cluster.qgraph_node_ids])) 

698 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

699 

700 # If writing per-job QuantumGraph files during TRANSFORM stage, 

701 # write it now while in memory. 

702 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

703 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

704 

705 # Create job dependencies. 

706 for parent in cqgraph.clusters(): 

707 for child in cqgraph.successors(parent): 

708 generic_workflow.add_job_relationships(parent.name, child.name) 

709 

710 # Add initial workflow. 

711 if config.get("runInit", "{default: False}"): 

712 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

713 

714 generic_workflow.run_attrs.update({"bps_isjob": "True", 

715 "bps_project": config["project"], 

716 "bps_campaign": config["campaign"], 

717 "bps_run": generic_workflow.name, 

718 "bps_operator": config["operator"], 

719 "bps_payload": config["payloadName"], 

720 "bps_runsite": config["computeSite"]}) 

721 

722 # Add final job 

723 add_final_job(config, generic_workflow, prefix) 

724 

725 return generic_workflow 

726 

727 

728def create_generic_workflow_config(config, prefix): 

729 """Create generic workflow configuration. 

730 

731 Parameters 

732 ---------- 

733 config : `lsst.ctrl.bps.BpsConfig` 

734 Bps configuration. 

735 prefix : `str` 

736 Root path for any output files. 

737 

738 Returns 

739 ------- 

740 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

741 Configuration accompanying the GenericWorkflow. 

742 """ 

743 generic_workflow_config = BpsConfig(config) 

744 generic_workflow_config["workflowName"] = config["uniqProcName"] 

745 generic_workflow_config["workflowPath"] = prefix 

746 return generic_workflow_config 

747 

748 

749def add_final_job(config, generic_workflow, prefix): 

750 """Add final workflow job depending upon configuration. 

751 

752 Parameters 

753 ---------- 

754 config : `lsst.ctrl.bps.BpsConfig` 

755 Bps configuration. 

756 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

757 Generic workflow to which attributes should be added. 

758 prefix : `str` 

759 Directory in which to output final script. 

760 """ 

761 _, when_create = config.search(".executionButler.whenCreate") 

762 _, when_merge = config.search(".executionButler.whenMerge") 

763 

764 search_opt = {"searchobj": config[".executionButler"], "default": None} 

765 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

766 # create gwjob 

767 gwjob = GenericWorkflowJob("mergeExecutionButler") 

768 gwjob.label = "mergeExecutionButler" 

769 

770 job_values = _get_job_values(config, search_opt, None) 

771 for attr in _ATTRS_ALL: 

772 if not getattr(gwjob, attr) and job_values.get(attr, None): 

773 setattr(gwjob, attr, job_values[attr]) 

774 

775 # Create script and add command line to job. 

776 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

777 

778 # Determine inputs from command line. 

779 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

780 gwfile = generic_workflow.get_file(file_key) 

781 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

782 

783 _enhance_command(config, generic_workflow, gwjob, {}) 

784 

785 # Put transfer repo job in appropriate location in workflow. 

786 if when_merge.upper() == "ALWAYS": 

787 # add as special final job 

788 generic_workflow.add_final(gwjob) 

789 elif when_merge.upper() == "SUCCESS": 

790 # add as regular sink node 

791 add_final_job_as_sink(generic_workflow, gwjob) 

792 else: 

793 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

794 

795 

796def _create_final_command(config, prefix): 

797 """Create the command and shell script for the final job. 

798 

799 Parameters 

800 ---------- 

801 config : `lsst.ctrl.bps.BpsConfig` 

802 Bps configuration. 

803 prefix : `str` 

804 Directory in which to output final script. 

805 

806 Returns 

807 ------- 

808 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

809 Executable object for the final script. 

810 arguments : `str` 

811 Command line needed to call the final script. 

812 """ 

813 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False, 

814 'searchobj': config['executionButler']} 

815 

816 script_file = os.path.join(prefix, "final_job.bash") 

817 with open(script_file, "w") as fh: 

818 print("#!/bin/bash\n", file=fh) 

819 print("set -e", file=fh) 

820 print("set -x", file=fh) 

821 

822 print("butlerConfig=$1", file=fh) 

823 print("executionButlerDir=$2", file=fh) 

824 

825 i = 1 

826 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

827 while found: 

828 # Temporarily replace any env vars so formatter doesn't try to 

829 # replace them. 

830 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

831 

832 # executionButlerDir and butlerConfig will be args to script and 

833 # set to env vars 

834 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

835 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

836 

837 # Replace all other vars in command string 

838 search_opt["replaceVars"] = True 

839 command = config.formatter.format(command, config, search_opt) 

840 search_opt["replaceVars"] = False 

841 

842 # Replace any temporary env place holders. 

843 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

844 

845 print(command, file=fh) 

846 i += 1 

847 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

848 os.chmod(script_file, 0o755) 

849 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

850 

851 _, orig_butler = config.search("butlerConfig") 

852 # The execution butler was saved as butlerConfig in the workflow. 

853 return executable, f"{orig_butler} <FILE:butlerConfig>" 

854 

855 

856def add_final_job_as_sink(generic_workflow, final_job): 

857 """Add final job as the single sink for the workflow. 

858 

859 Parameters 

860 ---------- 

861 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

862 Generic workflow to which attributes should be added. 

863 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

864 Job to add as new sink node depending upon all previous sink nodes. 

865 """ 

866 # Find sink nodes of generic workflow graph. 

867 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

868 _LOG.debug("gw_sinks = %s", gw_sinks) 

869 

870 generic_workflow.add_job(final_job) 

871 generic_workflow.add_job_relationships(gw_sinks, final_job.name)