Coverage for python/lsst/ctrl/bps/transform.py: 8%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

313 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import math 

27import os 

28import re 

29import dataclasses 

30import copy 

31 

32from lsst.utils.timer import time_this 

33 

34from . import ( 

35 DEFAULT_MEM_RETRIES, 

36 BpsConfig, 

37 GenericWorkflow, 

38 GenericWorkflowJob, 

39 GenericWorkflowFile, 

40 GenericWorkflowExec, 

41) 

42from .bps_utils import ( 

43 save_qg_subgraph, 

44 WhenToSaveQuantumGraphs, 

45 create_job_quantum_graph_filename, 

46 _create_execution_butler 

47) 

48 

49# All available job attributes. 

50_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

51 

52# Job attributes that need to be set to their maximal value in the cluster. 

53_ATTRS_MAX = frozenset({ 

54 "memory_multiplier", 

55 "number_of_retries", 

56 "request_cpus", 

57 "request_memory", 

58 "request_memory_max", 

59}) 

60 

61# Job attributes that need to be set to sum of their values in the cluster. 

62_ATTRS_SUM = frozenset({ 

63 "request_disk", 

64 "request_walltime", 

65}) 

66 

67# Job attributes do not fall into a specific category 

68_ATTRS_MISC = frozenset({ 

69 "cmdline", 

70 "cmdvals", 

71 "environment", 

72 "pre_cmdline", 

73 "post_cmdline", 

74 "profile", 

75 "attrs", 

76}) 

77 

78# Attributes that need to be the same for each quanta in the cluster. 

79_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

80 

81_LOG = logging.getLogger(__name__) 

82 

83 

84def transform(config, cqgraph, prefix): 

85 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

86 

87 Parameters 

88 ---------- 

89 config : `lsst.ctrl.bps.BpsConfig` 

90 BPS configuration. 

91 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

92 A clustered quantum graph to transform into a generic workflow. 

93 prefix : `str` 

94 Root path for any output files. 

95 

96 Returns 

97 ------- 

98 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

99 The generic workflow transformed from the clustered quantum graph. 

100 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

101 Configuration to accompany GenericWorkflow. 

102 """ 

103 _, when_create = config.search(".executionButler.whenCreate") 

104 if when_create.upper() == "TRANSFORM": 

105 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

106 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

107 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

108 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

109 

110 if cqgraph.name is not None: 

111 name = cqgraph.name 

112 else: 

113 _, name = config.search("uniqProcName", opt={"required": True}) 

114 

115 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

116 generic_workflow_config = create_generic_workflow_config(config, prefix) 

117 

118 return generic_workflow, generic_workflow_config 

119 

120 

121def add_workflow_init_nodes(config, qgraph, generic_workflow): 

122 """Add nodes to workflow graph that perform initialization steps. 

123 

124 Assumes that all of the initialization should be executed prior to any 

125 of the current workflow. 

126 

127 Parameters 

128 ---------- 

129 config : `lsst.ctrl.bps.BpsConfig` 

130 BPS configuration. 

131 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

132 The quantum graph the generic workflow represents. 

133 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

134 Generic workflow to which the initialization steps should be added. 

135 """ 

136 # Create a workflow graph that will have task and file nodes necessary for 

137 # initializing the pipeline execution 

138 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

139 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

140 generic_workflow.add_workflow_source(init_workflow) 

141 

142 

143def create_init_workflow(config, qgraph, qgraph_gwfile): 

144 """Create workflow for running initialization job(s). 

145 

146 Parameters 

147 ---------- 

148 config : `lsst.ctrl.bps.BpsConfig` 

149 BPS configuration. 

150 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

151 The quantum graph the generic workflow represents. 

152 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

153 File object for the full run QuantumGraph file. 

154 

155 Returns 

156 ------- 

157 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

158 GenericWorkflow consisting of job(s) to initialize workflow. 

159 """ 

160 _LOG.debug("creating init subgraph") 

161 _LOG.debug("creating init task input(s)") 

162 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

163 "replaceVars": False, 

164 "expandEnvVars": False, 

165 "replaceEnvVars": True, 

166 "required": False} 

167 

168 init_workflow = GenericWorkflow("init") 

169 init_workflow.add_file(qgraph_gwfile) 

170 

171 # create job for executing --init-only 

172 gwjob = GenericWorkflowJob("pipetaskInit") 

173 

174 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

175 job_values["name"] = "pipetaskInit" 

176 job_values["label"] = "pipetaskInit" 

177 

178 # Adjust job attributes values if necessary. 

179 _handle_job_values(job_values, gwjob) 

180 

181 # Pick a node id for each task (not quantum!) to avoid reading the entire 

182 # quantum graph during the initialization stage. 

183 node_ids = [] 

184 for task in qgraph.iterTaskGraph(): 

185 task_def = qgraph.findTaskDefByLabel(task.label) 

186 node = next(iter(qgraph.getNodesForTask(task_def))) 

187 node_ids.append(node.nodeId) 

188 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

189 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids])) 

190 

191 init_workflow.add_job(gwjob) 

192 

193 # Lookup butler values 

194 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

195 _, butler_config = config.search("butlerConfig", opt=search_opt) 

196 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

197 prefix = config["submitPath"] 

198 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

199 

200 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

201 _enhance_command(config, init_workflow, gwjob, {}) 

202 

203 return init_workflow 

204 

205 

206def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

207 """Enhance command line with env and file placeholders 

208 and gather command line values. 

209 

210 Parameters 

211 ---------- 

212 config : `lsst.ctrl.bps.BpsConfig` 

213 BPS configuration. 

214 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

215 Generic workflow that contains the job. 

216 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

217 Generic workflow job to which the updated executable, arguments, 

218 and values should be saved. 

219 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

220 Cached values common across jobs with same label. Updated if values 

221 aren't already saved for given gwjob's label. 

222 """ 

223 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

224 

225 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

226 "replaceVars": False, 

227 "expandEnvVars": False, 

228 "replaceEnvVars": True, 

229 "required": False} 

230 

231 if gwjob.label not in cached_job_values: 

232 cached_job_values[gwjob.label] = {} 

233 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

234 key = "whenSaveJobQgraph" 

235 _, when_save = config.search(key, opt=search_opt) 

236 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

237 

238 key = "useLazyCommands" 

239 search_opt["default"] = True 

240 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

241 del search_opt["default"] 

242 

243 # Change qgraph variable to match whether using run or per-job qgraph 

244 # Note: these are lookup keys, not actual physical filenames. 

245 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

246 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

247 elif gwjob.name == "pipetaskInit": 

248 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

249 else: # Needed unique file keys for per-job QuantumGraphs 

250 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

251 

252 # Replace files with special placeholders 

253 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

254 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

255 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

256 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

257 

258 # Save dict of other values needed to complete command line. 

259 # (Be careful to not replace env variables as they may 

260 # be different in compute job.) 

261 search_opt["replaceVars"] = True 

262 

263 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

264 if key not in gwjob.cmdvals: 

265 if key not in cached_job_values[gwjob.label]: 

266 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

267 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

268 

269 # backwards compatibility 

270 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

271 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

272 key = "bpsUseShared" 

273 search_opt["default"] = True 

274 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

275 del search_opt["default"] 

276 

277 gwjob.arguments = _fill_arguments(cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, 

278 gwjob.arguments, gwjob.cmdvals) 

279 

280 

281def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

282 """Replace placeholders in command line string in job. 

283 

284 Parameters 

285 ---------- 

286 use_shared : `bool` 

287 Whether using shared filesystem. 

288 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

289 Generic workflow containing the job. 

290 arguments : `str` 

291 String containing placeholders. 

292 cmdvals : `dict` [`str`, `Any`] 

293 Any command line values that can be used to replace placeholders. 

294 

295 Returns 

296 ------- 

297 arguments : `str` 

298 Command line with FILE and ENV placeholders replaced. 

299 """ 

300 # Replace file placeholders 

301 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

302 gwfile = generic_workflow.get_file(file_key) 

303 if not gwfile.wms_transfer: 

304 # Must assume full URI if in command line and told WMS is not 

305 # responsible for transferring file. 

306 uri = gwfile.src_uri 

307 elif use_shared: 

308 if gwfile.job_shared: 

309 # Have shared filesystems and jobs can share file. 

310 uri = gwfile.src_uri 

311 else: 

312 # Taking advantage of inside knowledge. Not future-proof. 

313 # Temporary fix until have job wrapper that pulls files 

314 # within job. 

315 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

316 uri = "butler.yaml" 

317 else: 

318 uri = os.path.basename(gwfile.src_uri) 

319 else: # Using push transfer 

320 uri = os.path.basename(gwfile.src_uri) 

321 

322 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

323 

324 # Replace env placeholder with submit-side values 

325 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

326 arguments = os.path.expandvars(arguments) 

327 

328 # Replace remaining vars 

329 arguments = arguments.format(**cmdvals) 

330 

331 return arguments 

332 

333 

334def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

335 """Get butler location to be used by job. 

336 

337 Parameters 

338 ---------- 

339 prefix : `str` 

340 Root path for any output files. 

341 when_create : `str` 

342 When to create the execution butler used to determine whether job is 

343 using execution butler or not. 

344 butler_config : `str` 

345 Location of central butler repositories config file. 

346 execution_butler_dir : `str` 

347 Location of execution butler repository. 

348 

349 Returns 

350 ------- 

351 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

352 Representation of butler location. 

353 """ 

354 if when_create.upper() == "NEVER": 

355 wms_transfer = False 

356 job_access_remote = True 

357 job_shared = True 

358 else: 

359 butler_config = execution_butler_dir 

360 if not butler_config.startswith("/"): 

361 butler_config = f"{prefix}/{butler_config}" 

362 wms_transfer = True 

363 job_access_remote = False 

364 job_shared = False 

365 

366 gwfile = GenericWorkflowFile("butlerConfig", 

367 src_uri=butler_config, 

368 wms_transfer=wms_transfer, 

369 job_access_remote=job_access_remote, 

370 job_shared=job_shared) 

371 

372 return gwfile 

373 

374 

375def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

376 """Get qgraph location to be used by job. 

377 

378 Parameters 

379 ---------- 

380 config : `lsst.ctrl.bps.BpsConfig` 

381 Bps configuration. 

382 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

383 What submission stage to save per-job qgraph files (or NEVER) 

384 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

385 Job for which determining QuantumGraph file. 

386 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

387 File representation of the full run QuantumGraph. 

388 prefix : `str` 

389 Path prefix for any files written. 

390 

391 Returns 

392 ------- 

393 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

394 Representation of butler location (may not include filename). 

395 """ 

396 qgraph_gwfile = None 

397 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

398 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

399 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

400 wms_transfer=True, 

401 job_access_remote=True, 

402 job_shared=True) 

403 else: 

404 qgraph_gwfile = run_qgraph_file 

405 

406 return qgraph_gwfile 

407 

408 

409def _get_job_values(config, search_opt, cmd_line_key): 

410 """Gather generic workflow job values from the bps config. 

411 

412 Parameters 

413 ---------- 

414 config : `lsst.ctrl.bps.BpsConfig` 

415 Bps configuration. 

416 search_opt : `dict` [`str`, `Any`] 

417 Search options to be used when searching config. 

418 cmd_line_key : `str` or None 

419 Which command line key to search for (e.g., "runQuantumCommand"). 

420 

421 Returns 

422 ------- 

423 job_values : `dict` [ `str`, `Any` ]` 

424 A mapping between job attributes and their values. 

425 """ 

426 job_values = {} 

427 for attr in _ATTRS_ALL: 

428 # Variable names in yaml are camel case instead of snake case. 

429 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr) 

430 found, value = config.search(yaml_name, opt=search_opt) 

431 if found: 

432 job_values[attr] = value 

433 else: 

434 job_values[attr] = None 

435 

436 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

437 # is set and it is a positive number greater than 1.0), adjust number 

438 # of retries when necessary. If the memory multiplier is invalid, disable 

439 # automatic memory scaling. 

440 if job_values["memory_multiplier"] is not None: 

441 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

442 if job_values["number_of_retries"] is None: 

443 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

444 else: 

445 job_values["memory_multiplier"] = None 

446 

447 if cmd_line_key: 

448 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

449 # Make sure cmdline isn't None as that could be sent in as a 

450 # default value in search_opt. 

451 if found and cmdline: 

452 cmd, args = cmdline.split(" ", 1) 

453 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

454 if args: 

455 job_values["arguments"] = args 

456 

457 return job_values 

458 

459 

460def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

461 """Set the job attributes in the cluster to their correct values. 

462 

463 Parameters 

464 ---------- 

465 quantum_job_values : `dict` [`str`, Any] 

466 Job values for running single Quantum. 

467 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

468 Generic workflow job in which to store the universal values. 

469 attributes: `Iterable` [`str`], optional 

470 Job attributes to be set in the job following different rules. 

471 The default value is _ATTRS_ALL. 

472 """ 

473 _LOG.debug("Call to _handle_job_values") 

474 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

475 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

476 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

477 

478 

479def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

480 """Handle job attributes that must have the same value for every quantum 

481 in the cluster. 

482 

483 Parameters 

484 ---------- 

485 quantum_job_values : `dict` [`str`, Any] 

486 Job values for running single Quantum. 

487 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

488 Generic workflow job in which to store the universal values. 

489 attributes: `Iterable` [`str`], optional 

490 Job attributes to be set in the job following different rules. 

491 The default value is _ATTRS_UNIVERSAL. 

492 """ 

493 for attr in _ATTRS_UNIVERSAL & set(attributes): 

494 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr), 

495 quantum_job_values.get(attr, "MISSING")) 

496 current_value = getattr(gwjob, attr) 

497 try: 

498 quantum_value = quantum_job_values[attr] 

499 except KeyError: 

500 continue 

501 else: 

502 if not current_value: 

503 setattr(gwjob, attr, quantum_value) 

504 elif current_value != quantum_value: 

505 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

506 "Current cluster value: %s\n" 

507 "Quantum value: %s", 

508 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

509 quantum_value) 

510 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

511 

512 

513def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

514 """Handle job attributes that should be set to their maximum value in 

515 the in cluster. 

516 

517 Parameters 

518 ---------- 

519 quantum_job_values : `dict` [`str`, `Any`] 

520 Job values for running single Quantum. 

521 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

522 Generic workflow job in which to store the aggregate values. 

523 attributes: `Iterable` [`str`], optional 

524 Job attributes to be set in the job following different rules. 

525 The default value is _ATTR_MAX. 

526 """ 

527 for attr in _ATTRS_MAX & set(attributes): 

528 current_value = getattr(gwjob, attr) 

529 try: 

530 quantum_value = quantum_job_values[attr] 

531 except KeyError: 

532 continue 

533 else: 

534 needs_update = False 

535 if current_value is None: 

536 if quantum_value is not None: 

537 needs_update = True 

538 else: 

539 if quantum_value is not None and current_value < quantum_value: 

540 needs_update = True 

541 if needs_update: 

542 setattr(gwjob, attr, quantum_value) 

543 

544 # When updating memory requirements for a job, check if memory 

545 # autoscaling is enabled. If it is, always use the memory 

546 # multiplier and the number of retries which comes with the 

547 # quantum. 

548 # 

549 # Note that as a result, the quantum with the biggest memory 

550 # requirements will determine whether the memory autoscaling 

551 # will be enabled (or disabled) depending on the value of its 

552 # memory multiplier. 

553 if attr == "request_memory": 

554 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

555 if gwjob.memory_multiplier is not None: 

556 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

557 

558 

559def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

560 """Handle job attributes that are the sum of their values in the cluster. 

561 

562 Parameters 

563 ---------- 

564 quantum_job_values : `dict` [`str`, `Any`] 

565 Job values for running single Quantum. 

566 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

567 Generic workflow job in which to store the aggregate values. 

568 attributes: `Iterable` [`str`], optional 

569 Job attributes to be set in the job following different rules. 

570 The default value is _ATTRS_SUM. 

571 """ 

572 for attr in _ATTRS_SUM & set(attributes): 

573 current_value = getattr(gwjob, attr) 

574 if not current_value: 

575 setattr(gwjob, attr, quantum_job_values[attr]) 

576 else: 

577 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

578 

579 

580def create_generic_workflow(config, cqgraph, name, prefix): 

581 """Create a generic workflow from a ClusteredQuantumGraph such that it 

582 has information needed for WMS (e.g., command lines). 

583 

584 Parameters 

585 ---------- 

586 config : `lsst.ctrl.bps.BpsConfig` 

587 BPS configuration. 

588 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

589 ClusteredQuantumGraph for running a specific pipeline on a specific 

590 payload. 

591 name : `str` 

592 Name for the workflow (typically unique). 

593 prefix : `str` 

594 Root path for any output files. 

595 

596 Returns 

597 ------- 

598 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

599 Generic workflow for the given ClusteredQuantumGraph + config. 

600 """ 

601 # Determine whether saving per-job QuantumGraph files in the loop. 

602 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

603 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

604 

605 search_opt = {"replaceVars": False, 

606 "expandEnvVars": False, 

607 "replaceEnvVars": True, 

608 "required": False} 

609 

610 # Lookup butler values once 

611 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

612 _, butler_config = config.search("butlerConfig", opt=search_opt) 

613 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

614 

615 generic_workflow = GenericWorkflow(name) 

616 

617 # Save full run QuantumGraph for use by jobs 

618 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

619 src_uri=config["runQgraphFile"], 

620 wms_transfer=True, 

621 job_access_remote=True, 

622 job_shared=True)) 

623 

624 # Cache pipetask specific or more generic job values to minimize number 

625 # on config searches. 

626 cached_job_values = {} 

627 cached_pipetask_values = {} 

628 

629 for cluster in cqgraph.clusters(): 

630 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

631 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name, 

632 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids) 

633 

634 gwjob = GenericWorkflowJob(cluster.name) 

635 

636 # First get job values from cluster or cluster config 

637 search_opt["curvals"] = {"curr_cluster": cluster.label} 

638 

639 # If some config values are set for this cluster 

640 if cluster.label not in cached_job_values: 

641 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

642 cached_job_values[cluster.label] = {} 

643 

644 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

645 key = "whenSaveJobQgraph" 

646 _, when_save = config.search(key, opt=search_opt) 

647 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

648 

649 key = "useLazyCommands" 

650 search_opt["default"] = True 

651 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

652 del search_opt["default"] 

653 

654 if cluster.label in config["cluster"]: 

655 # Don't want to get global defaults here so only look in 

656 # cluster section. 

657 cached_job_values[cluster.label].update(_get_job_values(config["cluster"][cluster.label], 

658 search_opt, "runQuantumCommand")) 

659 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

660 

661 cluster_job_values['name'] = cluster.name 

662 cluster_job_values['label'] = cluster.label 

663 cluster_job_values['quanta_counts'] = cluster.quanta_counts 

664 cluster_job_values['tags'] = cluster.tags 

665 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

666 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

667 

668 # For purposes of whether to continue searching for a value is whether 

669 # the value evaluates to False. 

670 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

671 

672 _LOG.debug("unset_attributes=%s", unset_attributes) 

673 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

674 

675 # For job info not defined at cluster level, attempt to get job info 

676 # either common or aggregate for all Quanta in cluster. 

677 for node_id in iter(cluster.qgraph_node_ids): 

678 _LOG.debug("node_id=%s", node_id) 

679 qnode = cqgraph.get_quantum_node(node_id) 

680 

681 if qnode.taskDef.label not in cached_pipetask_values: 

682 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label} 

683 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(config, search_opt, 

684 "runQuantumCommand") 

685 

686 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

687 

688 # Update job with workflow attribute and profile values. 

689 qgraph_gwfile = _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, 

690 generic_workflow.get_file("runQgraphFile"), prefix) 

691 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

692 

693 generic_workflow.add_job(gwjob) 

694 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

695 

696 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

697 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in 

698 cluster.qgraph_node_ids])) 

699 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

700 

701 # If writing per-job QuantumGraph files during TRANSFORM stage, 

702 # write it now while in memory. 

703 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

704 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

705 

706 # Create job dependencies. 

707 for parent in cqgraph.clusters(): 

708 for child in cqgraph.successors(parent): 

709 generic_workflow.add_job_relationships(parent.name, child.name) 

710 

711 # Add initial workflow. 

712 if config.get("runInit", "{default: False}"): 

713 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

714 

715 generic_workflow.run_attrs.update({"bps_isjob": "True", 

716 "bps_project": config["project"], 

717 "bps_campaign": config["campaign"], 

718 "bps_run": generic_workflow.name, 

719 "bps_operator": config["operator"], 

720 "bps_payload": config["payloadName"], 

721 "bps_runsite": config["computeSite"]}) 

722 

723 # Add final job 

724 add_final_job(config, generic_workflow, prefix) 

725 

726 return generic_workflow 

727 

728 

729def create_generic_workflow_config(config, prefix): 

730 """Create generic workflow configuration. 

731 

732 Parameters 

733 ---------- 

734 config : `lsst.ctrl.bps.BpsConfig` 

735 Bps configuration. 

736 prefix : `str` 

737 Root path for any output files. 

738 

739 Returns 

740 ------- 

741 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

742 Configuration accompanying the GenericWorkflow. 

743 """ 

744 generic_workflow_config = BpsConfig(config) 

745 generic_workflow_config["workflowName"] = config["uniqProcName"] 

746 generic_workflow_config["workflowPath"] = prefix 

747 return generic_workflow_config 

748 

749 

750def add_final_job(config, generic_workflow, prefix): 

751 """Add final workflow job depending upon configuration. 

752 

753 Parameters 

754 ---------- 

755 config : `lsst.ctrl.bps.BpsConfig` 

756 Bps configuration. 

757 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

758 Generic workflow to which attributes should be added. 

759 prefix : `str` 

760 Directory in which to output final script. 

761 """ 

762 _, when_create = config.search(".executionButler.whenCreate") 

763 _, when_merge = config.search(".executionButler.whenMerge") 

764 

765 search_opt = {"searchobj": config[".executionButler"], "default": None} 

766 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

767 # create gwjob 

768 gwjob = GenericWorkflowJob("mergeExecutionButler") 

769 gwjob.label = "mergeExecutionButler" 

770 

771 job_values = _get_job_values(config, search_opt, None) 

772 for attr in _ATTRS_ALL: 

773 if not getattr(gwjob, attr) and job_values.get(attr, None): 

774 setattr(gwjob, attr, job_values[attr]) 

775 

776 # Create script and add command line to job. 

777 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

778 

779 # Determine inputs from command line. 

780 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

781 gwfile = generic_workflow.get_file(file_key) 

782 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

783 

784 _enhance_command(config, generic_workflow, gwjob, {}) 

785 

786 # Put transfer repo job in appropriate location in workflow. 

787 if when_merge.upper() == "ALWAYS": 

788 # add as special final job 

789 generic_workflow.add_final(gwjob) 

790 elif when_merge.upper() == "SUCCESS": 

791 # add as regular sink node 

792 add_final_job_as_sink(generic_workflow, gwjob) 

793 else: 

794 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

795 

796 

797def _create_final_command(config, prefix): 

798 """Create the command and shell script for the final job. 

799 

800 Parameters 

801 ---------- 

802 config : `lsst.ctrl.bps.BpsConfig` 

803 Bps configuration. 

804 prefix : `str` 

805 Directory in which to output final script. 

806 

807 Returns 

808 ------- 

809 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

810 Executable object for the final script. 

811 arguments : `str` 

812 Command line needed to call the final script. 

813 """ 

814 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False, 

815 'searchobj': config['executionButler']} 

816 

817 script_file = os.path.join(prefix, "final_job.bash") 

818 with open(script_file, "w") as fh: 

819 print("#!/bin/bash\n", file=fh) 

820 print("set -e", file=fh) 

821 print("set -x", file=fh) 

822 

823 print("butlerConfig=$1", file=fh) 

824 print("executionButlerDir=$2", file=fh) 

825 

826 i = 1 

827 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

828 while found: 

829 # Temporarily replace any env vars so formatter doesn't try to 

830 # replace them. 

831 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

832 

833 # executionButlerDir and butlerConfig will be args to script and 

834 # set to env vars 

835 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

836 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

837 

838 # Replace all other vars in command string 

839 search_opt["replaceVars"] = True 

840 command = config.formatter.format(command, config, search_opt) 

841 search_opt["replaceVars"] = False 

842 

843 # Replace any temporary env place holders. 

844 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

845 

846 print(command, file=fh) 

847 i += 1 

848 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

849 os.chmod(script_file, 0o755) 

850 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

851 

852 _, orig_butler = config.search("butlerConfig") 

853 # The execution butler was saved as butlerConfig in the workflow. 

854 return executable, f"{orig_butler} <FILE:butlerConfig>" 

855 

856 

857def add_final_job_as_sink(generic_workflow, final_job): 

858 """Add final job as the single sink for the workflow. 

859 

860 Parameters 

861 ---------- 

862 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

863 Generic workflow to which attributes should be added. 

864 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

865 Job to add as new sink node depending upon all previous sink nodes. 

866 """ 

867 # Find sink nodes of generic workflow graph. 

868 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

869 _LOG.debug("gw_sinks = %s", gw_sinks) 

870 

871 generic_workflow.add_job(final_job) 

872 generic_workflow.add_job_relationships(gw_sinks, final_job.name)