Coverage for python/lsst/ctrl/bps/transform.py: 8%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

315 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import math 

27import os 

28import re 

29import dataclasses 

30import copy 

31 

32from lsst.utils.logging import VERBOSE 

33from lsst.utils.timer import time_this, timeMethod 

34 

35from . import ( 

36 DEFAULT_MEM_RETRIES, 

37 BpsConfig, 

38 GenericWorkflow, 

39 GenericWorkflowJob, 

40 GenericWorkflowFile, 

41 GenericWorkflowExec, 

42) 

43from .bps_utils import ( 

44 save_qg_subgraph, 

45 WhenToSaveQuantumGraphs, 

46 create_job_quantum_graph_filename, 

47 _create_execution_butler 

48) 

49 

50# All available job attributes. 

51_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

52 

53# Job attributes that need to be set to their maximal value in the cluster. 

54_ATTRS_MAX = frozenset({ 

55 "memory_multiplier", 

56 "number_of_retries", 

57 "request_cpus", 

58 "request_memory", 

59 "request_memory_max", 

60}) 

61 

62# Job attributes that need to be set to sum of their values in the cluster. 

63_ATTRS_SUM = frozenset({ 

64 "request_disk", 

65 "request_walltime", 

66}) 

67 

68# Job attributes do not fall into a specific category 

69_ATTRS_MISC = frozenset({ 

70 "cmdline", 

71 "cmdvals", 

72 "environment", 

73 "pre_cmdline", 

74 "post_cmdline", 

75 "profile", 

76 "attrs", 

77}) 

78 

79# Attributes that need to be the same for each quanta in the cluster. 

80_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

81 

82_LOG = logging.getLogger(__name__) 

83 

84 

85@timeMethod(logger=_LOG, logLevel=VERBOSE) 

86def transform(config, cqgraph, prefix): 

87 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

88 

89 Parameters 

90 ---------- 

91 config : `lsst.ctrl.bps.BpsConfig` 

92 BPS configuration. 

93 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

94 A clustered quantum graph to transform into a generic workflow. 

95 prefix : `str` 

96 Root path for any output files. 

97 

98 Returns 

99 ------- 

100 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

101 The generic workflow transformed from the clustered quantum graph. 

102 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

103 Configuration to accompany GenericWorkflow. 

104 """ 

105 _, when_create = config.search(".executionButler.whenCreate") 

106 if when_create.upper() == "TRANSFORM": 

107 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

108 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

109 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

110 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

111 

112 if cqgraph.name is not None: 

113 name = cqgraph.name 

114 else: 

115 _, name = config.search("uniqProcName", opt={"required": True}) 

116 

117 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

118 generic_workflow_config = create_generic_workflow_config(config, prefix) 

119 

120 return generic_workflow, generic_workflow_config 

121 

122 

123def add_workflow_init_nodes(config, qgraph, generic_workflow): 

124 """Add nodes to workflow graph that perform initialization steps. 

125 

126 Assumes that all of the initialization should be executed prior to any 

127 of the current workflow. 

128 

129 Parameters 

130 ---------- 

131 config : `lsst.ctrl.bps.BpsConfig` 

132 BPS configuration. 

133 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

134 The quantum graph the generic workflow represents. 

135 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

136 Generic workflow to which the initialization steps should be added. 

137 """ 

138 # Create a workflow graph that will have task and file nodes necessary for 

139 # initializing the pipeline execution 

140 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

141 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

142 generic_workflow.add_workflow_source(init_workflow) 

143 

144 

145def create_init_workflow(config, qgraph, qgraph_gwfile): 

146 """Create workflow for running initialization job(s). 

147 

148 Parameters 

149 ---------- 

150 config : `lsst.ctrl.bps.BpsConfig` 

151 BPS configuration. 

152 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

153 The quantum graph the generic workflow represents. 

154 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

155 File object for the full run QuantumGraph file. 

156 

157 Returns 

158 ------- 

159 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

160 GenericWorkflow consisting of job(s) to initialize workflow. 

161 """ 

162 _LOG.debug("creating init subgraph") 

163 _LOG.debug("creating init task input(s)") 

164 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, 

165 "replaceVars": False, 

166 "expandEnvVars": False, 

167 "replaceEnvVars": True, 

168 "required": False} 

169 

170 init_workflow = GenericWorkflow("init") 

171 init_workflow.add_file(qgraph_gwfile) 

172 

173 # create job for executing --init-only 

174 gwjob = GenericWorkflowJob("pipetaskInit") 

175 

176 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

177 job_values["name"] = "pipetaskInit" 

178 job_values["label"] = "pipetaskInit" 

179 

180 # Adjust job attributes values if necessary. 

181 _handle_job_values(job_values, gwjob) 

182 

183 # Pick a node id for each task (not quantum!) to avoid reading the entire 

184 # quantum graph during the initialization stage. 

185 node_ids = [] 

186 for task in qgraph.iterTaskGraph(): 

187 task_def = qgraph.findTaskDefByLabel(task.label) 

188 node = next(iter(qgraph.getNodesForTask(task_def))) 

189 node_ids.append(node.nodeId) 

190 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

191 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in node_ids])) 

192 

193 init_workflow.add_job(gwjob) 

194 

195 # Lookup butler values 

196 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

197 _, butler_config = config.search("butlerConfig", opt=search_opt) 

198 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

199 prefix = config["submitPath"] 

200 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

201 

202 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

203 _enhance_command(config, init_workflow, gwjob, {}) 

204 

205 return init_workflow 

206 

207 

208def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

209 """Enhance command line with env and file placeholders 

210 and gather command line values. 

211 

212 Parameters 

213 ---------- 

214 config : `lsst.ctrl.bps.BpsConfig` 

215 BPS configuration. 

216 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

217 Generic workflow that contains the job. 

218 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

219 Generic workflow job to which the updated executable, arguments, 

220 and values should be saved. 

221 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

222 Cached values common across jobs with same label. Updated if values 

223 aren't already saved for given gwjob's label. 

224 """ 

225 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

226 

227 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

228 "replaceVars": False, 

229 "expandEnvVars": False, 

230 "replaceEnvVars": True, 

231 "required": False} 

232 

233 if gwjob.label not in cached_job_values: 

234 cached_job_values[gwjob.label] = {} 

235 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

236 key = "whenSaveJobQgraph" 

237 _, when_save = config.search(key, opt=search_opt) 

238 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

239 

240 key = "useLazyCommands" 

241 search_opt["default"] = True 

242 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

243 del search_opt["default"] 

244 

245 # Change qgraph variable to match whether using run or per-job qgraph 

246 # Note: these are lookup keys, not actual physical filenames. 

247 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

248 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

249 elif gwjob.name == "pipetaskInit": 

250 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

251 else: # Needed unique file keys for per-job QuantumGraphs 

252 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

253 

254 # Replace files with special placeholders 

255 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

256 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

257 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

258 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

259 

260 # Save dict of other values needed to complete command line. 

261 # (Be careful to not replace env variables as they may 

262 # be different in compute job.) 

263 search_opt["replaceVars"] = True 

264 

265 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

266 if key not in gwjob.cmdvals: 

267 if key not in cached_job_values[gwjob.label]: 

268 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

269 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

270 

271 # backwards compatibility 

272 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

273 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

274 key = "bpsUseShared" 

275 search_opt["default"] = True 

276 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

277 del search_opt["default"] 

278 

279 gwjob.arguments = _fill_arguments(cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, 

280 gwjob.arguments, gwjob.cmdvals) 

281 

282 

283def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

284 """Replace placeholders in command line string in job. 

285 

286 Parameters 

287 ---------- 

288 use_shared : `bool` 

289 Whether using shared filesystem. 

290 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

291 Generic workflow containing the job. 

292 arguments : `str` 

293 String containing placeholders. 

294 cmdvals : `dict` [`str`, `Any`] 

295 Any command line values that can be used to replace placeholders. 

296 

297 Returns 

298 ------- 

299 arguments : `str` 

300 Command line with FILE and ENV placeholders replaced. 

301 """ 

302 # Replace file placeholders 

303 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

304 gwfile = generic_workflow.get_file(file_key) 

305 if not gwfile.wms_transfer: 

306 # Must assume full URI if in command line and told WMS is not 

307 # responsible for transferring file. 

308 uri = gwfile.src_uri 

309 elif use_shared: 

310 if gwfile.job_shared: 

311 # Have shared filesystems and jobs can share file. 

312 uri = gwfile.src_uri 

313 else: 

314 # Taking advantage of inside knowledge. Not future-proof. 

315 # Temporary fix until have job wrapper that pulls files 

316 # within job. 

317 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

318 uri = "butler.yaml" 

319 else: 

320 uri = os.path.basename(gwfile.src_uri) 

321 else: # Using push transfer 

322 uri = os.path.basename(gwfile.src_uri) 

323 

324 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

325 

326 # Replace env placeholder with submit-side values 

327 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

328 arguments = os.path.expandvars(arguments) 

329 

330 # Replace remaining vars 

331 arguments = arguments.format(**cmdvals) 

332 

333 return arguments 

334 

335 

336def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

337 """Get butler location to be used by job. 

338 

339 Parameters 

340 ---------- 

341 prefix : `str` 

342 Root path for any output files. 

343 when_create : `str` 

344 When to create the execution butler used to determine whether job is 

345 using execution butler or not. 

346 butler_config : `str` 

347 Location of central butler repositories config file. 

348 execution_butler_dir : `str` 

349 Location of execution butler repository. 

350 

351 Returns 

352 ------- 

353 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

354 Representation of butler location. 

355 """ 

356 if when_create.upper() == "NEVER": 

357 wms_transfer = False 

358 job_access_remote = True 

359 job_shared = True 

360 else: 

361 butler_config = execution_butler_dir 

362 if not butler_config.startswith("/"): 

363 butler_config = f"{prefix}/{butler_config}" 

364 wms_transfer = True 

365 job_access_remote = False 

366 job_shared = False 

367 

368 gwfile = GenericWorkflowFile("butlerConfig", 

369 src_uri=butler_config, 

370 wms_transfer=wms_transfer, 

371 job_access_remote=job_access_remote, 

372 job_shared=job_shared) 

373 

374 return gwfile 

375 

376 

377def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

378 """Get qgraph location to be used by job. 

379 

380 Parameters 

381 ---------- 

382 config : `lsst.ctrl.bps.BpsConfig` 

383 Bps configuration. 

384 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

385 What submission stage to save per-job qgraph files (or NEVER) 

386 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

387 Job for which determining QuantumGraph file. 

388 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

389 File representation of the full run QuantumGraph. 

390 prefix : `str` 

391 Path prefix for any files written. 

392 

393 Returns 

394 ------- 

395 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

396 Representation of butler location (may not include filename). 

397 """ 

398 qgraph_gwfile = None 

399 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

400 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

401 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

402 wms_transfer=True, 

403 job_access_remote=True, 

404 job_shared=True) 

405 else: 

406 qgraph_gwfile = run_qgraph_file 

407 

408 return qgraph_gwfile 

409 

410 

411def _get_job_values(config, search_opt, cmd_line_key): 

412 """Gather generic workflow job values from the bps config. 

413 

414 Parameters 

415 ---------- 

416 config : `lsst.ctrl.bps.BpsConfig` 

417 Bps configuration. 

418 search_opt : `dict` [`str`, `Any`] 

419 Search options to be used when searching config. 

420 cmd_line_key : `str` or None 

421 Which command line key to search for (e.g., "runQuantumCommand"). 

422 

423 Returns 

424 ------- 

425 job_values : `dict` [ `str`, `Any` ]` 

426 A mapping between job attributes and their values. 

427 """ 

428 job_values = {} 

429 for attr in _ATTRS_ALL: 

430 # Variable names in yaml are camel case instead of snake case. 

431 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr) 

432 found, value = config.search(yaml_name, opt=search_opt) 

433 if found: 

434 job_values[attr] = value 

435 else: 

436 job_values[attr] = None 

437 

438 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

439 # is set and it is a positive number greater than 1.0), adjust number 

440 # of retries when necessary. If the memory multiplier is invalid, disable 

441 # automatic memory scaling. 

442 if job_values["memory_multiplier"] is not None: 

443 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

444 if job_values["number_of_retries"] is None: 

445 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

446 else: 

447 job_values["memory_multiplier"] = None 

448 

449 if cmd_line_key: 

450 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

451 # Make sure cmdline isn't None as that could be sent in as a 

452 # default value in search_opt. 

453 if found and cmdline: 

454 cmd, args = cmdline.split(" ", 1) 

455 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

456 if args: 

457 job_values["arguments"] = args 

458 

459 return job_values 

460 

461 

462def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

463 """Set the job attributes in the cluster to their correct values. 

464 

465 Parameters 

466 ---------- 

467 quantum_job_values : `dict` [`str`, Any] 

468 Job values for running single Quantum. 

469 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

470 Generic workflow job in which to store the universal values. 

471 attributes: `Iterable` [`str`], optional 

472 Job attributes to be set in the job following different rules. 

473 The default value is _ATTRS_ALL. 

474 """ 

475 _LOG.debug("Call to _handle_job_values") 

476 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

477 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

478 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

479 

480 

481def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

482 """Handle job attributes that must have the same value for every quantum 

483 in the cluster. 

484 

485 Parameters 

486 ---------- 

487 quantum_job_values : `dict` [`str`, Any] 

488 Job values for running single Quantum. 

489 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

490 Generic workflow job in which to store the universal values. 

491 attributes: `Iterable` [`str`], optional 

492 Job attributes to be set in the job following different rules. 

493 The default value is _ATTRS_UNIVERSAL. 

494 """ 

495 for attr in _ATTRS_UNIVERSAL & set(attributes): 

496 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr), 

497 quantum_job_values.get(attr, "MISSING")) 

498 current_value = getattr(gwjob, attr) 

499 try: 

500 quantum_value = quantum_job_values[attr] 

501 except KeyError: 

502 continue 

503 else: 

504 if not current_value: 

505 setattr(gwjob, attr, quantum_value) 

506 elif current_value != quantum_value: 

507 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

508 "Current cluster value: %s\n" 

509 "Quantum value: %s", 

510 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value, 

511 quantum_value) 

512 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

513 

514 

515def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

516 """Handle job attributes that should be set to their maximum value in 

517 the in cluster. 

518 

519 Parameters 

520 ---------- 

521 quantum_job_values : `dict` [`str`, `Any`] 

522 Job values for running single Quantum. 

523 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

524 Generic workflow job in which to store the aggregate values. 

525 attributes: `Iterable` [`str`], optional 

526 Job attributes to be set in the job following different rules. 

527 The default value is _ATTR_MAX. 

528 """ 

529 for attr in _ATTRS_MAX & set(attributes): 

530 current_value = getattr(gwjob, attr) 

531 try: 

532 quantum_value = quantum_job_values[attr] 

533 except KeyError: 

534 continue 

535 else: 

536 needs_update = False 

537 if current_value is None: 

538 if quantum_value is not None: 

539 needs_update = True 

540 else: 

541 if quantum_value is not None and current_value < quantum_value: 

542 needs_update = True 

543 if needs_update: 

544 setattr(gwjob, attr, quantum_value) 

545 

546 # When updating memory requirements for a job, check if memory 

547 # autoscaling is enabled. If it is, always use the memory 

548 # multiplier and the number of retries which comes with the 

549 # quantum. 

550 # 

551 # Note that as a result, the quantum with the biggest memory 

552 # requirements will determine whether the memory autoscaling 

553 # will be enabled (or disabled) depending on the value of its 

554 # memory multiplier. 

555 if attr == "request_memory": 

556 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

557 if gwjob.memory_multiplier is not None: 

558 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

559 

560 

561def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

562 """Handle job attributes that are the sum of their values in the cluster. 

563 

564 Parameters 

565 ---------- 

566 quantum_job_values : `dict` [`str`, `Any`] 

567 Job values for running single Quantum. 

568 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

569 Generic workflow job in which to store the aggregate values. 

570 attributes: `Iterable` [`str`], optional 

571 Job attributes to be set in the job following different rules. 

572 The default value is _ATTRS_SUM. 

573 """ 

574 for attr in _ATTRS_SUM & set(attributes): 

575 current_value = getattr(gwjob, attr) 

576 if not current_value: 

577 setattr(gwjob, attr, quantum_job_values[attr]) 

578 else: 

579 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

580 

581 

582def create_generic_workflow(config, cqgraph, name, prefix): 

583 """Create a generic workflow from a ClusteredQuantumGraph such that it 

584 has information needed for WMS (e.g., command lines). 

585 

586 Parameters 

587 ---------- 

588 config : `lsst.ctrl.bps.BpsConfig` 

589 BPS configuration. 

590 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

591 ClusteredQuantumGraph for running a specific pipeline on a specific 

592 payload. 

593 name : `str` 

594 Name for the workflow (typically unique). 

595 prefix : `str` 

596 Root path for any output files. 

597 

598 Returns 

599 ------- 

600 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

601 Generic workflow for the given ClusteredQuantumGraph + config. 

602 """ 

603 # Determine whether saving per-job QuantumGraph files in the loop. 

604 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

605 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

606 

607 search_opt = {"replaceVars": False, 

608 "expandEnvVars": False, 

609 "replaceEnvVars": True, 

610 "required": False} 

611 

612 # Lookup butler values once 

613 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

614 _, butler_config = config.search("butlerConfig", opt=search_opt) 

615 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

616 

617 generic_workflow = GenericWorkflow(name) 

618 

619 # Save full run QuantumGraph for use by jobs 

620 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

621 src_uri=config["runQgraphFile"], 

622 wms_transfer=True, 

623 job_access_remote=True, 

624 job_shared=True)) 

625 

626 # Cache pipetask specific or more generic job values to minimize number 

627 # on config searches. 

628 cached_job_values = {} 

629 cached_pipetask_values = {} 

630 

631 for cluster in cqgraph.clusters(): 

632 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

633 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name, 

634 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids) 

635 

636 gwjob = GenericWorkflowJob(cluster.name) 

637 

638 # First get job values from cluster or cluster config 

639 search_opt["curvals"] = {"curr_cluster": cluster.label} 

640 

641 # If some config values are set for this cluster 

642 if cluster.label not in cached_job_values: 

643 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

644 cached_job_values[cluster.label] = {} 

645 

646 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

647 key = "whenSaveJobQgraph" 

648 _, when_save = config.search(key, opt=search_opt) 

649 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

650 

651 key = "useLazyCommands" 

652 search_opt["default"] = True 

653 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

654 del search_opt["default"] 

655 

656 if cluster.label in config["cluster"]: 

657 # Don't want to get global defaults here so only look in 

658 # cluster section. 

659 cached_job_values[cluster.label].update(_get_job_values(config["cluster"][cluster.label], 

660 search_opt, "runQuantumCommand")) 

661 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

662 

663 cluster_job_values['name'] = cluster.name 

664 cluster_job_values['label'] = cluster.label 

665 cluster_job_values['quanta_counts'] = cluster.quanta_counts 

666 cluster_job_values['tags'] = cluster.tags 

667 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

668 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

669 

670 # For purposes of whether to continue searching for a value is whether 

671 # the value evaluates to False. 

672 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

673 

674 _LOG.debug("unset_attributes=%s", unset_attributes) 

675 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

676 

677 # For job info not defined at cluster level, attempt to get job info 

678 # either common or aggregate for all Quanta in cluster. 

679 for node_id in iter(cluster.qgraph_node_ids): 

680 _LOG.debug("node_id=%s", node_id) 

681 qnode = cqgraph.get_quantum_node(node_id) 

682 

683 if qnode.taskDef.label not in cached_pipetask_values: 

684 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label} 

685 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(config, search_opt, 

686 "runQuantumCommand") 

687 

688 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

689 

690 # Update job with workflow attribute and profile values. 

691 qgraph_gwfile = _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, 

692 generic_workflow.get_file("runQgraphFile"), prefix) 

693 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

694 

695 generic_workflow.add_job(gwjob) 

696 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

697 

698 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

699 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in 

700 cluster.qgraph_node_ids])) 

701 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

702 

703 # If writing per-job QuantumGraph files during TRANSFORM stage, 

704 # write it now while in memory. 

705 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

706 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

707 

708 # Create job dependencies. 

709 for parent in cqgraph.clusters(): 

710 for child in cqgraph.successors(parent): 

711 generic_workflow.add_job_relationships(parent.name, child.name) 

712 

713 # Add initial workflow. 

714 if config.get("runInit", "{default: False}"): 

715 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

716 

717 generic_workflow.run_attrs.update({"bps_isjob": "True", 

718 "bps_project": config["project"], 

719 "bps_campaign": config["campaign"], 

720 "bps_run": generic_workflow.name, 

721 "bps_operator": config["operator"], 

722 "bps_payload": config["payloadName"], 

723 "bps_runsite": config["computeSite"]}) 

724 

725 # Add final job 

726 add_final_job(config, generic_workflow, prefix) 

727 

728 return generic_workflow 

729 

730 

731def create_generic_workflow_config(config, prefix): 

732 """Create generic workflow configuration. 

733 

734 Parameters 

735 ---------- 

736 config : `lsst.ctrl.bps.BpsConfig` 

737 Bps configuration. 

738 prefix : `str` 

739 Root path for any output files. 

740 

741 Returns 

742 ------- 

743 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

744 Configuration accompanying the GenericWorkflow. 

745 """ 

746 generic_workflow_config = BpsConfig(config) 

747 generic_workflow_config["workflowName"] = config["uniqProcName"] 

748 generic_workflow_config["workflowPath"] = prefix 

749 return generic_workflow_config 

750 

751 

752def add_final_job(config, generic_workflow, prefix): 

753 """Add final workflow job depending upon configuration. 

754 

755 Parameters 

756 ---------- 

757 config : `lsst.ctrl.bps.BpsConfig` 

758 Bps configuration. 

759 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

760 Generic workflow to which attributes should be added. 

761 prefix : `str` 

762 Directory in which to output final script. 

763 """ 

764 _, when_create = config.search(".executionButler.whenCreate") 

765 _, when_merge = config.search(".executionButler.whenMerge") 

766 

767 search_opt = {"searchobj": config[".executionButler"], "default": None} 

768 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

769 # create gwjob 

770 gwjob = GenericWorkflowJob("mergeExecutionButler") 

771 gwjob.label = "mergeExecutionButler" 

772 

773 job_values = _get_job_values(config, search_opt, None) 

774 for attr in _ATTRS_ALL: 

775 if not getattr(gwjob, attr) and job_values.get(attr, None): 

776 setattr(gwjob, attr, job_values[attr]) 

777 

778 # Create script and add command line to job. 

779 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

780 

781 # Determine inputs from command line. 

782 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

783 gwfile = generic_workflow.get_file(file_key) 

784 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

785 

786 _enhance_command(config, generic_workflow, gwjob, {}) 

787 

788 # Put transfer repo job in appropriate location in workflow. 

789 if when_merge.upper() == "ALWAYS": 

790 # add as special final job 

791 generic_workflow.add_final(gwjob) 

792 elif when_merge.upper() == "SUCCESS": 

793 # add as regular sink node 

794 add_final_job_as_sink(generic_workflow, gwjob) 

795 else: 

796 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

797 

798 

799def _create_final_command(config, prefix): 

800 """Create the command and shell script for the final job. 

801 

802 Parameters 

803 ---------- 

804 config : `lsst.ctrl.bps.BpsConfig` 

805 Bps configuration. 

806 prefix : `str` 

807 Directory in which to output final script. 

808 

809 Returns 

810 ------- 

811 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

812 Executable object for the final script. 

813 arguments : `str` 

814 Command line needed to call the final script. 

815 """ 

816 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False, 

817 'searchobj': config['executionButler']} 

818 

819 script_file = os.path.join(prefix, "final_job.bash") 

820 with open(script_file, "w") as fh: 

821 print("#!/bin/bash\n", file=fh) 

822 print("set -e", file=fh) 

823 print("set -x", file=fh) 

824 

825 print("butlerConfig=$1", file=fh) 

826 print("executionButlerDir=$2", file=fh) 

827 

828 i = 1 

829 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

830 while found: 

831 # Temporarily replace any env vars so formatter doesn't try to 

832 # replace them. 

833 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

834 

835 # executionButlerDir and butlerConfig will be args to script and 

836 # set to env vars 

837 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

838 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

839 

840 # Replace all other vars in command string 

841 search_opt["replaceVars"] = True 

842 command = config.formatter.format(command, config, search_opt) 

843 search_opt["replaceVars"] = False 

844 

845 # Replace any temporary env place holders. 

846 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

847 

848 print(command, file=fh) 

849 i += 1 

850 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

851 os.chmod(script_file, 0o755) 

852 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

853 

854 _, orig_butler = config.search("butlerConfig") 

855 # The execution butler was saved as butlerConfig in the workflow. 

856 return executable, f"{orig_butler} <FILE:butlerConfig>" 

857 

858 

859def add_final_job_as_sink(generic_workflow, final_job): 

860 """Add final job as the single sink for the workflow. 

861 

862 Parameters 

863 ---------- 

864 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

865 Generic workflow to which attributes should be added. 

866 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

867 Job to add as new sink node depending upon all previous sink nodes. 

868 """ 

869 # Find sink nodes of generic workflow graph. 

870 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

871 _LOG.debug("gw_sinks = %s", gw_sinks) 

872 

873 generic_workflow.add_job(final_job) 

874 generic_workflow.add_job_relationships(gw_sinks, final_job.name)