Coverage for python/lsst/ctrl/bps/transform.py: 8%

334 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-01 10:21 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import copy 

26import dataclasses 

27import logging 

28import math 

29import os 

30import re 

31 

32from lsst.utils.logging import VERBOSE 

33from lsst.utils.timer import time_this, timeMethod 

34 

35from . import ( 

36 DEFAULT_MEM_RETRIES, 

37 BpsConfig, 

38 GenericWorkflow, 

39 GenericWorkflowExec, 

40 GenericWorkflowFile, 

41 GenericWorkflowJob, 

42) 

43from .bps_utils import ( 

44 WhenToSaveQuantumGraphs, 

45 _create_execution_butler, 

46 create_job_quantum_graph_filename, 

47 save_qg_subgraph, 

48) 

49 

50# All available job attributes. 

51_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)]) 

52 

53# Job attributes that need to be set to their maximal value in the cluster. 

54_ATTRS_MAX = frozenset( 

55 { 

56 "memory_multiplier", 

57 "number_of_retries", 

58 "request_cpus", 

59 "request_memory", 

60 "request_memory_max", 

61 } 

62) 

63 

64# Job attributes that need to be set to sum of their values in the cluster. 

65_ATTRS_SUM = frozenset( 

66 { 

67 "request_disk", 

68 "request_walltime", 

69 } 

70) 

71 

72# Job attributes do not fall into a specific category 

73_ATTRS_MISC = frozenset( 

74 { 

75 "label", # taskDef labels aren't same in job and may not match job label 

76 "cmdvals", 

77 "profile", 

78 "attrs", 

79 } 

80) 

81 

82# Attributes that need to be the same for each quanta in the cluster. 

83_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM)) 

84 

85_LOG = logging.getLogger(__name__) 

86 

87 

88@timeMethod(logger=_LOG, logLevel=VERBOSE) 

89def transform(config, cqgraph, prefix): 

90 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

91 

92 Parameters 

93 ---------- 

94 config : `lsst.ctrl.bps.BpsConfig` 

95 BPS configuration. 

96 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

97 A clustered quantum graph to transform into a generic workflow. 

98 prefix : `str` 

99 Root path for any output files. 

100 

101 Returns 

102 ------- 

103 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

104 The generic workflow transformed from the clustered quantum graph. 

105 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

106 Configuration to accompany GenericWorkflow. 

107 """ 

108 _, when_create = config.search(".executionButler.whenCreate") 

109 if when_create.upper() == "TRANSFORM": 

110 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir") 

111 _LOG.info("Creating execution butler in '%s'", execution_butler_dir) 

112 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"): 

113 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix) 

114 

115 if cqgraph.name is not None: 

116 name = cqgraph.name 

117 else: 

118 _, name = config.search("uniqProcName", opt={"required": True}) 

119 

120 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix) 

121 generic_workflow_config = create_generic_workflow_config(config, prefix) 

122 

123 return generic_workflow, generic_workflow_config 

124 

125 

126def add_workflow_init_nodes(config, qgraph, generic_workflow): 

127 """Add nodes to workflow graph that perform initialization steps. 

128 

129 Assumes that all of the initialization should be executed prior to any 

130 of the current workflow. 

131 

132 Parameters 

133 ---------- 

134 config : `lsst.ctrl.bps.BpsConfig` 

135 BPS configuration. 

136 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

137 The quantum graph the generic workflow represents. 

138 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

139 Generic workflow to which the initialization steps should be added. 

140 """ 

141 # Create a workflow graph that will have task and file nodes necessary for 

142 # initializing the pipeline execution 

143 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile")) 

144 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

145 generic_workflow.add_workflow_source(init_workflow) 

146 

147 

148def create_init_workflow(config, qgraph, qgraph_gwfile): 

149 """Create workflow for running initialization job(s). 

150 

151 Parameters 

152 ---------- 

153 config : `lsst.ctrl.bps.BpsConfig` 

154 BPS configuration. 

155 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

156 The quantum graph the generic workflow represents. 

157 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

158 File object for the full run QuantumGraph file. 

159 

160 Returns 

161 ------- 

162 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

163 GenericWorkflow consisting of job(s) to initialize workflow. 

164 """ 

165 _LOG.debug("creating init subgraph") 

166 _LOG.debug("creating init task input(s)") 

167 search_opt = { 

168 "curvals": {"curr_pipetask": "pipetaskInit"}, 

169 "replaceVars": False, 

170 "expandEnvVars": False, 

171 "replaceEnvVars": True, 

172 "required": False, 

173 } 

174 found, value = config.search("computeSite", opt=search_opt) 

175 if found: 

176 search_opt["curvals"]["curr_site"] = value 

177 found, value = config.search("computeCloud", opt=search_opt) 

178 if found: 

179 search_opt["curvals"]["curr_cloud"] = value 

180 

181 init_workflow = GenericWorkflow("init") 

182 init_workflow.add_file(qgraph_gwfile) 

183 

184 # create job for executing --init-only 

185 gwjob = GenericWorkflowJob("pipetaskInit", label="pipetaskInit") 

186 

187 job_values = _get_job_values(config, search_opt, "runQuantumCommand") 

188 job_values["name"] = "pipetaskInit" 

189 job_values["label"] = "pipetaskInit" 

190 

191 # Adjust job attributes values if necessary. 

192 _handle_job_values(job_values, gwjob) 

193 

194 # Pick a node id for each task (not quantum!) to avoid reading the entire 

195 # quantum graph during the initialization stage. 

196 node_ids = [] 

197 for task in qgraph.iterTaskGraph(): 

198 task_def = qgraph.findTaskDefByLabel(task.label) 

199 node = next(iter(qgraph.getNodesForTask(task_def))) 

200 node_ids.append(node.nodeId) 

201 gwjob.cmdvals["qgraphId"] = qgraph.graphID 

202 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in node_ids])) 

203 

204 init_workflow.add_job(gwjob) 

205 

206 # Lookup butler values 

207 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

208 _, butler_config = config.search("butlerConfig", opt=search_opt) 

209 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

210 prefix = config["submitPath"] 

211 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

212 

213 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

214 _enhance_command(config, init_workflow, gwjob, {}) 

215 

216 return init_workflow 

217 

218 

219def _enhance_command(config, generic_workflow, gwjob, cached_job_values): 

220 """Enhance command line with env and file placeholders 

221 and gather command line values. 

222 

223 Parameters 

224 ---------- 

225 config : `lsst.ctrl.bps.BpsConfig` 

226 BPS configuration. 

227 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

228 Generic workflow that contains the job. 

229 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

230 Generic workflow job to which the updated executable, arguments, 

231 and values should be saved. 

232 cached_job_values : `dict` [`str`, dict[`str`, `Any`]] 

233 Cached values common across jobs with same label. Updated if values 

234 aren't already saved for given gwjob's label. 

235 """ 

236 _LOG.debug("gwjob given to _enhance_command: %s", gwjob) 

237 

238 search_opt = { 

239 "curvals": {"curr_pipetask": gwjob.label}, 

240 "replaceVars": False, 

241 "expandEnvVars": False, 

242 "replaceEnvVars": True, 

243 "required": False, 

244 } 

245 

246 if gwjob.label not in cached_job_values: 

247 cached_job_values[gwjob.label] = {} 

248 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label. 

249 key = "whenSaveJobQgraph" 

250 _, when_save = config.search(key, opt=search_opt) 

251 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

252 

253 key = "useLazyCommands" 

254 search_opt["default"] = True 

255 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

256 del search_opt["default"] 

257 

258 # Change qgraph variable to match whether using run or per-job qgraph 

259 # Note: these are lookup keys, not actual physical filenames. 

260 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER: 

261 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

262 elif gwjob.name == "pipetaskInit": 

263 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}") 

264 else: # Needed unique file keys for per-job QuantumGraphs 

265 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

266 

267 # Replace files with special placeholders 

268 for gwfile in generic_workflow.get_job_inputs(gwjob.name): 

269 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

270 for gwfile in generic_workflow.get_job_outputs(gwjob.name): 

271 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

272 

273 # Save dict of other values needed to complete command line. 

274 # (Be careful to not replace env variables as they may 

275 # be different in compute job.) 

276 search_opt["replaceVars"] = True 

277 

278 for key in re.findall(r"{([^}]+)}", gwjob.arguments): 

279 if key not in gwjob.cmdvals: 

280 if key not in cached_job_values[gwjob.label]: 

281 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

282 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key] 

283 

284 # backwards compatibility 

285 if not cached_job_values[gwjob.label]["useLazyCommands"]: 

286 if "bpsUseShared" not in cached_job_values[gwjob.label]: 

287 key = "bpsUseShared" 

288 search_opt["default"] = True 

289 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt) 

290 del search_opt["default"] 

291 

292 gwjob.arguments = _fill_arguments( 

293 cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, gwjob.arguments, gwjob.cmdvals 

294 ) 

295 

296 

297def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals): 

298 """Replace placeholders in command line string in job. 

299 

300 Parameters 

301 ---------- 

302 use_shared : `bool` 

303 Whether using shared filesystem. 

304 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

305 Generic workflow containing the job. 

306 arguments : `str` 

307 String containing placeholders. 

308 cmdvals : `dict` [`str`, `Any`] 

309 Any command line values that can be used to replace placeholders. 

310 

311 Returns 

312 ------- 

313 arguments : `str` 

314 Command line with FILE and ENV placeholders replaced. 

315 """ 

316 # Replace file placeholders 

317 for file_key in re.findall(r"<FILE:([^>]+)>", arguments): 

318 gwfile = generic_workflow.get_file(file_key) 

319 if not gwfile.wms_transfer: 

320 # Must assume full URI if in command line and told WMS is not 

321 # responsible for transferring file. 

322 uri = gwfile.src_uri 

323 elif use_shared: 

324 if gwfile.job_shared: 

325 # Have shared filesystems and jobs can share file. 

326 uri = gwfile.src_uri 

327 else: 

328 # Taking advantage of inside knowledge. Not future-proof. 

329 # Temporary fix until have job wrapper that pulls files 

330 # within job. 

331 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml": 

332 uri = "butler.yaml" 

333 else: 

334 uri = os.path.basename(gwfile.src_uri) 

335 else: # Using push transfer 

336 uri = os.path.basename(gwfile.src_uri) 

337 

338 arguments = arguments.replace(f"<FILE:{file_key}>", uri) 

339 

340 # Replace env placeholder with submit-side values 

341 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments) 

342 arguments = os.path.expandvars(arguments) 

343 

344 # Replace remaining vars 

345 arguments = arguments.format(**cmdvals) 

346 

347 return arguments 

348 

349 

350def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir): 

351 """Get butler location to be used by job. 

352 

353 Parameters 

354 ---------- 

355 prefix : `str` 

356 Root path for any output files. 

357 when_create : `str` 

358 When to create the execution butler used to determine whether job is 

359 using execution butler or not. 

360 butler_config : `str` 

361 Location of central butler repositories config file. 

362 execution_butler_dir : `str` 

363 Location of execution butler repository. 

364 

365 Returns 

366 ------- 

367 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

368 Representation of butler location. 

369 """ 

370 if when_create.upper() == "NEVER": 

371 wms_transfer = False 

372 job_access_remote = True 

373 job_shared = True 

374 else: 

375 butler_config = execution_butler_dir 

376 if not butler_config.startswith("/"): 

377 butler_config = f"{prefix}/{butler_config}" 

378 wms_transfer = True 

379 job_access_remote = False 

380 job_shared = False 

381 

382 gwfile = GenericWorkflowFile( 

383 "butlerConfig", 

384 src_uri=butler_config, 

385 wms_transfer=wms_transfer, 

386 job_access_remote=job_access_remote, 

387 job_shared=job_shared, 

388 ) 

389 

390 return gwfile 

391 

392 

393def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix): 

394 """Get qgraph location to be used by job. 

395 

396 Parameters 

397 ---------- 

398 config : `lsst.ctrl.bps.BpsConfig` 

399 Bps configuration. 

400 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs` 

401 What submission stage to save per-job qgraph files (or NEVER) 

402 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

403 Job for which determining QuantumGraph file. 

404 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile` 

405 File representation of the full run QuantumGraph. 

406 prefix : `str` 

407 Path prefix for any files written. 

408 

409 Returns 

410 ------- 

411 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

412 Representation of butler location (may not include filename). 

413 """ 

414 qgraph_gwfile = None 

415 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER: 

416 qgraph_gwfile = GenericWorkflowFile( 

417 f"qgraphFile_{gwjob.name}", 

418 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix), 

419 wms_transfer=True, 

420 job_access_remote=True, 

421 job_shared=True, 

422 ) 

423 else: 

424 qgraph_gwfile = run_qgraph_file 

425 

426 return qgraph_gwfile 

427 

428 

429def _get_job_values(config, search_opt, cmd_line_key): 

430 """Gather generic workflow job values from the bps config. 

431 

432 Parameters 

433 ---------- 

434 config : `lsst.ctrl.bps.BpsConfig` 

435 Bps configuration. 

436 search_opt : `dict` [`str`, `Any`] 

437 Search options to be used when searching config. 

438 cmd_line_key : `str` or None 

439 Which command line key to search for (e.g., "runQuantumCommand"). 

440 

441 Returns 

442 ------- 

443 job_values : `dict` [ `str`, `Any` ]` 

444 A mapping between job attributes and their values. 

445 """ 

446 _LOG.debug("cmd_line_key=%s, search_opt=%s", cmd_line_key, search_opt) 

447 

448 # Create a dummy job to easily access the default values. 

449 default_gwjob = GenericWorkflowJob("default_job") 

450 

451 job_values = {} 

452 for attr in _ATTRS_ALL: 

453 # Variable names in yaml are camel case instead of snake case. 

454 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr) 

455 found, value = config.search(yaml_name, opt=search_opt) 

456 if found: 

457 job_values[attr] = value 

458 else: 

459 job_values[attr] = getattr(default_gwjob, attr) 

460 

461 # If the automatic memory scaling is enabled (i.e. the memory multiplier 

462 # is set and it is a positive number greater than 1.0), adjust number 

463 # of retries when necessary. If the memory multiplier is invalid, disable 

464 # automatic memory scaling. 

465 if job_values["memory_multiplier"] is not None: 

466 if math.ceil(float(job_values["memory_multiplier"])) > 1: 

467 if job_values["number_of_retries"] is None: 

468 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES 

469 else: 

470 job_values["memory_multiplier"] = None 

471 

472 if cmd_line_key: 

473 found, cmdline = config.search(cmd_line_key, opt=search_opt) 

474 # Make sure cmdline isn't None as that could be sent in as a 

475 # default value in search_opt. 

476 if found and cmdline: 

477 cmd, args = cmdline.split(" ", 1) 

478 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False) 

479 if args: 

480 job_values["arguments"] = args 

481 

482 return job_values 

483 

484 

485def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL): 

486 """Set the job attributes in the cluster to their correct values. 

487 

488 Parameters 

489 ---------- 

490 quantum_job_values : `dict` [`str`, Any] 

491 Job values for running single Quantum. 

492 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

493 Generic workflow job in which to store the universal values. 

494 attributes: `Iterable` [`str`], optional 

495 Job attributes to be set in the job following different rules. 

496 The default value is _ATTRS_ALL. 

497 """ 

498 _LOG.debug("Call to _handle_job_values") 

499 _handle_job_values_universal(quantum_job_values, gwjob, attributes) 

500 _handle_job_values_max(quantum_job_values, gwjob, attributes) 

501 _handle_job_values_sum(quantum_job_values, gwjob, attributes) 

502 

503 

504def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL): 

505 """Handle job attributes that must have the same value for every quantum 

506 in the cluster. 

507 

508 Parameters 

509 ---------- 

510 quantum_job_values : `dict` [`str`, Any] 

511 Job values for running single Quantum. 

512 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

513 Generic workflow job in which to store the universal values. 

514 attributes: `Iterable` [`str`], optional 

515 Job attributes to be set in the job following different rules. 

516 The default value is _ATTRS_UNIVERSAL. 

517 """ 

518 for attr in _ATTRS_UNIVERSAL & set(attributes): 

519 _LOG.debug( 

520 "Handling job %s (job=%s, quantum=%s)", 

521 attr, 

522 getattr(gwjob, attr), 

523 quantum_job_values.get(attr, "MISSING"), 

524 ) 

525 current_value = getattr(gwjob, attr) 

526 try: 

527 quantum_value = quantum_job_values[attr] 

528 except KeyError: 

529 continue 

530 else: 

531 if not current_value: 

532 setattr(gwjob, attr, quantum_value) 

533 elif current_value != quantum_value: 

534 _LOG.error( 

535 "Inconsistent value for %s in Cluster %s Quantum Number %s\n" 

536 "Current cluster value: %s\n" 

537 "Quantum value: %s", 

538 attr, 

539 gwjob.name, 

540 quantum_job_values.get("qgraphNodeId", "MISSING"), 

541 current_value, 

542 quantum_value, 

543 ) 

544 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.") 

545 

546 

547def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX): 

548 """Handle job attributes that should be set to their maximum value in 

549 the in cluster. 

550 

551 Parameters 

552 ---------- 

553 quantum_job_values : `dict` [`str`, `Any`] 

554 Job values for running single Quantum. 

555 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

556 Generic workflow job in which to store the aggregate values. 

557 attributes: `Iterable` [`str`], optional 

558 Job attributes to be set in the job following different rules. 

559 The default value is _ATTR_MAX. 

560 """ 

561 for attr in _ATTRS_MAX & set(attributes): 

562 current_value = getattr(gwjob, attr) 

563 try: 

564 quantum_value = quantum_job_values[attr] 

565 except KeyError: 

566 continue 

567 else: 

568 needs_update = False 

569 if current_value is None: 

570 if quantum_value is not None: 

571 needs_update = True 

572 else: 

573 if quantum_value is not None and current_value < quantum_value: 

574 needs_update = True 

575 if needs_update: 

576 setattr(gwjob, attr, quantum_value) 

577 

578 # When updating memory requirements for a job, check if memory 

579 # autoscaling is enabled. If it is, always use the memory 

580 # multiplier and the number of retries which comes with the 

581 # quantum. 

582 # 

583 # Note that as a result, the quantum with the biggest memory 

584 # requirements will determine whether the memory autoscaling 

585 # will be enabled (or disabled) depending on the value of its 

586 # memory multiplier. 

587 if attr == "request_memory": 

588 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"] 

589 if gwjob.memory_multiplier is not None: 

590 gwjob.number_of_retries = quantum_job_values["number_of_retries"] 

591 

592 

593def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM): 

594 """Handle job attributes that are the sum of their values in the cluster. 

595 

596 Parameters 

597 ---------- 

598 quantum_job_values : `dict` [`str`, `Any`] 

599 Job values for running single Quantum. 

600 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

601 Generic workflow job in which to store the aggregate values. 

602 attributes: `Iterable` [`str`], optional 

603 Job attributes to be set in the job following different rules. 

604 The default value is _ATTRS_SUM. 

605 """ 

606 for attr in _ATTRS_SUM & set(attributes): 

607 current_value = getattr(gwjob, attr) 

608 if not current_value: 

609 setattr(gwjob, attr, quantum_job_values[attr]) 

610 else: 

611 setattr(gwjob, attr, current_value + quantum_job_values[attr]) 

612 

613 

614def create_generic_workflow(config, cqgraph, name, prefix): 

615 """Create a generic workflow from a ClusteredQuantumGraph such that it 

616 has information needed for WMS (e.g., command lines). 

617 

618 Parameters 

619 ---------- 

620 config : `lsst.ctrl.bps.BpsConfig` 

621 BPS configuration. 

622 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

623 ClusteredQuantumGraph for running a specific pipeline on a specific 

624 payload. 

625 name : `str` 

626 Name for the workflow (typically unique). 

627 prefix : `str` 

628 Root path for any output files. 

629 

630 Returns 

631 ------- 

632 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

633 Generic workflow for the given ClusteredQuantumGraph + config. 

634 """ 

635 # Determine whether saving per-job QuantumGraph files in the loop. 

636 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

637 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()] 

638 

639 search_opt = {"replaceVars": False, "expandEnvVars": False, "replaceEnvVars": True, "required": False} 

640 

641 # Lookup butler values once 

642 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt) 

643 _, butler_config = config.search("butlerConfig", opt=search_opt) 

644 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt) 

645 

646 generic_workflow = GenericWorkflow(name) 

647 

648 # Save full run QuantumGraph for use by jobs 

649 generic_workflow.add_file( 

650 GenericWorkflowFile( 

651 "runQgraphFile", 

652 src_uri=config["runQgraphFile"], 

653 wms_transfer=True, 

654 job_access_remote=True, 

655 job_shared=True, 

656 ) 

657 ) 

658 

659 # Cache pipetask specific or more generic job values to minimize number 

660 # on config searches. 

661 cached_job_values = {} 

662 cached_pipetask_values = {} 

663 

664 for cluster in cqgraph.clusters(): 

665 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster)) 

666 _LOG.debug( 

667 "cqgraph: name=%s, len=%s, label=%s, ids=%s", 

668 cluster.name, 

669 len(cluster.qgraph_node_ids), 

670 cluster.label, 

671 cluster.qgraph_node_ids, 

672 ) 

673 

674 gwjob = GenericWorkflowJob(cluster.name, label=cluster.label) 

675 

676 # First get job values from cluster or cluster config 

677 search_opt["curvals"] = {"curr_cluster": cluster.label} 

678 found, value = config.search("computeSite", opt=search_opt) 

679 if found: 

680 search_opt["curvals"]["curr_site"] = value 

681 found, value = config.search("computeCloud", opt=search_opt) 

682 if found: 

683 search_opt["curvals"]["curr_cloud"] = value 

684 

685 # If some config values are set for this cluster 

686 if cluster.label not in cached_job_values: 

687 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label]) 

688 cached_job_values[cluster.label] = {} 

689 

690 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label. 

691 key = "whenSaveJobQgraph" 

692 _, when_save = config.search(key, opt=search_opt) 

693 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()] 

694 

695 key = "useLazyCommands" 

696 search_opt["default"] = True 

697 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt) 

698 del search_opt["default"] 

699 

700 if cluster.label in config["cluster"]: 

701 # Don't want to get global defaults here so only look in 

702 # cluster section. 

703 cached_job_values[cluster.label].update( 

704 _get_job_values(config["cluster"][cluster.label], search_opt, "runQuantumCommand") 

705 ) 

706 cluster_job_values = copy.copy(cached_job_values[cluster.label]) 

707 

708 cluster_job_values["name"] = cluster.name 

709 cluster_job_values["label"] = cluster.label 

710 cluster_job_values["quanta_counts"] = cluster.quanta_counts 

711 cluster_job_values["tags"] = cluster.tags 

712 _LOG.debug("cluster_job_values = %s", cluster_job_values) 

713 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys()) 

714 

715 # For purposes of whether to continue searching for a value is whether 

716 # the value evaluates to False. 

717 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)} 

718 

719 _LOG.debug("unset_attributes=%s", unset_attributes) 

720 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes) 

721 

722 # For job info not defined at cluster level, attempt to get job info 

723 # either common or aggregate for all Quanta in cluster. 

724 for node_id in iter(cluster.qgraph_node_ids): 

725 _LOG.debug("node_id=%s", node_id) 

726 qnode = cqgraph.get_quantum_node(node_id) 

727 

728 if qnode.taskDef.label not in cached_pipetask_values: 

729 search_opt["curvals"]["curr_pipetask"] = qnode.taskDef.label 

730 cached_pipetask_values[qnode.taskDef.label] = _get_job_values( 

731 config, search_opt, "runQuantumCommand" 

732 ) 

733 

734 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes) 

735 

736 # Update job with workflow attribute and profile values. 

737 qgraph_gwfile = _get_qgraph_gwfile( 

738 config, save_qgraph_per_job, gwjob, generic_workflow.get_file("runQgraphFile"), prefix 

739 ) 

740 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir) 

741 

742 generic_workflow.add_job(gwjob) 

743 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile]) 

744 

745 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID 

746 gwjob.cmdvals["qgraphNodeId"] = ",".join( 

747 sorted([f"{node_id}" for node_id in cluster.qgraph_node_ids]) 

748 ) 

749 _enhance_command(config, generic_workflow, gwjob, cached_job_values) 

750 

751 # If writing per-job QuantumGraph files during TRANSFORM stage, 

752 # write it now while in memory. 

753 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM: 

754 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids) 

755 

756 # Create job dependencies. 

757 for parent in cqgraph.clusters(): 

758 for child in cqgraph.successors(parent): 

759 generic_workflow.add_job_relationships(parent.name, child.name) 

760 

761 # Add initial workflow. 

762 if config.get("runInit", "{default: False}"): 

763 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow) 

764 

765 generic_workflow.run_attrs.update( 

766 { 

767 "bps_isjob": "True", 

768 "bps_project": config["project"], 

769 "bps_campaign": config["campaign"], 

770 "bps_run": generic_workflow.name, 

771 "bps_operator": config["operator"], 

772 "bps_payload": config["payloadName"], 

773 "bps_runsite": config["computeSite"], 

774 } 

775 ) 

776 

777 # Add final job 

778 add_final_job(config, generic_workflow, prefix) 

779 

780 return generic_workflow 

781 

782 

783def create_generic_workflow_config(config, prefix): 

784 """Create generic workflow configuration. 

785 

786 Parameters 

787 ---------- 

788 config : `lsst.ctrl.bps.BpsConfig` 

789 Bps configuration. 

790 prefix : `str` 

791 Root path for any output files. 

792 

793 Returns 

794 ------- 

795 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

796 Configuration accompanying the GenericWorkflow. 

797 """ 

798 generic_workflow_config = BpsConfig(config) 

799 generic_workflow_config["workflowName"] = config["uniqProcName"] 

800 generic_workflow_config["workflowPath"] = prefix 

801 return generic_workflow_config 

802 

803 

804def add_final_job(config, generic_workflow, prefix): 

805 """Add final workflow job depending upon configuration. 

806 

807 Parameters 

808 ---------- 

809 config : `lsst.ctrl.bps.BpsConfig` 

810 Bps configuration. 

811 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

812 Generic workflow to which attributes should be added. 

813 prefix : `str` 

814 Directory in which to output final script. 

815 """ 

816 _, when_create = config.search(".executionButler.whenCreate") 

817 _, when_merge = config.search(".executionButler.whenMerge") 

818 

819 search_opt = {"searchobj": config[".executionButler"], "curvals": {}, "default": None} 

820 found, value = config.search("computeSite", opt=search_opt) 

821 if found: 

822 search_opt["curvals"]["curr_site"] = value 

823 found, value = config.search("computeCloud", opt=search_opt) 

824 if found: 

825 search_opt["curvals"]["curr_cloud"] = value 

826 

827 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER": 

828 # create gwjob 

829 gwjob = GenericWorkflowJob("mergeExecutionButler", label="mergeExecutionButler") 

830 

831 # Set job attributes based on the values find in the config excluding 

832 # the ones in the _ATTRS_MISC group. The attributes in this group are 

833 # somewhat "special": 

834 # * HTCondor plugin, which uses 'attrs' and 'profile', has its own 

835 # mechanism for setting them, 

836 # * 'cmdvals' is being set internally, not via config. 

837 job_values = _get_job_values(config, search_opt, None) 

838 for attr in _ATTRS_ALL - _ATTRS_MISC: 

839 if not getattr(gwjob, attr) and job_values.get(attr, None): 

840 setattr(gwjob, attr, job_values[attr]) 

841 

842 # Create script and add command line to job. 

843 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix) 

844 

845 # Determine inputs from command line. 

846 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments): 

847 gwfile = generic_workflow.get_file(file_key) 

848 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

849 

850 _enhance_command(config, generic_workflow, gwjob, {}) 

851 

852 # Put transfer repo job in appropriate location in workflow. 

853 if when_merge.upper() == "ALWAYS": 

854 # add as special final job 

855 generic_workflow.add_final(gwjob) 

856 elif when_merge.upper() == "SUCCESS": 

857 # add as regular sink node 

858 add_final_job_as_sink(generic_workflow, gwjob) 

859 else: 

860 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}") 

861 

862 

863def _create_final_command(config, prefix): 

864 """Create the command and shell script for the final job. 

865 

866 Parameters 

867 ---------- 

868 config : `lsst.ctrl.bps.BpsConfig` 

869 Bps configuration. 

870 prefix : `str` 

871 Directory in which to output final script. 

872 

873 Returns 

874 ------- 

875 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

876 Executable object for the final script. 

877 arguments : `str` 

878 Command line needed to call the final script. 

879 """ 

880 search_opt = { 

881 "replaceVars": False, 

882 "replaceEnvVars": False, 

883 "expandEnvVars": False, 

884 "searchobj": config["executionButler"], 

885 } 

886 

887 script_file = os.path.join(prefix, "final_job.bash") 

888 with open(script_file, "w", encoding="utf8") as fh: 

889 print("#!/bin/bash\n", file=fh) 

890 print("set -e", file=fh) 

891 print("set -x", file=fh) 

892 

893 print("butlerConfig=$1", file=fh) 

894 print("executionButlerDir=$2", file=fh) 

895 

896 i = 1 

897 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

898 while found: 

899 # Temporarily replace any env vars so formatter doesn't try to 

900 # replace them. 

901 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command) 

902 

903 # executionButlerDir and butlerConfig will be args to script and 

904 # set to env vars 

905 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>") 

906 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>") 

907 

908 # Replace all other vars in command string 

909 search_opt["replaceVars"] = True 

910 command = config.formatter.format(command, config, search_opt) 

911 search_opt["replaceVars"] = False 

912 

913 # Replace any temporary env place holders. 

914 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command) 

915 

916 print(command, file=fh) 

917 i += 1 

918 found, command = config.search(f".executionButler.command{i}", opt=search_opt) 

919 os.chmod(script_file, 0o755) 

920 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True) 

921 

922 _, orig_butler = config.search("butlerConfig") 

923 # The execution butler was saved as butlerConfig in the workflow. 

924 return executable, f"{orig_butler} <FILE:butlerConfig>" 

925 

926 

927def add_final_job_as_sink(generic_workflow, final_job): 

928 """Add final job as the single sink for the workflow. 

929 

930 Parameters 

931 ---------- 

932 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

933 Generic workflow to which attributes should be added. 

934 final_job : `lsst.ctrl.bps.GenericWorkflowJob` 

935 Job to add as new sink node depending upon all previous sink nodes. 

936 """ 

937 # Find sink nodes of generic workflow graph. 

938 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0] 

939 _LOG.debug("gw_sinks = %s", gw_sinks) 

940 

941 generic_workflow.add_job(final_job) 

942 generic_workflow.add_job_relationships(gw_sinks, final_job.name)