Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a 

23generic workflow. 

24""" 

25 

26import logging 

27import os 

28import re 

29 

30from .bps_config import BpsConfig 

31from .generic_workflow import GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile 

32from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename 

33 

34 

35_LOG = logging.getLogger(__name__) 

36 

37 

38def transform(config, clustered_quantum_graph, prefix): 

39 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

40 

41 Parameters 

42 ---------- 

43 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

44 BPS configuration. 

45 clustered_quantum_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

46 A clustered quantum graph to transform into a generic workflow. 

47 prefix : `str` 

48 Root path for any output files. 

49 

50 Returns 

51 ------- 

52 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

53 The generic workflow transformed from the clustered quantum graph. 

54 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

55 Configuration to accompany GenericWorkflow. 

56 """ 

57 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None: 

58 name = clustered_quantum_graph.graph["name"] 

59 else: 

60 _, name = config.search("uniqProcName", opt={"required": True}) 

61 

62 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

63 generic_workflow_config = create_generic_workflow_config(config, prefix) 

64 

65 return generic_workflow, generic_workflow_config 

66 

67 

68def update_job(config, job): 

69 """Update given job with workflow attribute and profile values. 

70 

71 Parameters 

72 ---------- 

73 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

74 BPS configuration. 

75 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

76 Job to which the attributes and profile values should be added. 

77 """ 

78 key = f".site.{job.compute_site}.profile.condor" 

79 

80 if key in config: 

81 for key, val in config[key].items(): 

82 if key.startswith("+"): 

83 job.attrs[key[1:]] = val 

84 else: 

85 job.profile[key] = val 

86 

87 

88def add_workflow_init_nodes(config, generic_workflow): 

89 """Add nodes to workflow graph that perform initialization steps. 

90 

91 Assumes that all of the initialization should be executed prior to any 

92 of the current workflow. 

93 

94 Parameters 

95 ---------- 

96 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

97 BPS configuration. 

98 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

99 Generic workflow to which the initialization steps should be added. 

100 """ 

101 # Create a workflow graph that will have task and file nodes necessary for 

102 # initializing the pipeline execution 

103 init_workflow = create_init_workflow(config, generic_workflow.get_file("runQgraphFile")) 

104 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

105 

106 # Find source nodes in workflow graph. 

107 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0] 

108 _LOG.debug("workflow sources = %s", workflow_sources) 

109 

110 # Find sink nodes of initonly graph. 

111 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0] 

112 _LOG.debug("init sinks = %s", init_sinks) 

113 

114 # Add initonly nodes to Workflow graph and make new edges. 

115 generic_workflow.add_nodes_from(init_workflow.nodes(data=True)) 

116 generic_workflow.add_edges_from(init_workflow.edges()) 

117 for gwfile in init_workflow.get_files(data=True): 

118 generic_workflow.add_file(gwfile) 

119 for source in workflow_sources: 

120 for sink in init_sinks: 

121 generic_workflow.add_edge(sink, source) 

122 

123 

124def create_init_workflow(config, run_qgraph_gwfile): 

125 """Create workflow for running initialization job(s). 

126 

127 Parameters 

128 ---------- 

129 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

130 BPS configuration. 

131 run_qgraph_gwfile: `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

132 File object for the run QuantumGraph. 

133 

134 Returns 

135 ------- 

136 init_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

137 GenericWorkflow consisting of job(s) to initialize workflow. 

138 """ 

139 _LOG.debug("creating init subgraph") 

140 _LOG.debug("creating init task input(s)") 

141 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, "required": False, "default": False} 

142 

143 init_workflow = GenericWorkflow("init") 

144 

145 # create job for executing --init-only 

146 gwjob = GenericWorkflowJob("pipetaskInit") 

147 gwjob.label = "pipetaskInit" 

148 gwjob.compute_site = config["computeSite"] 

149 search_opt["default"] = 0 

150 gwjob.request_cpus = int(config.search("requestCpus", opt=search_opt)[1]) 

151 gwjob.request_memory = int(config.search("requestMemory", opt=search_opt)[1]) 

152 gwjob.request_disk = int(config.search("requestDisk", opt=search_opt)[1]) 

153 gwjob.request_walltime = int(config.search("requestWalltime", opt=search_opt)[1]) 

154 update_job(config, gwjob) 

155 init_workflow.add_job(gwjob) 

156 

157 # All outputs (config, software versions, etc) go to Butler. 

158 # Currently no need to add them to job. 

159 init_workflow.add_job_inputs(gwjob.name, run_qgraph_gwfile) 

160 create_command(config, init_workflow, gwjob) 

161 

162 return init_workflow 

163 

164 

165def create_command(config, workflow, gwjob): 

166 """Create command line and vals. 

167 

168 Parameters 

169 ---------- 

170 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

171 BPS configuration. 

172 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

173 Generic workflow that contains the job. 

174 gwjob : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

175 Generic workflow job to which the command line and vals should 

176 be saved. 

177 """ 

178 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

179 "replaceVars": False, 

180 "expandEnvVars": False, 

181 "replaceEnvVars": True, 

182 "required": False} 

183 

184 # Get command line from config 

185 _, gwjob.cmdline = config.search("runQuantumCommand", opt=search_opt) 

186 

187 # Change qgraph variable to match whether using run or per-job qgraph 

188 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

189 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

190 gwjob.cmdline = gwjob.cmdline.replace("{qgraphFile}", "{runQgraphFile}") 

191 else: 

192 gwjob.cmdline = gwjob.cmdline.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

193 

194 # Replace files with special placeholders 

195 for gwfile in workflow.get_job_inputs(gwjob.name): 

196 gwjob.cmdline = gwjob.cmdline.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

197 for gwfile in workflow.get_job_outputs(gwjob.name): 

198 gwjob.cmdline = gwjob.cmdline.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

199 

200 # Save dict of other values needed to complete cmdline 

201 # (Be careful to not replace env variables as they may 

202 # be different in compute job.) 

203 search_opt["replaceVars"] = True 

204 

205 if gwjob.cmdvals is None: 

206 gwjob.cmdvals = {} 

207 for key in re.findall(r"{([^}]+)}", gwjob.cmdline): 

208 if key not in gwjob.cmdvals: 

209 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt) 

210 

211 # backwards compatibility 

212 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True}) 

213 if not use_lazy_commands: 

214 _fill_command(config, workflow, gwjob) 

215 

216 

217def _fill_command(config, workflow, gwjob): 

218 """Replace placeholders in command line string in job. 

219 

220 Parameters 

221 ---------- 

222 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

223 Bps configuration. 

224 workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

225 Generic workflow containing the job. 

226 gwjob : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

227 Job for which to update command line by filling in values. 

228 """ 

229 _, use_shared = config.search("useBpsShared", opt={"default": False}) 

230 # Replace input file placeholders with paths 

231 for gwfile in workflow.get_job_inputs(gwjob.name): 

232 if use_shared: 

233 uri = os.path.basename(gwfile.src_uri) 

234 else: 

235 uri = gwfile.src_uri 

236 gwjob.cmdline = gwjob.cmdline.replace(f"<FILE:{gwfile.name}>", uri) 

237 

238 # Replace output file placeholders with paths 

239 for gwfile in workflow.get_job_outputs(gwjob.name): 

240 if use_shared: 

241 uri = os.path.basename(gwfile.src_uri) 

242 else: 

243 uri = gwfile.src_uri 

244 gwjob.cmdline = gwjob.cmdline.replace(f"<FILE:{gwfile.name}>", uri) 

245 

246 gwjob.cmdline = gwjob.cmdline.format(**gwjob.cmdvals) 

247 

248 

249def create_job_values_universal(config, qnodes, generic_workflow, gwjob, prefix): 

250 """Create job values. Must be same value for every PipelineTask in 

251 cluster. 

252 

253 Parameters 

254 ---------- 

255 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

256 Bps configuration. 

257 qnodes : `list` [`~lsst.pipe.base.QuantumGraph`] 

258 Full run QuantumGraph. 

259 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

260 Generic workflow containing job. 

261 gwjob : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

262 Generic workflow job to which values will be added. 

263 prefix : `str` 

264 Root path for any output files. 

265 """ 

266 per_job_qgraph_file = True 

267 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

268 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

269 per_job_qgraph_file = False 

270 

271 # Verify workflow config values are same for all nodes in QuantumGraph 

272 # for running the Quantum and compute_site. 

273 job_command = None 

274 job_compute_site = None 

275 for qnode in qnodes: 

276 _LOG.debug("taskClass=%s", qnode.taskDef.taskClass) 

277 _LOG.debug("taskName=%s", qnode.taskDef.taskName) 

278 _LOG.debug("label=%s", qnode.taskDef.label) 

279 

280 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False} 

281 

282 _, command = config.search("runQuantumCommand", opt=search_opt) 

283 if job_command is None: 

284 job_command = command 

285 elif job_command != command: 

286 _LOG.error("Inconsistent command to run QuantumGraph\n" 

287 "Cluster %s Quantum Number %d\n" 

288 "Current cluster command: %s\n" 

289 "Inconsistent command: %s", 

290 gwjob.name, qnode.nodeId.number, job_command, command) 

291 raise RuntimeError("Inconsistent run QuantumGraph command") 

292 

293 _, compute_site = config.search("computeSite", opt=search_opt) 

294 if job_compute_site is None: 

295 job_compute_site = compute_site 

296 elif job_compute_site != compute_site: 

297 _LOG.error("Inconsistent compute_site\n" 

298 "Cluster %s Quantum Number %d\n" 

299 "Current cluster compute_site: %s\n" 

300 "Inconsistent compute_site: %s", 

301 gwjob.name, qnode.nodeId.number, job_compute_site, compute_site) 

302 raise RuntimeError("Inconsistent run QuantumGraph command") 

303 

304 if per_job_qgraph_file: 

305 gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

306 src_uri=create_job_quantum_graph_filename(gwjob, prefix), 

307 wms_transfer=True, 

308 job_access_remote=True, 

309 job_shared=True) 

310 else: 

311 gwfile = generic_workflow.get_file("runQgraphFile") 

312 gwjob.cmdvals = {"qgraphNodeId": ",".join(sorted([f"{qnode.nodeId.number}" for qnode in qnodes])), 

313 "qgraphId": qnodes[0].nodeId.buildId} 

314 

315 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

316 

317 gwjob.cmdline = job_command 

318 create_command(config, generic_workflow, gwjob) 

319 if job_compute_site is not None: 

320 gwjob.compute_site = job_compute_site 

321 update_job(config, gwjob) 

322 

323 

324def create_job_values_aggregate(config, qnodes, gwjob, pipetask_labels): 

325 """Create job values that are aggregate of values from PipelineTasks 

326 in QuantumGraph. 

327 

328 Parameters 

329 ---------- 

330 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

331 Bps configuration. 

332 qnodes : `list` [`~lsst.pipe.base.QuantumGraph`] 

333 Full run QuantumGraph. 

334 gwjob : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

335 Job in which to store the aggregate values. 

336 pipetask_labels : `list` [`str`] 

337 PipelineTask labels used in generating quanta summary tag. 

338 """ 

339 label_counts = dict.fromkeys(pipetask_labels, 0) 

340 

341 gwjob.request_cpus = 0 

342 gwjob.request_memory = 0 

343 gwjob.request_disk = 0 

344 gwjob.request_walltime = 0 

345 

346 for qnode in qnodes: 

347 label_counts[qnode.taskDef.label] += 1 

348 

349 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0} 

350 _, request_cpus = config.search("requestCpus", opt=search_opt) 

351 gwjob.request_cpus = max(gwjob.request_cpus, int(request_cpus)) 

352 _, request_memory = config.search("requestMemory", opt=search_opt) 

353 gwjob.request_memory = max(gwjob.request_memory, int(request_memory)) 

354 _, request_disk = config.search("requestDisk", opt=search_opt) 

355 gwjob.request_disk += int(request_disk) 

356 _, request_walltime = config.search("requestWalltime", opt=search_opt) 

357 gwjob.request_walltime += int(request_walltime) 

358 

359 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v]) 

360 

361 

362def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

363 """Create a generic workflow from a ClusteredQuantumGraph such that it 

364 has information needed for WMS (e.g., command lines). 

365 

366 Parameters 

367 ---------- 

368 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

369 BPS configuration. 

370 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

371 ClusteredQuantumGraph for running a specific pipeline on a specific 

372 payload. 

373 name : `str` 

374 Name for the workflow (typically unique). 

375 prefix : `str` 

376 Root path for any output files. 

377 

378 Returns 

379 ------- 

380 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

381 Generic workflow for the given ClusteredQuantumGraph + config. 

382 """ 

383 generic_workflow = GenericWorkflow(name) 

384 

385 # Save full run QuantumGraph for use by jobs 

386 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

387 src_uri=config["run_qgraph_file"], 

388 wms_transfer=True, 

389 job_access_remote=True, 

390 job_shared=True)) 

391 

392 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

393 

394 for node_name, data in clustered_quanta_graph.nodes(data=True): 

395 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name, 

396 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4]) 

397 gwjob = GenericWorkflowJob(node_name) 

398 if "tags" in data: 

399 gwjob.tags = data["tags"] 

400 if "label" in data: 

401 gwjob.label = data["label"] 

402 generic_workflow.add_job(gwjob) 

403 

404 qgraph = clustered_quanta_graph.graph["qgraph"] 

405 qnodes = [] 

406 for node_id in data["qgraph_node_ids"]: 

407 qnodes.append(qgraph.getQuantumNodeByNodeId(node_id)) 

408 pipetask_labels = [task.label for task in qgraph.iterTaskGraph()] 

409 create_job_values_universal(config, qnodes, generic_workflow, gwjob, prefix) 

410 create_job_values_aggregate(config, qnodes, gwjob, pipetask_labels) 

411 

412 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM: 

413 save_qg_subgraph(qgraph, generic_workflow.get_file(f"qgraph_{gwjob.name}").src_uri, 

414 data["qgraph_node_ids"]) 

415 

416 # Create job dependencies. 

417 for node_name in clustered_quanta_graph.nodes(): 

418 for child in clustered_quanta_graph.successors(node_name): 

419 generic_workflow.add_job_relationships(node_name, child) 

420 

421 # Add initial workflow 

422 if config.get("runInit", "{default: False}"): 

423 add_workflow_init_nodes(config, generic_workflow) 

424 add_workflow_attributes(config, generic_workflow) 

425 

426 return generic_workflow 

427 

428 

429def add_workflow_attributes(config, generic_workflow): 

430 """Add workflow-level attributes to given GenericWorkflow. 

431 

432 Parameters 

433 ---------- 

434 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

435 Bps configuration. 

436 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

437 Generic workflow to which attributes should be added. 

438 """ 

439 # Save run quanta summary and other workflow attributes to GenericWorkflow. 

440 run_quanta_counts = {} 

441 for job_name in generic_workflow: 

442 job = generic_workflow.get_job(job_name) 

443 if job.tags is not None and "quanta_summary" in job.tags: 

444 for job_summary_part in job.tags["quanta_summary"].split(";"): 

445 (label, cnt) = job_summary_part.split(":") 

446 if label not in run_quanta_counts: 

447 run_quanta_counts[label] = 0 

448 run_quanta_counts[label] += int(cnt) 

449 

450 run_quanta_summary = [] 

451 for label in run_quanta_counts: 

452 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label])) 

453 

454 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary), 

455 "bps_isjob": "True", 

456 "bps_project": config["project"], 

457 "bps_campaign": config["campaign"], 

458 "bps_run": generic_workflow.name, 

459 "bps_operator": config["operator"], 

460 "bps_payload": config["payloadName"], 

461 "bps_runsite": "TODO"}) 

462 

463 

464def create_generic_workflow_config(config, prefix): 

465 """Create generic workflow configuration. 

466 

467 Parameters 

468 ---------- 

469 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

470 Bps configuration. 

471 prefix : `str` 

472 Root path for any output files. 

473 

474 Returns 

475 ------- 

476 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

477 Configuration accompanying the GenericWorkflow. 

478 """ 

479 generic_workflow_config = BpsConfig(config) 

480 generic_workflow_config["workflowName"] = config["uniqProcName"] 

481 generic_workflow_config["workflowPath"] = prefix 

482 return generic_workflow_config