Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a generic workflow. 

23""" 

24 

25import logging 

26import os 

27import re 

28 

29from . import BpsConfig, GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile 

30from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename 

31 

32 

33_LOG = logging.getLogger(__name__) 

34 

35 

36def transform(config, clustered_quantum_graph, prefix): 

37 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

38 

39 Parameters 

40 ---------- 

41 config : `lsst.ctrl.bps.BpsConfig` 

42 BPS configuration. 

43 clustered_quantum_graph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

44 A clustered quantum graph to transform into a generic workflow. 

45 prefix : `str` 

46 Root path for any output files. 

47 

48 Returns 

49 ------- 

50 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

51 The generic workflow transformed from the clustered quantum graph. 

52 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

53 Configuration to accompany GenericWorkflow. 

54 """ 

55 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None: 

56 name = clustered_quantum_graph.graph["name"] 

57 else: 

58 _, name = config.search("uniqProcName", opt={"required": True}) 

59 

60 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

61 generic_workflow_config = create_generic_workflow_config(config, prefix) 

62 

63 return generic_workflow, generic_workflow_config 

64 

65 

66def update_job(config, job): 

67 """Update given job with workflow attribute and profile values. 

68 

69 Parameters 

70 ---------- 

71 config : `lsst.ctrl.bps.BpsConfig` 

72 BPS configuration. 

73 job : `lsst.ctrl.bps.GenericWorkflowJob` 

74 Job to which the attributes and profile values should be added. 

75 """ 

76 key = f".site.{job.compute_site}.profile.condor" 

77 

78 if key in config: 

79 for key, val in config[key].items(): 

80 if key.startswith("+"): 

81 job.attrs[key[1:]] = val 

82 else: 

83 job.profile[key] = val 

84 

85 

86def add_workflow_init_nodes(config, generic_workflow): 

87 """Add nodes to workflow graph that perform initialization steps. 

88 

89 Assumes that all of the initialization should be executed prior to any 

90 of the current workflow. 

91 

92 Parameters 

93 ---------- 

94 config : `lsst.ctrl.bps.BpsConfig` 

95 BPS configuration. 

96 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

97 Generic workflow to which the initialization steps should be added. 

98 """ 

99 # Create a workflow graph that will have task and file nodes necessary for 

100 # initializing the pipeline execution 

101 init_workflow = create_init_workflow(config, generic_workflow.get_file("runQgraphFile")) 

102 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

103 

104 # Find source nodes in workflow graph. 

105 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0] 

106 _LOG.debug("workflow sources = %s", workflow_sources) 

107 

108 # Find sink nodes of initonly graph. 

109 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0] 

110 _LOG.debug("init sinks = %s", init_sinks) 

111 

112 # Add initonly nodes to Workflow graph and make new edges. 

113 generic_workflow.add_nodes_from(init_workflow.nodes(data=True)) 

114 generic_workflow.add_edges_from(init_workflow.edges()) 

115 for gwfile in init_workflow.get_files(data=True): 

116 generic_workflow.add_file(gwfile) 

117 for source in workflow_sources: 

118 for sink in init_sinks: 

119 generic_workflow.add_edge(sink, source) 

120 

121 

122def create_init_workflow(config, run_qgraph_gwfile): 

123 """Create workflow for running initialization job(s). 

124 

125 Parameters 

126 ---------- 

127 config : `lsst.ctrl.bps.BpsConfig` 

128 BPS configuration. 

129 run_qgraph_gwfile: `lsst.ctrl.bps.GenericWorkflowFile` 

130 File object for the run QuantumGraph. 

131 

132 Returns 

133 ------- 

134 init_workflow : `lsst.ctrl.bps.GenericWorkflow` 

135 GenericWorkflow consisting of job(s) to initialize workflow. 

136 """ 

137 _LOG.debug("creating init subgraph") 

138 _LOG.debug("creating init task input(s)") 

139 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, "required": False, "default": False} 

140 

141 init_workflow = GenericWorkflow("init") 

142 

143 # create job for executing --init-only 

144 gwjob = GenericWorkflowJob("pipetaskInit") 

145 gwjob.label = "pipetaskInit" 

146 gwjob.compute_site = config["computeSite"] 

147 search_opt["default"] = 0 

148 gwjob.request_cpus = int(config.search("requestCpus", opt=search_opt)[1]) 

149 gwjob.request_memory = int(config.search("requestMemory", opt=search_opt)[1]) 

150 gwjob.request_disk = int(config.search("requestDisk", opt=search_opt)[1]) 

151 gwjob.request_walltime = int(config.search("requestWalltime", opt=search_opt)[1]) 

152 update_job(config, gwjob) 

153 init_workflow.add_job(gwjob) 

154 

155 # All outputs (config, software versions, etc) go to Butler. 

156 # Currently no need to add them to job. 

157 init_workflow.add_job_inputs(gwjob.name, run_qgraph_gwfile) 

158 create_command(config, init_workflow, gwjob) 

159 

160 return init_workflow 

161 

162 

163def create_command(config, workflow, gwjob): 

164 """Create command line and vals. 

165 

166 Parameters 

167 ---------- 

168 config : `lsst.ctrl.bps.BpsConfig` 

169 BPS configuration. 

170 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

171 Generic workflow that contains the job. 

172 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

173 Generic workflow job to which the command line and vals should 

174 be saved. 

175 """ 

176 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, 

177 "replaceVars": False, 

178 "expandEnvVars": False, 

179 "replaceEnvVars": True, 

180 "required": False} 

181 

182 # Get command line from config 

183 _, gwjob.cmdline = config.search("runQuantumCommand", opt=search_opt) 

184 

185 # Change qgraph variable to match whether using run or per-job qgraph 

186 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

187 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

188 gwjob.cmdline = gwjob.cmdline.replace("{qgraphFile}", "{runQgraphFile}") 

189 else: 

190 gwjob.cmdline = gwjob.cmdline.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}") 

191 

192 # Replace files with special placeholders 

193 for gwfile in workflow.get_job_inputs(gwjob.name): 

194 gwjob.cmdline = gwjob.cmdline.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

195 for gwfile in workflow.get_job_outputs(gwjob.name): 

196 gwjob.cmdline = gwjob.cmdline.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>") 

197 

198 # Save dict of other values needed to complete cmdline 

199 # (Be careful to not replace env variables as they may 

200 # be different in compute job.) 

201 search_opt["replaceVars"] = True 

202 

203 if gwjob.cmdvals is None: 

204 gwjob.cmdvals = {} 

205 for key in re.findall(r"{([^}]+)}", gwjob.cmdline): 

206 if key not in gwjob.cmdvals: 

207 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt) 

208 

209 # backwards compatibility 

210 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True}) 

211 if not use_lazy_commands: 

212 _fill_command(config, workflow, gwjob) 

213 

214 

215def _fill_command(config, workflow, gwjob): 

216 """Replace placeholders in command line string in job. 

217 

218 Parameters 

219 ---------- 

220 config : `lsst.ctrl.bps.BPSConfig` 

221 Bps configuration. 

222 workflow : `lsst.ctrl.bps.GenericWorkflow` 

223 Generic workflow containing the job. 

224 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

225 Job for which to update command line by filling in values. 

226 """ 

227 _, use_shared = config.search("useBpsShared", opt={"default": False}) 

228 # Replace input file placeholders with paths 

229 for gwfile in workflow.get_job_inputs(gwjob.name): 

230 if use_shared: 

231 uri = os.path.basename(gwfile.src_uri) 

232 else: 

233 uri = gwfile.src_uri 

234 gwjob.cmdline = gwjob.cmdline.replace(f"<FILE:{gwfile.name}>", uri) 

235 

236 # Replace output file placeholders with paths 

237 for gwfile in workflow.get_job_outputs(gwjob.name): 

238 if use_shared: 

239 uri = os.path.basename(gwfile.src_uri) 

240 else: 

241 uri = gwfile.src_uri 

242 gwjob.cmdline = gwjob.cmdline.replace(f"<FILE:{gwfile.name}>", uri) 

243 

244 gwjob.cmdline = gwjob.cmdline.format(**gwjob.cmdvals) 

245 

246 

247def create_job_values_universal(config, qnodes, generic_workflow, gwjob, prefix): 

248 """Create job values. Must be same value for every PipelineTask in 

249 cluster. 

250 

251 Parameters 

252 ---------- 

253 config : `lsst.ctrl.bps.BPSConfig` 

254 Bps configuration. 

255 qnodes : `list` [`lsst.pipe.base.QuantumGraph`] 

256 Full run QuantumGraph. 

257 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

258 Generic workflow containing job. 

259 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

260 Generic workflow job to which values will be added. 

261 prefix : `str` 

262 Root path for any output files. 

263 """ 

264 per_job_qgraph_file = True 

265 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

266 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

267 per_job_qgraph_file = False 

268 

269 # Verify workflow config values are same for all nodes in QuantumGraph 

270 # for running the Quantum and compute_site. 

271 job_command = None 

272 job_compute_site = None 

273 for qnode in qnodes: 

274 _LOG.debug("taskClass=%s", qnode.taskDef.taskClass) 

275 _LOG.debug("taskName=%s", qnode.taskDef.taskName) 

276 _LOG.debug("label=%s", qnode.taskDef.label) 

277 

278 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False} 

279 

280 _, command = config.search("runQuantumCommand", opt=search_opt) 

281 if job_command is None: 

282 job_command = command 

283 elif job_command != command: 

284 _LOG.error("Inconsistent command to run QuantumGraph\n" 

285 "Cluster %s Quantum Number %d\n" 

286 "Current cluster command: %s\n" 

287 "Inconsistent command: %s", 

288 gwjob.name, qnode.nodeId.number, job_command, command) 

289 raise RuntimeError("Inconsistent run QuantumGraph command") 

290 

291 _, compute_site = config.search("computeSite", opt=search_opt) 

292 if job_compute_site is None: 

293 job_compute_site = compute_site 

294 elif job_compute_site != compute_site: 

295 _LOG.error("Inconsistent compute_site\n" 

296 "Cluster %s Quantum Number %d\n" 

297 "Current cluster compute_site: %s\n" 

298 "Inconsistent compute_site: %s", 

299 gwjob.name, qnode.nodeId.number, job_compute_site, compute_site) 

300 raise RuntimeError("Inconsistent run QuantumGraph command") 

301 

302 if per_job_qgraph_file: 

303 gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}", 

304 src_uri=create_job_quantum_graph_filename(gwjob, prefix), 

305 wms_transfer=True, 

306 job_access_remote=True, 

307 job_shared=True) 

308 else: 

309 gwfile = generic_workflow.get_file("runQgraphFile") 

310 gwjob.cmdvals = {"qgraphNodeId": ",".join(sorted([f"{qnode.nodeId.number}" for qnode in qnodes])), 

311 "qgraphId": qnodes[0].nodeId.buildId} 

312 

313 generic_workflow.add_job_inputs(gwjob.name, gwfile) 

314 

315 gwjob.cmdline = job_command 

316 create_command(config, generic_workflow, gwjob) 

317 if job_compute_site is not None: 

318 gwjob.compute_site = job_compute_site 

319 update_job(config, gwjob) 

320 

321 

322def create_job_values_aggregate(config, qnodes, gwjob, pipetask_labels): 

323 """Create job values that are aggregate of values from PipelineTasks 

324 in QuantumGraph. 

325 

326 Parameters 

327 ---------- 

328 config : `lsst.ctrl.bps.BpsConfig` 

329 Bps configuration. 

330 qnodes : `list` [`lsst.pipe.base.QuantumGraph`] 

331 Full run QuantumGraph. 

332 gwjob : `lsst.ctrl.bps.GenericWorkflowJob` 

333 Job in which to store the aggregate values. 

334 pipetask_labels : `list` [`str`] 

335 PipelineTask labels used in generating quanta summary tag. 

336 """ 

337 label_counts = dict.fromkeys(pipetask_labels, 0) 

338 

339 gwjob.request_cpus = 0 

340 gwjob.request_memory = 0 

341 gwjob.request_disk = 0 

342 gwjob.request_walltime = 0 

343 

344 for qnode in qnodes: 

345 label_counts[qnode.taskDef.label] += 1 

346 

347 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0} 

348 _, request_cpus = config.search("requestCpus", opt=search_opt) 

349 gwjob.request_cpus = max(gwjob.request_cpus, int(request_cpus)) 

350 _, request_memory = config.search("requestMemory", opt=search_opt) 

351 gwjob.request_memory = max(gwjob.request_memory, int(request_memory)) 

352 _, request_disk = config.search("requestDisk", opt=search_opt) 

353 gwjob.request_disk += int(request_disk) 

354 _, request_walltime = config.search("requestWalltime", opt=search_opt) 

355 gwjob.request_walltime += int(request_walltime) 

356 

357 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v]) 

358 

359 

360def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

361 """Create a generic workflow from a ClusteredQuantumGraph such that it 

362 has information needed for WMS (e.g., command lines). 

363 

364 Parameters 

365 ---------- 

366 config : `lsst.ctrl.bps.BpsConfig` 

367 BPS configuration. 

368 clustered_quanta_graph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

369 ClusteredQuantumGraph for running a specific pipeline on a specific 

370 payload. 

371 name : `str` 

372 Name for the workflow (typically unique). 

373 prefix : `str` 

374 Root path for any output files. 

375 

376 Returns 

377 ------- 

378 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

379 Generic workflow for the given ClusteredQuantumGraph + config. 

380 """ 

381 generic_workflow = GenericWorkflow(name) 

382 

383 # Save full run QuantumGraph for use by jobs 

384 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile", 

385 src_uri=config["run_qgraph_file"], 

386 wms_transfer=True, 

387 job_access_remote=True, 

388 job_shared=True)) 

389 

390 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

391 

392 for node_name, data in clustered_quanta_graph.nodes(data=True): 

393 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name, 

394 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4]) 

395 gwjob = GenericWorkflowJob(node_name) 

396 if "tags" in data: 

397 gwjob.tags = data["tags"] 

398 if "label" in data: 

399 gwjob.label = data["label"] 

400 generic_workflow.add_job(gwjob) 

401 

402 qgraph = clustered_quanta_graph.graph["qgraph"] 

403 qnodes = [] 

404 for node_id in data["qgraph_node_ids"]: 

405 qnodes.append(qgraph.getQuantumNodeByNodeId(node_id)) 

406 pipetask_labels = [task.label for task in qgraph.iterTaskGraph()] 

407 create_job_values_universal(config, qnodes, generic_workflow, gwjob, prefix) 

408 create_job_values_aggregate(config, qnodes, gwjob, pipetask_labels) 

409 

410 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM: 

411 save_qg_subgraph(qgraph, generic_workflow.get_file(f"qgraph_{gwjob.name}").src_uri, 

412 data["qgraph_node_ids"]) 

413 

414 # Create job dependencies. 

415 for node_name in clustered_quanta_graph.nodes(): 

416 for child in clustered_quanta_graph.successors(node_name): 

417 generic_workflow.add_job_relationships(node_name, child) 

418 

419 # Add initial workflow 

420 if config.get("runInit", "{default: False}"): 

421 add_workflow_init_nodes(config, generic_workflow) 

422 add_workflow_attributes(config, generic_workflow) 

423 

424 return generic_workflow 

425 

426 

427def add_workflow_attributes(config, generic_workflow): 

428 """Add workflow-level attributes to given GenericWorkflow. 

429 

430 Parameters 

431 ---------- 

432 config : `lsst.ctrl.bps.BpsConfig` 

433 Bps configuration. 

434 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

435 Generic workflow to which attributes should be added. 

436 """ 

437 # Save run quanta summary and other workflow attributes to GenericWorkflow. 

438 run_quanta_counts = {} 

439 for job_name in generic_workflow: 

440 job = generic_workflow.get_job(job_name) 

441 if job.tags is not None and "quanta_summary" in job.tags: 

442 for job_summary_part in job.tags["quanta_summary"].split(";"): 

443 (label, cnt) = job_summary_part.split(":") 

444 if label not in run_quanta_counts: 

445 run_quanta_counts[label] = 0 

446 run_quanta_counts[label] += int(cnt) 

447 

448 run_quanta_summary = [] 

449 for label in run_quanta_counts: 

450 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label])) 

451 

452 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary), 

453 "bps_isjob": "True", 

454 "bps_project": config["project"], 

455 "bps_campaign": config["campaign"], 

456 "bps_run": generic_workflow.name, 

457 "bps_operator": config["operator"], 

458 "bps_payload": config["payloadName"], 

459 "bps_runsite": "TODO"}) 

460 

461 

462def create_generic_workflow_config(config, prefix): 

463 """Create generic workflow configuration. 

464 

465 Parameters 

466 ---------- 

467 config : `lsst.ctrl.bps.BpsConfig` 

468 Bps configuration. 

469 prefix : `str` 

470 Root path for any output files. 

471 

472 Returns 

473 ------- 

474 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

475 Configuration accompanying the GenericWorkflow. 

476 """ 

477 generic_workflow_config = BpsConfig(config) 

478 generic_workflow_config["workflowName"] = config["uniqProcName"] 

479 generic_workflow_config["workflowPath"] = prefix 

480 return generic_workflow_config