Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a 

23generic workflow. 

24""" 

25 

26import logging 

27import os 

28 

29from .bps_config import BpsConfig 

30from .generic_workflow import GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile 

31from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename 

32 

33 

34_LOG = logging.getLogger() 

35 

36 

37def transform(config, clustered_quantum_graph, prefix): 

38 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

39 

40 Parameters 

41 ---------- 

42 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

43 BPS configuration. 

44 clustered_quantum_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

45 A clustered quantum graph to transform into a generic workflow. 

46 prefix : `str` 

47 Root path for any output files. 

48 

49 Returns 

50 ------- 

51 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

52 The generic workflow transformed from the clustered quantum graph. 

53 """ 

54 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None: 

55 name = clustered_quantum_graph.graph["name"] 

56 else: 

57 _, name = config.search("uniqProcName", opt={"required": True}) 

58 

59 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

60 generic_workflow_config = create_generic_workflow_config(config, prefix) 

61 

62 # Save QuantumGraphs. 

63 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

64 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM: 

65 for job_name in generic_workflow.nodes(): 

66 job = generic_workflow.get_job(job_name) 

67 if job.qgraph_node_ids is not None: 

68 save_qg_subgraph(clustered_quantum_graph.graph["qgraph"], 

69 create_job_quantum_graph_filename(job, prefix), 

70 job.qgraph_node_ids) 

71 

72 return generic_workflow, generic_workflow_config 

73 

74 

75def group_clusters_into_jobs(clustered_quanta_graph, name): 

76 """Group clusters of quanta into compute jobs. 

77 

78 Parameters 

79 ---------- 

80 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

81 Graph where each node is a QuantumGraph of quanta that should be run 

82 inside single python execution. 

83 name : `str` 

84 Name of GenericWorkflow (typically unique by conventions). 

85 

86 Returns 

87 ------- 

88 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

89 Skeleton of the generic workflow (job placeholders and dependencies) 

90 """ 

91 generic_workflow = GenericWorkflow(name) 

92 

93 for node_name, data in clustered_quanta_graph.nodes(data=True): 

94 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name, 

95 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4]) 

96 job = GenericWorkflowJob(node_name) 

97 job.qgraph_node_ids = data["qgraph_node_ids"] 

98 if "tags" in data: 

99 job.tags = data["tags"] 

100 if "label" in data: 

101 job.label = data["label"] 

102 generic_workflow.add_job(job) 

103 

104 # Create job dependencies. 

105 for node_name in clustered_quanta_graph.nodes(): 

106 children = clustered_quanta_graph.successors(node_name) 

107 for child in children: 

108 generic_workflow.add_job_relationships(node_name, child) 

109 

110 return generic_workflow 

111 

112 

113def update_job(config, job): 

114 """Update given job with workflow attribute and profile values. 

115 

116 Parameters 

117 ---------- 

118 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

119 BPS configuration. 

120 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

121 Job to which the attributes and profile values should be added. 

122 """ 

123 key = f".site.{job.compute_site}.profile.condor" 

124 

125 if key in config: 

126 for key, val in config[key].items(): 

127 if key.startswith("+"): 

128 job.attrs[key[1:]] = val 

129 else: 

130 job.profile[key] = val 

131 

132 

133def add_workflow_init_nodes(config, generic_workflow): 

134 """Add nodes to workflow graph that perform initialization steps. 

135 

136 Assumes that all of the initialization should be executed prior to any 

137 of the current workflow. 

138 

139 Parameters 

140 ---------- 

141 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

142 BPS configuration. 

143 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

144 Generic workflow to which the initialization steps should be added. 

145 """ 

146 # Create a workflow graph that will have task and file nodes necessary for 

147 # initializing the pipeline execution 

148 init_workflow = create_init_workflow(config) 

149 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

150 

151 # Find source nodes in workflow graph. 

152 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0] 

153 _LOG.debug("workflow sources = %s", workflow_sources) 

154 

155 # Find sink nodes of initonly graph. 

156 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0] 

157 _LOG.debug("init sinks = %s", init_sinks) 

158 

159 # Add initonly nodes to Workflow graph and make new edges. 

160 generic_workflow.add_nodes_from(init_workflow.nodes(data=True)) 

161 generic_workflow.add_edges_from(init_workflow.edges()) 

162 generic_workflow._files.update(init_workflow._files) 

163 for source in workflow_sources: 

164 for sink in init_sinks: 

165 generic_workflow.add_edge(sink, source) 

166 

167 

168def create_init_workflow(config): 

169 """Create workflow for running initialization job(s). 

170 

171 Parameters 

172 ---------- 

173 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

174 BPS configuration. 

175 

176 Returns 

177 ------- 

178 init_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

179 GenericWorkflow consisting of job(s) to initialize workflow 

180 """ 

181 _LOG.debug("creating init subgraph") 

182 _LOG.debug("creating init task input(s)") 

183 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, "required": False, "default": False} 

184 _, use_shared = config.search("bpsUseShared", opt=search_opt) 

185 gwfile = GenericWorkflowFile(os.path.basename(config["run_qgraph_file"]), 

186 wms_transfer=not use_shared, 

187 src_uri=config["run_qgraph_file"]) 

188 

189 init_workflow = GenericWorkflow("init") 

190 

191 # create job for executing --init-only 

192 job = GenericWorkflowJob("pipetaskInit") 

193 job.label = "pipetaskInit" 

194 job.compute_site = config["computeSite"] 

195 search_opt["default"] = 0 

196 job.request_cpus = int(config.search("requestCpus", opt=search_opt)[1]) 

197 job.request_memory = int(config.search("requestMemory", opt=search_opt)[1]) 

198 job.request_disk = int(config.search("requestDisk", opt=search_opt)[1]) 

199 job.request_walltime = int(config.search("requestWalltime", opt=search_opt)[1]) 

200 update_job(config, job) 

201 create_command(config, job, gwfile) 

202 init_workflow.add_job(job) 

203 

204 # All outputs (config, software versions, etc) go to Butler. 

205 # Currently no need to add them to job. 

206 init_workflow.add_job_inputs(job.name, gwfile) 

207 

208 return init_workflow 

209 

210 

211def create_command(config, gwjob, gwfile): 

212 """Update command line string in job. 

213 

214 Parameters 

215 ---------- 

216 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

217 Bps configuration. 

218 gwjob : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

219 Job for which to create command line. 

220 gwfile : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

221 File that will contain the QuantumGraph. 

222 """ 

223 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, "required": False} 

224 

225 if gwfile.wms_transfer: 

226 search_opt["curvals"]["qgraphFile"] = os.path.basename(gwfile.src_uri) 

227 else: 

228 search_opt["curvals"]["qgraphFile"] = gwfile.src_uri 

229 

230 if gwjob.qgraph_node_ids: 

231 search_opt["curvals"]["qgraphId"] = gwjob.qgraph_node_ids[0].buildId 

232 search_opt["curvals"]["qgraphNodeId"] = ",".join([f"{nid.number}" for nid in gwjob.qgraph_node_ids]) 

233 

234 _, gwjob.cmdline = config.search("runQuantumCommand", opt=search_opt) 

235 

236 

237def create_job_values_universal(config, qgraph, generic_workflow, prefix): 

238 """Create job values. Must be same value for every PipelineTask in 

239 QuantumGraph. 

240 

241 Parameters 

242 ---------- 

243 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

244 Bps configuration. 

245 qgraph : `~lsst.pipe.base.QuantumGraph` 

246 Full run QuantumGraph. 

247 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

248 Generic workflow in which job values will be added. 

249 prefix : `str` 

250 Root path for any output files. 

251 """ 

252 per_job_qgraph_file = True 

253 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name}) 

254 

255 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER: 

256 per_job_qgraph_file = False 

257 run_qgraph_gwfile = GenericWorkflowFile(os.path.basename(config["run_qgraph_file"]), 

258 src_uri=config["run_qgraph_file"]) 

259 

260 # Verify workflow config values are same for all nodes in QuantumGraph 

261 # for running the Quantum and compute_site. 

262 for job_name, data in generic_workflow.nodes(data=True): 

263 generic_workflow_job = data["job"] 

264 job_command = None 

265 job_compute_site = None 

266 job_use_shared = None # Cannot set default or can get conflict on first Quantum. 

267 for node_id in generic_workflow_job.qgraph_node_ids: 

268 qnode = qgraph.getQuantumNodeByNodeId(node_id) 

269 task_def = qnode.taskDef 

270 _LOG.debug("config=%s", task_def.config) 

271 _LOG.debug("taskClass=%s", task_def.taskClass) 

272 _LOG.debug("taskName=%s", task_def.taskName) 

273 _LOG.debug("label=%s", task_def.label) 

274 

275 search_opt = {"curvals": {"curr_pipetask": task_def.label}, "required": False} 

276 

277 _, command = config.search("runQuantumCommand", opt=search_opt) 

278 if job_command is None: 

279 job_command = command 

280 elif job_command != command: 

281 _LOG.error("Inconsistent command to run QuantumGraph\n" 

282 "Cluster %s Quantum Number %d\n" 

283 "Current cluster command: %s\n" 

284 "Inconsistent command: %s", 

285 job_name, qnode.nodeId.number, job_command, command) 

286 raise RuntimeError("Inconsistent run QuantumGraph command") 

287 

288 _, compute_site = config.search("computeSite", opt=search_opt) 

289 if job_compute_site is None: 

290 job_compute_site = compute_site 

291 elif job_compute_site != compute_site: 

292 _LOG.error("Inconsistent compute_site\n" 

293 "Cluster %s Quantum Number %d\n" 

294 "Current cluster compute_site: %s\n" 

295 "Inconsistent compute_site: %s", 

296 job_name, qnode.nodeId.number, job_compute_site, compute_site) 

297 raise RuntimeError("Inconsistent run QuantumGraph command") 

298 

299 _, use_shared = config.search("bpsUseShared", opt=search_opt) 

300 if job_use_shared is None: 

301 job_use_shared = use_shared 

302 elif job_use_shared != use_shared: 

303 _LOG.error("Inconsistent bpsUseShared\n" 

304 "Cluster %s Quantum Number %d\n" 

305 "Current cluster bpsUseShared: %s\n" 

306 "Inconsistent bpsUseShared: %s", 

307 job_name, qnode.nodeId.number, job_use_shared, use_shared) 

308 raise RuntimeError("Inconsistent bpsUseShared value within cluster.") 

309 

310 if per_job_qgraph_file: 

311 data["qgraph_file"] = create_job_quantum_graph_filename(generic_workflow_job, prefix) 

312 gwfile = GenericWorkflowFile(os.path.basename(data["qgraph_file"]), 

313 src_uri=data["qgraph_file"]) 

314 else: 

315 data["qgraph_file"] = run_qgraph_gwfile.src_uri 

316 gwfile = run_qgraph_gwfile 

317 

318 # Tell WMS whether to transfer QuantumGraph file. 

319 gwfile.wms_transfer = not job_use_shared 

320 

321 generic_workflow.add_job_inputs(job_name, gwfile) 

322 

323 generic_workflow_job.cmdline = job_command 

324 create_command(config, generic_workflow_job, gwfile) 

325 if job_compute_site is not None: 

326 generic_workflow_job.compute_site = job_compute_site 

327 update_job(config, generic_workflow_job) 

328 

329 

330def create_job_values_aggregate(config, qgraph, generic_workflow): 

331 """Create job values that are aggregate of values from PipelineTasks 

332 in QuantumGraph. 

333 

334 Parameters 

335 ---------- 

336 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

337 Bps configuration. 

338 qgraph : `~lsst.pipe.base.QuantumGraph` 

339 Full run QuantumGraph. 

340 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

341 Generic workflow in which job values will be added. 

342 """ 

343 for _, data in generic_workflow.nodes(data=True): 

344 # Verify workflow config values are same for all nodes in QuantumGraph 

345 # for running the Quantum and compute_site 

346 job = data["job"] 

347 

348 pipeline_labels = [task.label for task in qgraph.iterTaskGraph()] 

349 label_counts = dict.fromkeys(pipeline_labels, 0) 

350 

351 job.request_cpus = 0 

352 job.request_memory = 0 

353 job.request_disk = 0 

354 job.request_walltime = 0 

355 

356 for node_id in job.qgraph_node_ids: # Assumes ordering. 

357 qnode = qgraph.getQuantumNodeByNodeId(node_id) 

358 label_counts[qnode.taskDef.label] += 1 

359 

360 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0} 

361 _, request_cpus = config.search("requestCpus", opt=search_opt) 

362 job.request_cpus = max(job.request_cpus, int(request_cpus)) 

363 _, request_memory = config.search("requestMemory", opt=search_opt) 

364 job.request_memory = max(job.request_memory, int(request_memory)) 

365 _, request_disk = config.search("requestDisk", opt=search_opt) 

366 job.request_disk += int(request_disk) 

367 _, request_walltime = config.search("requestWalltime", opt=search_opt) 

368 job.request_walltime += int(request_walltime) 

369 

370 job.quanta_summary = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v]) 

371 

372 

373def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

374 """Create a generic workflow from a ClusteredQuantumGraph such that it 

375 has information needed for WMS (e.g., command lines). 

376 

377 Parameters 

378 ---------- 

379 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

380 BPS configuration. 

381 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

382 ClusteredQuantumGraph for running a specific pipeline on a specific 

383 payload. 

384 name : `str` 

385 Name for the workflow (typically unique). 

386 prefix : `str` 

387 Root path for any output files. 

388 """ 

389 generic_workflow = group_clusters_into_jobs(clustered_quanta_graph, name) 

390 create_job_values_universal(config, clustered_quanta_graph.graph["qgraph"], generic_workflow, prefix) 

391 create_job_values_aggregate(config, clustered_quanta_graph.graph["qgraph"], generic_workflow) 

392 

393 if config.get("runInit", "{default: False}"): 

394 add_workflow_init_nodes(config, generic_workflow) 

395 add_workflow_attributes(config, generic_workflow) 

396 return generic_workflow 

397 

398 

399def add_workflow_attributes(config, generic_workflow): 

400 """Add workflow-level attributes to given GenericWorkflow. 

401 

402 Parameters 

403 ---------- 

404 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

405 Bps configuration. 

406 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

407 Generic workflow to which attributes should be added. 

408 """ 

409 # Save run quanta summary and other workflow attributes to GenericWorkflow. 

410 run_quanta_counts = {} 

411 for job_name in generic_workflow: 

412 job = generic_workflow.get_job(job_name) 

413 if job.quanta_summary: 

414 for job_summary_part in job.quanta_summary.split(";"): 

415 (label, cnt) = job_summary_part.split(":") 

416 if label not in run_quanta_counts: 

417 run_quanta_counts[label] = 0 

418 run_quanta_counts[label] += int(cnt) 

419 

420 run_quanta_summary = [] 

421 for label in run_quanta_counts: 

422 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label])) 

423 

424 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary), 

425 "bps_isjob": "True", 

426 "bps_project": config["project"], 

427 "bps_campaign": config["campaign"], 

428 "bps_run": generic_workflow.name, 

429 "bps_operator": config["operator"], 

430 "bps_payload": config["payloadName"], 

431 "bps_runsite": "TODO"}) 

432 

433 

434def create_generic_workflow_config(config, prefix): 

435 """Create generic workflow configuration. 

436 

437 Parameters 

438 ---------- 

439 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

440 Bps configuration. 

441 prefix : `str` 

442 Root path for any output files. 

443 

444 Returns 

445 ------- 

446 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

447 Configuration accompanying the GenericWorkflow. 

448 """ 

449 

450 generic_workflow_config = BpsConfig(config) 

451 generic_workflow_config["workflowName"] = config["uniqProcName"] 

452 generic_workflow_config["workflowPath"] = prefix 

453 return generic_workflow_config