Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a 

23generic workflow. 

24""" 

25 

26import logging 

27import os 

28 

29from .bps_config import BpsConfig 

30from .generic_workflow import GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile 

31from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename 

32 

33 

34_LOG = logging.getLogger() 

35 

36 

37def transform(config, clustered_quantum_graph, prefix): 

38 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

39 

40 Parameters 

41 ---------- 

42 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

43 BPS configuration. 

44 clustered_quantum_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

45 A clustered quantum graph to transform into a generic workflow. 

46 prefix : `str` 

47 Root path for any output files. 

48 

49 Returns 

50 ------- 

51 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

52 The generic workflow transformed from the clustered quantum graph. 

53 """ 

54 if 'name' in clustered_quantum_graph.graph and clustered_quantum_graph.graph['name'] is not None: 

55 name = clustered_quantum_graph.graph['name'] 

56 else: 

57 _, name = config.search("uniqProcName", opt={"required": True}) 

58 

59 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

60 generic_workflow_config = create_generic_workflow_config(config, prefix) 

61 

62 # Save QuantumGraphs. 

63 found, when_to_save_job_qgraph = config.search("whenSaveJobQgraph", 

64 {"default": WhenToSaveQuantumGraphs.TRANSFORM}) 

65 if found and when_to_save_job_qgraph == WhenToSaveQuantumGraphs.TRANSFORM: 

66 for job_name in generic_workflow.nodes(): 

67 job = generic_workflow.get_job(job_name) 

68 if job.quantum_graph is not None: 

69 save_qg_subgraph(job.quantum_graph, 

70 create_job_quantum_graph_filename(job, prefix)) 

71 

72 return generic_workflow, generic_workflow_config 

73 

74 

75def group_clusters_into_jobs(clustered_quanta_graph, name): 

76 """Group clusters of quanta into compute jobs. 

77 

78 Parameters 

79 ---------- 

80 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

81 Graph where each node is a QuantumGraph of quanta that should be run 

82 inside single python execution. 

83 name : `str` 

84 Name of GenericWorkflow (typically unique by conventions). 

85 

86 Returns 

87 ------- 

88 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

89 Skeleton of the generic workflow (job placeholders and dependencies) 

90 """ 

91 generic_workflow = GenericWorkflow(name) 

92 

93 for node_name, data in clustered_quanta_graph.nodes(data=True): 

94 _LOG.debug("clustered_quanta_graph: node_name=%s, type(qgraph)=%s, label=%s", node_name, 

95 type(data["qgraph"]), data["label"]) 

96 job = GenericWorkflowJob(node_name) 

97 job.quantum_graph = data["qgraph"] 

98 if "label" in data: 

99 job.label = data["label"] 

100 generic_workflow.add_job(job) 

101 

102 # Create job dependencies. 

103 for node_name in clustered_quanta_graph.nodes(): 

104 children = clustered_quanta_graph.successors(node_name) 

105 for child in children: 

106 generic_workflow.add_job_relationships(node_name, child) 

107 

108 return generic_workflow 

109 

110 

111def update_job(config, job): 

112 """Update given job with workflow attribute and profile values. 

113 

114 Parameters 

115 ---------- 

116 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

117 BPS configuration. 

118 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

119 Job to which the attributes and profile values should be added. 

120 """ 

121 key = f".site.{job.compute_site}.profile.condor" 

122 

123 if key in config: 

124 for key, val in config[key].items(): 

125 if key.startswith("+"): 

126 job.attrs[key[1:]] = val 

127 else: 

128 job.profile[key] = val 

129 

130 

131def add_workflow_init_nodes(config, generic_workflow): 

132 """Add nodes to workflow graph that perform initialization steps. 

133 

134 Assumes that all of the initialization should be executed prior to any 

135 of the current workflow. 

136 

137 Parameters 

138 ---------- 

139 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

140 BPS configuration. 

141 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

142 Generic workflow to which the initialization steps should be added. 

143 """ 

144 # Create a workflow graph that will have task and file nodes necessary for 

145 # initializing the pipeline execution 

146 init_workflow = create_init_workflow(config) 

147 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

148 

149 # Find source nodes in workflow graph. 

150 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0] 

151 _LOG.debug("workflow sources = %s", workflow_sources) 

152 

153 # Find sink nodes of initonly graph. 

154 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0] 

155 _LOG.debug("init sinks = %s", init_sinks) 

156 

157 # Add initonly nodes to Workflow graph and make new edges. 

158 generic_workflow.add_nodes_from(init_workflow.nodes(data=True)) 

159 generic_workflow.add_edges_from(init_workflow.edges()) 

160 generic_workflow._files.update(init_workflow._files) 

161 for source in workflow_sources: 

162 for sink in init_sinks: 

163 generic_workflow.add_edge(sink, source) 

164 

165 

166def create_init_workflow(config): 

167 """Create workflow for running initialization job(s). 

168 

169 Parameters 

170 ---------- 

171 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

172 BPS configuration. 

173 

174 Returns 

175 ------- 

176 init_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

177 GenericWorkflow consisting of job(s) to initialize workflow 

178 """ 

179 _LOG.debug("creating init subgraph") 

180 init_workflow = GenericWorkflow("init") 

181 

182 # create job for executing --init-only 

183 job = GenericWorkflowJob("pipetaskInit") 

184 job.cmdline = create_command(config, "pipetaskInit", config[".bps_defined.run_qgraph_file"]) 

185 job.label = "init" 

186 job.compute_site = config["computeSite"] 

187 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, "required": False, "default": 0} 

188 job.request_cpus = int(config.search("requestCpus", opt=search_opt)[1]) 

189 job.request_memory = int(config.search("requestMemory", opt=search_opt)[1]) 

190 job.request_disk = int(config.search("requestDisk", opt=search_opt)[1]) 

191 job.request_walltime = int(config.search("requestWalltime", opt=search_opt)[1]) 

192 update_job(config, job) 

193 init_workflow.add_job(job) 

194 

195 _LOG.debug("creating init task input(s)") 

196 file_ = GenericWorkflowFile(os.path.basename(config[".bps_defined.run_qgraph_file"]), 

197 wms_transfer=True, src_uri=config[".bps_defined.run_qgraph_file"]) 

198 init_workflow.add_job_inputs(job.name, file_) 

199 

200 # All outputs (config, software versions, etc) go to Butler. 

201 # Currently no need to add them to job. 

202 

203 return init_workflow 

204 

205 

206def create_command(config, label, qgraph_file): 

207 """Create command line string for job. 

208 

209 Parameters 

210 ---------- 

211 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

212 Bps configuration. 

213 label : `str` 

214 Pipeline Task label used as key into config. 

215 qgraph_file : `str` 

216 Name of file that will contain the QuantumGraph. 

217 

218 Returns 

219 ------- 

220 command : `str` 

221 String containing command line. 

222 """ 

223 search_opt = {"curvals": {"curr_pipetask": label, "qgraphFile": qgraph_file}, "required": False} 

224 found, command = config.search("runQuantumCommand", opt=search_opt) 

225 # Allow older Exec Args separation. 

226 if not found: 

227 found, exec_name = config.search("runQuantumExec", opt=search_opt) 

228 found, exec_args = config.search("runQuantumArgs", opt=search_opt) 

229 if not found: 

230 raise RuntimeError(f"Could not find command to execute for {label}.") 

231 command = f"{exec_name} {exec_args}" 

232 

233 return command 

234 

235 

236def create_job_values_universal(config, generic_workflow, prefix): 

237 """Create job values. Must be same value for every PipelineTask in 

238 QuantumGraph. 

239 

240 Parameters 

241 ---------- 

242 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

243 Bps configuration. 

244 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

245 Generic workflow in which job values will be added. 

246 prefix : `str` 

247 Root path for any output files. 

248 """ 

249 for job_name, data in generic_workflow.nodes(data=True): 

250 # Verify workflow config values are same for all nodes in QuantumGraph 

251 # for running the Quantum and compute_site 

252 generic_workflow_job = data["job"] 

253 data["qgraph_file"] = create_job_quantum_graph_filename(generic_workflow_job, prefix) 

254 file_ = GenericWorkflowFile(os.path.basename(data["qgraph_file"]), wms_transfer=True, 

255 src_uri=data["qgraph_file"]) 

256 generic_workflow.add_job_inputs(job_name, file_) 

257 

258 job_command = None 

259 job_compute_site = None 

260 for qnode in generic_workflow_job.quantum_graph: 

261 task_def = qnode.taskDef 

262 _LOG.debug("config=%s", task_def.config) 

263 _LOG.debug("taskClass=%s", task_def.taskClass) 

264 _LOG.debug("taskName=%s", task_def.taskName) 

265 _LOG.debug("label=%s", task_def.label) 

266 

267 command = create_command(config, task_def.label, data["qgraph_file"]) 

268 if job_command is None: 

269 job_command = command 

270 elif job_command != command: 

271 _LOG.error("Inconsistent command to run QuantumGraph\n" 

272 "Cluster %s Quantum Number %d\n" 

273 "Current cluster command: %s\n" 

274 "Inconsistent command: %s", 

275 job_name, qnode.nodeId.number, job_command, command) 

276 raise RuntimeError("Inconsistent run QuantumGraph command") 

277 

278 search_opt = {"curvals": {"curr_pipetask": task_def.label}, "required": False} 

279 _, compute_site = config.search("computeSite", opt=search_opt) 

280 if job_compute_site is None: 

281 job_compute_site = compute_site 

282 elif job_compute_site != compute_site: 

283 _LOG.error("Inconsistent compute_site\n" 

284 "Cluster %s Quantum Number %d\n" 

285 "Current cluster compute_site: %s\n" 

286 "Inconsistent compute_site: %s", 

287 job_name, qnode.nodeId.number, job_compute_site, compute_site) 

288 raise RuntimeError("Inconsistent run QuantumGraph command") 

289 

290 generic_workflow_job.cmdline = job_command 

291 if job_compute_site is not None: 

292 generic_workflow_job.compute_site = job_compute_site 

293 update_job(config, generic_workflow_job) 

294 

295 

296def create_job_values_aggregate(config, generic_workflow): 

297 """Create job values that are aggregate of values from PipelineTasks 

298 in QuantumGraph. 

299 

300 Parameters 

301 ---------- 

302 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

303 Bps configuration. 

304 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

305 Generic workflow in which job values will be added. 

306 """ 

307 for _, data in generic_workflow.nodes(data=True): 

308 # Verify workflow config values are same for all nodes in QuantumGraph 

309 # for running the Quantum and compute_site 

310 job = data["job"] 

311 

312 pipeline_labels = [task.label for task in job.quantum_graph.iterTaskGraph()] 

313 label_counts = dict.fromkeys(pipeline_labels, 0) 

314 

315 job.request_cpus = 0 

316 job.request_memory = 0 

317 job.request_disk = 0 

318 job.request_walltime = 0 

319 

320 for qnode in job.quantum_graph: # Assumes ordering 

321 label_counts[qnode.taskDef.label] += 1 

322 

323 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0} 

324 _, request_cpus = config.search("requestCpus", opt=search_opt) 

325 job.request_cpus = max(job.request_cpus, int(request_cpus)) 

326 _, request_memory = config.search("requestMemory", opt=search_opt) 

327 job.request_memory = max(job.request_memory, int(request_memory)) 

328 _, request_disk = config.search("requestDisk", opt=search_opt) 

329 job.request_disk += int(request_disk) 

330 _, request_walltime = config.search("requestWalltime", opt=search_opt) 

331 job.request_walltime += int(request_walltime) 

332 

333 job.quanta_summary = ';'.join([f"{k}:{v}" for k, v in label_counts.items()]) 

334 

335 

336def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

337 """Create a generic workflow from a ClusteredQuantumGraph such that it 

338 has information needed for WMS (e.g., command lines). 

339 

340 Parameters 

341 ---------- 

342 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

343 BPS configuration. 

344 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

345 ClusteredQuantumGraph for running a specific pipeline on a specific 

346 payload. 

347 name : `str` 

348 Name for the workflow (typically unique). 

349 prefix : `str` 

350 Root path for any output files. 

351 """ 

352 generic_workflow = group_clusters_into_jobs(clustered_quanta_graph, name) 

353 create_job_values_universal(config, generic_workflow, prefix) 

354 create_job_values_aggregate(config, generic_workflow) 

355 

356 if config.get("runInit", "{default: False}"): 

357 add_workflow_init_nodes(config, generic_workflow) 

358 add_workflow_attributes(config, generic_workflow) 

359 return generic_workflow 

360 

361 

362def add_workflow_attributes(config, generic_workflow): 

363 """Add workflow-level attributes to given GenericWorkflow. 

364 

365 Parameters 

366 ---------- 

367 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

368 Bps configuration. 

369 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

370 Generic workflow to which attributes should be added. 

371 """ 

372 # Save run quanta summary and other workflow attributes to GenericWorkflow. 

373 run_quanta_counts = {} 

374 for job_name in generic_workflow: 

375 job = generic_workflow.get_job(job_name) 

376 if job.quanta_summary: 

377 for job_summary_part in job.quanta_summary.split(';'): 

378 (label, cnt) = job_summary_part.split(':') 

379 if label not in run_quanta_counts: 

380 run_quanta_counts[label] = 0 

381 run_quanta_counts[label] += int(cnt) 

382 

383 run_quanta_summary = [] 

384 for label in run_quanta_counts: 

385 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label])) 

386 

387 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary), 

388 "bps_isjob": "True", 

389 "bps_project": config["project"], 

390 "bps_campaign": config["campaign"], 

391 "bps_run": generic_workflow.name, 

392 "bps_operator": config["operator"], 

393 "bps_payload": config["payloadName"], 

394 "bps_runsite": "TODO"}) 

395 

396 

397def create_generic_workflow_config(config, prefix): 

398 """Create generic workflow configuration. 

399 

400 Parameters 

401 ---------- 

402 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

403 Bps configuration. 

404 prefix : `str` 

405 Root path for any output files. 

406 

407 Returns 

408 ------- 

409 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

410 Configuration accompanying the GenericWorkflow. 

411 """ 

412 

413 generic_workflow_config = BpsConfig(config) 

414 generic_workflow_config["workflowName"] = config["uniqProcName"] 

415 generic_workflow_config["workflowPath"] = prefix 

416 return generic_workflow_config