Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a 

23generic workflow. 

24""" 

25 

26import logging 

27import os 

28 

29from .bps_config import BpsConfig 

30from .generic_workflow import GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile 

31from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename 

32 

33 

34_LOG = logging.getLogger() 

35 

36 

37def transform(config, clustered_quantum_graph, prefix): 

38 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

39 

40 Parameters 

41 ---------- 

42 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

43 BPS configuration. 

44 clustered_quantum_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

45 A clustered quantum graph to transform into a generic workflow. 

46 prefix : `str` 

47 Root path for any output files. 

48 

49 Returns 

50 ------- 

51 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

52 The generic workflow transformed from the clustered quantum graph. 

53 """ 

54 if 'name' in clustered_quantum_graph.graph and clustered_quantum_graph.graph['name'] is not None: 

55 name = clustered_quantum_graph.graph['name'] 

56 else: 

57 _, name = config.search("uniqProcName", opt={"required": True}) 

58 

59 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

60 generic_workflow_config = create_generic_workflow_config(config, prefix) 

61 

62 # Save QuantumGraphs. 

63 found, when_to_save_job_qgraph = config.search("whenSaveJobQgraph", 

64 {"default": WhenToSaveQuantumGraphs.TRANSFORM}) 

65 if found and when_to_save_job_qgraph == WhenToSaveQuantumGraphs.TRANSFORM: 

66 for job_name in generic_workflow.nodes(): 

67 job = generic_workflow.get_job(job_name) 

68 if job.quantum_graph is not None: 

69 save_qg_subgraph(job.quantum_graph, 

70 create_job_quantum_graph_filename(job, prefix)) 

71 

72 return generic_workflow, generic_workflow_config 

73 

74 

75def group_clusters_into_jobs(clustered_quanta_graph, name): 

76 """Group clusters of quanta into compute jobs. 

77 

78 Parameters 

79 ---------- 

80 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

81 Graph where each node is a QuantumGraph of quanta that should be run 

82 inside single python execution. 

83 name : `str` 

84 Name of GenericWorkflow (typically unique by conventions). 

85 

86 Returns 

87 ------- 

88 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

89 Skeleton of the generic workflow (job placeholders and dependencies) 

90 """ 

91 generic_workflow = GenericWorkflow(name) 

92 

93 for node_name, data in clustered_quanta_graph.nodes(data=True): 

94 _LOG.debug("clustered_quanta_graph: node_name=%s, type(qgraph)=%s, label=%s", node_name, 

95 type(data["qgraph"]), data["label"]) 

96 job = GenericWorkflowJob(node_name) 

97 job.quantum_graph = data["qgraph"] 

98 if "label" in data: 

99 job.label = data["label"] 

100 generic_workflow.add_job(job) 

101 

102 # Create job dependencies. 

103 for node_name in clustered_quanta_graph.nodes(): 

104 children = clustered_quanta_graph.successors(node_name) 

105 for child in children: 

106 generic_workflow.add_job_relationships(node_name, child) 

107 

108 return generic_workflow 

109 

110 

111def update_job(config, job): 

112 """Update given job with workflow attribute and profile values. 

113 

114 Parameters 

115 ---------- 

116 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

117 BPS configuration. 

118 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

119 Job to which the attributes and profile values should be added. 

120 """ 

121 key = f".site.{job.compute_site}.profile.condor" 

122 

123 if key in config: 

124 for key, val in config[key].items(): 

125 if key.startswith("+"): 

126 job.attrs[key[1:]] = val 

127 else: 

128 job.profile[key] = val 

129 

130 

131def add_workflow_init_nodes(config, generic_workflow): 

132 """Add nodes to workflow graph that perform initialization steps. 

133 

134 Assumes that all of the initialization should be executed prior to any 

135 of the current workflow. 

136 

137 Parameters 

138 ---------- 

139 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

140 BPS configuration. 

141 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

142 Generic workflow to which the initialization steps should be added. 

143 """ 

144 # Create a workflow graph that will have task and file nodes necessary for 

145 # initializing the pipeline execution 

146 init_workflow = create_init_workflow(config) 

147 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

148 

149 # Find source nodes in workflow graph. 

150 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0] 

151 _LOG.debug("workflow sources = %s", workflow_sources) 

152 

153 # Find sink nodes of initonly graph. 

154 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0] 

155 _LOG.debug("init sinks = %s", init_sinks) 

156 

157 # Add initonly nodes to Workflow graph and make new edges. 

158 generic_workflow.add_nodes_from(init_workflow.nodes(data=True)) 

159 generic_workflow.add_edges_from(init_workflow.edges()) 

160 generic_workflow._files.update(init_workflow._files) 

161 for source in workflow_sources: 

162 for sink in init_sinks: 

163 generic_workflow.add_edge(sink, source) 

164 

165 

166def create_init_workflow(config): 

167 """Create workflow for running initialization job(s). 

168 

169 Parameters 

170 ---------- 

171 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

172 BPS configuration. 

173 

174 Returns 

175 ------- 

176 init_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

177 GenericWorkflow consisting of job(s) to initialize workflow 

178 """ 

179 _LOG.debug("creating init subgraph") 

180 init_workflow = GenericWorkflow("init") 

181 

182 # create job for executing --init-only 

183 job = GenericWorkflowJob("pipetaskInit") 

184 job.cmdline = create_command(config, "pipetaskInit", config[".bps_defined.run_qgraph_file"]) 

185 job.label = "init" 

186 job.compute_site = config["computeSite"] 

187 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, "required": False, "default": 0} 

188 job.request_cpus = int(config.search("requestCpus", opt=search_opt)[1]) 

189 job.request_memory = int(config.search("requestMemory", opt=search_opt)[1]) 

190 job.request_disk = int(config.search("requestDisk", opt=search_opt)[1]) 

191 job.request_walltime = int(config.search("requestWalltime", opt=search_opt)[1]) 

192 update_job(config, job) 

193 init_workflow.add_job(job) 

194 

195 _LOG.debug("creating init task input(s)") 

196 file_ = GenericWorkflowFile(os.path.basename(config[".bps_defined.run_qgraph_file"]), 

197 wms_transfer=True, src_uri=config[".bps_defined.run_qgraph_file"]) 

198 init_workflow.add_job_inputs(job.name, file_) 

199 

200 # All outputs (config, software versions, etc) go to Butler. 

201 # Currently no need to add them to job. 

202 

203 return init_workflow 

204 

205 

206def create_command(config, label, qgraph_file): 

207 """Create command line string for job. 

208 

209 Parameters 

210 ---------- 

211 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

212 Bps configuration. 

213 label : `str` 

214 Pipeline Task label used as key into config. 

215 qgraph_file : `str` 

216 Name of file that will contain the QuantumGraph. 

217 

218 Returns 

219 ------- 

220 command : `str` 

221 String containing command line. 

222 """ 

223 search_opt = {"curvals": {"curr_pipetask": label}, 

224 "required": False} 

225 

226 # Temporary check until lazy command creation in DM-27009 

227 found, use_shared = config.search("bpsUseShared", opt=search_opt) 

228 if found and use_shared: 

229 qfile = qgraph_file 

230 else: 

231 qfile = os.path.basename(qgraph_file) 

232 

233 search_opt["curvals"]["qgraphFile"] = qfile 

234 found, command = config.search("runQuantumCommand", opt=search_opt) 

235 # Allow older Exec Args separation. 

236 if not found: 

237 found, exec_name = config.search("runQuantumExec", opt=search_opt) 

238 found, exec_args = config.search("runQuantumArgs", opt=search_opt) 

239 if not found: 

240 raise RuntimeError(f"Could not find command to execute for {label}.") 

241 command = f"{exec_name} {exec_args}" 

242 

243 return command 

244 

245 

246def create_job_values_universal(config, generic_workflow, prefix): 

247 """Create job values. Must be same value for every PipelineTask in 

248 QuantumGraph. 

249 

250 Parameters 

251 ---------- 

252 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

253 Bps configuration. 

254 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

255 Generic workflow in which job values will be added. 

256 prefix : `str` 

257 Root path for any output files. 

258 """ 

259 for job_name, data in generic_workflow.nodes(data=True): 

260 # Verify workflow config values are same for all nodes in QuantumGraph 

261 # for running the Quantum and compute_site 

262 generic_workflow_job = data["job"] 

263 data["qgraph_file"] = create_job_quantum_graph_filename(generic_workflow_job, prefix) 

264 file_ = GenericWorkflowFile(os.path.basename(data["qgraph_file"]), wms_transfer=True, 

265 src_uri=data["qgraph_file"]) 

266 generic_workflow.add_job_inputs(job_name, file_) 

267 

268 job_command = None 

269 job_compute_site = None 

270 for qnode in generic_workflow_job.quantum_graph: 

271 task_def = qnode.taskDef 

272 _LOG.debug("config=%s", task_def.config) 

273 _LOG.debug("taskClass=%s", task_def.taskClass) 

274 _LOG.debug("taskName=%s", task_def.taskName) 

275 _LOG.debug("label=%s", task_def.label) 

276 

277 command = create_command(config, task_def.label, data["qgraph_file"]) 

278 if job_command is None: 

279 job_command = command 

280 elif job_command != command: 

281 _LOG.error("Inconsistent command to run QuantumGraph\n" 

282 "Cluster %s Quantum Number %d\n" 

283 "Current cluster command: %s\n" 

284 "Inconsistent command: %s", 

285 job_name, qnode.nodeId.number, job_command, command) 

286 raise RuntimeError("Inconsistent run QuantumGraph command") 

287 

288 search_opt = {"curvals": {"curr_pipetask": task_def.label}, "required": False} 

289 _, compute_site = config.search("computeSite", opt=search_opt) 

290 if job_compute_site is None: 

291 job_compute_site = compute_site 

292 elif job_compute_site != compute_site: 

293 _LOG.error("Inconsistent compute_site\n" 

294 "Cluster %s Quantum Number %d\n" 

295 "Current cluster compute_site: %s\n" 

296 "Inconsistent compute_site: %s", 

297 job_name, qnode.nodeId.number, job_compute_site, compute_site) 

298 raise RuntimeError("Inconsistent run QuantumGraph command") 

299 

300 generic_workflow_job.cmdline = job_command 

301 if job_compute_site is not None: 

302 generic_workflow_job.compute_site = job_compute_site 

303 update_job(config, generic_workflow_job) 

304 

305 

306def create_job_values_aggregate(config, generic_workflow): 

307 """Create job values that are aggregate of values from PipelineTasks 

308 in QuantumGraph. 

309 

310 Parameters 

311 ---------- 

312 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

313 Bps configuration. 

314 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

315 Generic workflow in which job values will be added. 

316 """ 

317 for _, data in generic_workflow.nodes(data=True): 

318 # Verify workflow config values are same for all nodes in QuantumGraph 

319 # for running the Quantum and compute_site 

320 job = data["job"] 

321 

322 pipeline_labels = [task.label for task in job.quantum_graph.iterTaskGraph()] 

323 label_counts = dict.fromkeys(pipeline_labels, 0) 

324 

325 job.request_cpus = 0 

326 job.request_memory = 0 

327 job.request_disk = 0 

328 job.request_walltime = 0 

329 

330 for qnode in job.quantum_graph: # Assumes ordering 

331 label_counts[qnode.taskDef.label] += 1 

332 

333 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0} 

334 _, request_cpus = config.search("requestCpus", opt=search_opt) 

335 job.request_cpus = max(job.request_cpus, int(request_cpus)) 

336 _, request_memory = config.search("requestMemory", opt=search_opt) 

337 job.request_memory = max(job.request_memory, int(request_memory)) 

338 _, request_disk = config.search("requestDisk", opt=search_opt) 

339 job.request_disk += int(request_disk) 

340 _, request_walltime = config.search("requestWalltime", opt=search_opt) 

341 job.request_walltime += int(request_walltime) 

342 

343 job.quanta_summary = ';'.join([f"{k}:{v}" for k, v in label_counts.items()]) 

344 

345 

346def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

347 """Create a generic workflow from a ClusteredQuantumGraph such that it 

348 has information needed for WMS (e.g., command lines). 

349 

350 Parameters 

351 ---------- 

352 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

353 BPS configuration. 

354 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

355 ClusteredQuantumGraph for running a specific pipeline on a specific 

356 payload. 

357 name : `str` 

358 Name for the workflow (typically unique). 

359 prefix : `str` 

360 Root path for any output files. 

361 """ 

362 generic_workflow = group_clusters_into_jobs(clustered_quanta_graph, name) 

363 create_job_values_universal(config, generic_workflow, prefix) 

364 create_job_values_aggregate(config, generic_workflow) 

365 

366 if config.get("runInit", "{default: False}"): 

367 add_workflow_init_nodes(config, generic_workflow) 

368 add_workflow_attributes(config, generic_workflow) 

369 return generic_workflow 

370 

371 

372def add_workflow_attributes(config, generic_workflow): 

373 """Add workflow-level attributes to given GenericWorkflow. 

374 

375 Parameters 

376 ---------- 

377 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

378 Bps configuration. 

379 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

380 Generic workflow to which attributes should be added. 

381 """ 

382 # Save run quanta summary and other workflow attributes to GenericWorkflow. 

383 run_quanta_counts = {} 

384 for job_name in generic_workflow: 

385 job = generic_workflow.get_job(job_name) 

386 if job.quanta_summary: 

387 for job_summary_part in job.quanta_summary.split(';'): 

388 (label, cnt) = job_summary_part.split(':') 

389 if label not in run_quanta_counts: 

390 run_quanta_counts[label] = 0 

391 run_quanta_counts[label] += int(cnt) 

392 

393 run_quanta_summary = [] 

394 for label in run_quanta_counts: 

395 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label])) 

396 

397 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary), 

398 "bps_isjob": "True", 

399 "bps_project": config["project"], 

400 "bps_campaign": config["campaign"], 

401 "bps_run": generic_workflow.name, 

402 "bps_operator": config["operator"], 

403 "bps_payload": config["payloadName"], 

404 "bps_runsite": "TODO"}) 

405 

406 

407def create_generic_workflow_config(config, prefix): 

408 """Create generic workflow configuration. 

409 

410 Parameters 

411 ---------- 

412 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

413 Bps configuration. 

414 prefix : `str` 

415 Root path for any output files. 

416 

417 Returns 

418 ------- 

419 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

420 Configuration accompanying the GenericWorkflow. 

421 """ 

422 

423 generic_workflow_config = BpsConfig(config) 

424 generic_workflow_config["workflowName"] = config["uniqProcName"] 

425 generic_workflow_config["workflowPath"] = prefix 

426 return generic_workflow_config