Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Driver for the transformation of a QuantumGraph into a 

23generic workflow. 

24""" 

25 

26import logging 

27import os 

28 

29from .bps_config import BpsConfig 

30from .generic_workflow import GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile 

31from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename 

32 

33 

34_LOG = logging.getLogger() 

35 

36 

37def transform(config, clustered_quantum_graph, prefix): 

38 """Transform a ClusteredQuantumGraph to a GenericWorkflow. 

39 

40 Parameters 

41 ---------- 

42 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

43 BPS configuration. 

44 clustered_quantum_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

45 A clustered quantum graph to transform into a generic workflow. 

46 prefix : `str` 

47 Root path for any output files. 

48 

49 Returns 

50 ------- 

51 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

52 The generic workflow transformed from the clustered quantum graph. 

53 """ 

54 if 'name' in clustered_quantum_graph.graph and clustered_quantum_graph.graph['name'] is not None: 

55 name = clustered_quantum_graph.graph['name'] 

56 else: 

57 _, name = config.search('uniqProcName', opt={'required': True}) 

58 

59 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix) 

60 generic_workflow_config = create_generic_workflow_config(config, prefix) 

61 

62 # Save QuantumGraphs. 

63 found, when_to_save_job_qgraph = config.search('when_save_job_qgraph', 

64 {'default': WhenToSaveQuantumGraphs.TRANSFORM}) 

65 if found and when_to_save_job_qgraph == WhenToSaveQuantumGraphs.TRANSFORM: 

66 for job_name in generic_workflow.nodes(): 

67 job = generic_workflow.get_job(job_name) 

68 if job.quantum_graph is not None: 

69 save_qg_subgraph(job.quantum_graph, 

70 create_job_quantum_graph_filename(job, prefix)) 

71 

72 return generic_workflow, generic_workflow_config 

73 

74 

75def group_clusters_into_jobs(clustered_quanta_graph, name): 

76 """Group clusters of quanta into compute jobs. 

77 

78 Parameters 

79 ---------- 

80 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

81 Graph where each node is a QuantumGraph of quanta that should be run 

82 inside single python execution. 

83 name : `str` 

84 Name of GenericWorkflow (typically unique by conventions). 

85 

86 Returns 

87 ------- 

88 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

89 Skeleton of the generic workflow (job placeholders and dependencies) 

90 """ 

91 generic_workflow = GenericWorkflow(name) 

92 

93 for node_name, data in clustered_quanta_graph.nodes(data=True): 

94 _LOG.debug("clustered_quanta_graph: node_name=%s, type(qgraph)=%s, label=%s", node_name, 

95 type(data["qgraph"]), data["label"]) 

96 job = GenericWorkflowJob(node_name) 

97 job.quantum_graph = data["qgraph"] 

98 if "label" in data: 

99 job.label = data["label"] 

100 generic_workflow.add_job(job) 

101 

102 # Create job dependencies. 

103 for node_name in clustered_quanta_graph.nodes(): 

104 children = clustered_quanta_graph.successors(node_name) 

105 for child in children: 

106 generic_workflow.add_job_relationships(node_name, child) 

107 

108 return generic_workflow 

109 

110 

111def update_job(config, job): 

112 """Update given job with workflow attribute and profile values. 

113 

114 Parameters 

115 ---------- 

116 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

117 BPS configuration. 

118 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

119 Job to which the attributes and profile values should be added. 

120 """ 

121 key = f".site.{job.compute_site}.profile.condor" 

122 

123 if key in config: 

124 for key, val in config[key].items(): 

125 if key.startswith("+"): 

126 job.attrs[key[1:]] = val 

127 else: 

128 job.profile[key] = val 

129 

130 

131def add_workflow_init_nodes(config, generic_workflow): 

132 """Add nodes to workflow graph that perform initialization steps. 

133 

134 Assumes that all of the initialization should be executed prior to any 

135 of the current workflow. 

136 

137 Parameters 

138 ---------- 

139 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

140 BPS configuration. 

141 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

142 Generic workflow to which the initialization steps should be added. 

143 """ 

144 # Create a workflow graph that will have task and file nodes necessary for 

145 # initializing the pipeline execution 

146 init_workflow = create_init_workflow(config) 

147 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes()) 

148 

149 # Find source nodes in workflow graph. 

150 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0] 

151 _LOG.debug("workflow sources = %s", workflow_sources) 

152 

153 # Find sink nodes of initonly graph. 

154 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0] 

155 _LOG.debug("init sinks = %s", init_sinks) 

156 

157 # Add initonly nodes to Workflow graph and make new edges. 

158 generic_workflow.add_nodes_from(init_workflow.nodes(data=True)) 

159 generic_workflow.add_edges_from(init_workflow.edges()) 

160 generic_workflow._files.update(init_workflow._files) 

161 for source in workflow_sources: 

162 for sink in init_sinks: 

163 generic_workflow.add_edge(sink, source) 

164 

165 

166def create_init_workflow(config): 

167 """Create workflow for running initialization job(s). 

168 

169 Parameters 

170 ---------- 

171 config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

172 BPS configuration. 

173 

174 Returns 

175 ------- 

176 init_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

177 GenericWorkflow consisting of job(s) to initialize workflow 

178 """ 

179 _LOG.debug("creating init subgraph") 

180 init_workflow = GenericWorkflow("init") 

181 

182 # create job for executing --init-only 

183 job = GenericWorkflowJob("pipetaskInit") 

184 job.cmdline = create_command(config, "pipetaskInit", config[".bps_defined.run_qgraph_file"]) 

185 job.label = "init" 

186 job.compute_site = config["computeSite"] 

187 init_workflow.add_job(job) 

188 

189 _LOG.debug("creating init task input(s)") 

190 file_ = GenericWorkflowFile(os.path.basename(config[".bps_defined.run_qgraph_file"]), 

191 wms_transfer=True, src_uri=config[".bps_defined.run_qgraph_file"]) 

192 init_workflow.add_job_inputs(job.name, file_) 

193 

194 # All outputs (config, software versions, etc) go to Butler. 

195 # Currently no need to add them to job. 

196 

197 return init_workflow 

198 

199 

200def create_command(config, label, qgraph_file): 

201 """Create command line string for job. 

202 

203 Parameters 

204 ---------- 

205 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

206 Bps configuration. 

207 label : `str` 

208 Pipeline Task label used as key into config. 

209 qgraph_file : `str` 

210 Name of file that will contain the QuantumGraph. 

211 

212 Returns 

213 ------- 

214 command : `str` 

215 String containing command line. 

216 """ 

217 search_opt = {"curvals": {"curr_pipetask": label, "qgraphFile": qgraph_file}, "required": False} 

218 found, command = config.search("runQuantumCommand", opt=search_opt) 

219 # Allow older Exec Args separation. 

220 if not found: 

221 found, exec_name = config.search("runQuantumExec", opt=search_opt) 

222 found, exec_args = config.search("runQuantumArgs", opt=search_opt) 

223 if not found: 

224 raise RuntimeError(f"Could not find command to execute for {label}.") 

225 command = f"{exec_name} {exec_args}" 

226 

227 return command 

228 

229 

230def create_job_values_universal(config, generic_workflow, prefix): 

231 """Create job values. Must be same value for every PipelineTask in 

232 QuantumGraph. 

233 

234 Parameters 

235 ---------- 

236 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

237 Bps configuration. 

238 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

239 Generic workflow in which job values will be added. 

240 prefix : `str` 

241 Root path for any output files. 

242 """ 

243 for job_name, data in generic_workflow.nodes(data=True): 

244 # Verify workflow config values are same for all nodes in QuantumGraph 

245 # for running the Quantum and compute_site 

246 generic_workflow_job = data["job"] 

247 data["qgraph_file"] = create_job_quantum_graph_filename(generic_workflow_job, prefix) 

248 file_ = GenericWorkflowFile(os.path.basename(data["qgraph_file"]), wms_transfer=True, 

249 src_uri=data["qgraph_file"]) 

250 generic_workflow.add_job_inputs(job_name, file_) 

251 

252 job_command = None 

253 job_compute_site = None 

254 for qnode in generic_workflow_job.quantum_graph: 

255 task_def = qnode.taskDef 

256 _LOG.debug("config=%s", task_def.config) 

257 _LOG.debug("taskClass=%s", task_def.taskClass) 

258 _LOG.debug("taskName=%s", task_def.taskName) 

259 _LOG.debug("label=%s", task_def.label) 

260 

261 command = create_command(config, task_def.label, data["qgraph_file"]) 

262 if job_command is None: 

263 job_command = command 

264 elif job_command != command: 

265 _LOG.error("Inconsistent command to run QuantumGraph\n" 

266 "Cluster %s Quantum Number %d\n" 

267 "Current cluster command: %s\n" 

268 "Inconsistent command: %s", 

269 job_name, qnode.nodeId.number, job_command, command) 

270 raise RuntimeError("Inconsistent run QuantumGraph command") 

271 

272 search_opt = {"curvals": {"curr_pipetask": task_def.label}, "required": False} 

273 _, compute_site = config.search("computeSite", opt=search_opt) 

274 if job_compute_site is None: 

275 job_compute_site = compute_site 

276 elif job_compute_site != compute_site: 

277 _LOG.error("Inconsistent compute_site\n" 

278 "Cluster %s Quantum Number %d\n" 

279 "Current cluster compute_site: %s\n" 

280 "Inconsistent compute_site: %s", 

281 job_name, qnode.nodeId.number, job_compute_site, compute_site) 

282 raise RuntimeError("Inconsistent run QuantumGraph command") 

283 

284 generic_workflow_job.cmdline = job_command 

285 if job_compute_site is not None: 

286 generic_workflow_job.compute_site = job_compute_site 

287 update_job(config, generic_workflow_job) 

288 

289 

290def create_job_values_aggregate(config, generic_workflow): 

291 """Create job values that are aggregate of values from PipelineTasks 

292 in QuantumGraph. 

293 

294 Parameters 

295 ---------- 

296 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

297 Bps configuration. 

298 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

299 Generic workflow in which job values will be added. 

300 """ 

301 for _, data in generic_workflow.nodes(data=True): 

302 # Verify workflow config values are same for all nodes in QuantumGraph 

303 # for running the Quantum and compute_site 

304 job = data["job"] 

305 

306 pipeline_labels = [task.label for task in job.quantum_graph.iterTaskGraph()] 

307 label_counts = dict.fromkeys(pipeline_labels, 0) 

308 

309 job.request_cpus = 0 

310 job.request_memory = 0 

311 job.request_disk = 0 

312 job.request_walltime = 0 

313 

314 for qnode in job.quantum_graph: # Assumes ordering 

315 label_counts[qnode.taskDef.label] += 1 

316 

317 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0} 

318 _, request_cpus = config.search("request_cpus", opt=search_opt) 

319 job.request_cpus = max(job.request_cpus, request_cpus) 

320 _, request_memory = config.search("request_memory", opt=search_opt) 

321 job.request_memory = max(job.request_memory, request_memory) 

322 _, request_disk = config.search("request_walltime", opt=search_opt) 

323 job.request_disk += request_disk 

324 _, request_walltime = config.search("request_walltime", opt=search_opt) 

325 job.request_walltime += request_walltime 

326 

327 job.quanta_summary = ';'.join([f"{k}:{v}" for k, v in label_counts.items()]) 

328 

329 

330def create_generic_workflow(config, clustered_quanta_graph, name, prefix): 

331 """Create a generic workflow from a ClusteredQuantumGraph such that it 

332 has information needed for WMS (e.g., command lines). 

333 

334 Parameters 

335 ---------- 

336 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

337 BPS configuration. 

338 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph` 

339 ClusteredQuantumGraph for running a specific pipeline on a specific 

340 payload. 

341 name : `str` 

342 Name for the workflow (typically unique). 

343 prefix : `str` 

344 Root path for any output files. 

345 """ 

346 generic_workflow = group_clusters_into_jobs(clustered_quanta_graph, name) 

347 create_job_values_universal(config, generic_workflow, prefix) 

348 create_job_values_aggregate(config, generic_workflow) 

349 

350 if config.get("runInit", "{default: False}"): 

351 add_workflow_init_nodes(config, generic_workflow) 

352 add_workflow_attributes(config, generic_workflow) 

353 return generic_workflow 

354 

355 

356def add_workflow_attributes(config, generic_workflow): 

357 """Add workflow-level attributes to given GenericWorkflow. 

358 

359 Parameters 

360 ---------- 

361 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

362 Bps configuration. 

363 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

364 Generic workflow to which attributes should be added. 

365 """ 

366 # Save run quanta summary and other workflow attributes to GenericWorkflow. 

367 run_quanta_counts = {} 

368 for job_name in generic_workflow: 

369 job = generic_workflow.get_job(job_name) 

370 if job.quanta_summary: 

371 for job_summary_part in job.quanta_summary.split(';'): 

372 (label, cnt) = job_summary_part.split(':') 

373 if label not in run_quanta_counts: 

374 run_quanta_counts[label] = 0 

375 run_quanta_counts[label] += int(cnt) 

376 

377 run_quanta_summary = [] 

378 for label in run_quanta_counts: 

379 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label])) 

380 

381 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary), 

382 "bps_isjob": "True", 

383 "bps_project": config["project"], 

384 "bps_campaign": config["campaign"], 

385 "bps_run": generic_workflow.name, 

386 "bps_operator": config["operator"], 

387 "bps_payload": config["payloadName"], 

388 "bps_runsite": "TODO"}) 

389 

390 

391def create_generic_workflow_config(config, prefix): 

392 """Create generic workflow configuration. 

393 

394 Parameters 

395 ---------- 

396 config : `~lsst.ctrl.bps.bps_config.BPSConfig` 

397 Bps configuration. 

398 prefix : `str` 

399 Root path for any output files. 

400 

401 Returns 

402 ------- 

403 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BpsConfig` 

404 Configuration accompanying the GenericWorkflow. 

405 """ 

406 

407 generic_workflow_config = BpsConfig(config) 

408 generic_workflow_config["workflowName"] = config["uniqProcName"] 

409 generic_workflow_config["workflowPath"] = prefix 

410 return generic_workflow_config