Coverage for python/lsst/ctrl/bps/transform.py : 7%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a
23generic workflow.
24"""
26import logging
27import os
29from .bps_config import BpsConfig
30from .generic_workflow import GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile
31from .bps_utils import save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename
34_LOG = logging.getLogger()
37def transform(config, clustered_quantum_graph, prefix):
38 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
40 Parameters
41 ----------
42 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
43 BPS configuration.
44 clustered_quantum_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph`
45 A clustered quantum graph to transform into a generic workflow.
46 prefix : `str`
47 Root path for any output files.
49 Returns
50 -------
51 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
52 The generic workflow transformed from the clustered quantum graph.
53 """
54 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None:
55 name = clustered_quantum_graph.graph["name"]
56 else:
57 _, name = config.search("uniqProcName", opt={"required": True})
59 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix)
60 generic_workflow_config = create_generic_workflow_config(config, prefix)
62 # Save QuantumGraphs.
63 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
64 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM:
65 for job_name in generic_workflow.nodes():
66 job = generic_workflow.get_job(job_name)
67 if job.qgraph_node_ids is not None:
68 save_qg_subgraph(clustered_quantum_graph.graph["qgraph"],
69 create_job_quantum_graph_filename(job, prefix),
70 job.qgraph_node_ids)
72 return generic_workflow, generic_workflow_config
75def group_clusters_into_jobs(clustered_quanta_graph, name):
76 """Group clusters of quanta into compute jobs.
78 Parameters
79 ----------
80 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph`
81 Graph where each node is a QuantumGraph of quanta that should be run
82 inside single python execution.
83 name : `str`
84 Name of GenericWorkflow (typically unique by conventions).
86 Returns
87 -------
88 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
89 Skeleton of the generic workflow (job placeholders and dependencies)
90 """
91 generic_workflow = GenericWorkflow(name)
93 for node_name, data in clustered_quanta_graph.nodes(data=True):
94 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name,
95 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4])
96 job = GenericWorkflowJob(node_name)
97 job.qgraph_node_ids = data["qgraph_node_ids"]
98 if "tags" in data:
99 job.tags = data["tags"]
100 if "label" in data:
101 job.label = data["label"]
102 generic_workflow.add_job(job)
104 # Create job dependencies.
105 for node_name in clustered_quanta_graph.nodes():
106 children = clustered_quanta_graph.successors(node_name)
107 for child in children:
108 generic_workflow.add_job_relationships(node_name, child)
110 return generic_workflow
113def update_job(config, job):
114 """Update given job with workflow attribute and profile values.
116 Parameters
117 ----------
118 config : `~lsst.ctrl.bps.bps_config.BpsConfig`
119 BPS configuration.
120 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob`
121 Job to which the attributes and profile values should be added.
122 """
123 key = f".site.{job.compute_site}.profile.condor"
125 if key in config:
126 for key, val in config[key].items():
127 if key.startswith("+"):
128 job.attrs[key[1:]] = val
129 else:
130 job.profile[key] = val
133def add_workflow_init_nodes(config, generic_workflow):
134 """Add nodes to workflow graph that perform initialization steps.
136 Assumes that all of the initialization should be executed prior to any
137 of the current workflow.
139 Parameters
140 ----------
141 config : `~lsst.ctrl.bps.bps_config.BpsConfig`
142 BPS configuration.
143 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
144 Generic workflow to which the initialization steps should be added.
145 """
146 # Create a workflow graph that will have task and file nodes necessary for
147 # initializing the pipeline execution
148 init_workflow = create_init_workflow(config)
149 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
151 # Find source nodes in workflow graph.
152 workflow_sources = [n for n in generic_workflow if generic_workflow.in_degree(n) == 0]
153 _LOG.debug("workflow sources = %s", workflow_sources)
155 # Find sink nodes of initonly graph.
156 init_sinks = [n for n in init_workflow if init_workflow.out_degree(n) == 0]
157 _LOG.debug("init sinks = %s", init_sinks)
159 # Add initonly nodes to Workflow graph and make new edges.
160 generic_workflow.add_nodes_from(init_workflow.nodes(data=True))
161 generic_workflow.add_edges_from(init_workflow.edges())
162 generic_workflow._files.update(init_workflow._files)
163 for source in workflow_sources:
164 for sink in init_sinks:
165 generic_workflow.add_edge(sink, source)
168def create_init_workflow(config):
169 """Create workflow for running initialization job(s).
171 Parameters
172 ----------
173 config : `~lsst.ctrl.bps.bps_config.BpsConfig`
174 BPS configuration.
176 Returns
177 -------
178 init_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
179 GenericWorkflow consisting of job(s) to initialize workflow
180 """
181 _LOG.debug("creating init subgraph")
182 _LOG.debug("creating init task input(s)")
183 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"}, "required": False, "default": False}
184 _, use_shared = config.search("bpsUseShared", opt=search_opt)
185 gwfile = GenericWorkflowFile(os.path.basename(config["run_qgraph_file"]),
186 wms_transfer=not use_shared,
187 src_uri=config["run_qgraph_file"])
189 init_workflow = GenericWorkflow("init")
191 # create job for executing --init-only
192 job = GenericWorkflowJob("pipetaskInit")
193 job.label = "pipetaskInit"
194 job.compute_site = config["computeSite"]
195 search_opt["default"] = 0
196 job.request_cpus = int(config.search("requestCpus", opt=search_opt)[1])
197 job.request_memory = int(config.search("requestMemory", opt=search_opt)[1])
198 job.request_disk = int(config.search("requestDisk", opt=search_opt)[1])
199 job.request_walltime = int(config.search("requestWalltime", opt=search_opt)[1])
200 update_job(config, job)
201 create_command(config, job, gwfile)
202 init_workflow.add_job(job)
204 # All outputs (config, software versions, etc) go to Butler.
205 # Currently no need to add them to job.
206 init_workflow.add_job_inputs(job.name, gwfile)
208 return init_workflow
211def create_command(config, gwjob, gwfile):
212 """Update command line string in job.
214 Parameters
215 ----------
216 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
217 Bps configuration.
218 gwjob : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob`
219 Job for which to create command line.
220 gwfile : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`
221 File that will contain the QuantumGraph.
222 """
223 search_opt = {"curvals": {"curr_pipetask": gwjob.label}, "required": False}
225 if gwfile.wms_transfer:
226 search_opt["curvals"]["qgraphFile"] = os.path.basename(gwfile.src_uri)
227 else:
228 search_opt["curvals"]["qgraphFile"] = gwfile.src_uri
230 if gwjob.qgraph_node_ids:
231 search_opt["curvals"]["qgraphId"] = gwjob.qgraph_node_ids[0].buildId
232 search_opt["curvals"]["qgraphNodeId"] = ",".join([f"{nid.number}" for nid in gwjob.qgraph_node_ids])
234 _, gwjob.cmdline = config.search("runQuantumCommand", opt=search_opt)
237def create_job_values_universal(config, qgraph, generic_workflow, prefix):
238 """Create job values. Must be same value for every PipelineTask in
239 QuantumGraph.
241 Parameters
242 ----------
243 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
244 Bps configuration.
245 qgraph : `~lsst.pipe.base.QuantumGraph`
246 Full run QuantumGraph.
247 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
248 Generic workflow in which job values will be added.
249 prefix : `str`
250 Root path for any output files.
251 """
252 per_job_qgraph_file = True
253 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
255 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
256 per_job_qgraph_file = False
257 run_qgraph_gwfile = GenericWorkflowFile(os.path.basename(config["run_qgraph_file"]),
258 src_uri=config["run_qgraph_file"])
260 # Verify workflow config values are same for all nodes in QuantumGraph
261 # for running the Quantum and compute_site.
262 for job_name, data in generic_workflow.nodes(data=True):
263 generic_workflow_job = data["job"]
264 job_command = None
265 job_compute_site = None
266 job_use_shared = None # Cannot set default or can get conflict on first Quantum.
267 for node_id in generic_workflow_job.qgraph_node_ids:
268 qnode = qgraph.getQuantumNodeByNodeId(node_id)
269 task_def = qnode.taskDef
270 _LOG.debug("config=%s", task_def.config)
271 _LOG.debug("taskClass=%s", task_def.taskClass)
272 _LOG.debug("taskName=%s", task_def.taskName)
273 _LOG.debug("label=%s", task_def.label)
275 search_opt = {"curvals": {"curr_pipetask": task_def.label}, "required": False}
277 _, command = config.search("runQuantumCommand", opt=search_opt)
278 if job_command is None:
279 job_command = command
280 elif job_command != command:
281 _LOG.error("Inconsistent command to run QuantumGraph\n"
282 "Cluster %s Quantum Number %d\n"
283 "Current cluster command: %s\n"
284 "Inconsistent command: %s",
285 job_name, qnode.nodeId.number, job_command, command)
286 raise RuntimeError("Inconsistent run QuantumGraph command")
288 _, compute_site = config.search("computeSite", opt=search_opt)
289 if job_compute_site is None:
290 job_compute_site = compute_site
291 elif job_compute_site != compute_site:
292 _LOG.error("Inconsistent compute_site\n"
293 "Cluster %s Quantum Number %d\n"
294 "Current cluster compute_site: %s\n"
295 "Inconsistent compute_site: %s",
296 job_name, qnode.nodeId.number, job_compute_site, compute_site)
297 raise RuntimeError("Inconsistent run QuantumGraph command")
299 _, use_shared = config.search("bpsUseShared", opt=search_opt)
300 if job_use_shared is None:
301 job_use_shared = use_shared
302 elif job_use_shared != use_shared:
303 _LOG.error("Inconsistent bpsUseShared\n"
304 "Cluster %s Quantum Number %d\n"
305 "Current cluster bpsUseShared: %s\n"
306 "Inconsistent bpsUseShared: %s",
307 job_name, qnode.nodeId.number, job_use_shared, use_shared)
308 raise RuntimeError("Inconsistent bpsUseShared value within cluster.")
310 if per_job_qgraph_file:
311 data["qgraph_file"] = create_job_quantum_graph_filename(generic_workflow_job, prefix)
312 gwfile = GenericWorkflowFile(os.path.basename(data["qgraph_file"]),
313 src_uri=data["qgraph_file"])
314 else:
315 data["qgraph_file"] = run_qgraph_gwfile.src_uri
316 gwfile = run_qgraph_gwfile
318 # Tell WMS whether to transfer QuantumGraph file.
319 gwfile.wms_transfer = not job_use_shared
321 generic_workflow.add_job_inputs(job_name, gwfile)
323 generic_workflow_job.cmdline = job_command
324 create_command(config, generic_workflow_job, gwfile)
325 if job_compute_site is not None:
326 generic_workflow_job.compute_site = job_compute_site
327 update_job(config, generic_workflow_job)
330def create_job_values_aggregate(config, qgraph, generic_workflow):
331 """Create job values that are aggregate of values from PipelineTasks
332 in QuantumGraph.
334 Parameters
335 ----------
336 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
337 Bps configuration.
338 qgraph : `~lsst.pipe.base.QuantumGraph`
339 Full run QuantumGraph.
340 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
341 Generic workflow in which job values will be added.
342 """
343 for _, data in generic_workflow.nodes(data=True):
344 # Verify workflow config values are same for all nodes in QuantumGraph
345 # for running the Quantum and compute_site
346 job = data["job"]
348 pipeline_labels = [task.label for task in qgraph.iterTaskGraph()]
349 label_counts = dict.fromkeys(pipeline_labels, 0)
351 job.request_cpus = 0
352 job.request_memory = 0
353 job.request_disk = 0
354 job.request_walltime = 0
356 for node_id in job.qgraph_node_ids: # Assumes ordering.
357 qnode = qgraph.getQuantumNodeByNodeId(node_id)
358 label_counts[qnode.taskDef.label] += 1
360 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label}, "required": False, "default": 0}
361 _, request_cpus = config.search("requestCpus", opt=search_opt)
362 job.request_cpus = max(job.request_cpus, int(request_cpus))
363 _, request_memory = config.search("requestMemory", opt=search_opt)
364 job.request_memory = max(job.request_memory, int(request_memory))
365 _, request_disk = config.search("requestDisk", opt=search_opt)
366 job.request_disk += int(request_disk)
367 _, request_walltime = config.search("requestWalltime", opt=search_opt)
368 job.request_walltime += int(request_walltime)
370 job.quanta_summary = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v])
373def create_generic_workflow(config, clustered_quanta_graph, name, prefix):
374 """Create a generic workflow from a ClusteredQuantumGraph such that it
375 has information needed for WMS (e.g., command lines).
377 Parameters
378 ----------
379 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
380 BPS configuration.
381 clustered_quanta_graph : `~lsst.ctrl.bps.clustered_quantum_graph.ClusteredQuantumGraph`
382 ClusteredQuantumGraph for running a specific pipeline on a specific
383 payload.
384 name : `str`
385 Name for the workflow (typically unique).
386 prefix : `str`
387 Root path for any output files.
388 """
389 generic_workflow = group_clusters_into_jobs(clustered_quanta_graph, name)
390 create_job_values_universal(config, clustered_quanta_graph.graph["qgraph"], generic_workflow, prefix)
391 create_job_values_aggregate(config, clustered_quanta_graph.graph["qgraph"], generic_workflow)
393 if config.get("runInit", "{default: False}"):
394 add_workflow_init_nodes(config, generic_workflow)
395 add_workflow_attributes(config, generic_workflow)
396 return generic_workflow
399def add_workflow_attributes(config, generic_workflow):
400 """Add workflow-level attributes to given GenericWorkflow.
402 Parameters
403 ----------
404 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
405 Bps configuration.
406 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
407 Generic workflow to which attributes should be added.
408 """
409 # Save run quanta summary and other workflow attributes to GenericWorkflow.
410 run_quanta_counts = {}
411 for job_name in generic_workflow:
412 job = generic_workflow.get_job(job_name)
413 if job.quanta_summary:
414 for job_summary_part in job.quanta_summary.split(";"):
415 (label, cnt) = job_summary_part.split(":")
416 if label not in run_quanta_counts:
417 run_quanta_counts[label] = 0
418 run_quanta_counts[label] += int(cnt)
420 run_quanta_summary = []
421 for label in run_quanta_counts:
422 run_quanta_summary.append("%s:%d" % (label, run_quanta_counts[label]))
424 generic_workflow.run_attrs.update({"bps_run_summary": ";".join(run_quanta_summary),
425 "bps_isjob": "True",
426 "bps_project": config["project"],
427 "bps_campaign": config["campaign"],
428 "bps_run": generic_workflow.name,
429 "bps_operator": config["operator"],
430 "bps_payload": config["payloadName"],
431 "bps_runsite": "TODO"})
434def create_generic_workflow_config(config, prefix):
435 """Create generic workflow configuration.
437 Parameters
438 ----------
439 config : `~lsst.ctrl.bps.bps_config.BPSConfig`
440 Bps configuration.
441 prefix : `str`
442 Root path for any output files.
444 Returns
445 -------
446 generic_workflow_config : `~lsst.ctrl.bps.bps_config.BpsConfig`
447 Configuration accompanying the GenericWorkflow.
448 """
450 generic_workflow_config = BpsConfig(config)
451 generic_workflow_config["workflowName"] = config["uniqProcName"]
452 generic_workflow_config["workflowPath"] = prefix
453 return generic_workflow_config