Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 5%
126 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-01 02:12 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-01 02:12 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Functions that convert QuantumGraph into ClusteredQuantumGraph.
23"""
24import logging
25import re
26from collections import defaultdict
28from lsst.pipe.base import NodeId
29from networkx import DiGraph, is_directed_acyclic_graph
31from . import ClusteredQuantumGraph, QuantaCluster
33_LOG = logging.getLogger(__name__)
36def single_quantum_clustering(config, qgraph, name):
37 """Create clusters with only single quantum.
39 Parameters
40 ----------
41 config : `lsst.ctrl.bps.BpsConfig`
42 BPS configuration.
43 qgraph : `lsst.pipe.base.QuantumGraph`
44 QuantumGraph to break into clusters for ClusteredQuantumGraph.
45 name : `str`
46 Name to give to ClusteredQuantumGraph.
48 Returns
49 -------
50 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph`
51 ClusteredQuantumGraph with single quantum per cluster created from
52 given QuantumGraph.
53 """
54 cqgraph = ClusteredQuantumGraph(
55 name=name,
56 qgraph=qgraph,
57 qgraph_filename=config[".bps_defined.runQgraphFile"],
58 )
60 # Save mapping of quantum nodeNumber to name so don't have to create it
61 # multiple times.
62 number_to_name = {}
64 # Cache template per label for speed.
65 cached_template = {}
67 # Create cluster of single quantum.
68 for qnode in qgraph:
69 if qnode.taskDef.label not in cached_template:
70 found, template_data_id = config.search(
71 "templateDataId",
72 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False},
73 )
74 if found:
75 template = "{node_number}_{label}_" + template_data_id
76 else:
77 template = "{node_number}"
78 cached_template[qnode.taskDef.label] = template
80 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label])
82 # Save mapping for use when creating dependencies.
83 number_to_name[qnode.nodeId] = cluster.name
85 cqgraph.add_cluster(cluster)
87 # Add cluster dependencies.
88 for qnode in qgraph:
89 # Get child nodes.
90 children = qgraph.determineOutputsOfQuantumNode(qnode)
91 for child in children:
92 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId])
94 return cqgraph
97def _check_clusters_tasks(cluster_config, taskGraph):
98 """Check cluster definitions in terms of pipetask lists.
100 Parameters
101 ----------
102 cluster_config : `lsst.ctrl.bps.BpsConfig`
103 The cluster section from the BPS configuration.
104 taskGraph : `lsst.pipe.base.taskGraph`
105 Directed graph of tasks.
107 Returns
108 -------
109 task_labels : `set` [`str`]
110 Set of task labels from the cluster definitions.
112 Raises
113 -------
114 RuntimeError
115 Raised if task label appears in more than one cluster def or
116 if there's a cycle in the cluster defs.
117 """
119 # Build a "clustered" task graph to check for cycle.
120 task_to_cluster = {}
121 task_labels = set()
122 clustered_task_graph = DiGraph()
124 # Create clusters based on given configuration.
125 for cluster_label in cluster_config:
126 _LOG.debug("cluster = %s", cluster_label)
127 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
128 for task_label in cluster_tasks:
129 if task_label in task_labels:
130 raise RuntimeError(
131 f"Task label {task_label} appears in more than one cluster definition. "
132 "Aborting submission."
133 )
134 task_labels.add(task_label)
135 task_to_cluster[task_label] = cluster_label
136 clustered_task_graph.add_node(cluster_label)
138 # Create clusters for tasks not covered by clusters.
139 for task in taskGraph:
140 if task.label not in task_labels:
141 task_to_cluster[task.label] = task.label
142 clustered_task_graph.add_node(task.label)
144 # Create dependencies between clusters.
145 for edge in taskGraph.edges:
146 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]:
147 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label])
149 _LOG.debug("clustered_task_graph.edges = %s", [e for e in clustered_task_graph.edges])
151 if not is_directed_acyclic_graph(clustered_task_graph):
152 raise RuntimeError("Cluster pipetasks do not create a DAG")
154 return task_labels
157def dimension_clustering(config, qgraph, name):
158 """Follow config instructions to make clusters based upon dimensions.
160 Parameters
161 ----------
162 config : `lsst.ctrl.bps.BpsConfig`
163 BPS configuration.
164 qgraph : `lsst.pipe.base.QuantumGraph`
165 QuantumGraph to break into clusters for ClusteredQuantumGraph.
166 name : `str`
167 Name to give to ClusteredQuantumGraph.
169 Returns
170 -------
171 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
172 ClusteredQuantumGraph with clustering as defined in config.
173 """
174 cqgraph = ClusteredQuantumGraph(
175 name=name,
176 qgraph=qgraph,
177 qgraph_filename=config[".bps_defined.runQgraphFile"],
178 )
180 # save mapping in order to create dependencies later
181 quantum_to_cluster = {}
183 cluster_config = config["cluster"]
184 task_labels = _check_clusters_tasks(cluster_config, qgraph.taskGraph)
185 for cluster_label in cluster_config:
186 _LOG.debug("cluster = %s", cluster_label)
187 cluster_dims = []
188 if "dimensions" in cluster_config[cluster_label]:
189 cluster_dims = [d.strip() for d in cluster_config[cluster_label]["dimensions"].split(",")]
190 _LOG.debug("cluster_dims = %s", cluster_dims)
192 found, template = cluster_config[cluster_label].search("clusterTemplate", opt={"replaceVars": False})
193 if not found:
194 if cluster_dims:
195 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims)
196 else:
197 template = cluster_label
198 _LOG.debug("template = %s", template)
200 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
201 for task_label in cluster_tasks:
202 task_labels.add(task_label)
204 # Currently getQuantaForTask is currently a mapping taskDef to
205 # Quanta, so quick enough to call repeatedly.
206 task_def = qgraph.findTaskDefByLabel(task_label)
207 if task_def is None:
208 continue
209 quantum_nodes = qgraph.getNodesForTask(task_def)
211 equal_dims = cluster_config[cluster_label].get("equalDimensions", None)
213 # Determine cluster for each node
214 for qnode in quantum_nodes:
215 # Gather info for cluster name template into a dictionary.
216 info = {}
218 missing_info = set()
219 data_id_info = qnode.quantum.dataId.byName()
220 for dim_name in cluster_dims:
221 _LOG.debug("dim_name = %s", dim_name)
222 if dim_name in data_id_info:
223 info[dim_name] = data_id_info[dim_name]
224 else:
225 missing_info.add(dim_name)
226 if equal_dims:
227 for pair in [pt.strip() for pt in equal_dims.split(",")]:
228 dim1, dim2 = pair.strip().split(":")
229 if dim1 in cluster_dims and dim2 in data_id_info:
230 info[dim1] = data_id_info[dim2]
231 missing_info.remove(dim1)
232 elif dim2 in cluster_dims and dim1 in data_id_info:
233 info[dim2] = data_id_info[dim1]
234 missing_info.remove(dim2)
236 info["label"] = cluster_label
237 _LOG.debug("info for template = %s", info)
239 if missing_info:
240 raise RuntimeError(
241 "Quantum %s (%s) missing dimensions %s required for cluster %s"
242 % (qnode.nodeId, data_id_info, ",".join(missing_info), cluster_label)
243 )
245 # Use dictionary plus template format string to create name.
246 # To avoid # key errors from generic patterns, use defaultdict.
247 cluster_name = template.format_map(defaultdict(lambda: "", info))
248 cluster_name = re.sub("_+", "_", cluster_name)
250 # Some dimensions contain slash which must be replaced.
251 cluster_name = re.sub("/", "_", cluster_name)
252 _LOG.debug("cluster_name = %s", cluster_name)
254 # Save mapping for use when creating dependencies.
255 quantum_to_cluster[qnode.nodeId] = cluster_name
257 # Add cluster to the ClusteredQuantumGraph.
258 # Saving NodeId instead of number because QuantumGraph API
259 # requires it for creating per-job QuantumGraphs.
260 if cluster_name in cqgraph:
261 cluster = cqgraph.get_cluster(cluster_name)
262 else:
263 cluster = QuantaCluster(cluster_name, cluster_label, info)
264 cqgraph.add_cluster(cluster)
265 cluster.add_quantum(qnode.nodeId, task_label)
267 # Assume any task not handled above is supposed to be 1 cluster = 1 quantum
268 for task_def in qgraph.iterTaskGraph():
269 if task_def.label not in task_labels:
270 _LOG.info("Creating 1-quantum clusters for task %s", task_def.label)
271 found, template_data_id = config.search(
272 "templateDataId", opt={"curvals": {"curr_pipetask": task_def.label}, "replaceVars": False}
273 )
274 if found:
275 template = "{node_number}_{label}_" + template_data_id
276 else:
277 template = "{node_number}"
279 for qnode in qgraph.getNodesForTask(task_def):
280 cluster = QuantaCluster.from_quantum_node(qnode, template)
281 cqgraph.add_cluster(cluster)
282 quantum_to_cluster[qnode.nodeId] = cluster.name
284 # Add cluster dependencies.
285 for parent in qgraph:
286 # Get child nodes.
287 children = qgraph.determineOutputsOfQuantumNode(parent)
288 for child in children:
289 try:
290 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[child.nodeId]:
291 cqgraph.add_dependency(
292 quantum_to_cluster[parent.nodeId], quantum_to_cluster[child.nodeId]
293 )
294 except KeyError as e: # pragma: no cover
295 # For debugging a problem internal to method
296 nid = NodeId(e.args[0], qgraph.graphID)
297 qnode = qgraph.getQuantumNodeByNodeId(nid)
299 print(
300 f"Quanta missing when clustering: {qnode.taskDef.label}, {qnode.quantum.dataId.byName()}"
301 )
302 raise
304 return cqgraph