Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 5%
126 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-01 23:59 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-01 23:59 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Functions that convert QuantumGraph into ClusteredQuantumGraph.
23"""
24import logging
25import re
26from collections import defaultdict
28from lsst.pipe.base import NodeId
29from networkx import DiGraph, is_directed_acyclic_graph
31from . import ClusteredQuantumGraph, QuantaCluster
33_LOG = logging.getLogger(__name__)
36def single_quantum_clustering(config, qgraph, name):
37 """Create clusters with only single quantum.
39 Parameters
40 ----------
41 config : `lsst.ctrl.bps.BpsConfig`
42 BPS configuration.
43 qgraph : `lsst.pipe.base.QuantumGraph`
44 QuantumGraph to break into clusters for ClusteredQuantumGraph.
45 name : `str`
46 Name to give to ClusteredQuantumGraph.
48 Returns
49 -------
50 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph`
51 ClusteredQuantumGraph with single quantum per cluster created from
52 given QuantumGraph.
53 """
54 cqgraph = ClusteredQuantumGraph(
55 name=name,
56 qgraph=qgraph,
57 qgraph_filename=config[".bps_defined.runQgraphFile"],
58 butler_uri=config["butlerConfig"],
59 )
61 # Save mapping of quantum nodeNumber to name so don't have to create it
62 # multiple times.
63 number_to_name = {}
65 # Cache template per label for speed.
66 cached_template = {}
68 # Create cluster of single quantum.
69 for qnode in qgraph:
70 if qnode.taskDef.label not in cached_template:
71 found, template_data_id = config.search(
72 "templateDataId",
73 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False},
74 )
75 if found:
76 template = "{node_number}_{label}_" + template_data_id
77 else:
78 template = "{node_number}"
79 cached_template[qnode.taskDef.label] = template
81 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label])
83 # Save mapping for use when creating dependencies.
84 number_to_name[qnode.nodeId] = cluster.name
86 cqgraph.add_cluster(cluster)
88 # Add cluster dependencies.
89 for qnode in qgraph:
90 # Get child nodes.
91 children = qgraph.determineOutputsOfQuantumNode(qnode)
92 for child in children:
93 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId])
95 return cqgraph
98def _check_clusters_tasks(cluster_config, taskGraph):
99 """Check cluster definitions in terms of pipetask lists.
101 Parameters
102 ----------
103 cluster_config : `lsst.ctrl.bps.BpsConfig`
104 The cluster section from the BPS configuration.
105 taskGraph : `lsst.pipe.base.taskGraph`
106 Directed graph of tasks.
108 Returns
109 -------
110 task_labels : `set` [`str`]
111 Set of task labels from the cluster definitions.
113 Raises
114 -------
115 RuntimeError
116 Raised if task label appears in more than one cluster def or
117 if there's a cycle in the cluster defs.
118 """
120 # Build a "clustered" task graph to check for cycle.
121 task_to_cluster = {}
122 task_labels = set()
123 clustered_task_graph = DiGraph()
125 # Create clusters based on given configuration.
126 for cluster_label in cluster_config:
127 _LOG.debug("cluster = %s", cluster_label)
128 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
129 for task_label in cluster_tasks:
130 if task_label in task_labels:
131 raise RuntimeError(
132 f"Task label {task_label} appears in more than one cluster definition. "
133 "Aborting submission."
134 )
135 task_labels.add(task_label)
136 task_to_cluster[task_label] = cluster_label
137 clustered_task_graph.add_node(cluster_label)
139 # Create clusters for tasks not covered by clusters.
140 for task in taskGraph:
141 if task.label not in task_labels:
142 task_to_cluster[task.label] = task.label
143 clustered_task_graph.add_node(task.label)
145 # Create dependencies between clusters.
146 for edge in taskGraph.edges:
147 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]:
148 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label])
150 _LOG.debug("clustered_task_graph.edges = %s", [e for e in clustered_task_graph.edges])
152 if not is_directed_acyclic_graph(clustered_task_graph):
153 raise RuntimeError("Cluster pipetasks do not create a DAG")
155 return task_labels
158def dimension_clustering(config, qgraph, name):
159 """Follow config instructions to make clusters based upon dimensions.
161 Parameters
162 ----------
163 config : `lsst.ctrl.bps.BpsConfig`
164 BPS configuration.
165 qgraph : `lsst.pipe.base.QuantumGraph`
166 QuantumGraph to break into clusters for ClusteredQuantumGraph.
167 name : `str`
168 Name to give to ClusteredQuantumGraph.
170 Returns
171 -------
172 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
173 ClusteredQuantumGraph with clustering as defined in config.
174 """
175 cqgraph = ClusteredQuantumGraph(
176 name=name,
177 qgraph=qgraph,
178 qgraph_filename=config[".bps_defined.runQgraphFile"],
179 butler_uri=config["butlerConfig"],
180 )
182 # save mapping in order to create dependencies later
183 quantum_to_cluster = {}
185 cluster_config = config["cluster"]
186 task_labels = _check_clusters_tasks(cluster_config, qgraph.taskGraph)
187 for cluster_label in cluster_config:
188 _LOG.debug("cluster = %s", cluster_label)
189 cluster_dims = []
190 if "dimensions" in cluster_config[cluster_label]:
191 cluster_dims = [d.strip() for d in cluster_config[cluster_label]["dimensions"].split(",")]
192 _LOG.debug("cluster_dims = %s", cluster_dims)
194 found, template = cluster_config[cluster_label].search("clusterTemplate", opt={"replaceVars": False})
195 if not found:
196 if cluster_dims:
197 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims)
198 else:
199 template = cluster_label
200 _LOG.debug("template = %s", template)
202 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
203 for task_label in cluster_tasks:
204 task_labels.add(task_label)
206 # Currently getQuantaForTask is currently a mapping taskDef to
207 # Quanta, so quick enough to call repeatedly.
208 task_def = qgraph.findTaskDefByLabel(task_label)
209 if task_def is None:
210 continue
211 quantum_nodes = qgraph.getNodesForTask(task_def)
213 equal_dims = cluster_config[cluster_label].get("equalDimensions", None)
215 # Determine cluster for each node
216 for qnode in quantum_nodes:
217 # Gather info for cluster name template into a dictionary.
218 info = {}
220 missing_info = set()
221 data_id_info = qnode.quantum.dataId.byName()
222 for dim_name in cluster_dims:
223 _LOG.debug("dim_name = %s", dim_name)
224 if dim_name in data_id_info:
225 info[dim_name] = data_id_info[dim_name]
226 else:
227 missing_info.add(dim_name)
228 if equal_dims:
229 for pair in [pt.strip() for pt in equal_dims.split(",")]:
230 dim1, dim2 = pair.strip().split(":")
231 if dim1 in cluster_dims and dim2 in data_id_info:
232 info[dim1] = data_id_info[dim2]
233 missing_info.remove(dim1)
234 elif dim2 in cluster_dims and dim1 in data_id_info:
235 info[dim2] = data_id_info[dim1]
236 missing_info.remove(dim2)
238 info["label"] = cluster_label
239 _LOG.debug("info for template = %s", info)
241 if missing_info:
242 raise RuntimeError(
243 "Quantum %s (%s) missing dimensions %s required for cluster %s"
244 % (qnode.nodeId, data_id_info, ",".join(missing_info), cluster_label)
245 )
247 # Use dictionary plus template format string to create name.
248 # To avoid # key errors from generic patterns, use defaultdict.
249 cluster_name = template.format_map(defaultdict(lambda: "", info))
250 cluster_name = re.sub("_+", "_", cluster_name)
252 # Some dimensions contain slash which must be replaced.
253 cluster_name = re.sub("/", "_", cluster_name)
254 _LOG.debug("cluster_name = %s", cluster_name)
256 # Save mapping for use when creating dependencies.
257 quantum_to_cluster[qnode.nodeId] = cluster_name
259 # Add cluster to the ClusteredQuantumGraph.
260 # Saving NodeId instead of number because QuantumGraph API
261 # requires it for creating per-job QuantumGraphs.
262 if cluster_name in cqgraph:
263 cluster = cqgraph.get_cluster(cluster_name)
264 else:
265 cluster = QuantaCluster(cluster_name, cluster_label, info)
266 cqgraph.add_cluster(cluster)
267 cluster.add_quantum(qnode.nodeId, task_label)
269 # Assume any task not handled above is supposed to be 1 cluster = 1 quantum
270 for task_def in qgraph.iterTaskGraph():
271 if task_def.label not in task_labels:
272 _LOG.info("Creating 1-quantum clusters for task %s", task_def.label)
273 found, template_data_id = config.search(
274 "templateDataId", opt={"curvals": {"curr_pipetask": task_def.label}, "replaceVars": False}
275 )
276 if found:
277 template = "{node_number}_{label}_" + template_data_id
278 else:
279 template = "{node_number}"
281 for qnode in qgraph.getNodesForTask(task_def):
282 cluster = QuantaCluster.from_quantum_node(qnode, template)
283 cqgraph.add_cluster(cluster)
284 quantum_to_cluster[qnode.nodeId] = cluster.name
286 # Add cluster dependencies.
287 for parent in qgraph:
288 # Get child nodes.
289 children = qgraph.determineOutputsOfQuantumNode(parent)
290 for child in children:
291 try:
292 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[child.nodeId]:
293 cqgraph.add_dependency(
294 quantum_to_cluster[parent.nodeId], quantum_to_cluster[child.nodeId]
295 )
296 except KeyError as e: # pragma: no cover
297 # For debugging a problem internal to method
298 nid = NodeId(e.args[0], qgraph.graphID)
299 qnode = qgraph.getQuantumNodeByNodeId(nid)
301 print(
302 f"Quanta missing when clustering: {qnode.taskDef.label}, "
303 f"{qnode.quantum.dataId.byName()}"
304 )
305 raise
307 return cqgraph