Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 6%
148 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 02:22 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 02:22 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Functions that convert QuantumGraph into ClusteredQuantumGraph.
23"""
24import logging
25import re
26from collections import defaultdict
28from networkx import DiGraph, is_directed_acyclic_graph, topological_sort
30from . import ClusteredQuantumGraph, QuantaCluster
32_LOG = logging.getLogger(__name__)
35def single_quantum_clustering(config, qgraph, name):
36 """Create clusters with only single quantum.
38 Parameters
39 ----------
40 config : `lsst.ctrl.bps.BpsConfig`
41 BPS configuration.
42 qgraph : `lsst.pipe.base.QuantumGraph`
43 QuantumGraph to break into clusters for ClusteredQuantumGraph.
44 name : `str`
45 Name to give to ClusteredQuantumGraph.
47 Returns
48 -------
49 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph`
50 ClusteredQuantumGraph with single quantum per cluster created from
51 given QuantumGraph.
52 """
53 cqgraph = ClusteredQuantumGraph(
54 name=name,
55 qgraph=qgraph,
56 qgraph_filename=config[".bps_defined.runQgraphFile"],
57 )
59 # Save mapping of quantum nodeNumber to name so don't have to create it
60 # multiple times.
61 number_to_name = {}
63 # Cache template per label for speed.
64 cached_template = {}
66 # Create cluster of single quantum.
67 for qnode in qgraph:
68 if qnode.taskDef.label not in cached_template:
69 found, template_data_id = config.search(
70 "templateDataId",
71 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False},
72 )
73 if found:
74 template = "{node_number}_{label}_" + template_data_id
75 else:
76 template = "{node_number}"
77 cached_template[qnode.taskDef.label] = template
79 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label])
81 # Save mapping for use when creating dependencies.
82 number_to_name[qnode.nodeId] = cluster.name
84 cqgraph.add_cluster(cluster)
86 # Add cluster dependencies.
87 for qnode in qgraph:
88 # Get child nodes.
89 children = qgraph.determineOutputsOfQuantumNode(qnode)
90 for child in children:
91 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId])
93 return cqgraph
96def _check_clusters_tasks(cluster_config, task_graph):
97 """Check cluster definitions in terms of pipetask lists.
99 Parameters
100 ----------
101 cluster_config : `lsst.ctrl.bps.BpsConfig`
102 The cluster section from the BPS configuration.
103 task_graph : `lsst.pipe.base.taskGraph`
104 Directed graph of tasks.
106 Returns
107 -------
108 cluster_labels: `list` [`str`]
109 Dependency ordered list of cluster labels (includes
110 single quantum clusters).
111 ordered_tasks : `dict` [`str`, `list` [`str`]]
112 Mapping of cluster label to ordered list of task labels.
114 Raises
115 -------
116 RuntimeError
117 Raised if task label appears in more than one cluster def or
118 if there's a cycle in the cluster defs.
119 """
121 # Build a PipelineTask graph of just labels because TaskGraph
122 # methods revolve around TaskDefs instead of labels.
123 label_graph = DiGraph()
124 for tdef in task_graph:
125 label_graph.add_node(tdef.label)
126 for parent in task_graph.predecessors(tdef):
127 label_graph.add_edge(parent.label, tdef.label)
129 # Build a "clustered" task graph to check for cycle.
130 task_to_cluster = {}
131 used_labels = set()
132 clustered_task_graph = DiGraph()
133 ordered_tasks = {} # cluster label to ordered list of task labels
135 # Create clusters based on given configuration.
136 for cluster_label in cluster_config:
137 _LOG.debug("cluster = %s", cluster_label)
138 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
139 cluster_tasks_in_qgraph = []
140 for task_label in cluster_tasks:
141 if task_label in used_labels:
142 raise RuntimeError(
143 f"Task label {task_label} appears in more than one cluster definition. "
144 "Aborting submission."
145 )
146 # Only check cluster defs that affect the QuantumGraph
147 if label_graph.has_node(task_label):
148 cluster_tasks_in_qgraph.append(task_label)
149 used_labels.add(task_label)
150 task_to_cluster[task_label] = cluster_label
152 if cluster_tasks_in_qgraph:
153 # Ensure have list of tasks in dependency order.
154 quantum_subgraph = label_graph.subgraph(cluster_tasks_in_qgraph)
155 ordered_tasks[cluster_label] = list(topological_sort(quantum_subgraph))
157 clustered_task_graph.add_node(cluster_label)
159 # Create single task clusters for tasks not covered by clusters.
160 for label in label_graph:
161 if label not in used_labels:
162 task_to_cluster[label] = label
163 clustered_task_graph.add_node(label)
164 ordered_tasks[label] = [label]
166 # Create dependencies between clusters.
167 for edge in task_graph.edges:
168 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]:
169 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label])
171 _LOG.debug("clustered_task_graph.edges = %s", list(clustered_task_graph.edges))
173 if not is_directed_acyclic_graph(clustered_task_graph):
174 raise RuntimeError("Cluster pipetasks do not create a DAG")
176 return list(topological_sort(clustered_task_graph)), ordered_tasks
179def dimension_clustering(config, qgraph, name):
180 """Follow config instructions to make clusters based upon dimensions.
182 Parameters
183 ----------
184 config : `lsst.ctrl.bps.BpsConfig`
185 BPS configuration.
186 qgraph : `lsst.pipe.base.QuantumGraph`
187 QuantumGraph to break into clusters for ClusteredQuantumGraph.
188 name : `str`
189 Name to give to ClusteredQuantumGraph.
191 Returns
192 -------
193 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
194 ClusteredQuantumGraph with clustering as defined in config.
195 """
196 cqgraph = ClusteredQuantumGraph(
197 name=name,
198 qgraph=qgraph,
199 qgraph_filename=config[".bps_defined.runQgraphFile"],
200 )
202 # save mapping in order to create dependencies later
203 quantum_to_cluster = {}
205 cluster_section = config["cluster"]
206 cluster_labels, ordered_tasks = _check_clusters_tasks(cluster_section, qgraph.taskGraph)
207 for cluster_label in cluster_labels:
208 _LOG.debug("cluster = %s", cluster_label)
209 if cluster_label in cluster_section:
210 add_dim_clusters(
211 cluster_section[cluster_label],
212 cluster_label,
213 qgraph,
214 ordered_tasks,
215 cqgraph,
216 quantum_to_cluster,
217 )
218 else:
219 add_clusters_per_quantum(config, cluster_label, qgraph, cqgraph, quantum_to_cluster)
221 return cqgraph
224def add_clusters_per_quantum(config, label, qgraph, cqgraph, quantum_to_cluster):
225 """Add 1-quantum clusters for a task to a ClusteredQuantumGraph.
227 Parameters
228 ----------
229 config : `lsst.ctrl.bps.BpsConfig`
230 BPS configuration.
231 label : `str`
232 taskDef label for which to add clusters.
233 qgraph : `lsst.pipe.base.QuantumGraph`
234 QuantumGraph providing quanta for the clusters.
235 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
236 The ClusteredQuantumGraph to which the new 1-quantum
237 clusters are added. (modified in method)
238 quantum_to_cluster : `dict` [ `str`, `str` ]
239 Mapping of quantum node id to which cluster it was added.
240 (modified in method)
241 """
242 _LOG.info("Creating 1-quantum clusters for task %s", label)
243 found, template_data_id = config.search(
244 "templateDataId", opt={"curvals": {"curr_pipetask": label}, "replaceVars": False}
245 )
246 if found:
247 template = "{node_number}_{label}_" + template_data_id
248 else:
249 template = "{node_number}"
251 # Currently getQuantaForTask is currently a mapping taskDef to
252 # Quanta, so quick enough to call repeatedly.
253 task_def = qgraph.findTaskDefByLabel(label)
254 quantum_nodes = qgraph.getNodesForTask(task_def)
256 for qnode in quantum_nodes:
257 cluster = QuantaCluster.from_quantum_node(qnode, template)
258 cqgraph.add_cluster(cluster)
259 quantum_to_cluster[qnode.nodeId] = cluster.name
260 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster)
263def add_dim_clusters(cluster_config, cluster_label, qgraph, ordered_tasks, cqgraph, quantum_to_cluster):
264 """Add clusters for a cluster label to a ClusteredQuantumGraph.
266 Parameters
267 ----------
268 cluster_config : `lsst.ctrl.bps.BpsConfig`
269 BPS configuration for specific cluster label.
270 cluster_label : `str`
271 Cluster label for which to add clusters.
272 qgraph : `lsst.pipe.base.QuantumGraph`
273 QuantumGraph providing quanta for the clusters.
274 ordered_tasks : `dict` [`str`, `list` [`str`]]
275 Mapping of cluster label to ordered list of task labels.
276 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
277 The ClusteredQuantumGraph to which the new 1-quantum
278 clusters are added. (modified in method)
279 quantum_to_cluster : `dict` [ `str`, `str` ]
280 Mapping of quantum node id to which cluster it was added.
281 (modified in method)
282 """
283 cluster_dims = []
284 if "dimensions" in cluster_config:
285 cluster_dims = [d.strip() for d in cluster_config["dimensions"].split(",")]
286 _LOG.debug("cluster_dims = %s", cluster_dims)
288 found, template = cluster_config.search("clusterTemplate", opt={"replaceVars": False})
289 if not found:
290 if cluster_dims:
291 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims)
292 else:
293 template = cluster_label
294 _LOG.debug("template = %s", template)
296 new_clusters = []
297 for task_label in ordered_tasks[cluster_label]:
298 # Currently getQuantaForTask is currently a mapping taskDef to
299 # Quanta, so quick enough to call repeatedly.
300 task_def = qgraph.findTaskDefByLabel(task_label)
301 if task_def is None:
302 continue
303 quantum_nodes = qgraph.getNodesForTask(task_def)
305 equal_dims = cluster_config.get("equalDimensions", None)
307 # Determine cluster for each node
308 for qnode in quantum_nodes:
309 # Gather info for cluster name template into a dictionary.
310 info = {}
312 missing_info = set()
313 data_id_info = qnode.quantum.dataId.byName()
314 for dim_name in cluster_dims:
315 _LOG.debug("dim_name = %s", dim_name)
316 if dim_name in data_id_info:
317 info[dim_name] = data_id_info[dim_name]
318 else:
319 missing_info.add(dim_name)
320 if equal_dims:
321 for pair in [pt.strip() for pt in equal_dims.split(",")]:
322 dim1, dim2 = pair.strip().split(":")
323 if dim1 in cluster_dims and dim2 in data_id_info:
324 info[dim1] = data_id_info[dim2]
325 missing_info.remove(dim1)
326 elif dim2 in cluster_dims and dim1 in data_id_info:
327 info[dim2] = data_id_info[dim1]
328 missing_info.remove(dim2)
330 info["label"] = cluster_label
331 _LOG.debug("info for template = %s", info)
333 if missing_info:
334 raise RuntimeError(
335 f"Quantum {qnode.nodeId} ({data_id_info}) missing dimensions {','.join(missing_info)}"
336 "required for cluster {cluster_label}"
337 )
339 # Use dictionary plus template format string to create name.
340 # To avoid # key errors from generic patterns, use defaultdict.
341 cluster_name = template.format_map(defaultdict(lambda: "", info))
342 cluster_name = re.sub("_+", "_", cluster_name)
344 # Some dimensions contain slash which must be replaced.
345 cluster_name = re.sub("/", "_", cluster_name)
346 _LOG.debug("cluster_name = %s", cluster_name)
348 # Save mapping for use when creating dependencies.
349 quantum_to_cluster[qnode.nodeId] = cluster_name
351 # Add cluster to the ClusteredQuantumGraph.
352 # Saving NodeId instead of number because QuantumGraph API
353 # requires it for creating per-job QuantumGraphs.
354 if cluster_name in cqgraph:
355 cluster = cqgraph.get_cluster(cluster_name)
356 else:
357 cluster = QuantaCluster(cluster_name, cluster_label, info)
358 cqgraph.add_cluster(cluster)
359 cluster.add_quantum(qnode.nodeId, task_label)
360 new_clusters.append(cluster)
362 for cluster in new_clusters:
363 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster)
366def add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster):
367 """Add dependencies for a cluster within a ClusteredQuantumGraph.
369 Parameters
370 ----------
371 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
372 The ClusteredQuantumGraph to which the new 1-quantum
373 clusters are added. (modified in method)
374 cluster : `lsst.ctrl.bps.QuantaCluster`
375 The cluster for which to add dependencies.
376 quantum_to_cluster : `dict` [ `str`, `str` ]
377 Mapping of quantum node id to which cluster it was added.
378 (modified in method)
380 Raises
381 ------
382 KeyError :
383 Raised if any of the cluster's quantum node ids are missing
384 from quantum_to_cluster or if their parent quantum node ids
385 are missing from quantum_to_cluster.
386 """
387 qgraph = cqgraph.qgraph
388 for node_id in cluster.qgraph_node_ids:
389 parents = qgraph.determineInputsToQuantumNode(qgraph.getQuantumNodeByNodeId(node_id))
390 for parent in parents:
391 try:
392 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[node_id]:
393 cqgraph.add_dependency(quantum_to_cluster[parent.nodeId], quantum_to_cluster[node_id])
394 except KeyError as e: # pragma: no cover
395 # For debugging a problem internal to method
396 qnode = qgraph.getQuantumNodeByNodeId(e.args[0])
397 _LOG.error(
398 "Quanta missing when clustering: %s, %s",
399 qnode.taskDef.label,
400 qnode.quantum.dataId.byName(),
401 )
402 raise