Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 6%
148 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-22 09:44 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-22 09:44 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Functions that convert QuantumGraph into ClusteredQuantumGraph.
23"""
24import logging
25import re
26from collections import defaultdict
28from networkx import DiGraph, is_directed_acyclic_graph, topological_sort
30from . import ClusteredQuantumGraph, QuantaCluster
32_LOG = logging.getLogger(__name__)
35def single_quantum_clustering(config, qgraph, name):
36 """Create clusters with only single quantum.
38 Parameters
39 ----------
40 config : `lsst.ctrl.bps.BpsConfig`
41 BPS configuration.
42 qgraph : `lsst.pipe.base.QuantumGraph`
43 QuantumGraph to break into clusters for ClusteredQuantumGraph.
44 name : `str`
45 Name to give to ClusteredQuantumGraph.
47 Returns
48 -------
49 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph`
50 ClusteredQuantumGraph with single quantum per cluster created from
51 given QuantumGraph.
52 """
53 cqgraph = ClusteredQuantumGraph(
54 name=name,
55 qgraph=qgraph,
56 qgraph_filename=config[".bps_defined.runQgraphFile"],
57 )
59 # Save mapping of quantum nodeNumber to name so don't have to create it
60 # multiple times.
61 number_to_name = {}
63 # Cache template per label for speed.
64 cached_template = {}
66 # Create cluster of single quantum.
67 for qnode in qgraph:
68 if qnode.taskDef.label not in cached_template:
69 found, template_data_id = config.search(
70 "templateDataId",
71 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False},
72 )
73 if found:
74 template = "{node_number}_{label}_" + template_data_id
75 else:
76 template = "{node_number}"
77 cached_template[qnode.taskDef.label] = template
79 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label])
81 # Save mapping for use when creating dependencies.
82 number_to_name[qnode.nodeId] = cluster.name
84 cqgraph.add_cluster(cluster)
86 # Add cluster dependencies.
87 for qnode in qgraph:
88 # Get child nodes.
89 children = qgraph.determineOutputsOfQuantumNode(qnode)
90 for child in children:
91 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId])
93 return cqgraph
96def _check_clusters_tasks(cluster_config, task_graph):
97 """Check cluster definitions in terms of pipetask lists.
99 Parameters
100 ----------
101 cluster_config : `lsst.ctrl.bps.BpsConfig`
102 The cluster section from the BPS configuration.
103 task_graph : `lsst.pipe.base.taskGraph`
104 Directed graph of tasks.
106 Returns
107 -------
108 cluster_labels: `list` [`str`]
109 Dependency ordered list of cluster labels (includes
110 single quantum clusters).
111 ordered_tasks : `dict` [`str`, `list` [`str`]]
112 Mapping of cluster label to ordered list of task labels.
114 Raises
115 ------
116 RuntimeError
117 Raised if task label appears in more than one cluster def or
118 if there's a cycle in the cluster defs.
119 """
120 # Build a PipelineTask graph of just labels because TaskGraph
121 # methods revolve around TaskDefs instead of labels.
122 label_graph = DiGraph()
123 for tdef in task_graph:
124 label_graph.add_node(tdef.label)
125 for parent in task_graph.predecessors(tdef):
126 label_graph.add_edge(parent.label, tdef.label)
128 # Build a "clustered" task graph to check for cycle.
129 task_to_cluster = {}
130 used_labels = set()
131 clustered_task_graph = DiGraph()
132 ordered_tasks = {} # cluster label to ordered list of task labels
134 # Create clusters based on given configuration.
135 for cluster_label in cluster_config:
136 _LOG.debug("cluster = %s", cluster_label)
137 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
138 cluster_tasks_in_qgraph = []
139 for task_label in cluster_tasks:
140 if task_label in used_labels:
141 raise RuntimeError(
142 f"Task label {task_label} appears in more than one cluster definition. "
143 "Aborting submission."
144 )
145 # Only check cluster defs that affect the QuantumGraph
146 if label_graph.has_node(task_label):
147 cluster_tasks_in_qgraph.append(task_label)
148 used_labels.add(task_label)
149 task_to_cluster[task_label] = cluster_label
151 if cluster_tasks_in_qgraph:
152 # Ensure have list of tasks in dependency order.
153 quantum_subgraph = label_graph.subgraph(cluster_tasks_in_qgraph)
154 ordered_tasks[cluster_label] = list(topological_sort(quantum_subgraph))
156 clustered_task_graph.add_node(cluster_label)
158 # Create single task clusters for tasks not covered by clusters.
159 for label in label_graph:
160 if label not in used_labels:
161 task_to_cluster[label] = label
162 clustered_task_graph.add_node(label)
163 ordered_tasks[label] = [label]
165 # Create dependencies between clusters.
166 for edge in task_graph.edges:
167 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]:
168 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label])
170 _LOG.debug("clustered_task_graph.edges = %s", list(clustered_task_graph.edges))
172 if not is_directed_acyclic_graph(clustered_task_graph):
173 raise RuntimeError("Cluster pipetasks do not create a DAG")
175 return list(topological_sort(clustered_task_graph)), ordered_tasks
178def dimension_clustering(config, qgraph, name):
179 """Follow config instructions to make clusters based upon dimensions.
181 Parameters
182 ----------
183 config : `lsst.ctrl.bps.BpsConfig`
184 BPS configuration.
185 qgraph : `lsst.pipe.base.QuantumGraph`
186 QuantumGraph to break into clusters for ClusteredQuantumGraph.
187 name : `str`
188 Name to give to ClusteredQuantumGraph.
190 Returns
191 -------
192 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
193 ClusteredQuantumGraph with clustering as defined in config.
194 """
195 cqgraph = ClusteredQuantumGraph(
196 name=name,
197 qgraph=qgraph,
198 qgraph_filename=config[".bps_defined.runQgraphFile"],
199 )
201 # save mapping in order to create dependencies later
202 quantum_to_cluster = {}
204 cluster_section = config["cluster"]
205 cluster_labels, ordered_tasks = _check_clusters_tasks(cluster_section, qgraph.taskGraph)
206 for cluster_label in cluster_labels:
207 _LOG.debug("cluster = %s", cluster_label)
208 if cluster_label in cluster_section:
209 add_dim_clusters(
210 cluster_section[cluster_label],
211 cluster_label,
212 qgraph,
213 ordered_tasks,
214 cqgraph,
215 quantum_to_cluster,
216 )
217 else:
218 add_clusters_per_quantum(config, cluster_label, qgraph, cqgraph, quantum_to_cluster)
220 return cqgraph
223def add_clusters_per_quantum(config, label, qgraph, cqgraph, quantum_to_cluster):
224 """Add 1-quantum clusters for a task to a ClusteredQuantumGraph.
226 Parameters
227 ----------
228 config : `lsst.ctrl.bps.BpsConfig`
229 BPS configuration.
230 label : `str`
231 taskDef label for which to add clusters.
232 qgraph : `lsst.pipe.base.QuantumGraph`
233 QuantumGraph providing quanta for the clusters.
234 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
235 The ClusteredQuantumGraph to which the new 1-quantum
236 clusters are added. (modified in method)
237 quantum_to_cluster : `dict` [ `str`, `str` ]
238 Mapping of quantum node id to which cluster it was added.
239 (modified in method)
240 """
241 _LOG.info("Creating 1-quantum clusters for task %s", label)
242 found, template_data_id = config.search(
243 "templateDataId", opt={"curvals": {"curr_pipetask": label}, "replaceVars": False}
244 )
245 if found:
246 template = "{node_number}_{label}_" + template_data_id
247 else:
248 template = "{node_number}"
250 # Currently getQuantaForTask is currently a mapping taskDef to
251 # Quanta, so quick enough to call repeatedly.
252 task_def = qgraph.findTaskDefByLabel(label)
253 quantum_nodes = qgraph.getNodesForTask(task_def)
255 for qnode in quantum_nodes:
256 cluster = QuantaCluster.from_quantum_node(qnode, template)
257 cqgraph.add_cluster(cluster)
258 quantum_to_cluster[qnode.nodeId] = cluster.name
259 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster)
262def add_dim_clusters(cluster_config, cluster_label, qgraph, ordered_tasks, cqgraph, quantum_to_cluster):
263 """Add clusters for a cluster label to a ClusteredQuantumGraph.
265 Parameters
266 ----------
267 cluster_config : `lsst.ctrl.bps.BpsConfig`
268 BPS configuration for specific cluster label.
269 cluster_label : `str`
270 Cluster label for which to add clusters.
271 qgraph : `lsst.pipe.base.QuantumGraph`
272 QuantumGraph providing quanta for the clusters.
273 ordered_tasks : `dict` [`str`, `list` [`str`]]
274 Mapping of cluster label to ordered list of task labels.
275 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
276 The ClusteredQuantumGraph to which the new 1-quantum
277 clusters are added. (modified in method)
278 quantum_to_cluster : `dict` [ `str`, `str` ]
279 Mapping of quantum node id to which cluster it was added.
280 (modified in method)
281 """
282 cluster_dims = []
283 if "dimensions" in cluster_config:
284 cluster_dims = [d.strip() for d in cluster_config["dimensions"].split(",")]
285 _LOG.debug("cluster_dims = %s", cluster_dims)
287 found, template = cluster_config.search("clusterTemplate", opt={"replaceVars": False})
288 if not found:
289 if cluster_dims:
290 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims)
291 else:
292 template = cluster_label
293 _LOG.debug("template = %s", template)
295 new_clusters = []
296 for task_label in ordered_tasks[cluster_label]:
297 # Currently getQuantaForTask is currently a mapping taskDef to
298 # Quanta, so quick enough to call repeatedly.
299 task_def = qgraph.findTaskDefByLabel(task_label)
300 if task_def is None:
301 continue
302 quantum_nodes = qgraph.getNodesForTask(task_def)
304 equal_dims = cluster_config.get("equalDimensions", None)
306 # Determine cluster for each node
307 for qnode in quantum_nodes:
308 # Gather info for cluster name template into a dictionary.
309 info = {}
311 missing_info = set()
312 data_id_info = qnode.quantum.dataId.full.byName()
313 for dim_name in cluster_dims:
314 _LOG.debug("dim_name = %s", dim_name)
315 if dim_name in data_id_info:
316 info[dim_name] = data_id_info[dim_name]
317 else:
318 missing_info.add(dim_name)
319 if equal_dims:
320 for pair in [pt.strip() for pt in equal_dims.split(",")]:
321 dim1, dim2 = pair.strip().split(":")
322 if dim1 in cluster_dims and dim2 in data_id_info:
323 info[dim1] = data_id_info[dim2]
324 missing_info.remove(dim1)
325 elif dim2 in cluster_dims and dim1 in data_id_info:
326 info[dim2] = data_id_info[dim1]
327 missing_info.remove(dim2)
329 info["label"] = cluster_label
330 _LOG.debug("info for template = %s", info)
332 if missing_info:
333 raise RuntimeError(
334 f"Quantum {qnode.nodeId} ({data_id_info}) missing dimensions: {','.join(missing_info)}; "
335 f"required for cluster {cluster_label}"
336 )
338 # Use dictionary plus template format string to create name.
339 # To avoid # key errors from generic patterns, use defaultdict.
340 cluster_name = template.format_map(defaultdict(lambda: "", info))
341 cluster_name = re.sub("_+", "_", cluster_name)
343 # Some dimensions contain slash which must be replaced.
344 cluster_name = re.sub("/", "_", cluster_name)
345 _LOG.debug("cluster_name = %s", cluster_name)
347 # Save mapping for use when creating dependencies.
348 quantum_to_cluster[qnode.nodeId] = cluster_name
350 # Add cluster to the ClusteredQuantumGraph.
351 # Saving NodeId instead of number because QuantumGraph API
352 # requires it for creating per-job QuantumGraphs.
353 if cluster_name in cqgraph:
354 cluster = cqgraph.get_cluster(cluster_name)
355 else:
356 cluster = QuantaCluster(cluster_name, cluster_label, info)
357 cqgraph.add_cluster(cluster)
358 cluster.add_quantum(qnode.nodeId, task_label)
359 new_clusters.append(cluster)
361 for cluster in new_clusters:
362 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster)
365def add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster):
366 """Add dependencies for a cluster within a ClusteredQuantumGraph.
368 Parameters
369 ----------
370 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
371 The ClusteredQuantumGraph to which the new 1-quantum
372 clusters are added. (modified in method)
373 cluster : `lsst.ctrl.bps.QuantaCluster`
374 The cluster for which to add dependencies.
375 quantum_to_cluster : `dict` [ `str`, `str` ]
376 Mapping of quantum node id to which cluster it was added.
377 (modified in method)
379 Raises
380 ------
381 KeyError :
382 Raised if any of the cluster's quantum node ids are missing
383 from quantum_to_cluster or if their parent quantum node ids
384 are missing from quantum_to_cluster.
385 """
386 qgraph = cqgraph.qgraph
387 for node_id in cluster.qgraph_node_ids:
388 parents = qgraph.determineInputsToQuantumNode(qgraph.getQuantumNodeByNodeId(node_id))
389 for parent in parents:
390 try:
391 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[node_id]:
392 cqgraph.add_dependency(quantum_to_cluster[parent.nodeId], quantum_to_cluster[node_id])
393 except KeyError as e: # pragma: no cover
394 # For debugging a problem internal to method
395 qnode = qgraph.getQuantumNodeByNodeId(e.args[0])
396 _LOG.error(
397 "Quanta missing when clustering: %s, %s",
398 qnode.taskDef.label,
399 qnode.quantum.dataId.full.byName(),
400 )
401 raise