Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 6%
148 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-28 09:41 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-28 09:41 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Functions that convert QuantumGraph into ClusteredQuantumGraph.
29"""
30import logging
31import re
32from collections import defaultdict
34from networkx import DiGraph, is_directed_acyclic_graph, topological_sort
36from . import ClusteredQuantumGraph, QuantaCluster
38_LOG = logging.getLogger(__name__)
41def single_quantum_clustering(config, qgraph, name):
42 """Create clusters with only single quantum.
44 Parameters
45 ----------
46 config : `lsst.ctrl.bps.BpsConfig`
47 BPS configuration.
48 qgraph : `lsst.pipe.base.QuantumGraph`
49 QuantumGraph to break into clusters for ClusteredQuantumGraph.
50 name : `str`
51 Name to give to ClusteredQuantumGraph.
53 Returns
54 -------
55 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph`
56 ClusteredQuantumGraph with single quantum per cluster created from
57 given QuantumGraph.
58 """
59 cqgraph = ClusteredQuantumGraph(
60 name=name,
61 qgraph=qgraph,
62 qgraph_filename=config[".bps_defined.runQgraphFile"],
63 )
65 # Save mapping of quantum nodeNumber to name so don't have to create it
66 # multiple times.
67 number_to_name = {}
69 # Cache template per label for speed.
70 cached_template = {}
72 # Create cluster of single quantum.
73 for qnode in qgraph:
74 if qnode.taskDef.label not in cached_template:
75 found, template_data_id = config.search(
76 "templateDataId",
77 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False},
78 )
79 if found:
80 template = "{node_number}_{label}_" + template_data_id
81 else:
82 template = "{node_number}"
83 cached_template[qnode.taskDef.label] = template
85 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label])
87 # Save mapping for use when creating dependencies.
88 number_to_name[qnode.nodeId] = cluster.name
90 cqgraph.add_cluster(cluster)
92 # Add cluster dependencies.
93 for qnode in qgraph:
94 # Get child nodes.
95 children = qgraph.determineOutputsOfQuantumNode(qnode)
96 for child in children:
97 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId])
99 return cqgraph
102def _check_clusters_tasks(cluster_config, task_graph):
103 """Check cluster definitions in terms of pipetask lists.
105 Parameters
106 ----------
107 cluster_config : `lsst.ctrl.bps.BpsConfig`
108 The cluster section from the BPS configuration.
109 task_graph : `lsst.pipe.base.taskGraph`
110 Directed graph of tasks.
112 Returns
113 -------
114 cluster_labels: `list` [`str`]
115 Dependency ordered list of cluster labels (includes
116 single quantum clusters).
117 ordered_tasks : `dict` [`str`, `list` [`str`]]
118 Mapping of cluster label to ordered list of task labels.
120 Raises
121 ------
122 RuntimeError
123 Raised if task label appears in more than one cluster def or
124 if there's a cycle in the cluster defs.
125 """
126 # Build a PipelineTask graph of just labels because TaskGraph
127 # methods revolve around TaskDefs instead of labels.
128 label_graph = DiGraph()
129 for tdef in task_graph:
130 label_graph.add_node(tdef.label)
131 for parent in task_graph.predecessors(tdef):
132 label_graph.add_edge(parent.label, tdef.label)
134 # Build a "clustered" task graph to check for cycle.
135 task_to_cluster = {}
136 used_labels = set()
137 clustered_task_graph = DiGraph()
138 ordered_tasks = {} # cluster label to ordered list of task labels
140 # Create clusters based on given configuration.
141 for cluster_label in cluster_config:
142 _LOG.debug("cluster = %s", cluster_label)
143 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")]
144 cluster_tasks_in_qgraph = []
145 for task_label in cluster_tasks:
146 if task_label in used_labels:
147 raise RuntimeError(
148 f"Task label {task_label} appears in more than one cluster definition. "
149 "Aborting submission."
150 )
151 # Only check cluster defs that affect the QuantumGraph
152 if label_graph.has_node(task_label):
153 cluster_tasks_in_qgraph.append(task_label)
154 used_labels.add(task_label)
155 task_to_cluster[task_label] = cluster_label
157 if cluster_tasks_in_qgraph:
158 # Ensure have list of tasks in dependency order.
159 quantum_subgraph = label_graph.subgraph(cluster_tasks_in_qgraph)
160 ordered_tasks[cluster_label] = list(topological_sort(quantum_subgraph))
162 clustered_task_graph.add_node(cluster_label)
164 # Create single task clusters for tasks not covered by clusters.
165 for label in label_graph:
166 if label not in used_labels:
167 task_to_cluster[label] = label
168 clustered_task_graph.add_node(label)
169 ordered_tasks[label] = [label]
171 # Create dependencies between clusters.
172 for edge in task_graph.edges:
173 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]:
174 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label])
176 _LOG.debug("clustered_task_graph.edges = %s", list(clustered_task_graph.edges))
178 if not is_directed_acyclic_graph(clustered_task_graph):
179 raise RuntimeError("Cluster pipetasks do not create a DAG")
181 return list(topological_sort(clustered_task_graph)), ordered_tasks
184def dimension_clustering(config, qgraph, name):
185 """Follow config instructions to make clusters based upon dimensions.
187 Parameters
188 ----------
189 config : `lsst.ctrl.bps.BpsConfig`
190 BPS configuration.
191 qgraph : `lsst.pipe.base.QuantumGraph`
192 QuantumGraph to break into clusters for ClusteredQuantumGraph.
193 name : `str`
194 Name to give to ClusteredQuantumGraph.
196 Returns
197 -------
198 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
199 ClusteredQuantumGraph with clustering as defined in config.
200 """
201 cqgraph = ClusteredQuantumGraph(
202 name=name,
203 qgraph=qgraph,
204 qgraph_filename=config[".bps_defined.runQgraphFile"],
205 )
207 # save mapping in order to create dependencies later
208 quantum_to_cluster = {}
210 cluster_section = config["cluster"]
211 cluster_labels, ordered_tasks = _check_clusters_tasks(cluster_section, qgraph.taskGraph)
212 for cluster_label in cluster_labels:
213 _LOG.debug("cluster = %s", cluster_label)
214 if cluster_label in cluster_section:
215 add_dim_clusters(
216 cluster_section[cluster_label],
217 cluster_label,
218 qgraph,
219 ordered_tasks,
220 cqgraph,
221 quantum_to_cluster,
222 )
223 else:
224 add_clusters_per_quantum(config, cluster_label, qgraph, cqgraph, quantum_to_cluster)
226 return cqgraph
229def add_clusters_per_quantum(config, label, qgraph, cqgraph, quantum_to_cluster):
230 """Add 1-quantum clusters for a task to a ClusteredQuantumGraph.
232 Parameters
233 ----------
234 config : `lsst.ctrl.bps.BpsConfig`
235 BPS configuration.
236 label : `str`
237 taskDef label for which to add clusters.
238 qgraph : `lsst.pipe.base.QuantumGraph`
239 QuantumGraph providing quanta for the clusters.
240 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
241 The ClusteredQuantumGraph to which the new 1-quantum
242 clusters are added. (modified in method)
243 quantum_to_cluster : `dict` [ `str`, `str` ]
244 Mapping of quantum node id to which cluster it was added.
245 (modified in method)
246 """
247 _LOG.info("Creating 1-quantum clusters for task %s", label)
248 found, template_data_id = config.search(
249 "templateDataId", opt={"curvals": {"curr_pipetask": label}, "replaceVars": False}
250 )
251 if found:
252 template = "{node_number}_{label}_" + template_data_id
253 else:
254 template = "{node_number}"
256 # Currently getQuantaForTask is currently a mapping taskDef to
257 # Quanta, so quick enough to call repeatedly.
258 task_def = qgraph.findTaskDefByLabel(label)
259 quantum_nodes = qgraph.getNodesForTask(task_def)
261 for qnode in quantum_nodes:
262 cluster = QuantaCluster.from_quantum_node(qnode, template)
263 cqgraph.add_cluster(cluster)
264 quantum_to_cluster[qnode.nodeId] = cluster.name
265 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster)
268def add_dim_clusters(cluster_config, cluster_label, qgraph, ordered_tasks, cqgraph, quantum_to_cluster):
269 """Add clusters for a cluster label to a ClusteredQuantumGraph.
271 Parameters
272 ----------
273 cluster_config : `lsst.ctrl.bps.BpsConfig`
274 BPS configuration for specific cluster label.
275 cluster_label : `str`
276 Cluster label for which to add clusters.
277 qgraph : `lsst.pipe.base.QuantumGraph`
278 QuantumGraph providing quanta for the clusters.
279 ordered_tasks : `dict` [`str`, `list` [`str`]]
280 Mapping of cluster label to ordered list of task labels.
281 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
282 The ClusteredQuantumGraph to which the new 1-quantum
283 clusters are added. (modified in method)
284 quantum_to_cluster : `dict` [ `str`, `str` ]
285 Mapping of quantum node id to which cluster it was added.
286 (modified in method)
287 """
288 cluster_dims = []
289 if "dimensions" in cluster_config:
290 cluster_dims = [d.strip() for d in cluster_config["dimensions"].split(",")]
291 _LOG.debug("cluster_dims = %s", cluster_dims)
293 found, template = cluster_config.search("clusterTemplate", opt={"replaceVars": False})
294 if not found:
295 if cluster_dims:
296 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims)
297 else:
298 template = cluster_label
299 _LOG.debug("template = %s", template)
301 new_clusters = []
302 for task_label in ordered_tasks[cluster_label]:
303 # Currently getQuantaForTask is currently a mapping taskDef to
304 # Quanta, so quick enough to call repeatedly.
305 task_def = qgraph.findTaskDefByLabel(task_label)
306 if task_def is None:
307 continue
308 quantum_nodes = qgraph.getNodesForTask(task_def)
310 equal_dims = cluster_config.get("equalDimensions", None)
312 # Determine cluster for each node
313 for qnode in quantum_nodes:
314 # Gather info for cluster name template into a dictionary.
315 info = {}
317 missing_info = set()
318 data_id_info = qnode.quantum.dataId.full.byName()
319 for dim_name in cluster_dims:
320 _LOG.debug("dim_name = %s", dim_name)
321 if dim_name in data_id_info:
322 info[dim_name] = data_id_info[dim_name]
323 else:
324 missing_info.add(dim_name)
325 if equal_dims:
326 for pair in [pt.strip() for pt in equal_dims.split(",")]:
327 dim1, dim2 = pair.strip().split(":")
328 if dim1 in cluster_dims and dim2 in data_id_info:
329 info[dim1] = data_id_info[dim2]
330 missing_info.remove(dim1)
331 elif dim2 in cluster_dims and dim1 in data_id_info:
332 info[dim2] = data_id_info[dim1]
333 missing_info.remove(dim2)
335 info["label"] = cluster_label
336 _LOG.debug("info for template = %s", info)
338 if missing_info:
339 raise RuntimeError(
340 f"Quantum {qnode.nodeId} ({data_id_info}) missing dimensions: {','.join(missing_info)}; "
341 f"required for cluster {cluster_label}"
342 )
344 # Use dictionary plus template format string to create name.
345 # To avoid # key errors from generic patterns, use defaultdict.
346 cluster_name = template.format_map(defaultdict(lambda: "", info))
347 cluster_name = re.sub("_+", "_", cluster_name)
349 # Some dimensions contain slash which must be replaced.
350 cluster_name = re.sub("/", "_", cluster_name)
351 _LOG.debug("cluster_name = %s", cluster_name)
353 # Save mapping for use when creating dependencies.
354 quantum_to_cluster[qnode.nodeId] = cluster_name
356 # Add cluster to the ClusteredQuantumGraph.
357 # Saving NodeId instead of number because QuantumGraph API
358 # requires it for creating per-job QuantumGraphs.
359 if cluster_name in cqgraph:
360 cluster = cqgraph.get_cluster(cluster_name)
361 else:
362 cluster = QuantaCluster(cluster_name, cluster_label, info)
363 cqgraph.add_cluster(cluster)
364 cluster.add_quantum(qnode.nodeId, task_label)
365 new_clusters.append(cluster)
367 for cluster in new_clusters:
368 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster)
371def add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster):
372 """Add dependencies for a cluster within a ClusteredQuantumGraph.
374 Parameters
375 ----------
376 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
377 The ClusteredQuantumGraph to which the new 1-quantum
378 clusters are added. (modified in method)
379 cluster : `lsst.ctrl.bps.QuantaCluster`
380 The cluster for which to add dependencies.
381 quantum_to_cluster : `dict` [ `str`, `str` ]
382 Mapping of quantum node id to which cluster it was added.
383 (modified in method)
385 Raises
386 ------
387 KeyError :
388 Raised if any of the cluster's quantum node ids are missing
389 from quantum_to_cluster or if their parent quantum node ids
390 are missing from quantum_to_cluster.
391 """
392 qgraph = cqgraph.qgraph
393 for node_id in cluster.qgraph_node_ids:
394 parents = qgraph.determineInputsToQuantumNode(qgraph.getQuantumNodeByNodeId(node_id))
395 for parent in parents:
396 try:
397 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[node_id]:
398 cqgraph.add_dependency(quantum_to_cluster[parent.nodeId], quantum_to_cluster[node_id])
399 except KeyError as e: # pragma: no cover
400 # For debugging a problem internal to method
401 qnode = qgraph.getQuantumNodeByNodeId(e.args[0])
402 _LOG.error(
403 "Quanta missing when clustering: %s, %s",
404 qnode.taskDef.label,
405 qnode.quantum.dataId.full.byName(),
406 )
407 raise