Coverage for python/lsst/pipe/base/pipeline_graph/visualization/_merge.py: 27%
105 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:48 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:48 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = (
30 "MergedNodeKey",
31 "merge_graph_input_trees",
32 "merge_graph_output_trees",
33 "merge_graph_intermediates",
34)
36import dataclasses
37from collections import defaultdict
38from typing import Any, Iterable, TypeVar
40import networkx
41import networkx.algorithms.dag
42import networkx.algorithms.tree
43from lsst.daf.butler import DimensionGroup
45from .._nodes import NodeKey, NodeType
46from ._options import NodeAttributeOptions
48_P = TypeVar("_P")
49_C = TypeVar("_C")
52class MergedNodeKey(frozenset[NodeKey]):
53 """A key for NetworkX graph nodes that represent multiple similar tasks
54 or dataset types that have been merged to simplify graph visualization.
55 """
57 def __str__(self) -> str:
58 members = [str(k) for k in self]
59 members.sort(reverse=True)
60 return ", ".join(members)
62 @property
63 def node_type(self) -> NodeType:
64 """Enum value for whether this is a task, task initialization, or
65 dataset type node.
66 """
67 return next(iter(self)).node_type
70def merge_graph_input_trees(
71 xgraph: networkx.DiGraph | networkx.MultiDiGraph, options: NodeAttributeOptions, depth: int
72) -> None:
73 """Merge trees of overall-input dataset type nodes and/or
74 beginning-of-pipeline task nodes that have similar properties and the same
75 structure.
77 Parameters
78 ----------
79 xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph`
80 Graph to be processed; modified in-place.
81 options : `NodeAttributeOptions`
82 Properties of nodes that should be considered when determining whether
83 they are similar enough to be merged. Only the truthiness of
84 attributes is considered (e.g. ``options.dimensions == 'full'`` and
85 ``options.dimensions == 'concise'`` are both interpreted to mean "only
86 merge trees with the same dimensions"). This is typically the same set
87 of options that controls whether to display these attributes in the
88 graph visualization.
89 depth : `int`
90 How many nodes to traverse from the beginning of the graph before
91 terminating the merging algorithm.
92 """
93 groups = _make_tree_merge_groups(xgraph, options, depth)
94 _apply_tree_merges(xgraph, groups)
97def merge_graph_output_trees(
98 xgraph: networkx.DiGraph | networkx.MultiDiGraph, options: NodeAttributeOptions, depth: int
99) -> None:
100 """Merge trees of overall-output dataset type nodes and/or
101 end-of-pipeline task nodes that have similar properties and the same
102 structure.
104 Parameters
105 ----------
106 xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph`
107 Graph to be processed; modified in-place.
108 options : `NodeAttributeOptions`
109 Properties of nodes that should be considered when determining whether
110 they are similar enough to be merged. Only the truthiness of
111 attributes is considered (e.g. ``options.dimensions == 'full'`` and
112 ``options.dimensions == 'concise'`` are both interpreted to mean "only
113 merge trees with the same dimensions"). This is typically the same set
114 of options that controls whether to display these attributes in the
115 graph visualization.
116 depth : `int`
117 How many nodes to traverse from the beginning of the graph before
118 terminating the merging algorithm.
119 """
120 groups = _make_tree_merge_groups(xgraph.reverse(copy=False), options, depth)
121 _apply_tree_merges(xgraph, groups)
124def merge_graph_intermediates(
125 xgraph: networkx.DiGraph | networkx.MultiDiGraph, options: NodeAttributeOptions
126) -> None:
127 """Merge parallel interior nodes of a graph with similar properties.
129 Parameters
130 ----------
131 xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph`
132 Graph to be processed; modified in-place.
133 options : `NodeAttributeOptions`
134 Properties of nodes that should be considered when determining whether
135 they are similar enough to be merged. Only the truthiness of
136 attributes is considered (e.g. ``options.dimensions == 'full'`` and
137 ``options.dimensions == 'concise'`` are both interpreted to mean "only
138 merge trees with the same dimensions"). This is typically the same set
139 of options that controls whether to display these attributes in the
140 graph visualization.
142 Notes
143 -----
144 "Parallel" nodes here are nodes that have the exact same predecessor and
145 successors.
146 """
147 groups: dict[_MergeKey, set[NodeKey]] = defaultdict(set)
148 for node, state in xgraph.nodes.items():
149 merge_key = _MergeKey.from_node_state(
150 state,
151 xgraph.predecessors(node),
152 xgraph.successors(node),
153 options,
154 )
155 if merge_key.parents and merge_key.children:
156 groups[merge_key].add(node)
157 replacements: dict[NodeKey, MergedNodeKey] = {}
158 for merge_key, members in groups.items():
159 if len(members) < 2:
160 continue
161 new_node_key = MergedNodeKey(frozenset(members))
162 xgraph.add_node(
163 new_node_key,
164 storage_class_name=merge_key.storage_class_name,
165 task_class_name=merge_key.task_class_name,
166 dimensions=merge_key.dimensions,
167 )
168 for parent in merge_key.parents:
169 xgraph.add_edge(replacements.get(parent, parent), new_node_key)
170 for child in merge_key.children:
171 xgraph.add_edge(new_node_key, replacements.get(child, child))
172 for member in members:
173 replacements[member] = new_node_key
174 xgraph.remove_nodes_from(members)
177@dataclasses.dataclass(frozen=True)
178class _MergeKey:
179 """A helper class for merge algorithms that is used as a dictionary key
180 when grouping nodes that may be merged by their attributes.
181 """
183 parents: frozenset[Any]
184 """Nodes of the original graph that are successors or predecessors of
185 the nodes being considered for merging.
186 """
188 dimensions: DimensionGroup | None
189 """Dimensions of the nodes being considered for merging, or `None` if
190 dimensions are not included in the similarity criteria.
191 """
193 storage_class_name: str | None
194 """Storage class of the nodes being considered for merging, or `None` if
195 storage classes are not included in the similarity criteria or this is a
196 task or task initialization node group.
197 """
199 task_class_name: str | None
200 """Name of the task class for the nodes being considered for merging, or
201 `None` if task classes are not included in the similarity criteria or
202 this is a dataset type node group.
203 """
205 children: frozenset[Any]
206 """Nodes that are predecessors or successors (the opposite of ``parents``
207 of the nodes being considered for merging.
209 In the `merge_graph_intermediates` algorithm, these are regular unmerged
210 nodes. In the `merge_graph_input_trees` or `merge_graph_output_trees`
211 algorithms, these are more `_MergeKey` instances, representing
212 already-processed trees.
213 """
215 @classmethod
216 def from_node_state(
217 cls,
218 state: dict[str, Any],
219 parents: Iterable[_P],
220 children: Iterable[_C],
221 options: NodeAttributeOptions,
222 ) -> _MergeKey:
223 """Construct from a NetworkX node attribute state dictionary.
225 Parameters
226 ----------
227 state : `dict`
228 Dictionary used to hold NetworkX node attributes.
229 parents : `~collections.abc.Iterable` [ `NodeKey` ]
230 Predecessor or successor nodes (depending on the orientation of
231 the algorithm).
232 children : ~collections.abc.Iterable`
233 Successor or predecessor nodes (depending on the orientation of
234 the algorithm).
235 options : `NodeAttributeOptions`
236 Options for which node attributes to include in the new key.
237 """
238 return cls(
239 parents=frozenset(parents),
240 dimensions=state.get("dimensions"),
241 storage_class_name=(state.get("storage_class_name") if options.storage_classes else None),
242 task_class_name=(state.get("task_class_name") if options.task_classes else None),
243 children=frozenset(children),
244 )
247def _make_tree_merge_groups(
248 xgraph: networkx.DiGraph | networkx.MultiDiGraph,
249 options: NodeAttributeOptions,
250 depth: int,
251) -> list[dict[_MergeKey, set[NodeKey]]]:
252 """First-stage implementation of `merge_graph_input_trees` and
253 (when run on the reversed graph) `merge_graph_output_trees`.
254 """
255 # Our goal is to obtain mappings that groups trees of nodes by the
256 # attributes in a _TreeMergeKey. The nested dictionaries are the root of a
257 # tree and the nodes under that root, recursively (but not including the
258 # root). We nest these mappings inside a list, which each mapping
259 # corresponding to a different depth for the trees it represents. We start
260 # with a special empty dict for "0-depth trees", since that makes
261 # result[depth] valid and hence off-by-one errors less likely.
262 result: list[dict[_MergeKey, set[NodeKey]]] = [{}]
263 if depth == 0:
264 return result
265 # We start with the nodes that have no predecessors in the graph.
266 # Ignore for now the fact that the 'current_candidates' data structure
267 # we process is actually a dict that associates each of those nodes
268 # with an empty dict. All of these initial nodes are valid trees,
269 # since they're just single nodes.
270 first_generation = next(networkx.algorithms.dag.topological_generations(xgraph))
271 current_candidates: dict[NodeKey, dict[NodeKey, _MergeKey]] = dict.fromkeys(first_generation, {})
272 # Set up an outer loop over tree depth; we'll construct a new set of
273 # candidates at each iteration.
274 while current_candidates:
275 # As we go, we'll remember nodes that have just one predecessor, as
276 # those predecessors might be the roots of slightly taller trees.
277 # We store the successors and their merge keys under them.
278 next_candidates: dict[NodeKey, dict[NodeKey, _MergeKey]] = defaultdict(dict)
279 # We also want to track the nodes the level up that are not trees
280 # because some node has both them and some other node as a
281 # predecessor.
282 nontrees: set[NodeKey] = set()
283 # Make a dictionary for the results at this depth, then start the
284 # inner iteration over candidates and (after the first iteration)
285 # their children.
286 result_for_depth: dict[_MergeKey, set[NodeKey]] = defaultdict(set)
287 for node, children in current_candidates.items():
288 # Make a _TreeMergeKey for this node and add it to the results for
289 # this depth. Two nodes with the same _TreeMergeKey are roots of
290 # isomorphic trees that have the same predecessor(s), and can be
291 # merged (with isomorphism defined as both both structure and
292 # whatever comparisons are in 'options').
293 merge_key = _MergeKey.from_node_state(
294 xgraph.nodes[node], xgraph.successors(node), children.values(), options
295 )
296 result_for_depth[merge_key].add(node)
297 if len(result) <= depth:
298 # See if this node's successor might be the root of a
299 # larger tree.
300 if len(merge_key.parents) == 1:
301 (parent,) = merge_key.parents
302 next_candidates[parent][node] = dataclasses.replace(merge_key, parents=frozenset())
303 else:
304 nontrees.update(merge_key.parents)
305 # Append the results for this depth.
306 result.append(result_for_depth)
307 # Trim out candidates that aren't trees after all.
308 for nontree_node in nontrees & next_candidates.keys():
309 del next_candidates[nontree_node]
310 current_candidates = next_candidates
311 return result
314def _apply_tree_merges(
315 xgraph: networkx.DiGraph | networkx.MultiDiGraph,
316 groups: list[dict[_MergeKey, set[NodeKey]]],
317) -> None:
318 """Second-stage implementation of `merge_graph_input_trees` and
319 `merge_graph_output_trees`.
320 """
321 replacements: dict[NodeKey, MergedNodeKey] = {}
322 for group in reversed(groups):
323 new_group: dict[_MergeKey, set[NodeKey]] = defaultdict(set)
324 for merge_key, members in group.items():
325 if merge_key.parents & replacements.keys():
326 replaced_parents = frozenset(replacements.get(p, p) for p in merge_key.parents)
327 new_group[dataclasses.replace(merge_key, parents=replaced_parents)].update(members)
328 else:
329 new_group[merge_key].update(members)
330 for merge_key, members in new_group.items():
331 if len(members) < 2:
332 continue
333 new_node_key = MergedNodeKey(frozenset(members))
334 new_edges: set[tuple[NodeKey | MergedNodeKey, NodeKey | MergedNodeKey]] = set()
335 for member_key in members:
336 replacements[member_key] = new_node_key
337 new_edges.update(
338 (replacements.get(a, a), replacements.get(b, b)) for a, b in xgraph.in_edges(member_key)
339 )
340 new_edges.update(
341 (replacements.get(a, a), replacements.get(b, b)) for a, b in xgraph.out_edges(member_key)
342 )
343 xgraph.add_node(
344 new_node_key,
345 storage_class_name=merge_key.storage_class_name,
346 task_class_name=merge_key.task_class_name,
347 dimensions=merge_key.dimensions,
348 )
349 xgraph.add_edges_from(new_edges)
350 xgraph.remove_nodes_from(replacements.keys())