Coverage for python/lsst/pipe/base/pipeline_graph/visualization/_merge.py: 27%

105 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-24 10:01 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "MergedNodeKey", 

31 "merge_graph_input_trees", 

32 "merge_graph_output_trees", 

33 "merge_graph_intermediates", 

34) 

35 

36import dataclasses 

37from collections import defaultdict 

38from typing import Any, Iterable, TypeVar 

39 

40import networkx 

41import networkx.algorithms.dag 

42import networkx.algorithms.tree 

43from lsst.daf.butler import DimensionGroup 

44 

45from .._nodes import NodeKey, NodeType 

46from ._options import NodeAttributeOptions 

47 

48_P = TypeVar("_P") 

49_C = TypeVar("_C") 

50 

51 

52class MergedNodeKey(frozenset[NodeKey]): 

53 """A key for NetworkX graph nodes that represent multiple similar tasks 

54 or dataset types that have been merged to simplify graph visualization. 

55 """ 

56 

57 def __str__(self) -> str: 

58 members = [str(k) for k in self] 

59 members.sort(reverse=True) 

60 return ", ".join(members) 

61 

62 @property 

63 def node_type(self) -> NodeType: 

64 """Enum value for whether this is a task, task initialization, or 

65 dataset type node. 

66 """ 

67 return next(iter(self)).node_type 

68 

69 

70def merge_graph_input_trees( 

71 xgraph: networkx.DiGraph | networkx.MultiDiGraph, options: NodeAttributeOptions, depth: int 

72) -> None: 

73 """Merge trees of overall-input dataset type nodes and/or 

74 beginning-of-pipeline task nodes that have similar properties and the same 

75 structure. 

76 

77 Parameters 

78 ---------- 

79 xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph` 

80 Graph to be processed; modified in-place. 

81 options : `NodeAttributeOptions` 

82 Properties of nodes that should be considered when determining whether 

83 they are similar enough to be merged. Only the truthiness of 

84 attributes is considered (e.g. ``options.dimensions == 'full'`` and 

85 ``options.dimensions == 'concise'`` are both interpreted to mean "only 

86 merge trees with the same dimensions"). This is typically the same set 

87 of options that controls whether to display these attributes in the 

88 graph visualization. 

89 depth : `int` 

90 How many nodes to traverse from the beginning of the graph before 

91 terminating the merging algorithm. 

92 """ 

93 groups = _make_tree_merge_groups(xgraph, options, depth) 

94 _apply_tree_merges(xgraph, groups) 

95 

96 

97def merge_graph_output_trees( 

98 xgraph: networkx.DiGraph | networkx.MultiDiGraph, options: NodeAttributeOptions, depth: int 

99) -> None: 

100 """Merge trees of overall-output dataset type nodes and/or 

101 end-of-pipeline task nodes that have similar properties and the same 

102 structure. 

103 

104 Parameters 

105 ---------- 

106 xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph` 

107 Graph to be processed; modified in-place. 

108 options : `NodeAttributeOptions` 

109 Properties of nodes that should be considered when determining whether 

110 they are similar enough to be merged. Only the truthiness of 

111 attributes is considered (e.g. ``options.dimensions == 'full'`` and 

112 ``options.dimensions == 'concise'`` are both interpreted to mean "only 

113 merge trees with the same dimensions"). This is typically the same set 

114 of options that controls whether to display these attributes in the 

115 graph visualization. 

116 depth : `int` 

117 How many nodes to traverse from the beginning of the graph before 

118 terminating the merging algorithm. 

119 """ 

120 groups = _make_tree_merge_groups(xgraph.reverse(copy=False), options, depth) 

121 _apply_tree_merges(xgraph, groups) 

122 

123 

124def merge_graph_intermediates( 

125 xgraph: networkx.DiGraph | networkx.MultiDiGraph, options: NodeAttributeOptions 

126) -> None: 

127 """Merge parallel interior nodes of a graph with similar properties. 

128 

129 Parameters 

130 ---------- 

131 xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph` 

132 Graph to be processed; modified in-place. 

133 options : `NodeAttributeOptions` 

134 Properties of nodes that should be considered when determining whether 

135 they are similar enough to be merged. Only the truthiness of 

136 attributes is considered (e.g. ``options.dimensions == 'full'`` and 

137 ``options.dimensions == 'concise'`` are both interpreted to mean "only 

138 merge trees with the same dimensions"). This is typically the same set 

139 of options that controls whether to display these attributes in the 

140 graph visualization. 

141 

142 Notes 

143 ----- 

144 "Parallel" nodes here are nodes that have the exact same predecessor and 

145 successors. 

146 """ 

147 groups: dict[_MergeKey, set[NodeKey]] = defaultdict(set) 

148 for node, state in xgraph.nodes.items(): 

149 merge_key = _MergeKey.from_node_state( 

150 state, 

151 xgraph.predecessors(node), 

152 xgraph.successors(node), 

153 options, 

154 ) 

155 if merge_key.parents and merge_key.children: 

156 groups[merge_key].add(node) 

157 replacements: dict[NodeKey, MergedNodeKey] = {} 

158 for merge_key, members in groups.items(): 

159 if len(members) < 2: 

160 continue 

161 new_node_key = MergedNodeKey(frozenset(members)) 

162 xgraph.add_node( 

163 new_node_key, 

164 storage_class_name=merge_key.storage_class_name, 

165 task_class_name=merge_key.task_class_name, 

166 dimensions=merge_key.dimensions, 

167 ) 

168 for parent in merge_key.parents: 

169 xgraph.add_edge(replacements.get(parent, parent), new_node_key) 

170 for child in merge_key.children: 

171 xgraph.add_edge(new_node_key, replacements.get(child, child)) 

172 for member in members: 

173 replacements[member] = new_node_key 

174 xgraph.remove_nodes_from(members) 

175 

176 

177@dataclasses.dataclass(frozen=True) 

178class _MergeKey: 

179 """A helper class for merge algorithms that is used as a dictionary key 

180 when grouping nodes that may be merged by their attributes. 

181 """ 

182 

183 parents: frozenset[Any] 

184 """Nodes of the original graph that are successors or predecessors of 

185 the nodes being considered for merging. 

186 """ 

187 

188 dimensions: DimensionGroup | None 

189 """Dimensions of the nodes being considered for merging, or `None` if 

190 dimensions are not included in the similarity criteria. 

191 """ 

192 

193 storage_class_name: str | None 

194 """Storage class of the nodes being considered for merging, or `None` if 

195 storage classes are not included in the similarity criteria or this is a 

196 task or task initialization node group. 

197 """ 

198 

199 task_class_name: str | None 

200 """Name of the task class for the nodes being considered for merging, or 

201 `None` if task classes are not included in the similarity criteria or 

202 this is a dataset type node group. 

203 """ 

204 

205 children: frozenset[Any] 

206 """Nodes that are predecessors or successors (the opposite of ``parents`` 

207 of the nodes being considered for merging. 

208 

209 In the `merge_graph_intermediates` algorithm, these are regular unmerged 

210 nodes. In the `merge_graph_input_trees` or `merge_graph_output_trees` 

211 algorithms, these are more `_MergeKey` instances, representing 

212 already-processed trees. 

213 """ 

214 

215 @classmethod 

216 def from_node_state( 

217 cls, 

218 state: dict[str, Any], 

219 parents: Iterable[_P], 

220 children: Iterable[_C], 

221 options: NodeAttributeOptions, 

222 ) -> _MergeKey: 

223 """Construct from a NetworkX node attribute state dictionary. 

224 

225 Parameters 

226 ---------- 

227 state : `dict` 

228 Dictionary used to hold NetworkX node attributes. 

229 parents : `~collections.abc.Iterable` [ `NodeKey` ] 

230 Predecessor or successor nodes (depending on the orientation of 

231 the algorithm). 

232 children : ~collections.abc.Iterable` 

233 Successor or predecessor nodes (depending on the orientation of 

234 the algorithm). 

235 options : `NodeAttributeOptions` 

236 Options for which node attributes to include in the new key. 

237 """ 

238 return cls( 

239 parents=frozenset(parents), 

240 dimensions=state.get("dimensions"), 

241 storage_class_name=(state.get("storage_class_name") if options.storage_classes else None), 

242 task_class_name=(state.get("task_class_name") if options.task_classes else None), 

243 children=frozenset(children), 

244 ) 

245 

246 

247def _make_tree_merge_groups( 

248 xgraph: networkx.DiGraph | networkx.MultiDiGraph, 

249 options: NodeAttributeOptions, 

250 depth: int, 

251) -> list[dict[_MergeKey, set[NodeKey]]]: 

252 """First-stage implementation of `merge_graph_input_trees` and 

253 (when run on the reversed graph) `merge_graph_output_trees`. 

254 """ 

255 # Our goal is to obtain mappings that groups trees of nodes by the 

256 # attributes in a _TreeMergeKey. The nested dictionaries are the root of a 

257 # tree and the nodes under that root, recursively (but not including the 

258 # root). We nest these mappings inside a list, which each mapping 

259 # corresponding to a different depth for the trees it represents. We start 

260 # with a special empty dict for "0-depth trees", since that makes 

261 # result[depth] valid and hence off-by-one errors less likely. 

262 result: list[dict[_MergeKey, set[NodeKey]]] = [{}] 

263 if depth == 0: 

264 return result 

265 # We start with the nodes that have no predecessors in the graph. 

266 # Ignore for now the fact that the 'current_candidates' data structure 

267 # we process is actually a dict that associates each of those nodes 

268 # with an empty dict. All of these initial nodes are valid trees, 

269 # since they're just single nodes. 

270 first_generation = next(networkx.algorithms.dag.topological_generations(xgraph)) 

271 current_candidates: dict[NodeKey, dict[NodeKey, _MergeKey]] = dict.fromkeys(first_generation, {}) 

272 # Set up an outer loop over tree depth; we'll construct a new set of 

273 # candidates at each iteration. 

274 while current_candidates: 

275 # As we go, we'll remember nodes that have just one predecessor, as 

276 # those predecessors might be the roots of slightly taller trees. 

277 # We store the successors and their merge keys under them. 

278 next_candidates: dict[NodeKey, dict[NodeKey, _MergeKey]] = defaultdict(dict) 

279 # We also want to track the nodes the level up that are not trees 

280 # because some node has both them and some other node as a 

281 # predecessor. 

282 nontrees: set[NodeKey] = set() 

283 # Make a dictionary for the results at this depth, then start the 

284 # inner iteration over candidates and (after the first iteration) 

285 # their children. 

286 result_for_depth: dict[_MergeKey, set[NodeKey]] = defaultdict(set) 

287 for node, children in current_candidates.items(): 

288 # Make a _TreeMergeKey for this node and add it to the results for 

289 # this depth. Two nodes with the same _TreeMergeKey are roots of 

290 # isomorphic trees that have the same predecessor(s), and can be 

291 # merged (with isomorphism defined as both both structure and 

292 # whatever comparisons are in 'options'). 

293 merge_key = _MergeKey.from_node_state( 

294 xgraph.nodes[node], xgraph.successors(node), children.values(), options 

295 ) 

296 result_for_depth[merge_key].add(node) 

297 if len(result) <= depth: 

298 # See if this node's successor might be the root of a 

299 # larger tree. 

300 if len(merge_key.parents) == 1: 

301 (parent,) = merge_key.parents 

302 next_candidates[parent][node] = dataclasses.replace(merge_key, parents=frozenset()) 

303 else: 

304 nontrees.update(merge_key.parents) 

305 # Append the results for this depth. 

306 result.append(result_for_depth) 

307 # Trim out candidates that aren't trees after all. 

308 for nontree_node in nontrees & next_candidates.keys(): 

309 del next_candidates[nontree_node] 

310 current_candidates = next_candidates 

311 return result 

312 

313 

314def _apply_tree_merges( 

315 xgraph: networkx.DiGraph | networkx.MultiDiGraph, 

316 groups: list[dict[_MergeKey, set[NodeKey]]], 

317) -> None: 

318 """Second-stage implementation of `merge_graph_input_trees` and 

319 `merge_graph_output_trees`. 

320 """ 

321 replacements: dict[NodeKey, MergedNodeKey] = {} 

322 for group in reversed(groups): 

323 new_group: dict[_MergeKey, set[NodeKey]] = defaultdict(set) 

324 for merge_key, members in group.items(): 

325 if merge_key.parents & replacements.keys(): 

326 replaced_parents = frozenset(replacements.get(p, p) for p in merge_key.parents) 

327 new_group[dataclasses.replace(merge_key, parents=replaced_parents)].update(members) 

328 else: 

329 new_group[merge_key].update(members) 

330 for merge_key, members in new_group.items(): 

331 if len(members) < 2: 

332 continue 

333 new_node_key = MergedNodeKey(frozenset(members)) 

334 new_edges: set[tuple[NodeKey | MergedNodeKey, NodeKey | MergedNodeKey]] = set() 

335 for member_key in members: 

336 replacements[member_key] = new_node_key 

337 new_edges.update( 

338 (replacements.get(a, a), replacements.get(b, b)) for a, b in xgraph.in_edges(member_key) 

339 ) 

340 new_edges.update( 

341 (replacements.get(a, a), replacements.get(b, b)) for a, b in xgraph.out_edges(member_key) 

342 ) 

343 xgraph.add_node( 

344 new_node_key, 

345 storage_class_name=merge_key.storage_class_name, 

346 task_class_name=merge_key.task_class_name, 

347 dimensions=merge_key.dimensions, 

348 ) 

349 xgraph.add_edges_from(new_edges) 

350 xgraph.remove_nodes_from(replacements.keys())