Coverage for python/lsst/pipe/base/graph/_implDetails.py: 29%

50 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 03:31 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("_DatasetTracker", "DatasetTypeName") 

30 

31from collections import defaultdict 

32from typing import NewType 

33 

34import networkx as nx 

35 

36from ..pipeline import TaskDef 

37 

38# NewTypes 

39DatasetTypeName = NewType("DatasetTypeName", str) 

40 

41 

42class _DatasetTracker: 

43 r"""A container for tracking the relationships between tasks and dataset 

44 types. 

45 

46 Prameters 

47 --------- 

48 createInverse : bool 

49 When adding a key associated with a producer or consumer, also create 

50 and inverse mapping that allows looking up all the keys associated with 

51 some value. Defaults to False. 

52 """ 

53 

54 def __init__(self, createInverse: bool = False): 

55 self._producers: dict[DatasetTypeName, TaskDef] = {} 

56 self._consumers: defaultdict[DatasetTypeName, set[TaskDef]] = defaultdict(set) 

57 self._createInverse = createInverse 

58 if self._createInverse: 

59 self._itemsDict: defaultdict[TaskDef, set[DatasetTypeName]] = defaultdict(set) 

60 

61 def addProducer(self, key: DatasetTypeName, value: TaskDef) -> None: 

62 """Add a key which is produced by some value. 

63 

64 Parameters 

65 ---------- 

66 key : `~typing.TypeVar` 

67 The type to track. 

68 value : `~typing.TypeVar` 

69 The type associated with the production of the key. 

70 

71 Raises 

72 ------ 

73 ValueError 

74 Raised if key is already declared to be produced by another value. 

75 """ 

76 if (existing := self._producers.get(key)) is not None and existing != value: 

77 raise ValueError(f"Only one node is allowed to produce {key}, the current producer is {existing}") 

78 self._producers[key] = value 

79 if self._createInverse: 

80 self._itemsDict[value].add(key) 

81 

82 def addConsumer(self, key: DatasetTypeName, value: TaskDef) -> None: 

83 """Add a key which is consumed by some value. 

84 

85 Parameters 

86 ---------- 

87 key : `~typing.TypeVar` 

88 The type to track. 

89 value : `~typing.TypeVar` 

90 The type associated with the consumption of the key. 

91 """ 

92 self._consumers[key].add(value) 

93 if self._createInverse: 

94 self._itemsDict[value].add(key) 

95 

96 def getConsumers(self, key: DatasetTypeName) -> set[TaskDef]: 

97 """Return all values associated with the consumption of the supplied 

98 key. 

99 

100 Parameters 

101 ---------- 

102 key : `~typing.TypeVar` 

103 The type which has been tracked in the `_DatasetTracker`. 

104 """ 

105 return self._consumers.get(key, set()) 

106 

107 def getProducer(self, key: DatasetTypeName) -> TaskDef | None: 

108 """Return the value associated with the consumption of the supplied 

109 key. 

110 

111 Parameters 

112 ---------- 

113 key : `~typing.TypeVar` 

114 The type which has been tracked in the `_DatasetTracker`. 

115 """ 

116 # This tracker may have had all nodes associated with a key removed 

117 # and if there are no refs (empty set) should return None 

118 return producer if (producer := self._producers.get(key)) else None 

119 

120 def getAll(self, key: DatasetTypeName) -> set[TaskDef]: 

121 """Return all consumers and the producer associated with the the 

122 supplied key. 

123 

124 Parameters 

125 ---------- 

126 key : `~typing.TypeVar` 

127 The type which has been tracked in the `_DatasetTracker`. 

128 """ 

129 return self.getConsumers(key).union(x for x in (self.getProducer(key),) if x is not None) 

130 

131 @property 

132 def inverse(self) -> defaultdict[TaskDef, set[DatasetTypeName]] | None: 

133 """Return the inverse mapping if class was instantiated to create an 

134 inverse, else return None. 

135 """ 

136 return self._itemsDict if self._createInverse else None 

137 

138 def makeNetworkXGraph(self) -> nx.DiGraph: 

139 """Create a NetworkX graph out of all the contained keys, using the 

140 relations of producer and consumers to create the edges. 

141 

142 Returns 

143 ------- 

144 graph : `networkx.DiGraph` 

145 The graph created out of the supplied keys and their relations. 

146 """ 

147 graph = nx.DiGraph() 

148 for entry in self._producers.keys() | self._consumers.keys(): 

149 producer = self.getProducer(entry) 

150 consumers = self.getConsumers(entry) 

151 # This block is for tasks that consume existing inputs 

152 if producer is None and consumers: 

153 for consumer in consumers: 

154 graph.add_node(consumer) 

155 # This block is for tasks that produce output that is not consumed 

156 # in this graph 

157 elif producer is not None and not consumers: 

158 graph.add_node(producer) 

159 # all other connections 

160 else: 

161 for consumer in consumers: 

162 graph.add_edge(producer, consumer) 

163 return graph 

164 

165 def keys(self) -> set[DatasetTypeName]: 

166 """Return all tracked keys.""" 

167 return self._producers.keys() | self._consumers.keys() 

168 

169 def __contains__(self, key: DatasetTypeName) -> bool: 

170 """Check if a key is in the `_DatasetTracker`. 

171 

172 Parameters 

173 ---------- 

174 key : `~typing.TypeVar` 

175 The key to check. 

176 

177 Returns 

178 ------- 

179 contains : `bool` 

180 Boolean of the presence of the supplied key. 

181 """ 

182 return key in self._producers or key in self._consumers