Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Module defining quantum graph classes and related methods. 

24 

25There could be different representations of the quantum graph depending 

26on the client needs. Presently this module contains graph implementation 

27which is based on requirements of command-line environment. In the future 

28we could add other implementations and methods to convert between those 

29representations. 

30""" 

31 

32# "exported" names 

33__all__ = ["QuantumGraph", "QuantumGraphTaskNodes", "QuantumIterData"] 

34 

35# ------------------------------- 

36# Imports of standard modules -- 

37# ------------------------------- 

38from itertools import chain 

39from dataclasses import dataclass 

40from typing import List, FrozenSet, Mapping 

41 

42# ----------------------------- 

43# Imports for other modules -- 

44# ----------------------------- 

45from .pipeline import TaskDef 

46from .pipeTools import orderPipeline 

47from lsst.daf.butler import DatasetRef, DatasetType, NamedKeyDict, Quantum 

48 

49# ---------------------------------- 

50# Local non-exported definitions -- 

51# ---------------------------------- 

52 

53# ------------------------ 

54# Exported definitions -- 

55# ------------------------ 

56 

57 

58@dataclass 

59class QuantumIterData: 

60 """Helper class for iterating over quanta in a graph. 

61 

62 The `QuantumGraph.traverse` method needs to return topologically ordered 

63 Quanta together with their dependencies. This class is used as a value 

64 for the iterator, it contains enumerated Quantum and its dependencies. 

65 """ 

66 

67 __slots__ = ["index", "quantum", "taskDef", "dependencies"] 

68 

69 index: int 

70 """Index of this Quantum, a unique but arbitrary integer.""" 

71 

72 quantum: Quantum 

73 """Quantum corresponding to a graph node.""" 

74 

75 taskDef: TaskDef 

76 """Task class to be run on this quantum, and corresponding label and 

77 config. 

78 """ 

79 

80 dependencies: FrozenSet(int) 

81 """Possibly empty set of indices of dependencies for this Quantum. 

82 Dependencies include other nodes in the graph; they do not reflect data 

83 already in butler (there are no graph nodes for those). 

84 """ 

85 

86 

87@dataclass 

88class QuantumGraphTaskNodes: 

89 """QuantumGraphTaskNodes represents a bunch of nodes in an quantum graph 

90 corresponding to a single task. 

91 

92 The node in quantum graph is represented by the `PipelineTask` and a 

93 single `~lsst.daf.butler.Quantum` instance. One possible representation 

94 of the graph is just a list of nodes without edges (edges can be deduced 

95 from nodes' quantum inputs and outputs if needed). That representation can 

96 be reduced to the list of PipelineTasks (or their corresponding TaskDefs) 

97 and the corresponding list of Quanta. This class is used in this reduced 

98 representation for a single task, and full `QuantumGraph` is a sequence of 

99 tinstances of this class for one or more tasks. 

100 

101 Different frameworks may use different graph representation, this 

102 representation was based mostly on requirements of command-line 

103 executor which does not need explicit edges information. 

104 """ 

105 

106 taskDef: TaskDef 

107 """Task defintion for this set of nodes.""" 

108 

109 quanta: List[Quantum] 

110 """List of quanta corresponding to the task.""" 

111 

112 initInputs: Mapping[DatasetType, DatasetRef] 

113 """Datasets that must be loaded or created to construct this task.""" 

114 

115 initOutputs: Mapping[DatasetType, DatasetRef] 

116 """Datasets that may be written after constructing this task.""" 

117 

118 

119class QuantumGraph(list): 

120 """QuantumGraph is a sequence of `QuantumGraphTaskNodes` objects. 

121 

122 Typically the order of the tasks in the list will be the same as the 

123 order of tasks in a pipeline (obviously depends on the code which 

124 constructs graph). 

125 

126 Parameters 

127 ---------- 

128 iterable : iterable of `QuantumGraphTaskNodes`, optional 

129 Initial sequence of per-task nodes. 

130 """ 

131 def __init__(self, iterable=None): 

132 list.__init__(self, iterable or []) 

133 self.initInputs = NamedKeyDict() 

134 self.initIntermediates = NamedKeyDict() 

135 self.initOutputs = NamedKeyDict() 

136 

137 initInputs: NamedKeyDict 

138 """Datasets that must be provided to construct one or more Tasks in this 

139 graph, and must be obtained from the data repository. 

140 

141 This is disjoint with both `initIntermediates` and `initOutputs`. 

142 """ 

143 

144 initIntermediates: NamedKeyDict 

145 """Datasets that must be provided to construct one or more Tasks in this 

146 graph, but are also produced after constructing a Task in this graph. 

147 

148 This is disjoint with both `initInputs` and `initOutputs`. 

149 """ 

150 

151 initOutputs: NamedKeyDict 

152 """Datasets that are produced after constructing a Task in this graph, 

153 and are not used to construct any other Task in this graph. 

154 

155 This is disjoint from both `initInputs` and `initIntermediates`. 

156 """ 

157 

158 def quanta(self): 

159 """Iterator over quanta in a graph. 

160 

161 Quanta are returned in unspecified order. 

162 

163 Yields 

164 ------ 

165 taskDef : `TaskDef` 

166 Task definition for a Quantum. 

167 quantum : `~lsst.daf.butler.Quantum` 

168 Single quantum. 

169 """ 

170 for taskNodes in self: 

171 taskDef = taskNodes.taskDef 

172 for quantum in taskNodes.quanta: 

173 yield taskDef, quantum 

174 

175 def quantaAsQgraph(self): 

176 """Iterator over quanta in a graph. 

177 

178 QuantumGraph containing individual quanta are returned. 

179 

180 Yields 

181 ------ 

182 graph : `QuantumGraph` 

183 """ 

184 for taskDef, quantum in self.quanta(): 

185 node = QuantumGraphTaskNodes(taskDef, [quantum], 

186 quantum.initInputs, quantum.outputs) 

187 graph = QuantumGraph([node]) 

188 yield graph 

189 

190 def countQuanta(self): 

191 """Return total count of quanta in a graph. 

192 

193 Returns 

194 ------- 

195 count : `int` 

196 Number of quanta in a graph. 

197 """ 

198 return sum(len(taskNodes.quanta) for taskNodes in self) 

199 

200 def traverse(self): 

201 """Return topologically ordered Quanta and their dependencies. 

202 

203 This method iterates over all Quanta in topological order, enumerating 

204 them during iteration. Returned `QuantumIterData` object contains 

205 Quantum instance, its ``index`` and the ``index`` of all its 

206 prerequsites (Quanta that produce inputs for this Quantum): 

207 

208 - the ``index`` values are generated by an iteration of a 

209 QuantumGraph, and are not intrinsic to the QuantumGraph 

210 - during iteration, each ID will appear in index before it ever 

211 appears in dependencies. 

212 

213 Yields 

214 ------ 

215 quantumData : `QuantumIterData` 

216 """ 

217 

218 def orderedTaskNodes(graph): 

219 """Return topologically ordered task nodes. 

220 

221 Yields 

222 ------ 

223 nodes : `QuantumGraphTaskNodes` 

224 """ 

225 # Tasks in a graph are probably topologically sorted already but there 

226 # is no guarantee for that. Just re-construct Pipeline and order tasks 

227 # in a pipeline using existing method. 

228 nodesMap = {id(item.taskDef): item for item in graph} 

229 pipeline = orderPipeline([item.taskDef for item in graph]) 

230 for taskDef in pipeline: 

231 yield nodesMap[id(taskDef)] 

232 

233 index = 0 

234 outputs = {} # maps (DatasetType.name, dataId) to its producing quantum index 

235 for nodes in orderedTaskNodes(self): 

236 for quantum in nodes.quanta: 

237 

238 # Find quantum dependencies (must be in `outputs` already) 

239 prereq = [] 

240 for dataRef in chain.from_iterable(quantum.predictedInputs.values()): 

241 # if data exists in butler then `id` is not None 

242 if dataRef.id is None: 

243 # Get the base name if this is a component 

244 name, component = dataRef.datasetType.nameAndComponent() 

245 key = (name, dataRef.dataId) 

246 try: 

247 prereq.append(outputs[key]) 

248 except KeyError: 

249 # The Quantum that makes our inputs is not in the graph, 

250 # this could happen if we run on a "split graph" which is 

251 # usually just one quantum. Check for number of Quanta 

252 # in a graph and ignore error if it's just one. 

253 # TODO: This code has to be removed or replaced with 

254 # something more generic 

255 if not (len(self) == 1 and len(self[0].quanta) == 1): 

256 raise 

257 

258 # Update `outputs` with this quantum outputs 

259 for dataRef in chain.from_iterable(quantum.outputs.values()): 

260 key = (dataRef.datasetType.name, dataRef.dataId) 

261 outputs[key] = index 

262 

263 yield QuantumIterData(index=index, quantum=quantum, taskDef=nodes.taskDef, 

264 dependencies=frozenset(prereq)) 

265 index += 1