Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

# This file is part of pipe_base. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

"""Module defining quantum graph classes and related methods. 

 

There could be different representations of the quantum graph depending 

on the client needs. Presently this module contains graph implementation 

which is based on requirements of command-line environment. In the future 

we could add other implementations and methods to convert between those 

representations. 

""" 

 

# "exported" names 

__all__ = ["QuantumGraph", "QuantumGraphTaskNodes", "QuantumIterData"] 

 

# ------------------------------- 

# Imports of standard modules -- 

# ------------------------------- 

from itertools import chain 

 

# ----------------------------- 

# Imports for other modules -- 

# ----------------------------- 

from .pipeline import Pipeline 

from .pipeTools import orderPipeline 

from lsst.daf.butler import DataId 

 

# ---------------------------------- 

# Local non-exported definitions -- 

# ---------------------------------- 

 

# ------------------------ 

# Exported definitions -- 

# ------------------------ 

 

 

class QuantumIterData: 

"""Helper class for iterating over quanta in a graph. 

 

`QuantumGraph.traverse` method needs to return topologically ordered 

Quanta together with their dependencies. This class is used as a value 

for iterator, it contains enumerated Quantum and its dependencies. 

 

Parameters 

---------- 

quantumId : `int` 

Index of this Quantum, unique but arbitrary integer. 

quantum : `~lsst.daf.butler.Quantum` 

Quantum corresponding to a graph node. 

taskDef : `TaskDef` 

Task to be run on this quantum. 

dependencies : iterable of `int` 

Possibly empty sequence of indices of dependencies for this Quantum. 

Prerequisites include other nodes in the graph; they do not reflect 

data already in butler (there are no graph nodes for those). 

""" 

 

__slots__ = ["quantumId", "quantum", "taskDef", "dependencies"] 

 

def __init__(self, quantumId, quantum, taskDef, dependencies): 

self.quantumId = quantumId 

self.quantum = quantum 

self.taskDef = taskDef 

self.dependencies = frozenset(dependencies) 

 

def __str__(self): 

return "QuantumIterData({}, {}, {})".format(self.quantumId, 

self.taskDef, 

self.dependencies) 

 

 

class QuantumGraphTaskNodes: 

"""QuantumGraphTaskNodes represents a bunch of nodes in an quantum graph 

corresponding to a single task. 

 

The node in quantum graph is represented by the `PipelineTask` and a 

single `~lsst.daf.butler.Quantum` instance. One possible representation 

of the graph is just a list of nodes without edges (edges can be deduced 

from nodes' quantum inputs and outputs if needed). That representation can 

be reduced to the list of PipelineTasks (or their corresponding TaskDefs) 

and the corresponding list of Quanta. This class is used in this reduced 

representation for a single task, and full `QuantumGraph` is a sequence of 

tinstances of this class for one or more tasks. 

 

Different frameworks may use different graph representation, this 

representation was based mostly on requirements of command-line 

executor which does not need explicit edges information. 

 

Attributes 

---------- 

taskDef : `TaskDef` 

Task defintion for this set of nodes. 

quanta : `list` of `~lsst.daf.butler.Quantum` 

List of quanta corresponding to the task. 

""" 

def __init__(self, taskDef, quanta): 

self.taskDef = taskDef 

self.quanta = quanta 

 

 

class QuantumGraph(list): 

"""QuantumGraph is a sequence of `QuantumGraphTaskNodes` objects. 

 

Typically the order of the tasks in the list will be the same as the 

order of tasks in a pipeline (obviously depends on the code which 

constructs graph). 

 

Parameters 

---------- 

iterable : iterable of `QuantumGraphTaskNodes`, optional 

Initial sequence of per-task nodes. 

""" 

def __init__(self, iterable=None): 

list.__init__(self, iterable or []) 

self.initInputs = [] 

self.initOutputs = [] 

self._inputDatasetTypes = set() 

self._outputDatasetTypes = set() 

 

def quanta(self): 

"""Iterator over quanta in a graph. 

 

Quanta are returned in unspecified order. 

 

Yields 

------ 

taskDef : `TaskDef` 

Task definition for a Quantum. 

quantum : `~lsst.daf.butler.Quantum` 

Single quantum. 

""" 

for taskNodes in self: 

taskDef = taskNodes.taskDef 

for quantum in taskNodes.quanta: 

yield taskDef, quantum 

 

def traverse(self): 

"""Return topologically ordered Quanta and their dependencies. 

 

This method iterates over all Quanta in topological order, enumerating 

them during iteration. Returned `QuantumIterData` object contains 

Quantum instance, its ``quantumId`` and ``quantumId`` of all its 

prerequsites (Quanta that produce inputs for this Quantum): 

- the ``quantumId`` values are generated by an iteration of a 

QuantumGraph, and are not intrinsic to the QuantumGraph 

- during iteration, each ID will appear in quantumId before it ever 

appears in dependencies. 

 

Yields 

------ 

quantumData : `QuantumIterData` 

""" 

 

def orderedTaskNodes(graph): 

"""Return topologically ordered task nodes. 

 

Yields 

------ 

nodes : `QuantumGraphTaskNodes` 

""" 

# Tasks in a graph are probably topologically sorted already but there 

# is no guarantee for that. Just re-construct Pipeline and order tasks 

# in a pipeline using existing method. 

nodesMap = {id(item.taskDef): item for item in graph} 

pipeline = orderPipeline(Pipeline(item.taskDef for item in graph)) 

for taskDef in pipeline: 

yield nodesMap[id(taskDef)] 

 

index = 0 

outputs = {} # maps (DatasetType.name, DataId) to its producing quantum index 

for nodes in orderedTaskNodes(self): 

for quantum in nodes.quanta: 

 

# Find quantum dependencies (must be in `outputs` already) 

prereq = [] 

for dataRef in chain.from_iterable(quantum.predictedInputs.values()): 

# if data exists in butler then `id` is not None 

if dataRef.id is None: 

key = (dataRef.datasetType.name, DataId(dataRef.dataId)) 

try: 

prereq.append(outputs[key]) 

except KeyError: 

# The Quantum that makes our inputs is not in the graph, 

# this could happen if we run on a "split graph" which is 

# usually just one quantum. Check for number of Quanta 

# in a graph and ignore error if it's just one. 

# TODO: This code has to be removed or replaced with 

# something more generic 

if not (len(self) == 1 and len(self[0].quanta) == 1): 

raise 

 

# Update `outputs` with this quantum outputs 

for dataRef in chain.from_iterable(quantum.outputs.values()): 

key = (dataRef.datasetType.name, DataId(dataRef.dataId)) 

outputs[key] = index 

 

yield QuantumIterData(index, quantum, nodes.taskDef, prereq) 

index += 1 

 

def getDatasetTypes(self, initInputs=True, initOutputs=True, inputs=True, outputs=True): 

total = set() 

if initInputs: 

for dsRef in self.initInputs: 

total.add(dsRef.datasetType) 

if initOutputs: 

for dsRef in self.initOutputs: 

total.add(dsRef.datasetType) 

if inputs: 

total |= self._inputDatasetTypes 

if outputs: 

total |= self._outputDatasetTypes 

return total