lsst.pipe.base  19.0.0-22-g8325f40
graph.py
Go to the documentation of this file.
1 # This file is part of pipe_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 """Module defining quantum graph classes and related methods.
24 
25 There could be different representations of the quantum graph depending
26 on the client needs. Presently this module contains graph implementation
27 which is based on requirements of command-line environment. In the future
28 we could add other implementations and methods to convert between those
29 representations.
30 """
31 
32 # "exported" names
33 __all__ = ["QuantumGraph", "QuantumGraphTaskNodes", "QuantumIterData"]
34 
35 # -------------------------------
36 # Imports of standard modules --
37 # -------------------------------
38 from itertools import chain
39 from dataclasses import dataclass
40 from typing import List, FrozenSet, Mapping
41 
42 # -----------------------------
43 # Imports for other modules --
44 # -----------------------------
45 from .pipeline import TaskDef
46 from .pipeTools import orderPipeline
47 from lsst.daf.butler import DatasetRef, DatasetType, NamedKeyDict, Quantum
48 
49 # ----------------------------------
50 # Local non-exported definitions --
51 # ----------------------------------
52 
53 # ------------------------
54 # Exported definitions --
55 # ------------------------
56 
57 
58 @dataclass
60  """Helper class for iterating over quanta in a graph.
61 
62  The `QuantumGraph.traverse` method needs to return topologically ordered
63  Quanta together with their dependencies. This class is used as a value
64  for the iterator, it contains enumerated Quantum and its dependencies.
65  """
66 
67  __slots__ = ["index", "quantum", "taskDef", "dependencies"]
68 
69  index: int
70  """Index of this Quantum, a unique but arbitrary integer."""
71 
72  quantum: Quantum
73  """Quantum corresponding to a graph node."""
74 
75  taskDef: TaskDef
76  """Task class to be run on this quantum, and corresponding label and
77  config.
78  """
79 
80  dependencies: FrozenSet(int)
81  """Possibly empty set of indices of dependencies for this Quantum.
82  Dependencies include other nodes in the graph; they do not reflect data
83  already in butler (there are no graph nodes for those).
84  """
85 
86 
87 @dataclass
89  """QuantumGraphTaskNodes represents a bunch of nodes in an quantum graph
90  corresponding to a single task.
91 
92  The node in quantum graph is represented by the `PipelineTask` and a
93  single `~lsst.daf.butler.Quantum` instance. One possible representation
94  of the graph is just a list of nodes without edges (edges can be deduced
95  from nodes' quantum inputs and outputs if needed). That representation can
96  be reduced to the list of PipelineTasks (or their corresponding TaskDefs)
97  and the corresponding list of Quanta. This class is used in this reduced
98  representation for a single task, and full `QuantumGraph` is a sequence of
99  tinstances of this class for one or more tasks.
100 
101  Different frameworks may use different graph representation, this
102  representation was based mostly on requirements of command-line
103  executor which does not need explicit edges information.
104  """
105 
106  taskDef: TaskDef
107  """Task defintion for this set of nodes."""
108 
109  quanta: List[Quantum]
110  """List of quanta corresponding to the task."""
111 
112  initInputs: Mapping[DatasetType, DatasetRef]
113  """Datasets that must be loaded or created to construct this task."""
114 
115  initOutputs: Mapping[DatasetType, DatasetRef]
116  """Datasets that may be written after constructing this task."""
117 
118 
119 class QuantumGraph(list):
120  """QuantumGraph is a sequence of `QuantumGraphTaskNodes` objects.
121 
122  Typically the order of the tasks in the list will be the same as the
123  order of tasks in a pipeline (obviously depends on the code which
124  constructs graph).
125 
126  Parameters
127  ----------
128  iterable : iterable of `QuantumGraphTaskNodes`, optional
129  Initial sequence of per-task nodes.
130  """
131  def __init__(self, iterable=None):
132  list.__init__(self, iterable or [])
133  self.initInputs = NamedKeyDict()
134  self.initIntermediates = NamedKeyDict()
135  self.initOutputs = NamedKeyDict()
136 
137  initInputs: NamedKeyDict
138  """Datasets that must be provided to construct one or more Tasks in this
139  graph, and must be obtained from the data repository.
140 
141  This is disjoint with both `initIntermediates` and `initOutputs`.
142  """
143 
144  initIntermediates: NamedKeyDict
145  """Datasets that must be provided to construct one or more Tasks in this
146  graph, but are also produced after constructing a Task in this graph.
147 
148  This is disjoint with both `initInputs` and `initOutputs`.
149  """
150 
151  initOutputs: NamedKeyDict
152  """Datasets that are produced after constructing a Task in this graph,
153  and are not used to construct any other Task in this graph.
154 
155  This is disjoint from both `initInputs` and `initIntermediates`.
156  """
157 
158  def quanta(self):
159  """Iterator over quanta in a graph.
160 
161  Quanta are returned in unspecified order.
162 
163  Yields
164  ------
165  taskDef : `TaskDef`
166  Task definition for a Quantum.
167  quantum : `~lsst.daf.butler.Quantum`
168  Single quantum.
169  """
170  for taskNodes in self:
171  taskDef = taskNodes.taskDef
172  for quantum in taskNodes.quanta:
173  yield taskDef, quantum
174 
175  def quantaAsQgraph(self):
176  """Iterator over quanta in a graph.
177 
178  QuantumGraph containing individual quanta are returned.
179 
180  Yields
181  ------
182  graph : `QuantumGraph`
183  """
184  for taskDef, quantum in self.quanta():
185  node = QuantumGraphTaskNodes(taskDef, [quantum],
186  quantum.initInputs, quantum.outputs)
187  graph = QuantumGraph([node])
188  yield graph
189 
190  def countQuanta(self):
191  """Return total count of quanta in a graph.
192 
193  Returns
194  -------
195  count : `int`
196  Number of quanta in a graph.
197  """
198  return sum(len(taskNodes.quanta) for taskNodes in self)
199 
200  def traverse(self):
201  """Return topologically ordered Quanta and their dependencies.
202 
203  This method iterates over all Quanta in topological order, enumerating
204  them during iteration. Returned `QuantumIterData` object contains
205  Quantum instance, its ``index`` and the ``index`` of all its
206  prerequsites (Quanta that produce inputs for this Quantum):
207 
208  - the ``index`` values are generated by an iteration of a
209  QuantumGraph, and are not intrinsic to the QuantumGraph
210  - during iteration, each ID will appear in index before it ever
211  appears in dependencies.
212 
213  Yields
214  ------
215  quantumData : `QuantumIterData`
216  """
217 
218  def orderedTaskNodes(graph):
219  """Return topologically ordered task nodes.
220 
221  Yields
222  ------
223  nodes : `QuantumGraphTaskNodes`
224  """
225  # Tasks in a graph are probably topologically sorted already but there
226  # is no guarantee for that. Just re-construct Pipeline and order tasks
227  # in a pipeline using existing method.
228  nodesMap = {id(item.taskDef): item for item in graph}
229  pipeline = orderPipeline([item.taskDef for item in graph])
230  for taskDef in pipeline:
231  yield nodesMap[id(taskDef)]
232 
233  index = 0
234  outputs = {} # maps (DatasetType.name, dataId) to its producing quantum index
235  for nodes in orderedTaskNodes(self):
236  for quantum in nodes.quanta:
237 
238  # Find quantum dependencies (must be in `outputs` already)
239  prereq = []
240  for dataRef in chain.from_iterable(quantum.predictedInputs.values()):
241  # if data exists in butler then `id` is not None
242  if dataRef.id is None:
243  # Get the base name if this is a component
244  name, component = dataRef.datasetType.nameAndComponent()
245  key = (name, dataRef.dataId)
246  try:
247  prereq.append(outputs[key])
248  except KeyError:
249  # The Quantum that makes our inputs is not in the graph,
250  # this could happen if we run on a "split graph" which is
251  # usually just one quantum. Check for number of Quanta
252  # in a graph and ignore error if it's just one.
253  # TODO: This code has to be removed or replaced with
254  # something more generic
255  if not (len(self) == 1 and len(self[0].quanta) == 1):
256  raise
257 
258  # Update `outputs` with this quantum outputs
259  for dataRef in chain.from_iterable(quantum.outputs.values()):
260  key = (dataRef.datasetType.name, dataRef.dataId)
261  outputs[key] = index
262 
263  yield QuantumIterData(index=index, quantum=quantum, taskDef=nodes.taskDef,
264  dependencies=frozenset(prereq))
265  index += 1
lsst::pipe::base.graph.QuantumGraph.initOutputs
initOutputs
Definition: graph.py:135
lsst::pipe::base.graph.QuantumGraph
Definition: graph.py:119
lsst::pipe::base.graph.QuantumGraphTaskNodes
Definition: graph.py:88
lsst::pipe::base.graph.QuantumGraph.traverse
def traverse(self)
Definition: graph.py:200
lsst::pipe::base.graph.QuantumGraph.initInputs
initInputs
Definition: graph.py:133
lsst::pipe::base.graph.QuantumGraph.countQuanta
def countQuanta(self)
Definition: graph.py:190
lsst::pipe::base.graph.QuantumGraph.__init__
def __init__(self, iterable=None)
Definition: graph.py:131
lsst::pipe::base.graph.QuantumGraph.initIntermediates
initIntermediates
Definition: graph.py:134
lsst::pipe::base.pipeTools.orderPipeline
def orderPipeline(pipeline)
Definition: pipeTools.py:135
lsst::pipe::base.graph.QuantumGraph.quanta
def quanta(self)
Definition: graph.py:158
lsst::pipe::base.graph.QuantumGraph.quantaAsQgraph
def quantaAsQgraph(self)
Definition: graph.py:175
lsst::pipe::base.graph.QuantumIterData
Definition: graph.py:59