Coverage for python/lsst/pipe/base/graph.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23"""Module defining quantum graph classes and related methods.
25There could be different representations of the quantum graph depending
26on the client needs. Presently this module contains graph implementation
27which is based on requirements of command-line environment. In the future
28we could add other implementations and methods to convert between those
29representations.
30"""
32# "exported" names
33__all__ = ["QuantumGraph", "QuantumGraphTaskNodes", "QuantumIterData"]
35# -------------------------------
36# Imports of standard modules --
37# -------------------------------
38from itertools import chain
39from dataclasses import dataclass
40from typing import List, FrozenSet, Mapping
42# -----------------------------
43# Imports for other modules --
44# -----------------------------
45from .pipeline import TaskDef
46from .pipeTools import orderPipeline
47from lsst.daf.butler import Quantum, DatasetRef, DatasetType
48from lsst.daf.butler.core.utils import NamedKeyDict
50# ----------------------------------
51# Local non-exported definitions --
52# ----------------------------------
54# ------------------------
55# Exported definitions --
56# ------------------------
59@dataclass
60class QuantumIterData:
61 """Helper class for iterating over quanta in a graph.
63 The `QuantumGraph.traverse` method needs to return topologically ordered
64 Quanta together with their dependencies. This class is used as a value
65 for the iterator, it contains enumerated Quantum and its dependencies.
66 """
68 __slots__ = ["index", "quantum", "taskDef", "dependencies"]
70 index: int
71 """Index of this Quantum, a unique but arbitrary integer."""
73 quantum: Quantum
74 """Quantum corresponding to a graph node."""
76 taskDef: TaskDef
77 """Task class to be run on this quantum, and corresponding label and
78 config.
79 """
81 dependencies: FrozenSet(int)
82 """Possibly empty set of indices of dependencies for this Quantum.
83 Dependencies include other nodes in the graph; they do not reflect data
84 already in butler (there are no graph nodes for those).
85 """
88@dataclass
89class QuantumGraphTaskNodes:
90 """QuantumGraphTaskNodes represents a bunch of nodes in an quantum graph
91 corresponding to a single task.
93 The node in quantum graph is represented by the `PipelineTask` and a
94 single `~lsst.daf.butler.Quantum` instance. One possible representation
95 of the graph is just a list of nodes without edges (edges can be deduced
96 from nodes' quantum inputs and outputs if needed). That representation can
97 be reduced to the list of PipelineTasks (or their corresponding TaskDefs)
98 and the corresponding list of Quanta. This class is used in this reduced
99 representation for a single task, and full `QuantumGraph` is a sequence of
100 tinstances of this class for one or more tasks.
102 Different frameworks may use different graph representation, this
103 representation was based mostly on requirements of command-line
104 executor which does not need explicit edges information.
105 """
107 taskDef: TaskDef
108 """Task defintion for this set of nodes."""
110 quanta: List[Quantum]
111 """List of quanta corresponding to the task."""
113 initInputs: Mapping[DatasetType, DatasetRef]
114 """Datasets that must be loaded or created to construct this task."""
116 initOutputs: Mapping[DatasetType, DatasetRef]
117 """Datasets that may be written after constructing this task."""
120class QuantumGraph(list):
121 """QuantumGraph is a sequence of `QuantumGraphTaskNodes` objects.
123 Typically the order of the tasks in the list will be the same as the
124 order of tasks in a pipeline (obviously depends on the code which
125 constructs graph).
127 Parameters
128 ----------
129 iterable : iterable of `QuantumGraphTaskNodes`, optional
130 Initial sequence of per-task nodes.
131 """
132 def __init__(self, iterable=None):
133 list.__init__(self, iterable or [])
134 self.initInputs = NamedKeyDict()
135 self.initIntermediates = NamedKeyDict()
136 self.initOutputs = NamedKeyDict()
138 initInputs: NamedKeyDict
139 """Datasets that must be provided to construct one or more Tasks in this
140 graph, and must be obtained from the data repository.
142 This is disjoint with both `initIntermediates` and `initOutputs`.
143 """
145 initIntermediates: NamedKeyDict
146 """Datasets that must be provided to construct one or more Tasks in this
147 graph, but are also produced after constructing a Task in this graph.
149 This is disjoint with both `initInputs` and `initOutputs`.
150 """
152 initOutputs: NamedKeyDict
153 """Datasets that are produced after constructing a Task in this graph,
154 and are not used to construct any other Task in this graph.
156 This is disjoint from both `initInputs` and `initIntermediates`.
157 """
159 def quanta(self):
160 """Iterator over quanta in a graph.
162 Quanta are returned in unspecified order.
164 Yields
165 ------
166 taskDef : `TaskDef`
167 Task definition for a Quantum.
168 quantum : `~lsst.daf.butler.Quantum`
169 Single quantum.
170 """
171 for taskNodes in self:
172 taskDef = taskNodes.taskDef
173 for quantum in taskNodes.quanta:
174 yield taskDef, quantum
176 def quantaAsQgraph(self):
177 """Iterator over quanta in a graph.
179 QuantumGraph containing individual quanta are returned.
181 Yields
182 ------
183 graph : `QuantumGraph`
184 """
185 for taskDef, quantum in self.quanta():
186 node = QuantumGraphTaskNodes(taskDef, [quantum],
187 quantum.initInputs, quantum.outputs)
188 graph = QuantumGraph([node])
189 yield graph
191 def countQuanta(self):
192 """Return total count of quanta in a graph.
194 Returns
195 -------
196 count : `int`
197 Number of quanta in a graph.
198 """
199 return sum(len(taskNodes.quanta) for taskNodes in self)
201 def traverse(self):
202 """Return topologically ordered Quanta and their dependencies.
204 This method iterates over all Quanta in topological order, enumerating
205 them during iteration. Returned `QuantumIterData` object contains
206 Quantum instance, its ``index`` and the ``index`` of all its
207 prerequsites (Quanta that produce inputs for this Quantum):
209 - the ``index`` values are generated by an iteration of a
210 QuantumGraph, and are not intrinsic to the QuantumGraph
211 - during iteration, each ID will appear in index before it ever
212 appears in dependencies.
214 Yields
215 ------
216 quantumData : `QuantumIterData`
217 """
219 def orderedTaskNodes(graph):
220 """Return topologically ordered task nodes.
222 Yields
223 ------
224 nodes : `QuantumGraphTaskNodes`
225 """
226 # Tasks in a graph are probably topologically sorted already but there
227 # is no guarantee for that. Just re-construct Pipeline and order tasks
228 # in a pipeline using existing method.
229 nodesMap = {id(item.taskDef): item for item in graph}
230 pipeline = orderPipeline([item.taskDef for item in graph])
231 for taskDef in pipeline:
232 yield nodesMap[id(taskDef)]
234 index = 0
235 outputs = {} # maps (DatasetType.name, dataId) to its producing quantum index
236 for nodes in orderedTaskNodes(self):
237 for quantum in nodes.quanta:
239 # Find quantum dependencies (must be in `outputs` already)
240 prereq = []
241 for dataRef in chain.from_iterable(quantum.predictedInputs.values()):
242 # if data exists in butler then `id` is not None
243 if dataRef.id is None:
244 # Get the base name if this is a component
245 name, component = dataRef.datasetType.nameAndComponent()
246 key = (name, dataRef.dataId)
247 try:
248 prereq.append(outputs[key])
249 except KeyError:
250 # The Quantum that makes our inputs is not in the graph,
251 # this could happen if we run on a "split graph" which is
252 # usually just one quantum. Check for number of Quanta
253 # in a graph and ignore error if it's just one.
254 # TODO: This code has to be removed or replaced with
255 # something more generic
256 if not (len(self) == 1 and len(self[0].quanta) == 1):
257 raise
259 # Update `outputs` with this quantum outputs
260 for dataRef in chain.from_iterable(quantum.outputs.values()):
261 key = (dataRef.datasetType.name, dataRef.dataId)
262 outputs[key] = index
264 yield QuantumIterData(index=index, quantum=quantum, taskDef=nodes.taskDef,
265 dependencies=frozenset(prereq))
266 index += 1