21 from __future__
import annotations
23 __all__ = (
"QuantumGraph",
"IncompatibleGraphError")
25 from collections
import defaultdict
27 from itertools
import chain, count
30 from networkx.drawing.nx_agraph
import write_dot
34 from typing
import (DefaultDict, Dict, FrozenSet, Iterable, List, Mapping, Set, Generator, Optional, Tuple,
37 from ..connections
import iterConnections
38 from ..pipeline
import TaskDef
39 from lsst.daf.butler
import Quantum, DatasetRef
41 from ._implDetails
import _DatasetTracker, DatasetTypeName
42 from .quantumNode
import QuantumNode, NodeId, BuildId
44 _T = TypeVar(
"_T", bound=
"QuantumGraph")
48 """Exception class to indicate that a lookup by NodeId is impossible due
55 """QuantumGraph is a directed acyclic graph of `QuantumNode`s
57 This data structure represents a concrete workflow generated from a
62 quanta : Mapping of `TaskDef` to sets of `Quantum`
63 This maps tasks (and their configs) to the sets of data they are to
66 def __init__(self, quanta: Mapping[TaskDef, Set[Quantum]]):
69 def _buildGraphs(self,
70 quanta: Mapping[TaskDef, Set[Quantum]],
72 _quantumToNodeId: Optional[Mapping[Quantum, NodeId]] =
None,
73 _buildId: Optional[BuildId] =
None):
74 """Builds the graph that is used to store the relation between tasks,
75 and the graph that holds the relations between quanta
78 self.
_buildId = _buildId
if _buildId
is not None else BuildId(f
"{time.time()}-{os.getpid()}")
82 self.
_datasetDict = _DatasetTracker[DatasetTypeName, TaskDef]()
85 nodeNumberGenerator = count()
86 self._nodeIdMap: Dict[NodeId, QuantumNode] = {}
88 for taskDef, quantumSet
in self.
_quanta.items():
89 connections = taskDef.connections
94 for inpt
in iterConnections(connections, (
"inputs",
"prerequisiteInputs",
"initInputs")):
104 self.
_count += len(quantumSet)
105 for quantum
in quantumSet:
107 nodeId = _quantumToNodeId.get(quantum)
109 raise ValueError(
"If _quantuMToNodeNumber is not None, all quanta must have an "
110 "associated value in the mapping")
114 inits = quantum.initInputs.values()
115 inputs = quantum.inputs.values()
117 self._nodeIdMap[nodeId] = value
119 for dsRef
in chain(inits, inputs):
123 if isinstance(dsRef, Iterable):
128 for dsRef
in chain.from_iterable(quantum.outputs.values()):
139 """Return a graph representing the relations between the tasks inside
144 taskGraph : `networkx.Digraph`
145 Internal datastructure that holds relations of `TaskDef`s
151 """Return a graph representing the relations between all the
152 `QuantumNode`s. Largely it should be preferred to iterate over, and use
153 methods of this class, but sometimes direct access to the networkx
154 object may be helpful
158 graph : `networkx.Digraph`
159 Internal datastructure that holds relations of `QuantumNode`s
165 """Make a `list` of all `QuantumNode`s that are 'input' nodes to the
166 graph, meaning those nodes to not depend on any other nodes in the
171 inputNodes : iterable of `QuantumNode`
172 A list of nodes that are inputs to the graph
178 """Make a `list` of all `QuantumNode`s that are 'output' nodes to the
179 graph, meaning those nodes have no nodes that depend them in the graph.
183 outputNodes : iterable of `QuantumNode`
184 A list of nodes that are outputs of the graph
190 """Return all the `DatasetTypeNames` that are contained inside the graph.
194 tuple of `DatasetTypeName`
195 All the data set type names that are present in the graph
201 """Return True if all of the nodes in the graph are connected, ignores
202 directionality of connections.
207 """Lookup a `QuantumNode` from an id associated with the node.
212 The number associated with a node
217 The node corresponding with input number
222 Raised if the requested nodeId is not in the graph.
223 IncompatibleGraphError
224 Raised if the nodeId was built with a different graph than is not
225 this instance (or a graph instance that produced this instance
226 through and operation such as subset)
230 return self._nodeIdMap[nodeId]
233 """Return all the `Quantum` associated with a `TaskDef`.
238 The `TaskDef` for which `Quantum` are to be queried
242 frozenset of `Quantum`
243 The `set` of `Quantum` that is associated with the specified
246 return frozenset(self.
_quanta[taskDef])
249 """Find all tasks that have the specified dataset type name as an
254 datasetTypeName : `str`
255 A string representing the name of a dataset type to be queried,
256 can also accept a `DatasetTypeName` which is a `NewType` of str for
257 type safety in static type checking.
261 tasks : iterable of `TaskDef`
262 `TaskDef`s that have the specified `DatasetTypeName` as an input, list
263 will be empty if no tasks use specified `DatasetTypeName` as an input.
268 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
270 return (c
for c
in self.
_datasetDict.getInputs(datasetTypeName))
273 """Find all tasks that have the specified dataset type name as an
278 datasetTypeName : `str`
279 A string representing the name of a dataset type to be queried,
280 can also accept a `DatasetTypeName` which is a `NewType` of str for
281 type safety in static type checking.
286 `TaskDef` that outputs `DatasetTypeName` as an output or None if none of
287 the tasks produce this `DatasetTypeName`.
292 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
297 """Find all tasks that are associated with the specified dataset type
302 datasetTypeName : `str`
303 A string representing the name of a dataset type to be queried,
304 can also accept a `DatasetTypeName` which is a `NewType` of str for
305 type safety in static type checking.
309 result : iterable of `TaskDef`
310 `TaskDef`s that are associated with the specified `DatasetTypeName`
315 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
319 if output
is not None:
320 results = chain(results, (output,))
324 """Determine which `TaskDef`s in this graph are associated with a `str`
325 representing a task name (looks at the taskName property of
328 Returns a list of `TaskDef`s as a `PipelineTask` may appear multiple
329 times in a graph with different labels.
334 Name of a task to search for
338 result : list of `TaskDef`
339 List of the `TaskDef`s that have the name specified. Multiple values
340 are returned in the case that a task is used multiple times with
344 for task
in self.
_quanta.keys():
345 split = task.taskName.split(
'.')
346 if split[-1] == taskName:
351 """Determine which `TaskDef`s in this graph are associated with a `str`
352 representing a tasks label.
357 Name of a task to search for
362 `TaskDef`s that has the specified label.
364 for task
in self.
_quanta.keys():
365 if label == task.label:
370 """Return all the `Quantum` that contain a specified `DatasetTypeName`.
374 datasetTypeName : `str`
375 The name of the dataset type to search for as a string,
376 can also accept a `DatasetTypeName` which is a `NewType` of str for
377 type safety in static type checking.
381 result : `set` of `QuantumNode`s
382 A `set` of `QuantumNode`s that contain specified `DatasetTypeName`
387 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
391 result: Set[Quantum] = set()
392 result = result.union(*(self.
_quanta[task]
for task
in tasks))
396 """Check if specified quantum appears in the graph as part of a node.
401 The quantum to search for
406 The result of searching for the quantum
408 for qset
in self.
_quanta.values():
414 """Write out the graph as a dot graph.
418 output : str or `io.BufferedIOBase`
419 Either a filesystem path to write to, or a file handle object
423 def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T:
424 """Create a new graph object that contains the subset of the nodes
425 specified as input. Node number is preserved.
429 nodes : `QuantumNode` or iterable of `QuantumNode`
433 graph : instance of graph type
434 An instance of the type from which the subset was created
436 if not isinstance(nodes, Iterable):
439 quantumMap = defaultdict(set)
442 for node
in quantumSubgraph:
443 quantumMap[node.taskDef].add(node.quantum)
445 newInst = type(self)({})
446 newInst._buildGraphs(quantumMap, _quantumToNodeId={n.quantum: n.nodeId
for n
in nodes},
451 """Generate a list of subgraphs where each is connected.
455 result : list of `QuantumGraph`
456 A list of graphs that are each connected
458 return tuple(self.
subset(connectedSet)
462 """Return a set of `QuantumNode` that are direct inputs to a specified
468 The node of the graph for which inputs are to be determined
473 All the nodes that are direct inputs to specified node
478 """Return a set of `QuantumNode` that are direct outputs of a specified
484 The node of the graph for which outputs are to be determined
489 All the nodes that are direct outputs to specified node
494 """Return a graph of `QuantumNode` that are direct inputs and outputs
500 The node of the graph for which connected nodes are to be determined
504 graph : graph of `QuantumNode`
505 All the nodes that are directly connected to specified node
512 """Return a graph of the specified node and all the ancestor nodes
513 directly reachable by walking edges.
518 The node for which all ansestors are to be determined
522 graph of `QuantumNode`
523 Graph of node and all of its ansestors
526 predecessorNodes.add(node)
527 return self.
subset(predecessorNodes)
529 def findCycle(self) -> List[Tuple[QuantumNode, QuantumNode]]:
530 """Check a graph for the presense of cycles and returns the edges of
531 any cycles found, or an empty list if there is no cycle.
535 result : list of tuple of `QuantumNode`, `QuantumNode`
536 A list of any graph edges that form a cycle, or an empty list if
537 there is no cycle. Empty list to so support if graph.find_cycle()
538 syntax as an empty list is falsy.
542 except nx.NetworkXNoCycle:
546 """Save QuantumGraph to a file.
547 Presently we store QuantumGraph in pickle format, this could
548 potentially change in the future if better format is found.
552 file : `io.BufferedIOBase`
553 File to write pickle data open in binary mode.
555 pickle.dump(self, file)
559 """Read QuantumGraph from a file that was made by `save`.
563 file : `io.BufferedIOBase`
564 File with pickle data open in binary mode.
565 universe: `~lsst.daf.butler.DimensionUniverse`
566 DimensionUniverse instance, not used by the method itself but
567 needed to ensure that registry data structures are initialized.
571 graph : `QuantumGraph`
572 Resulting QuantumGraph instance.
577 Raised if pickle contains instance of a type other than
581 Reading Quanta from pickle requires existence of singleton
582 DimensionUniverse which is usually instantiated during Registry
583 initialization. To make sure that DimensionUniverse exists this method
584 accepts dummy DimensionUniverse argument.
586 qgraph = pickle.load(file)
587 if not isinstance(qgraph, QuantumGraph):
588 raise TypeError(f
"QuantumGraph pickle file has contains unexpected object type: {type(qgraph)}")
592 """Iterate over the `taskGraph` attribute in topological order
597 `TaskDef` objects in topological order
599 yield from nx.topological_sort(self.
taskGraph)
601 def __iter__(self) -> Generator[QuantumNode, None, None]:
611 """Stores a compact form of the graph as a list of graph nodes, and a
612 tuple of task labels and task configs. The full graph can be
613 reconstructed with this information, and it preseves the ordering of
616 return {
"nodesList": list(self)}
619 """Reconstructs the state of the graph from the information persisted
622 quanta: DefaultDict[TaskDef, Set[Quantum]] = defaultdict(set)
623 quantumToNodeId: Dict[Quantum, NodeId] = {}
624 quantumNode: QuantumNode
625 for quantumNode
in state[
'nodesList']:
626 quanta[quantumNode.taskDef].add(quantumNode.quantum)
627 quantumToNodeId[quantumNode.quantum] = quantumNode.nodeId
628 _buildId = quantumNode.nodeId.buildId
if state[
'nodesList']
else None
629 self.
_buildGraphs(quanta, _quantumToNodeId=quantumToNodeId, _buildId=_buildId)
632 if not isinstance(other, QuantumGraph):
634 if len(self) != len(other):
637 if node
not in other:
643 return list(self.
taskGraph) == list(other.taskGraph)