21 from __future__
import annotations
23 __all__ = (
"QuantumGraph",
"IncompatibleGraphError")
25 from collections
import defaultdict
27 from itertools
import chain, count
30 from networkx.drawing.nx_agraph
import write_dot
34 from typing
import (DefaultDict, Dict, FrozenSet, Iterable, List, Mapping, Set, Generator, Optional, Tuple,
37 from ..connections
import iterConnections
38 from ..pipeline
import TaskDef
39 from lsst.daf.butler
import Quantum, DatasetRef
41 from ._implDetails
import _DatasetTracker, DatasetTypeName
42 from .quantumNode
import QuantumNode, NodeId, BuildId
44 _T = TypeVar(
"_T", bound=
"QuantumGraph")
48 """Exception class to indicate that a lookup by NodeId is impossible due
55 """QuantumGraph is a directed acyclic graph of `QuantumNode`s
57 This data structure represents a concrete workflow generated from a
62 quanta : Mapping of `TaskDef` to sets of `Quantum`
63 This maps tasks (and their configs) to the sets of data they are to
66 def __init__(self, quanta: Mapping[TaskDef, Set[Quantum]]):
69 def _buildGraphs(self,
70 quanta: Mapping[TaskDef, Set[Quantum]],
72 _quantumToNodeId: Optional[Mapping[Quantum, NodeId]] =
None,
73 _buildId: Optional[BuildId] =
None):
74 """Builds the graph that is used to store the relation between tasks,
75 and the graph that holds the relations between quanta
78 self.
_buildId = _buildId
if _buildId
is not None else BuildId(f
"{time.time()}-{os.getpid()}")
82 self.
_datasetDict = _DatasetTracker[DatasetTypeName, TaskDef]()
85 nodeNumberGenerator = count()
86 self._nodeIdMap: Dict[NodeId, QuantumNode] = {}
88 for taskDef, quantumSet
in self.
_quanta.items():
89 connections = taskDef.connections
94 for inpt
in iterConnections(connections, (
"inputs",
"prerequisiteInputs",
"initInputs")):
105 self.
_count += len(quantumSet)
106 for quantum
in quantumSet:
108 nodeId = _quantumToNodeId.get(quantum)
110 raise ValueError(
"If _quantuMToNodeNumber is not None, all quanta must have an "
111 "associated value in the mapping")
115 inits = quantum.initInputs.values()
116 inputs = quantum.inputs.values()
118 self._nodeIdMap[nodeId] = value
120 for dsRef
in chain(inits, inputs):
124 if isinstance(dsRef, Iterable):
129 for dsRef
in chain.from_iterable(quantum.outputs.values()):
140 """Return a graph representing the relations between the tasks inside
145 taskGraph : `networkx.Digraph`
146 Internal datastructure that holds relations of `TaskDef`s
152 """Return a graph representing the relations between all the
153 `QuantumNode`s. Largely it should be preferred to iterate over, and use
154 methods of this class, but sometimes direct access to the networkx
155 object may be helpful
159 graph : `networkx.Digraph`
160 Internal datastructure that holds relations of `QuantumNode`s
166 """Make a `list` of all `QuantumNode`s that are 'input' nodes to the
167 graph, meaning those nodes to not depend on any other nodes in the
172 inputNodes : iterable of `QuantumNode`
173 A list of nodes that are inputs to the graph
179 """Make a `list` of all `QuantumNode`s that are 'output' nodes to the
180 graph, meaning those nodes have no nodes that depend them in the graph.
184 outputNodes : iterable of `QuantumNode`
185 A list of nodes that are outputs of the graph
191 """Return all the `DatasetTypeNames` that are contained inside the
196 tuple of `DatasetTypeName`
197 All the data set type names that are present in the graph
203 """Return True if all of the nodes in the graph are connected, ignores
204 directionality of connections.
209 """Lookup a `QuantumNode` from an id associated with the node.
214 The number associated with a node
219 The node corresponding with input number
224 Raised if the requested nodeId is not in the graph.
225 IncompatibleGraphError
226 Raised if the nodeId was built with a different graph than is not
227 this instance (or a graph instance that produced this instance
228 through and operation such as subset)
232 return self._nodeIdMap[nodeId]
235 """Return all the `Quantum` associated with a `TaskDef`.
240 The `TaskDef` for which `Quantum` are to be queried
244 frozenset of `Quantum`
245 The `set` of `Quantum` that is associated with the specified
248 return frozenset(self.
_quanta[taskDef])
251 """Find all tasks that have the specified dataset type name as an
256 datasetTypeName : `str`
257 A string representing the name of a dataset type to be queried,
258 can also accept a `DatasetTypeName` which is a `NewType` of str for
259 type safety in static type checking.
263 tasks : iterable of `TaskDef`
264 `TaskDef`s that have the specified `DatasetTypeName` as an input,
265 list will be empty if no tasks use specified `DatasetTypeName` as
271 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
273 return (c
for c
in self.
_datasetDict.getInputs(datasetTypeName))
276 """Find all tasks that have the specified dataset type name as an
281 datasetTypeName : `str`
282 A string representing the name of a dataset type to be queried,
283 can also accept a `DatasetTypeName` which is a `NewType` of str for
284 type safety in static type checking.
289 `TaskDef` that outputs `DatasetTypeName` as an output or None if
290 none of the tasks produce this `DatasetTypeName`.
295 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
300 """Find all tasks that are associated with the specified dataset type
305 datasetTypeName : `str`
306 A string representing the name of a dataset type to be queried,
307 can also accept a `DatasetTypeName` which is a `NewType` of str for
308 type safety in static type checking.
312 result : iterable of `TaskDef`
313 `TaskDef`s that are associated with the specified `DatasetTypeName`
318 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
322 if output
is not None:
323 results = chain(results, (output,))
327 """Determine which `TaskDef`s in this graph are associated with a `str`
328 representing a task name (looks at the taskName property of
331 Returns a list of `TaskDef`s as a `PipelineTask` may appear multiple
332 times in a graph with different labels.
337 Name of a task to search for
341 result : list of `TaskDef`
342 List of the `TaskDef`s that have the name specified. Multiple
343 values are returned in the case that a task is used multiple times
344 with different labels.
347 for task
in self.
_quanta.keys():
348 split = task.taskName.split(
'.')
349 if split[-1] == taskName:
354 """Determine which `TaskDef`s in this graph are associated with a `str`
355 representing a tasks label.
360 Name of a task to search for
365 `TaskDef`s that has the specified label.
367 for task
in self.
_quanta.keys():
368 if label == task.label:
373 """Return all the `Quantum` that contain a specified `DatasetTypeName`.
377 datasetTypeName : `str`
378 The name of the dataset type to search for as a string,
379 can also accept a `DatasetTypeName` which is a `NewType` of str for
380 type safety in static type checking.
384 result : `set` of `QuantumNode`s
385 A `set` of `QuantumNode`s that contain specified `DatasetTypeName`
390 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
394 result: Set[Quantum] = set()
395 result = result.union(*(self.
_quanta[task]
for task
in tasks))
399 """Check if specified quantum appears in the graph as part of a node.
404 The quantum to search for
409 The result of searching for the quantum
411 for qset
in self.
_quanta.values():
417 """Write out the graph as a dot graph.
421 output : str or `io.BufferedIOBase`
422 Either a filesystem path to write to, or a file handle object
426 def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T:
427 """Create a new graph object that contains the subset of the nodes
428 specified as input. Node number is preserved.
432 nodes : `QuantumNode` or iterable of `QuantumNode`
436 graph : instance of graph type
437 An instance of the type from which the subset was created
439 if not isinstance(nodes, Iterable):
442 quantumMap = defaultdict(set)
445 for node
in quantumSubgraph:
446 quantumMap[node.taskDef].add(node.quantum)
448 newInst = type(self)({})
449 newInst._buildGraphs(quantumMap, _quantumToNodeId={n.quantum: n.nodeId
for n
in nodes},
454 """Generate a list of subgraphs where each is connected.
458 result : list of `QuantumGraph`
459 A list of graphs that are each connected
461 return tuple(self.
subset(connectedSet)
465 """Return a set of `QuantumNode` that are direct inputs to a specified
471 The node of the graph for which inputs are to be determined
476 All the nodes that are direct inputs to specified node
481 """Return a set of `QuantumNode` that are direct outputs of a specified
487 The node of the graph for which outputs are to be determined
492 All the nodes that are direct outputs to specified node
497 """Return a graph of `QuantumNode` that are direct inputs and outputs
503 The node of the graph for which connected nodes are to be
508 graph : graph of `QuantumNode`
509 All the nodes that are directly connected to specified node
516 """Return a graph of the specified node and all the ancestor nodes
517 directly reachable by walking edges.
522 The node for which all ansestors are to be determined
526 graph of `QuantumNode`
527 Graph of node and all of its ansestors
530 predecessorNodes.add(node)
531 return self.
subset(predecessorNodes)
533 def findCycle(self) -> List[Tuple[QuantumNode, QuantumNode]]:
534 """Check a graph for the presense of cycles and returns the edges of
535 any cycles found, or an empty list if there is no cycle.
539 result : list of tuple of `QuantumNode`, `QuantumNode`
540 A list of any graph edges that form a cycle, or an empty list if
541 there is no cycle. Empty list to so support if graph.find_cycle()
542 syntax as an empty list is falsy.
546 except nx.NetworkXNoCycle:
550 """Save QuantumGraph to a file.
551 Presently we store QuantumGraph in pickle format, this could
552 potentially change in the future if better format is found.
556 file : `io.BufferedIOBase`
557 File to write pickle data open in binary mode.
559 pickle.dump(self, file)
563 """Read QuantumGraph from a file that was made by `save`.
567 file : `io.BufferedIOBase`
568 File with pickle data open in binary mode.
569 universe: `~lsst.daf.butler.DimensionUniverse`
570 DimensionUniverse instance, not used by the method itself but
571 needed to ensure that registry data structures are initialized.
575 graph : `QuantumGraph`
576 Resulting QuantumGraph instance.
581 Raised if pickle contains instance of a type other than
585 Reading Quanta from pickle requires existence of singleton
586 DimensionUniverse which is usually instantiated during Registry
587 initialization. To make sure that DimensionUniverse exists this method
588 accepts dummy DimensionUniverse argument.
590 qgraph = pickle.load(file)
591 if not isinstance(qgraph, QuantumGraph):
592 raise TypeError(f
"QuantumGraph pickle file has contains unexpected object type: {type(qgraph)}")
596 """Iterate over the `taskGraph` attribute in topological order
601 `TaskDef` objects in topological order
603 yield from nx.topological_sort(self.
taskGraph)
605 def __iter__(self) -> Generator[QuantumNode, None, None]:
615 """Stores a compact form of the graph as a list of graph nodes, and a
616 tuple of task labels and task configs. The full graph can be
617 reconstructed with this information, and it preseves the ordering of
620 return {
"nodesList": list(self)}
623 """Reconstructs the state of the graph from the information persisted
626 quanta: DefaultDict[TaskDef, Set[Quantum]] = defaultdict(set)
627 quantumToNodeId: Dict[Quantum, NodeId] = {}
628 quantumNode: QuantumNode
629 for quantumNode
in state[
'nodesList']:
630 quanta[quantumNode.taskDef].add(quantumNode.quantum)
631 quantumToNodeId[quantumNode.quantum] = quantumNode.nodeId
632 _buildId = quantumNode.nodeId.buildId
if state[
'nodesList']
else None
633 self.
_buildGraphs(quanta, _quantumToNodeId=quantumToNodeId, _buildId=_buildId)
636 if not isinstance(other, QuantumGraph):
638 if len(self) != len(other):
641 if node
not in other:
647 return list(self.
taskGraph) == list(other.taskGraph)