Coverage for python/lsst/pipe/base/graph/graph.py: 16%
417 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 11:14 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 11:14 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QuantumGraph", "IncompatibleGraphError")
25import io
26import json
27import lzma
28import os
29import pickle
30import struct
31import time
32import uuid
33import warnings
34from collections import defaultdict, deque
35from collections.abc import Generator, Iterable, Mapping, MutableMapping
36from itertools import chain
37from types import MappingProxyType
38from typing import Any, BinaryIO, TypeVar
40import networkx as nx
41from lsst.daf.butler import DatasetRef, DatasetType, DimensionRecordsAccumulator, DimensionUniverse, Quantum
42from lsst.resources import ResourcePath, ResourcePathExpression
43from lsst.utils.introspection import get_full_type_name
44from networkx.drawing.nx_agraph import write_dot
46from ..connections import iterConnections
47from ..pipeline import TaskDef
48from ._implDetails import DatasetTypeName, _DatasetTracker, _pruner
49from ._loadHelpers import LoadHelper
50from ._versionDeserializers import DESERIALIZER_MAP
51from .quantumNode import BuildId, QuantumNode
53_T = TypeVar("_T", bound="QuantumGraph")
55# modify this constant any time the on disk representation of the save file
56# changes, and update the load helpers to behave properly for each version.
57SAVE_VERSION = 3
59# Strings used to describe the format for the preamble bytes in a file save
60# The base is a big endian encoded unsigned short that is used to hold the
61# file format version. This allows reading version bytes and determine which
62# loading code should be used for the rest of the file
63STRUCT_FMT_BASE = ">H"
64#
65# Version 1
66# This marks a big endian encoded format with an unsigned short, an unsigned
67# long long, and an unsigned long long in the byte stream
68# Version 2
69# A big endian encoded format with an unsigned long long byte stream used to
70# indicate the total length of the entire header.
71STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"}
73# magic bytes that help determine this is a graph save
74MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9"
77class IncompatibleGraphError(Exception):
78 """Exception class to indicate that a lookup by NodeId is impossible due
79 to incompatibilities
80 """
82 pass
85class QuantumGraph:
86 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects
88 This data structure represents a concrete workflow generated from a
89 `Pipeline`.
91 Parameters
92 ----------
93 quanta : `~collections.abc.Mapping` [ `TaskDef`, \
94 `set` [ `~lsst.daf.butler.Quantum` ] ]
95 This maps tasks (and their configs) to the sets of data they are to
96 process.
97 metadata : Optional `~collections.abc.Mapping` of `str` to primitives
98 This is an optional parameter of extra data to carry with the graph.
99 Entries in this mapping should be able to be serialized in JSON.
100 pruneRefs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional
101 Set of dataset refs to exclude from a graph.
102 universe : `~lsst.daf.butler.DimensionUniverse`, optional
103 The dimensions in which quanta can be defined. Need only be provided if
104 no quanta have data IDs.
105 initInputs : `~collections.abc.Mapping`, optional
106 Maps tasks to their InitInput dataset refs. Dataset refs can be either
107 resolved or non-resolved. Presently the same dataset refs are included
108 in each `~lsst.daf.butler.Quantum` for the same task.
109 initOutputs : `~collections.abc.Mapping`, optional
110 Maps tasks to their InitOutput dataset refs. Dataset refs can be either
111 resolved or non-resolved. For intermediate resolved refs their dataset
112 ID must match ``initInputs`` and Quantum ``initInputs``.
113 globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional
114 Dataset refs for some global objects produced by pipeline. These
115 objects include task configurations and package versions. Typically
116 they have an empty DataId, but there is no real restriction on what
117 can appear here.
118 registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \
119 optional
120 Dataset types which are used by this graph, their definitions must
121 match registry. If registry does not define dataset type yet, then
122 it should match one that will be created later.
124 Raises
125 ------
126 ValueError
127 Raised if the graph is pruned such that some tasks no longer have nodes
128 associated with them.
129 """
131 def __init__(
132 self,
133 quanta: Mapping[TaskDef, set[Quantum]],
134 metadata: Mapping[str, Any] | None = None,
135 pruneRefs: Iterable[DatasetRef] | None = None,
136 universe: DimensionUniverse | None = None,
137 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
138 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
139 globalInitOutputs: Iterable[DatasetRef] | None = None,
140 registryDatasetTypes: Iterable[DatasetType] | None = None,
141 ):
142 self._buildGraphs(
143 quanta,
144 metadata=metadata,
145 pruneRefs=pruneRefs,
146 universe=universe,
147 initInputs=initInputs,
148 initOutputs=initOutputs,
149 globalInitOutputs=globalInitOutputs,
150 registryDatasetTypes=registryDatasetTypes,
151 )
153 def _buildGraphs(
154 self,
155 quanta: Mapping[TaskDef, set[Quantum]],
156 *,
157 _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None,
158 _buildId: BuildId | None = None,
159 metadata: Mapping[str, Any] | None = None,
160 pruneRefs: Iterable[DatasetRef] | None = None,
161 universe: DimensionUniverse | None = None,
162 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
163 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
164 globalInitOutputs: Iterable[DatasetRef] | None = None,
165 registryDatasetTypes: Iterable[DatasetType] | None = None,
166 ) -> None:
167 """Build the graph that is used to store the relation between tasks,
168 and the graph that holds the relations between quanta
169 """
170 self._metadata = metadata
171 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}")
172 # Data structures used to identify relations between components;
173 # DatasetTypeName -> TaskDef for task,
174 # and DatasetRef -> QuantumNode for the quanta
175 self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True)
176 self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]()
178 self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {}
179 self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set)
180 for taskDef, quantumSet in quanta.items():
181 connections = taskDef.connections
183 # For each type of connection in the task, add a key to the
184 # `_DatasetTracker` for the connections name, with a value of
185 # the TaskDef in the appropriate field
186 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")):
187 # Have to handle components in inputs.
188 dataset_name, _, _ = inpt.name.partition(".")
189 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef)
191 for output in iterConnections(connections, ("outputs",)):
192 # Have to handle possible components in outputs.
193 dataset_name, _, _ = output.name.partition(".")
194 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef)
196 # For each `Quantum` in the set of all `Quantum` for this task,
197 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one
198 # of the individual datasets inside the `Quantum`, with a value of
199 # a newly created QuantumNode to the appropriate input/output
200 # field.
201 for quantum in quantumSet:
202 if quantum.dataId is not None:
203 if universe is None:
204 universe = quantum.dataId.universe
205 elif universe != quantum.dataId.universe:
206 raise RuntimeError(
207 "Mismatched dimension universes in QuantumGraph construction: "
208 f"{universe} != {quantum.dataId.universe}. "
209 )
211 if _quantumToNodeId:
212 if (nodeId := _quantumToNodeId.get(quantum)) is None:
213 raise ValueError(
214 "If _quantuMToNodeNumber is not None, all quanta must have an "
215 "associated value in the mapping"
216 )
217 else:
218 nodeId = uuid.uuid4()
220 inits = quantum.initInputs.values()
221 inputs = quantum.inputs.values()
222 value = QuantumNode(quantum, taskDef, nodeId)
223 self._taskToQuantumNode[taskDef].add(value)
224 self._nodeIdMap[nodeId] = value
226 for dsRef in chain(inits, inputs):
227 # unfortunately, `Quantum` allows inits to be individual
228 # `DatasetRef`s or an Iterable of such, so there must
229 # be an instance check here
230 if isinstance(dsRef, Iterable):
231 for sub in dsRef:
232 if sub.isComponent():
233 sub = sub.makeCompositeRef()
234 self._datasetRefDict.addConsumer(sub, value)
235 else:
236 assert isinstance(dsRef, DatasetRef)
237 if dsRef.isComponent():
238 dsRef = dsRef.makeCompositeRef()
239 self._datasetRefDict.addConsumer(dsRef, value)
240 for dsRef in chain.from_iterable(quantum.outputs.values()):
241 self._datasetRefDict.addProducer(dsRef, value)
243 if pruneRefs is not None:
244 # track what refs were pruned and prune the graph
245 prunes: set[QuantumNode] = set()
246 _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes)
248 # recreate the taskToQuantumNode dict removing nodes that have been
249 # pruned. Keep track of task defs that now have no QuantumNodes
250 emptyTasks: set[str] = set()
251 newTaskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set)
252 # accumulate all types
253 types_ = set()
254 # tracker for any pruneRefs that have caused tasks to have no nodes
255 # This helps the user find out what caused the issues seen.
256 culprits = set()
257 # Find all the types from the refs to prune
258 for r in pruneRefs:
259 types_.add(r.datasetType)
261 # For each of the tasks, and their associated nodes, remove any
262 # any nodes that were pruned. If there are no nodes associated
263 # with a task, record that task, and find out if that was due to
264 # a type from an input ref to prune.
265 for td, taskNodes in self._taskToQuantumNode.items():
266 diff = taskNodes.difference(prunes)
267 if len(diff) == 0:
268 if len(taskNodes) != 0:
269 tp: DatasetType
270 for tp in types_:
271 if (tmpRefs := next(iter(taskNodes)).quantum.inputs.get(tp)) and not set(
272 tmpRefs
273 ).difference(pruneRefs):
274 culprits.add(tp.name)
275 emptyTasks.add(td.label)
276 newTaskToQuantumNode[td] = diff
278 # update the internal dict
279 self._taskToQuantumNode = newTaskToQuantumNode
281 if emptyTasks:
282 raise ValueError(
283 f"{', '.join(emptyTasks)} task(s) have no nodes associated with them "
284 f"after graph pruning; {', '.join(culprits)} caused over-pruning"
285 )
287 # Dimension universe
288 if universe is None:
289 raise RuntimeError(
290 "Dimension universe or at least one quantum with a data ID "
291 "must be provided when constructing a QuantumGraph."
292 )
293 self._universe = universe
295 # Graph of quanta relations
296 self._connectedQuanta = self._datasetRefDict.makeNetworkXGraph()
297 self._count = len(self._connectedQuanta)
299 # Graph of task relations, used in various methods
300 self._taskGraph = self._datasetDict.makeNetworkXGraph()
302 # convert default dict into a regular to prevent accidental key
303 # insertion
304 self._taskToQuantumNode = dict(self._taskToQuantumNode.items())
306 self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {}
307 self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {}
308 self._globalInitOutputRefs: list[DatasetRef] = []
309 self._registryDatasetTypes: list[DatasetType] = []
310 if initInputs is not None:
311 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()}
312 if initOutputs is not None:
313 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()}
314 if globalInitOutputs is not None:
315 self._globalInitOutputRefs = list(globalInitOutputs)
316 if registryDatasetTypes is not None:
317 self._registryDatasetTypes = list(registryDatasetTypes)
319 @property
320 def taskGraph(self) -> nx.DiGraph:
321 """A graph representing the relations between the tasks inside
322 the quantum graph (`networkx.DiGraph`).
323 """
324 return self._taskGraph
326 @property
327 def graph(self) -> nx.DiGraph:
328 """A graph representing the relations between all the `QuantumNode`
329 objects (`networkx.DiGraph`).
331 The graph should usually be iterated over, or passed to methods of this
332 class, but sometimes direct access to the ``networkx`` object may be
333 helpful.
334 """
335 return self._connectedQuanta
337 @property
338 def inputQuanta(self) -> Iterable[QuantumNode]:
339 """The nodes that are inputs to the graph (iterable [`QuantumNode`]).
341 These are the nodes that do not depend on any other nodes in the
342 graph.
343 """
344 return (q for q, n in self._connectedQuanta.in_degree if n == 0)
346 @property
347 def outputQuanta(self) -> Iterable[QuantumNode]:
348 """The nodes that are outputs of the graph (iterable [`QuantumNode`]).
350 These are the nodes that have no nodes that depend on them in the
351 graph.
352 """
353 return [q for q, n in self._connectedQuanta.out_degree if n == 0]
355 @property
356 def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]:
357 """All the data set type names that are present in the graph
358 (`tuple` [`str`]).
360 These types do not include global init-outputs.
361 """
362 return tuple(self._datasetDict.keys())
364 @property
365 def isConnected(self) -> bool:
366 """Whether all of the nodes in the graph are connected, ignoring
367 directionality of connections (`bool`).
368 """
369 return nx.is_weakly_connected(self._connectedQuanta)
371 def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T:
372 r"""Return a graph pruned of input `~lsst.daf.butler.DatasetRef`\ s
373 and nodes which depend on them.
375 Parameters
376 ----------
377 refs : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef`
378 Refs which should be removed from resulting graph
380 Returns
381 -------
382 graph : `QuantumGraph`
383 A graph that has been pruned of specified refs and the nodes that
384 depend on them.
385 """
386 newInst = object.__new__(type(self))
387 quantumMap = defaultdict(set)
388 for node in self:
389 quantumMap[node.taskDef].add(node.quantum)
391 # convert to standard dict to prevent accidental key insertion
392 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items())
394 # This should not change set of tasks in a graph, so we can keep the
395 # same registryDatasetTypes as in the original graph.
396 # TODO: Do we need to copy initInputs/initOutputs?
397 newInst._buildGraphs(
398 quantumDict,
399 _quantumToNodeId={n.quantum: n.nodeId for n in self},
400 metadata=self._metadata,
401 pruneRefs=refs,
402 universe=self._universe,
403 globalInitOutputs=self._globalInitOutputRefs,
404 registryDatasetTypes=self._registryDatasetTypes,
405 )
406 return newInst
408 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode:
409 """Lookup a `QuantumNode` from an id associated with the node.
411 Parameters
412 ----------
413 nodeId : `NodeId`
414 The number associated with a node
416 Returns
417 -------
418 node : `QuantumNode`
419 The node corresponding with input number
421 Raises
422 ------
423 KeyError
424 Raised if the requested nodeId is not in the graph.
425 """
426 return self._nodeIdMap[nodeId]
428 def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]:
429 """Return all the `~lsst.daf.butler.Quantum` associated with a
430 `TaskDef`.
432 Parameters
433 ----------
434 taskDef : `TaskDef`
435 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be
436 queried.
438 Returns
439 -------
440 quanta : `frozenset` of `~lsst.daf.butler.Quantum`
441 The `set` of `~lsst.daf.butler.Quantum` that is associated with the
442 specified `TaskDef`.
443 """
444 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ()))
446 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int:
447 """Return the number of `~lsst.daf.butler.Quantum` associated with
448 a `TaskDef`.
450 Parameters
451 ----------
452 taskDef : `TaskDef`
453 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be
454 queried.
456 Returns
457 -------
458 count : `int`
459 The number of `~lsst.daf.butler.Quantum` that are associated with
460 the specified `TaskDef`.
461 """
462 return len(self._taskToQuantumNode.get(taskDef, ()))
464 def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]:
465 r"""Return all the `QuantumNode`\s associated with a `TaskDef`.
467 Parameters
468 ----------
469 taskDef : `TaskDef`
470 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be
471 queried.
473 Returns
474 -------
475 nodes : `frozenset` [ `QuantumNode` ]
476 A `frozenset` of `QuantumNode` that is associated with the
477 specified `TaskDef`.
478 """
479 return frozenset(self._taskToQuantumNode[taskDef])
481 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]:
482 """Find all tasks that have the specified dataset type name as an
483 input.
485 Parameters
486 ----------
487 datasetTypeName : `str`
488 A string representing the name of a dataset type to be queried,
489 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
490 `str` for type safety in static type checking.
492 Returns
493 -------
494 tasks : iterable of `TaskDef`
495 `TaskDef` objects that have the specified `DatasetTypeName` as an
496 input, list will be empty if no tasks use specified
497 `DatasetTypeName` as an input.
499 Raises
500 ------
501 KeyError
502 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`.
503 """
504 return (c for c in self._datasetDict.getConsumers(datasetTypeName))
506 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None:
507 """Find all tasks that have the specified dataset type name as an
508 output.
510 Parameters
511 ----------
512 datasetTypeName : `str`
513 A string representing the name of a dataset type to be queried,
514 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
515 `str` for type safety in static type checking.
517 Returns
518 -------
519 result : `TaskDef` or `None`
520 `TaskDef` that outputs `DatasetTypeName` as an output or `None` if
521 none of the tasks produce this `DatasetTypeName`.
523 Raises
524 ------
525 KeyError
526 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`.
527 """
528 return self._datasetDict.getProducer(datasetTypeName)
530 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]:
531 """Find all tasks that are associated with the specified dataset type
532 name.
534 Parameters
535 ----------
536 datasetTypeName : `str`
537 A string representing the name of a dataset type to be queried,
538 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
539 `str` for type safety in static type checking.
541 Returns
542 -------
543 result : iterable of `TaskDef`
544 `TaskDef` objects that are associated with the specified
545 `DatasetTypeName`.
547 Raises
548 ------
549 KeyError
550 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`.
551 """
552 return self._datasetDict.getAll(datasetTypeName)
554 def findTaskDefByName(self, taskName: str) -> list[TaskDef]:
555 """Determine which `TaskDef` objects in this graph are associated
556 with a `str` representing a task name (looks at the ``taskName``
557 property of `TaskDef` objects).
559 Returns a list of `TaskDef` objects as a `PipelineTask` may appear
560 multiple times in a graph with different labels.
562 Parameters
563 ----------
564 taskName : `str`
565 Name of a task to search for.
567 Returns
568 -------
569 result : `list` of `TaskDef`
570 List of the `TaskDef` objects that have the name specified.
571 Multiple values are returned in the case that a task is used
572 multiple times with different labels.
573 """
574 results = []
575 for task in self._taskToQuantumNode.keys():
576 split = task.taskName.split(".")
577 if split[-1] == taskName:
578 results.append(task)
579 return results
581 def findTaskDefByLabel(self, label: str) -> TaskDef | None:
582 """Determine which `TaskDef` objects in this graph are associated
583 with a `str` representing a tasks label.
585 Parameters
586 ----------
587 taskName : `str`
588 Name of a task to search for
590 Returns
591 -------
592 result : `TaskDef`
593 `TaskDef` objects that has the specified label.
594 """
595 for task in self._taskToQuantumNode.keys():
596 if label == task.label:
597 return task
598 return None
600 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]:
601 r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified
602 `DatasetTypeName`.
604 Parameters
605 ----------
606 datasetTypeName : `str`
607 The name of the dataset type to search for as a string,
608 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
609 `str` for type safety in static type checking.
611 Returns
612 -------
613 result : `set` of `QuantumNode` objects
614 A `set` of `QuantumNode`\s that contain specified
615 `DatasetTypeName`.
617 Raises
618 ------
619 KeyError
620 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
622 """
623 tasks = self._datasetDict.getAll(datasetTypeName)
624 result: set[Quantum] = set()
625 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task))
626 return result
628 def checkQuantumInGraph(self, quantum: Quantum) -> bool:
629 """Check if specified quantum appears in the graph as part of a node.
631 Parameters
632 ----------
633 quantum : `lsst.daf.butler.Quantum`
634 The quantum to search for.
636 Returns
637 -------
638 in_graph : `bool`
639 The result of searching for the quantum.
640 """
641 for node in self:
642 if quantum == node.quantum:
643 return True
644 return False
646 def writeDotGraph(self, output: str | io.BufferedIOBase) -> None:
647 """Write out the graph as a dot graph.
649 Parameters
650 ----------
651 output : `str` or `io.BufferedIOBase`
652 Either a filesystem path to write to, or a file handle object.
653 """
654 write_dot(self._connectedQuanta, output)
656 def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T:
657 """Create a new graph object that contains the subset of the nodes
658 specified as input. Node number is preserved.
660 Parameters
661 ----------
662 nodes : `QuantumNode` or iterable of `QuantumNode`
663 Nodes from which to create subset.
665 Returns
666 -------
667 graph : instance of graph type
668 An instance of the type from which the subset was created.
669 """
670 if not isinstance(nodes, Iterable):
671 nodes = (nodes,)
672 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes
673 quantumMap = defaultdict(set)
675 dataset_type_names: set[str] = set()
676 node: QuantumNode
677 for node in quantumSubgraph:
678 quantumMap[node.taskDef].add(node.quantum)
679 dataset_type_names.update(
680 dstype.name
681 for dstype in chain(
682 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys()
683 )
684 )
686 # May need to trim dataset types from registryDatasetTypes.
687 for taskDef in quantumMap:
688 if refs := self.initOutputRefs(taskDef):
689 dataset_type_names.update(ref.datasetType.name for ref in refs)
690 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs)
691 registryDatasetTypes = [
692 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names
693 ]
695 # convert to standard dict to prevent accidental key insertion
696 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items())
697 # Create an empty graph, and then populate it with custom mapping
698 newInst = type(self)({}, universe=self._universe)
699 # TODO: Do we need to copy initInputs/initOutputs?
700 newInst._buildGraphs(
701 quantumDict,
702 _quantumToNodeId={n.quantum: n.nodeId for n in nodes},
703 _buildId=self._buildId,
704 metadata=self._metadata,
705 universe=self._universe,
706 globalInitOutputs=self._globalInitOutputRefs,
707 registryDatasetTypes=registryDatasetTypes,
708 )
709 return newInst
711 def subsetToConnected(self: _T) -> tuple[_T, ...]:
712 """Generate a list of subgraphs where each is connected.
714 Returns
715 -------
716 result : `list` of `QuantumGraph`
717 A list of graphs that are each connected.
718 """
719 return tuple(
720 self.subset(connectedSet)
721 for connectedSet in nx.weakly_connected_components(self._connectedQuanta)
722 )
724 def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]:
725 """Return a set of `QuantumNode` that are direct inputs to a specified
726 node.
728 Parameters
729 ----------
730 node : `QuantumNode`
731 The node of the graph for which inputs are to be determined.
733 Returns
734 -------
735 inputs : `set` of `QuantumNode`
736 All the nodes that are direct inputs to specified node.
737 """
738 return set(pred for pred in self._connectedQuanta.predecessors(node))
740 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]:
741 """Return a set of `QuantumNode` that are direct outputs of a specified
742 node.
744 Parameters
745 ----------
746 node : `QuantumNode`
747 The node of the graph for which outputs are to be determined.
749 Returns
750 -------
751 outputs : `set` of `QuantumNode`
752 All the nodes that are direct outputs to specified node.
753 """
754 return set(succ for succ in self._connectedQuanta.successors(node))
756 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T:
757 """Return a graph of `QuantumNode` that are direct inputs and outputs
758 of a specified node.
760 Parameters
761 ----------
762 node : `QuantumNode`
763 The node of the graph for which connected nodes are to be
764 determined.
766 Returns
767 -------
768 graph : graph of `QuantumNode`
769 All the nodes that are directly connected to specified node.
770 """
771 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node))
772 nodes.add(node)
773 return self.subset(nodes)
775 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T:
776 """Return a graph of the specified node and all the ancestor nodes
777 directly reachable by walking edges.
779 Parameters
780 ----------
781 node : `QuantumNode`
782 The node for which all ancestors are to be determined
784 Returns
785 -------
786 ancestors : graph of `QuantumNode`
787 Graph of node and all of its ancestors.
788 """
789 predecessorNodes = nx.ancestors(self._connectedQuanta, node)
790 predecessorNodes.add(node)
791 return self.subset(predecessorNodes)
793 def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]:
794 """Check a graph for the presense of cycles and returns the edges of
795 any cycles found, or an empty list if there is no cycle.
797 Returns
798 -------
799 result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ]
800 A list of any graph edges that form a cycle, or an empty list if
801 there is no cycle. Empty list to so support if graph.find_cycle()
802 syntax as an empty list is falsy.
803 """
804 try:
805 return nx.find_cycle(self._connectedQuanta)
806 except nx.NetworkXNoCycle:
807 return []
809 def saveUri(self, uri: ResourcePathExpression) -> None:
810 """Save `QuantumGraph` to the specified URI.
812 Parameters
813 ----------
814 uri : convertible to `~lsst.resources.ResourcePath`
815 URI to where the graph should be saved.
816 """
817 buffer = self._buildSaveObject()
818 path = ResourcePath(uri)
819 if path.getExtension() not in (".qgraph"):
820 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}")
821 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes
823 @property
824 def metadata(self) -> MappingProxyType[str, Any] | None:
825 """Extra data carried with the graph (mapping [`str`] or `None`).
827 The mapping is a dynamic view of this object's metadata. Values should
828 be able to be serialized in JSON.
829 """
830 if self._metadata is None:
831 return None
832 return MappingProxyType(self._metadata)
834 def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None:
835 """Return DatasetRefs for a given task InitInputs.
837 Parameters
838 ----------
839 taskDef : `TaskDef`
840 Task definition structure.
842 Returns
843 -------
844 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None`
845 DatasetRef for the task InitInput, can be `None`. This can return
846 either resolved or non-resolved reference.
847 """
848 return self._initInputRefs.get(taskDef)
850 def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None:
851 """Return DatasetRefs for a given task InitOutputs.
853 Parameters
854 ----------
855 taskDef : `TaskDef`
856 Task definition structure.
858 Returns
859 -------
860 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None`
861 DatasetRefs for the task InitOutput, can be `None`. This can return
862 either resolved or non-resolved reference. Resolved reference will
863 match Quantum's initInputs if this is an intermediate dataset type.
864 """
865 return self._initOutputRefs.get(taskDef)
867 def globalInitOutputRefs(self) -> list[DatasetRef]:
868 """Return DatasetRefs for global InitOutputs.
870 Returns
871 -------
872 refs : `list` [ `~lsst.daf.butler.DatasetRef` ]
873 DatasetRefs for global InitOutputs.
874 """
875 return self._globalInitOutputRefs
877 def registryDatasetTypes(self) -> list[DatasetType]:
878 """Return dataset types used by this graph, their definitions match
879 dataset types from registry.
881 Returns
882 -------
883 refs : `list` [ `~lsst.daf.butler.DatasetType` ]
884 Dataset types for this graph.
885 """
886 return self._registryDatasetTypes
888 @classmethod
889 def loadUri(
890 cls,
891 uri: ResourcePathExpression,
892 universe: DimensionUniverse | None = None,
893 nodes: Iterable[uuid.UUID] | None = None,
894 graphID: BuildId | None = None,
895 minimumVersion: int = 3,
896 ) -> QuantumGraph:
897 """Read `QuantumGraph` from a URI.
899 Parameters
900 ----------
901 uri : convertible to `~lsst.resources.ResourcePath`
902 URI from where to load the graph.
903 universe : `~lsst.daf.butler.DimensionUniverse`, optional
904 `~lsst.daf.butler.DimensionUniverse` instance, not used by the
905 method itself but needed to ensure that registry data structures
906 are initialized. If `None` it is loaded from the `QuantumGraph`
907 saved structure. If supplied, the
908 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph`
909 will be validated against the supplied argument for compatibility.
910 nodes : iterable of `uuid.UUID` or `None`
911 UUIDs that correspond to nodes in the graph. If specified, only
912 these nodes will be loaded. Defaults to None, in which case all
913 nodes will be loaded.
914 graphID : `str` or `None`
915 If specified this ID is verified against the loaded graph prior to
916 loading any Nodes. This defaults to None in which case no
917 validation is done.
918 minimumVersion : `int`
919 Minimum version of a save file to load. Set to -1 to load all
920 versions. Older versions may need to be loaded, and re-saved
921 to upgrade them to the latest format before they can be used in
922 production.
924 Returns
925 -------
926 graph : `QuantumGraph`
927 Resulting QuantumGraph instance.
929 Raises
930 ------
931 TypeError
932 Raised if pickle contains instance of a type other than
933 `QuantumGraph`.
934 ValueError
935 Raised if one or more of the nodes requested is not in the
936 `QuantumGraph` or if graphID parameter does not match the graph
937 being loaded or if the supplied uri does not point at a valid
938 `QuantumGraph` save file.
939 RuntimeError
940 Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not
941 compatible with the `~lsst.daf.butler.DimensionUniverse` saved in
942 the graph.
944 Notes
945 -----
946 Reading Quanta from pickle requires existence of singleton
947 `~lsst.daf.butler.DimensionUniverse` which is usually instantiated
948 during `~lsst.daf.butler.Registry` initialization. To make sure
949 that `~lsst.daf.butler.DimensionUniverse` exists this method
950 accepts dummy `~lsst.daf.butler.DimensionUniverse` argument.
951 """
952 uri = ResourcePath(uri)
953 # With ResourcePath we have the choice of always using a local file
954 # or reading in the bytes directly. Reading in bytes can be more
955 # efficient for reasonably-sized pickle files when the resource
956 # is remote. For now use the local file variant. For a local file
957 # as_local() does nothing.
959 if uri.getExtension() in (".pickle", ".pkl"):
960 with uri.as_local() as local, open(local.ospath, "rb") as fd:
961 warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method")
962 qgraph = pickle.load(fd)
963 elif uri.getExtension() in (".qgraph"):
964 with LoadHelper(uri, minimumVersion) as loader:
965 qgraph = loader.load(universe, nodes, graphID)
966 else:
967 raise ValueError("Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`")
968 if not isinstance(qgraph, QuantumGraph):
969 raise TypeError(f"QuantumGraph save file contains unexpected object type: {type(qgraph)}")
970 return qgraph
972 @classmethod
973 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None:
974 """Read the header of a `QuantumGraph` pointed to by the uri parameter
975 and return it as a string.
977 Parameters
978 ----------
979 uri : convertible to `~lsst.resources.ResourcePath`
980 The location of the `QuantumGraph` to load. If the argument is a
981 string, it must correspond to a valid
982 `~lsst.resources.ResourcePath` path.
983 minimumVersion : `int`
984 Minimum version of a save file to load. Set to -1 to load all
985 versions. Older versions may need to be loaded, and re-saved
986 to upgrade them to the latest format before they can be used in
987 production.
989 Returns
990 -------
991 header : `str` or `None`
992 The header associated with the specified `QuantumGraph` it there is
993 one, else `None`.
995 Raises
996 ------
997 ValueError
998 Raised if `QuantumGraph` was saved as a pickle.
999 Raised if the extension of the file specified by uri is not a
1000 `QuantumGraph` extension.
1001 """
1002 uri = ResourcePath(uri)
1003 if uri.getExtension() in (".pickle", ".pkl"):
1004 raise ValueError("Reading a header from a pickle save is not supported")
1005 elif uri.getExtension() in (".qgraph"):
1006 return LoadHelper(uri, minimumVersion).readHeader()
1007 else:
1008 raise ValueError("Only know how to handle files saved as `qgraph`")
1010 def buildAndPrintHeader(self) -> None:
1011 """Create a header that would be used in a save of this object and
1012 prints it out to standard out.
1013 """
1014 _, header = self._buildSaveObject(returnHeader=True)
1015 print(json.dumps(header))
1017 def save(self, file: BinaryIO) -> None:
1018 """Save QuantumGraph to a file.
1020 Parameters
1021 ----------
1022 file : `io.BufferedIOBase`
1023 File to write pickle data open in binary mode.
1024 """
1025 buffer = self._buildSaveObject()
1026 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes
1028 def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]:
1029 # make some containers
1030 jsonData: deque[bytes] = deque()
1031 # node map is a list because json does not accept mapping keys that
1032 # are not strings, so we store a list of key, value pairs that will
1033 # be converted to a mapping on load
1034 nodeMap = []
1035 taskDefMap = {}
1036 headerData: dict[str, Any] = {}
1038 # Store the QauntumGraph BuildId, this will allow validating BuildIds
1039 # at load time, prior to loading any QuantumNodes. Name chosen for
1040 # unlikely conflicts.
1041 headerData["GraphBuildID"] = self.graphID
1042 headerData["Metadata"] = self._metadata
1044 # Store the universe this graph was created with
1045 universeConfig = self._universe.dimensionConfig
1046 headerData["universe"] = universeConfig.toDict()
1048 # counter for the number of bytes processed thus far
1049 count = 0
1050 # serialize out the task Defs recording the start and end bytes of each
1051 # taskDef
1052 inverseLookup = self._datasetDict.inverse
1053 taskDef: TaskDef
1054 # sort by task label to ensure serialization happens in the same order
1055 for taskDef in self.taskGraph:
1056 # compressing has very little impact on saving or load time, but
1057 # a large impact on on disk size, so it is worth doing
1058 taskDescription: dict[str, Any] = {}
1059 # save the fully qualified name.
1060 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass)
1061 # save the config as a text stream that will be un-persisted on the
1062 # other end
1063 stream = io.StringIO()
1064 taskDef.config.saveToStream(stream)
1065 taskDescription["config"] = stream.getvalue()
1066 taskDescription["label"] = taskDef.label
1067 if (refs := self._initInputRefs.get(taskDef)) is not None:
1068 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs]
1069 if (refs := self._initOutputRefs.get(taskDef)) is not None:
1070 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs]
1072 inputs = []
1073 outputs = []
1075 # Determine the connection between all of tasks and save that in
1076 # the header as a list of connections and edges in each task
1077 # this will help in un-persisting, and possibly in a "quick view"
1078 # method that does not require everything to be un-persisted
1079 #
1080 # Typing returns can't be parameter dependent
1081 for connection in inverseLookup[taskDef]: # type: ignore
1082 consumers = self._datasetDict.getConsumers(connection)
1083 producer = self._datasetDict.getProducer(connection)
1084 if taskDef in consumers:
1085 # This checks if the task consumes the connection directly
1086 # from the datastore or it is produced by another task
1087 producerLabel = producer.label if producer is not None else "datastore"
1088 inputs.append((producerLabel, connection))
1089 elif taskDef not in consumers and producer is taskDef:
1090 # If there are no consumers for this tasks produced
1091 # connection, the output will be said to be the datastore
1092 # in which case the for loop will be a zero length loop
1093 if not consumers:
1094 outputs.append(("datastore", connection))
1095 for td in consumers:
1096 outputs.append((td.label, connection))
1098 # dump to json string, and encode that string to bytes and then
1099 # conpress those bytes
1100 dump = lzma.compress(json.dumps(taskDescription).encode())
1101 # record the sizing and relation information
1102 taskDefMap[taskDef.label] = {
1103 "bytes": (count, count + len(dump)),
1104 "inputs": inputs,
1105 "outputs": outputs,
1106 }
1107 count += len(dump)
1108 jsonData.append(dump)
1110 headerData["TaskDefs"] = taskDefMap
1112 # serialize the nodes, recording the start and end bytes of each node
1113 dimAccumulator = DimensionRecordsAccumulator()
1114 for node in self:
1115 # compressing has very little impact on saving or load time, but
1116 # a large impact on on disk size, so it is worth doing
1117 simpleNode = node.to_simple(accumulator=dimAccumulator)
1119 dump = lzma.compress(simpleNode.json().encode())
1120 jsonData.append(dump)
1121 nodeMap.append(
1122 (
1123 str(node.nodeId),
1124 {
1125 "bytes": (count, count + len(dump)),
1126 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)],
1127 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)],
1128 },
1129 )
1130 )
1131 count += len(dump)
1133 headerData["DimensionRecords"] = {
1134 key: value.dict() for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items()
1135 }
1137 # need to serialize this as a series of key,value tuples because of
1138 # a limitation on how json cant do anything but strings as keys
1139 headerData["Nodes"] = nodeMap
1141 if self._globalInitOutputRefs:
1142 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs]
1144 if self._registryDatasetTypes:
1145 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes]
1147 # dump the headerData to json
1148 header_encode = lzma.compress(json.dumps(headerData).encode())
1150 # record the sizes as 2 unsigned long long numbers for a total of 16
1151 # bytes
1152 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION)
1154 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING()
1155 map_lengths = struct.pack(fmt_string, len(header_encode))
1157 # write each component of the save out in a deterministic order
1158 # buffer = io.BytesIO()
1159 # buffer.write(map_lengths)
1160 # buffer.write(taskDef_pickle)
1161 # buffer.write(map_pickle)
1162 buffer = bytearray()
1163 buffer.extend(MAGIC_BYTES)
1164 buffer.extend(save_bytes)
1165 buffer.extend(map_lengths)
1166 buffer.extend(header_encode)
1167 # Iterate over the length of pickleData, and for each element pop the
1168 # leftmost element off the deque and write it out. This is to save
1169 # memory, as the memory is added to the buffer object, it is removed
1170 # from from the container.
1171 #
1172 # Only this section needs to worry about memory pressue because
1173 # everything else written to the buffer prior to this pickle data is
1174 # only on the order of kilobytes to low numbers of megabytes.
1175 while jsonData:
1176 buffer.extend(jsonData.popleft())
1177 if returnHeader:
1178 return buffer, headerData
1179 else:
1180 return buffer
1182 @classmethod
1183 def load(
1184 cls,
1185 file: BinaryIO,
1186 universe: DimensionUniverse | None = None,
1187 nodes: Iterable[uuid.UUID] | None = None,
1188 graphID: BuildId | None = None,
1189 minimumVersion: int = 3,
1190 ) -> QuantumGraph:
1191 """Read `QuantumGraph` from a file that was made by `save`.
1193 Parameters
1194 ----------
1195 file : `io.IO` of bytes
1196 File with pickle data open in binary mode.
1197 universe : `~lsst.daf.butler.DimensionUniverse`, optional
1198 `~lsst.daf.butler.DimensionUniverse` instance, not used by the
1199 method itself but needed to ensure that registry data structures
1200 are initialized. If `None` it is loaded from the `QuantumGraph`
1201 saved structure. If supplied, the
1202 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph`
1203 will be validated against the supplied argument for compatibility.
1204 nodes : iterable of `uuid.UUID` or `None`
1205 UUIDs that correspond to nodes in the graph. If specified, only
1206 these nodes will be loaded. Defaults to None, in which case all
1207 nodes will be loaded.
1208 graphID : `str` or `None`
1209 If specified this ID is verified against the loaded graph prior to
1210 loading any Nodes. This defaults to None in which case no
1211 validation is done.
1212 minimumVersion : `int`
1213 Minimum version of a save file to load. Set to -1 to load all
1214 versions. Older versions may need to be loaded, and re-saved
1215 to upgrade them to the latest format before they can be used in
1216 production.
1218 Returns
1219 -------
1220 graph : `QuantumGraph`
1221 Resulting QuantumGraph instance.
1223 Raises
1224 ------
1225 TypeError
1226 Raised if pickle contains instance of a type other than
1227 `QuantumGraph`.
1228 ValueError
1229 Raised if one or more of the nodes requested is not in the
1230 `QuantumGraph` or if graphID parameter does not match the graph
1231 being loaded or if the supplied uri does not point at a valid
1232 `QuantumGraph` save file.
1234 Notes
1235 -----
1236 Reading Quanta from pickle requires existence of singleton
1237 `~lsst.daf.butler.DimensionUniverse` which is usually instantiated
1238 during `~lsst.daf.butler.Registry` initialization. To make sure that
1239 `~lsst.daf.butler.DimensionUniverse` exists this method accepts dummy
1240 `~lsst.daf.butler.DimensionUniverse` argument.
1241 """
1242 # Try to see if the file handle contains pickle data, this will be
1243 # removed in the future
1244 try:
1245 qgraph = pickle.load(file)
1246 warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method")
1247 except pickle.UnpicklingError:
1248 with LoadHelper(file, minimumVersion) as loader:
1249 qgraph = loader.load(universe, nodes, graphID)
1250 if not isinstance(qgraph, QuantumGraph):
1251 raise TypeError(f"QuantumGraph pickle file has contains unexpected object type: {type(qgraph)}")
1252 return qgraph
1254 def iterTaskGraph(self) -> Generator[TaskDef, None, None]:
1255 """Iterate over the `taskGraph` attribute in topological order
1257 Yields
1258 ------
1259 taskDef : `TaskDef`
1260 `TaskDef` objects in topological order
1261 """
1262 yield from nx.topological_sort(self.taskGraph)
1264 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None:
1265 """Change output run and dataset ID for each output dataset.
1267 Parameters
1268 ----------
1269 run : `str`
1270 New output run name.
1271 metadata_key : `str` or `None`
1272 Specifies matadata key corresponding to output run name to update
1273 with new run name. If `None` or if metadata is missing it is not
1274 updated. If metadata is present but key is missing, it will be
1275 added.
1276 update_graph_id : `bool`, optional
1277 If `True` then also update graph ID with a new unique value.
1278 """
1280 def _update_refs_in_place(refs: list[DatasetRef], run: str) -> None:
1281 """Update list of `~lsst.daf.butler.DatasetRef` with new run and
1282 dataset IDs.
1283 """
1284 for ref in refs:
1285 # hack the run to be replaced explicitly
1286 object.__setattr__(ref, "run", run)
1288 # Loop through all outputs and update their datasets.
1289 for node in self._connectedQuanta:
1290 for refs in node.quantum.outputs.values():
1291 _update_refs_in_place(refs, run)
1293 for refs in self._initOutputRefs.values():
1294 _update_refs_in_place(refs, run)
1296 _update_refs_in_place(self._globalInitOutputRefs, run)
1298 # Update all intermediates from their matching outputs.
1299 for node in self._connectedQuanta:
1300 for refs in node.quantum.inputs.values():
1301 _update_refs_in_place(refs, run)
1303 for refs in self._initInputRefs.values():
1304 _update_refs_in_place(refs, run)
1306 if update_graph_id:
1307 self._buildId = BuildId(f"{time.time()}-{os.getpid()}")
1309 # Update metadata if present.
1310 if self._metadata is not None and metadata_key is not None:
1311 metadata = dict(self._metadata)
1312 metadata[metadata_key] = run
1313 self._metadata = metadata
1315 @property
1316 def graphID(self) -> BuildId:
1317 """The ID generated by the graph at construction time (`str`)."""
1318 return self._buildId
1320 @property
1321 def universe(self) -> DimensionUniverse:
1322 """Dimension universe associated with this graph
1323 (`~lsst.daf.butler.DimensionUniverse`).
1324 """
1325 return self._universe
1327 def __iter__(self) -> Generator[QuantumNode, None, None]:
1328 yield from nx.topological_sort(self._connectedQuanta)
1330 def __len__(self) -> int:
1331 return self._count
1333 def __contains__(self, node: QuantumNode) -> bool:
1334 return self._connectedQuanta.has_node(node)
1336 def __getstate__(self) -> dict:
1337 """Store a compact form of the graph as a list of graph nodes, and a
1338 tuple of task labels and task configs. The full graph can be
1339 reconstructed with this information, and it preserves the ordering of
1340 the graph nodes.
1341 """
1342 universe: DimensionUniverse | None = None
1343 for node in self:
1344 dId = node.quantum.dataId
1345 if dId is None:
1346 continue
1347 universe = dId.graph.universe
1348 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe}
1350 def __setstate__(self, state: dict) -> None:
1351 """Reconstructs the state of the graph from the information persisted
1352 in getstate.
1353 """
1354 buffer = io.BytesIO(state["reduced"])
1355 with LoadHelper(buffer, minimumVersion=3) as loader:
1356 qgraph = loader.load(state["universe"], graphID=state["graphId"])
1358 self._metadata = qgraph._metadata
1359 self._buildId = qgraph._buildId
1360 self._datasetDict = qgraph._datasetDict
1361 self._nodeIdMap = qgraph._nodeIdMap
1362 self._count = len(qgraph)
1363 self._taskToQuantumNode = qgraph._taskToQuantumNode
1364 self._taskGraph = qgraph._taskGraph
1365 self._connectedQuanta = qgraph._connectedQuanta
1366 self._initInputRefs = qgraph._initInputRefs
1367 self._initOutputRefs = qgraph._initOutputRefs
1369 def __eq__(self, other: object) -> bool:
1370 if not isinstance(other, QuantumGraph):
1371 return False
1372 if len(self) != len(other):
1373 return False
1374 for node in self:
1375 if node not in other:
1376 return False
1377 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node):
1378 return False
1379 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node):
1380 return False
1381 if set(self.allDatasetTypes) != set(other.allDatasetTypes):
1382 return False
1383 return set(self.taskGraph) == set(other.taskGraph)