Coverage for python/lsst/pipe/base/graph/graph.py: 19%
413 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-23 10:31 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-23 10:31 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QuantumGraph", "IncompatibleGraphError")
25import io
26import json
27import lzma
28import os
29import struct
30import time
31import uuid
32from collections import defaultdict, deque
33from collections.abc import Generator, Iterable, Iterator, Mapping, MutableMapping
34from itertools import chain
35from types import MappingProxyType
36from typing import Any, BinaryIO, TypeVar
38import networkx as nx
39from lsst.daf.butler import (
40 DatasetId,
41 DatasetRef,
42 DatasetType,
43 DimensionRecordsAccumulator,
44 DimensionUniverse,
45 Quantum,
46)
47from lsst.resources import ResourcePath, ResourcePathExpression
48from lsst.utils.introspection import get_full_type_name
49from networkx.drawing.nx_agraph import write_dot
51from ..connections import iterConnections
52from ..pipeline import TaskDef
53from ._implDetails import DatasetTypeName, _DatasetTracker, _pruner
54from ._loadHelpers import LoadHelper
55from ._versionDeserializers import DESERIALIZER_MAP
56from .quantumNode import BuildId, QuantumNode
58_T = TypeVar("_T", bound="QuantumGraph")
60# modify this constant any time the on disk representation of the save file
61# changes, and update the load helpers to behave properly for each version.
62SAVE_VERSION = 3
64# Strings used to describe the format for the preamble bytes in a file save
65# The base is a big endian encoded unsigned short that is used to hold the
66# file format version. This allows reading version bytes and determine which
67# loading code should be used for the rest of the file
68STRUCT_FMT_BASE = ">H"
69#
70# Version 1
71# This marks a big endian encoded format with an unsigned short, an unsigned
72# long long, and an unsigned long long in the byte stream
73# Version 2
74# A big endian encoded format with an unsigned long long byte stream used to
75# indicate the total length of the entire header.
76STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"}
78# magic bytes that help determine this is a graph save
79MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9"
82class IncompatibleGraphError(Exception):
83 """Exception class to indicate that a lookup by NodeId is impossible due
84 to incompatibilities
85 """
87 pass
90class QuantumGraph:
91 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects
93 This data structure represents a concrete workflow generated from a
94 `Pipeline`.
96 Parameters
97 ----------
98 quanta : `~collections.abc.Mapping` [ `TaskDef`, \
99 `set` [ `~lsst.daf.butler.Quantum` ] ]
100 This maps tasks (and their configs) to the sets of data they are to
101 process.
102 metadata : Optional `~collections.abc.Mapping` of `str` to primitives
103 This is an optional parameter of extra data to carry with the graph.
104 Entries in this mapping should be able to be serialized in JSON.
105 pruneRefs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional
106 Set of dataset refs to exclude from a graph.
107 universe : `~lsst.daf.butler.DimensionUniverse`, optional
108 The dimensions in which quanta can be defined. Need only be provided if
109 no quanta have data IDs.
110 initInputs : `~collections.abc.Mapping`, optional
111 Maps tasks to their InitInput dataset refs. Dataset refs can be either
112 resolved or non-resolved. Presently the same dataset refs are included
113 in each `~lsst.daf.butler.Quantum` for the same task.
114 initOutputs : `~collections.abc.Mapping`, optional
115 Maps tasks to their InitOutput dataset refs. Dataset refs can be either
116 resolved or non-resolved. For intermediate resolved refs their dataset
117 ID must match ``initInputs`` and Quantum ``initInputs``.
118 globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional
119 Dataset refs for some global objects produced by pipeline. These
120 objects include task configurations and package versions. Typically
121 they have an empty DataId, but there is no real restriction on what
122 can appear here.
123 registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \
124 optional
125 Dataset types which are used by this graph, their definitions must
126 match registry. If registry does not define dataset type yet, then
127 it should match one that will be created later.
129 Raises
130 ------
131 ValueError
132 Raised if the graph is pruned such that some tasks no longer have nodes
133 associated with them.
134 """
136 def __init__(
137 self,
138 quanta: Mapping[TaskDef, set[Quantum]],
139 metadata: Mapping[str, Any] | None = None,
140 pruneRefs: Iterable[DatasetRef] | None = None,
141 universe: DimensionUniverse | None = None,
142 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
143 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
144 globalInitOutputs: Iterable[DatasetRef] | None = None,
145 registryDatasetTypes: Iterable[DatasetType] | None = None,
146 ):
147 self._buildGraphs(
148 quanta,
149 metadata=metadata,
150 pruneRefs=pruneRefs,
151 universe=universe,
152 initInputs=initInputs,
153 initOutputs=initOutputs,
154 globalInitOutputs=globalInitOutputs,
155 registryDatasetTypes=registryDatasetTypes,
156 )
158 def _buildGraphs(
159 self,
160 quanta: Mapping[TaskDef, set[Quantum]],
161 *,
162 _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None,
163 _buildId: BuildId | None = None,
164 metadata: Mapping[str, Any] | None = None,
165 pruneRefs: Iterable[DatasetRef] | None = None,
166 universe: DimensionUniverse | None = None,
167 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
168 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None,
169 globalInitOutputs: Iterable[DatasetRef] | None = None,
170 registryDatasetTypes: Iterable[DatasetType] | None = None,
171 ) -> None:
172 """Build the graph that is used to store the relation between tasks,
173 and the graph that holds the relations between quanta
174 """
175 self._metadata = metadata
176 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}")
177 # Data structures used to identify relations between components;
178 # DatasetTypeName -> TaskDef for task,
179 # and DatasetRef -> QuantumNode for the quanta
180 self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True)
181 self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]()
183 self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {}
184 self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set)
185 for taskDef, quantumSet in quanta.items():
186 connections = taskDef.connections
188 # For each type of connection in the task, add a key to the
189 # `_DatasetTracker` for the connections name, with a value of
190 # the TaskDef in the appropriate field
191 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")):
192 # Have to handle components in inputs.
193 dataset_name, _, _ = inpt.name.partition(".")
194 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef)
196 for output in iterConnections(connections, ("outputs",)):
197 # Have to handle possible components in outputs.
198 dataset_name, _, _ = output.name.partition(".")
199 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef)
201 # For each `Quantum` in the set of all `Quantum` for this task,
202 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one
203 # of the individual datasets inside the `Quantum`, with a value of
204 # a newly created QuantumNode to the appropriate input/output
205 # field.
206 for quantum in quantumSet:
207 if quantum.dataId is not None:
208 if universe is None:
209 universe = quantum.dataId.universe
210 elif universe != quantum.dataId.universe:
211 raise RuntimeError(
212 "Mismatched dimension universes in QuantumGraph construction: "
213 f"{universe} != {quantum.dataId.universe}. "
214 )
216 if _quantumToNodeId:
217 if (nodeId := _quantumToNodeId.get(quantum)) is None:
218 raise ValueError(
219 "If _quantuMToNodeNumber is not None, all quanta must have an "
220 "associated value in the mapping"
221 )
222 else:
223 nodeId = uuid.uuid4()
225 inits = quantum.initInputs.values()
226 inputs = quantum.inputs.values()
227 value = QuantumNode(quantum, taskDef, nodeId)
228 self._taskToQuantumNode[taskDef].add(value)
229 self._nodeIdMap[nodeId] = value
231 for dsRef in chain(inits, inputs):
232 # unfortunately, `Quantum` allows inits to be individual
233 # `DatasetRef`s or an Iterable of such, so there must
234 # be an instance check here
235 if isinstance(dsRef, Iterable):
236 for sub in dsRef:
237 if sub.isComponent():
238 sub = sub.makeCompositeRef()
239 self._datasetRefDict.addConsumer(sub, value)
240 else:
241 assert isinstance(dsRef, DatasetRef)
242 if dsRef.isComponent():
243 dsRef = dsRef.makeCompositeRef()
244 self._datasetRefDict.addConsumer(dsRef, value)
245 for dsRef in chain.from_iterable(quantum.outputs.values()):
246 self._datasetRefDict.addProducer(dsRef, value)
248 if pruneRefs is not None:
249 # track what refs were pruned and prune the graph
250 prunes: set[QuantumNode] = set()
251 _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes)
253 # recreate the taskToQuantumNode dict removing nodes that have been
254 # pruned. Keep track of task defs that now have no QuantumNodes
255 emptyTasks: set[str] = set()
256 newTaskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set)
257 # accumulate all types
258 types_ = set()
259 # tracker for any pruneRefs that have caused tasks to have no nodes
260 # This helps the user find out what caused the issues seen.
261 culprits = set()
262 # Find all the types from the refs to prune
263 for r in pruneRefs:
264 types_.add(r.datasetType)
266 # For each of the tasks, and their associated nodes, remove any
267 # any nodes that were pruned. If there are no nodes associated
268 # with a task, record that task, and find out if that was due to
269 # a type from an input ref to prune.
270 for td, taskNodes in self._taskToQuantumNode.items():
271 diff = taskNodes.difference(prunes)
272 if len(diff) == 0:
273 if len(taskNodes) != 0:
274 tp: DatasetType
275 for tp in types_:
276 if (tmpRefs := next(iter(taskNodes)).quantum.inputs.get(tp)) and not set(
277 tmpRefs
278 ).difference(pruneRefs):
279 culprits.add(tp.name)
280 emptyTasks.add(td.label)
281 newTaskToQuantumNode[td] = diff
283 # update the internal dict
284 self._taskToQuantumNode = newTaskToQuantumNode
286 if emptyTasks:
287 raise ValueError(
288 f"{', '.join(emptyTasks)} task(s) have no nodes associated with them "
289 f"after graph pruning; {', '.join(culprits)} caused over-pruning"
290 )
292 # Dimension universe
293 if universe is None:
294 raise RuntimeError(
295 "Dimension universe or at least one quantum with a data ID "
296 "must be provided when constructing a QuantumGraph."
297 )
298 self._universe = universe
300 # Graph of quanta relations
301 self._connectedQuanta = self._datasetRefDict.makeNetworkXGraph()
302 self._count = len(self._connectedQuanta)
304 # Graph of task relations, used in various methods
305 self._taskGraph = self._datasetDict.makeNetworkXGraph()
307 # convert default dict into a regular to prevent accidental key
308 # insertion
309 self._taskToQuantumNode = dict(self._taskToQuantumNode.items())
311 self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {}
312 self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {}
313 self._globalInitOutputRefs: list[DatasetRef] = []
314 self._registryDatasetTypes: list[DatasetType] = []
315 if initInputs is not None:
316 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()}
317 if initOutputs is not None:
318 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()}
319 if globalInitOutputs is not None:
320 self._globalInitOutputRefs = list(globalInitOutputs)
321 if registryDatasetTypes is not None:
322 self._registryDatasetTypes = list(registryDatasetTypes)
324 @property
325 def taskGraph(self) -> nx.DiGraph:
326 """A graph representing the relations between the tasks inside
327 the quantum graph (`networkx.DiGraph`).
328 """
329 return self._taskGraph
331 @property
332 def graph(self) -> nx.DiGraph:
333 """A graph representing the relations between all the `QuantumNode`
334 objects (`networkx.DiGraph`).
336 The graph should usually be iterated over, or passed to methods of this
337 class, but sometimes direct access to the ``networkx`` object may be
338 helpful.
339 """
340 return self._connectedQuanta
342 @property
343 def inputQuanta(self) -> Iterable[QuantumNode]:
344 """The nodes that are inputs to the graph (iterable [`QuantumNode`]).
346 These are the nodes that do not depend on any other nodes in the
347 graph.
348 """
349 return (q for q, n in self._connectedQuanta.in_degree if n == 0)
351 @property
352 def outputQuanta(self) -> Iterable[QuantumNode]:
353 """The nodes that are outputs of the graph (iterable [`QuantumNode`]).
355 These are the nodes that have no nodes that depend on them in the
356 graph.
357 """
358 return [q for q, n in self._connectedQuanta.out_degree if n == 0]
360 @property
361 def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]:
362 """All the data set type names that are present in the graph
363 (`tuple` [`str`]).
365 These types do not include global init-outputs.
366 """
367 return tuple(self._datasetDict.keys())
369 @property
370 def isConnected(self) -> bool:
371 """Whether all of the nodes in the graph are connected, ignoring
372 directionality of connections (`bool`).
373 """
374 return nx.is_weakly_connected(self._connectedQuanta)
376 def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T:
377 r"""Return a graph pruned of input `~lsst.daf.butler.DatasetRef`\ s
378 and nodes which depend on them.
380 Parameters
381 ----------
382 refs : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef`
383 Refs which should be removed from resulting graph
385 Returns
386 -------
387 graph : `QuantumGraph`
388 A graph that has been pruned of specified refs and the nodes that
389 depend on them.
390 """
391 newInst = object.__new__(type(self))
392 quantumMap = defaultdict(set)
393 for node in self:
394 quantumMap[node.taskDef].add(node.quantum)
396 # convert to standard dict to prevent accidental key insertion
397 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items())
399 # This should not change set of tasks in a graph, so we can keep the
400 # same registryDatasetTypes as in the original graph.
401 # TODO: Do we need to copy initInputs/initOutputs?
402 newInst._buildGraphs(
403 quantumDict,
404 _quantumToNodeId={n.quantum: n.nodeId for n in self},
405 metadata=self._metadata,
406 pruneRefs=refs,
407 universe=self._universe,
408 globalInitOutputs=self._globalInitOutputRefs,
409 registryDatasetTypes=self._registryDatasetTypes,
410 )
411 return newInst
413 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode:
414 """Lookup a `QuantumNode` from an id associated with the node.
416 Parameters
417 ----------
418 nodeId : `NodeId`
419 The number associated with a node
421 Returns
422 -------
423 node : `QuantumNode`
424 The node corresponding with input number
426 Raises
427 ------
428 KeyError
429 Raised if the requested nodeId is not in the graph.
430 """
431 return self._nodeIdMap[nodeId]
433 def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]:
434 """Return all the `~lsst.daf.butler.Quantum` associated with a
435 `TaskDef`.
437 Parameters
438 ----------
439 taskDef : `TaskDef`
440 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be
441 queried.
443 Returns
444 -------
445 quanta : `frozenset` of `~lsst.daf.butler.Quantum`
446 The `set` of `~lsst.daf.butler.Quantum` that is associated with the
447 specified `TaskDef`.
448 """
449 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ()))
451 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int:
452 """Return the number of `~lsst.daf.butler.Quantum` associated with
453 a `TaskDef`.
455 Parameters
456 ----------
457 taskDef : `TaskDef`
458 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be
459 queried.
461 Returns
462 -------
463 count : `int`
464 The number of `~lsst.daf.butler.Quantum` that are associated with
465 the specified `TaskDef`.
466 """
467 return len(self._taskToQuantumNode.get(taskDef, ()))
469 def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]:
470 r"""Return all the `QuantumNode`\s associated with a `TaskDef`.
472 Parameters
473 ----------
474 taskDef : `TaskDef`
475 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be
476 queried.
478 Returns
479 -------
480 nodes : `frozenset` [ `QuantumNode` ]
481 A `frozenset` of `QuantumNode` that is associated with the
482 specified `TaskDef`.
483 """
484 return frozenset(self._taskToQuantumNode[taskDef])
486 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]:
487 """Find all tasks that have the specified dataset type name as an
488 input.
490 Parameters
491 ----------
492 datasetTypeName : `str`
493 A string representing the name of a dataset type to be queried,
494 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
495 `str` for type safety in static type checking.
497 Returns
498 -------
499 tasks : iterable of `TaskDef`
500 `TaskDef` objects that have the specified `DatasetTypeName` as an
501 input, list will be empty if no tasks use specified
502 `DatasetTypeName` as an input.
504 Raises
505 ------
506 KeyError
507 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`.
508 """
509 return (c for c in self._datasetDict.getConsumers(datasetTypeName))
511 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None:
512 """Find all tasks that have the specified dataset type name as an
513 output.
515 Parameters
516 ----------
517 datasetTypeName : `str`
518 A string representing the name of a dataset type to be queried,
519 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
520 `str` for type safety in static type checking.
522 Returns
523 -------
524 result : `TaskDef` or `None`
525 `TaskDef` that outputs `DatasetTypeName` as an output or `None` if
526 none of the tasks produce this `DatasetTypeName`.
528 Raises
529 ------
530 KeyError
531 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`.
532 """
533 return self._datasetDict.getProducer(datasetTypeName)
535 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]:
536 """Find all tasks that are associated with the specified dataset type
537 name.
539 Parameters
540 ----------
541 datasetTypeName : `str`
542 A string representing the name of a dataset type to be queried,
543 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
544 `str` for type safety in static type checking.
546 Returns
547 -------
548 result : iterable of `TaskDef`
549 `TaskDef` objects that are associated with the specified
550 `DatasetTypeName`.
552 Raises
553 ------
554 KeyError
555 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`.
556 """
557 return self._datasetDict.getAll(datasetTypeName)
559 def findTaskDefByName(self, taskName: str) -> list[TaskDef]:
560 """Determine which `TaskDef` objects in this graph are associated
561 with a `str` representing a task name (looks at the ``taskName``
562 property of `TaskDef` objects).
564 Returns a list of `TaskDef` objects as a `PipelineTask` may appear
565 multiple times in a graph with different labels.
567 Parameters
568 ----------
569 taskName : `str`
570 Name of a task to search for.
572 Returns
573 -------
574 result : `list` of `TaskDef`
575 List of the `TaskDef` objects that have the name specified.
576 Multiple values are returned in the case that a task is used
577 multiple times with different labels.
578 """
579 results = []
580 for task in self._taskToQuantumNode:
581 split = task.taskName.split(".")
582 if split[-1] == taskName:
583 results.append(task)
584 return results
586 def findTaskDefByLabel(self, label: str) -> TaskDef | None:
587 """Determine which `TaskDef` objects in this graph are associated
588 with a `str` representing a tasks label.
590 Parameters
591 ----------
592 taskName : `str`
593 Name of a task to search for
595 Returns
596 -------
597 result : `TaskDef`
598 `TaskDef` objects that has the specified label.
599 """
600 for task in self._taskToQuantumNode:
601 if label == task.label:
602 return task
603 return None
605 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]:
606 r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified
607 `DatasetTypeName`.
609 Parameters
610 ----------
611 datasetTypeName : `str`
612 The name of the dataset type to search for as a string,
613 can also accept a `DatasetTypeName` which is a `~typing.NewType` of
614 `str` for type safety in static type checking.
616 Returns
617 -------
618 result : `set` of `QuantumNode` objects
619 A `set` of `QuantumNode`\s that contain specified
620 `DatasetTypeName`.
622 Raises
623 ------
624 KeyError
625 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`
627 """
628 tasks = self._datasetDict.getAll(datasetTypeName)
629 result: set[Quantum] = set()
630 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task))
631 return result
633 def checkQuantumInGraph(self, quantum: Quantum) -> bool:
634 """Check if specified quantum appears in the graph as part of a node.
636 Parameters
637 ----------
638 quantum : `lsst.daf.butler.Quantum`
639 The quantum to search for.
641 Returns
642 -------
643 in_graph : `bool`
644 The result of searching for the quantum.
645 """
646 return any(quantum == node.quantum for node in self)
648 def writeDotGraph(self, output: str | io.BufferedIOBase) -> None:
649 """Write out the graph as a dot graph.
651 Parameters
652 ----------
653 output : `str` or `io.BufferedIOBase`
654 Either a filesystem path to write to, or a file handle object.
655 """
656 write_dot(self._connectedQuanta, output)
658 def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T:
659 """Create a new graph object that contains the subset of the nodes
660 specified as input. Node number is preserved.
662 Parameters
663 ----------
664 nodes : `QuantumNode` or iterable of `QuantumNode`
665 Nodes from which to create subset.
667 Returns
668 -------
669 graph : instance of graph type
670 An instance of the type from which the subset was created.
671 """
672 if not isinstance(nodes, Iterable):
673 nodes = (nodes,)
674 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes
675 quantumMap = defaultdict(set)
677 dataset_type_names: set[str] = set()
678 node: QuantumNode
679 for node in quantumSubgraph:
680 quantumMap[node.taskDef].add(node.quantum)
681 dataset_type_names.update(
682 dstype.name
683 for dstype in chain(
684 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys()
685 )
686 )
688 # May need to trim dataset types from registryDatasetTypes.
689 for taskDef in quantumMap:
690 if refs := self.initOutputRefs(taskDef):
691 dataset_type_names.update(ref.datasetType.name for ref in refs)
692 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs)
693 registryDatasetTypes = [
694 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names
695 ]
697 # convert to standard dict to prevent accidental key insertion
698 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items())
699 # Create an empty graph, and then populate it with custom mapping
700 newInst = type(self)({}, universe=self._universe)
701 # TODO: Do we need to copy initInputs/initOutputs?
702 newInst._buildGraphs(
703 quantumDict,
704 _quantumToNodeId={n.quantum: n.nodeId for n in nodes},
705 _buildId=self._buildId,
706 metadata=self._metadata,
707 universe=self._universe,
708 globalInitOutputs=self._globalInitOutputRefs,
709 registryDatasetTypes=registryDatasetTypes,
710 )
711 return newInst
713 def subsetToConnected(self: _T) -> tuple[_T, ...]:
714 """Generate a list of subgraphs where each is connected.
716 Returns
717 -------
718 result : `list` of `QuantumGraph`
719 A list of graphs that are each connected.
720 """
721 return tuple(
722 self.subset(connectedSet)
723 for connectedSet in nx.weakly_connected_components(self._connectedQuanta)
724 )
726 def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]:
727 """Return a set of `QuantumNode` that are direct inputs to a specified
728 node.
730 Parameters
731 ----------
732 node : `QuantumNode`
733 The node of the graph for which inputs are to be determined.
735 Returns
736 -------
737 inputs : `set` of `QuantumNode`
738 All the nodes that are direct inputs to specified node.
739 """
740 return set(self._connectedQuanta.predecessors(node))
742 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]:
743 """Return a set of `QuantumNode` that are direct outputs of a specified
744 node.
746 Parameters
747 ----------
748 node : `QuantumNode`
749 The node of the graph for which outputs are to be determined.
751 Returns
752 -------
753 outputs : `set` of `QuantumNode`
754 All the nodes that are direct outputs to specified node.
755 """
756 return set(self._connectedQuanta.successors(node))
758 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T:
759 """Return a graph of `QuantumNode` that are direct inputs and outputs
760 of a specified node.
762 Parameters
763 ----------
764 node : `QuantumNode`
765 The node of the graph for which connected nodes are to be
766 determined.
768 Returns
769 -------
770 graph : graph of `QuantumNode`
771 All the nodes that are directly connected to specified node.
772 """
773 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node))
774 nodes.add(node)
775 return self.subset(nodes)
777 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T:
778 """Return a graph of the specified node and all the ancestor nodes
779 directly reachable by walking edges.
781 Parameters
782 ----------
783 node : `QuantumNode`
784 The node for which all ancestors are to be determined
786 Returns
787 -------
788 ancestors : graph of `QuantumNode`
789 Graph of node and all of its ancestors.
790 """
791 predecessorNodes = nx.ancestors(self._connectedQuanta, node)
792 predecessorNodes.add(node)
793 return self.subset(predecessorNodes)
795 def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]:
796 """Check a graph for the presense of cycles and returns the edges of
797 any cycles found, or an empty list if there is no cycle.
799 Returns
800 -------
801 result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ]
802 A list of any graph edges that form a cycle, or an empty list if
803 there is no cycle. Empty list to so support if graph.find_cycle()
804 syntax as an empty list is falsy.
805 """
806 try:
807 return nx.find_cycle(self._connectedQuanta)
808 except nx.NetworkXNoCycle:
809 return []
811 def saveUri(self, uri: ResourcePathExpression) -> None:
812 """Save `QuantumGraph` to the specified URI.
814 Parameters
815 ----------
816 uri : convertible to `~lsst.resources.ResourcePath`
817 URI to where the graph should be saved.
818 """
819 buffer = self._buildSaveObject()
820 path = ResourcePath(uri)
821 if path.getExtension() not in (".qgraph"):
822 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}")
823 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes
825 @property
826 def metadata(self) -> MappingProxyType[str, Any] | None:
827 """Extra data carried with the graph (mapping [`str`] or `None`).
829 The mapping is a dynamic view of this object's metadata. Values should
830 be able to be serialized in JSON.
831 """
832 if self._metadata is None:
833 return None
834 return MappingProxyType(self._metadata)
836 def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None:
837 """Return DatasetRefs for a given task InitInputs.
839 Parameters
840 ----------
841 taskDef : `TaskDef`
842 Task definition structure.
844 Returns
845 -------
846 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None`
847 DatasetRef for the task InitInput, can be `None`. This can return
848 either resolved or non-resolved reference.
849 """
850 return self._initInputRefs.get(taskDef)
852 def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None:
853 """Return DatasetRefs for a given task InitOutputs.
855 Parameters
856 ----------
857 taskDef : `TaskDef`
858 Task definition structure.
860 Returns
861 -------
862 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None`
863 DatasetRefs for the task InitOutput, can be `None`. This can return
864 either resolved or non-resolved reference. Resolved reference will
865 match Quantum's initInputs if this is an intermediate dataset type.
866 """
867 return self._initOutputRefs.get(taskDef)
869 def globalInitOutputRefs(self) -> list[DatasetRef]:
870 """Return DatasetRefs for global InitOutputs.
872 Returns
873 -------
874 refs : `list` [ `~lsst.daf.butler.DatasetRef` ]
875 DatasetRefs for global InitOutputs.
876 """
877 return self._globalInitOutputRefs
879 def registryDatasetTypes(self) -> list[DatasetType]:
880 """Return dataset types used by this graph, their definitions match
881 dataset types from registry.
883 Returns
884 -------
885 refs : `list` [ `~lsst.daf.butler.DatasetType` ]
886 Dataset types for this graph.
887 """
888 return self._registryDatasetTypes
890 @classmethod
891 def loadUri(
892 cls,
893 uri: ResourcePathExpression,
894 universe: DimensionUniverse | None = None,
895 nodes: Iterable[uuid.UUID] | None = None,
896 graphID: BuildId | None = None,
897 minimumVersion: int = 3,
898 ) -> QuantumGraph:
899 """Read `QuantumGraph` from a URI.
901 Parameters
902 ----------
903 uri : convertible to `~lsst.resources.ResourcePath`
904 URI from where to load the graph.
905 universe : `~lsst.daf.butler.DimensionUniverse`, optional
906 If `None` it is loaded from the `QuantumGraph`
907 saved structure. If supplied, the
908 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph`
909 will be validated against the supplied argument for compatibility.
910 nodes : iterable of `uuid.UUID` or `None`
911 UUIDs that correspond to nodes in the graph. If specified, only
912 these nodes will be loaded. Defaults to None, in which case all
913 nodes will be loaded.
914 graphID : `str` or `None`
915 If specified this ID is verified against the loaded graph prior to
916 loading any Nodes. This defaults to None in which case no
917 validation is done.
918 minimumVersion : `int`
919 Minimum version of a save file to load. Set to -1 to load all
920 versions. Older versions may need to be loaded, and re-saved
921 to upgrade them to the latest format before they can be used in
922 production.
924 Returns
925 -------
926 graph : `QuantumGraph`
927 Resulting QuantumGraph instance.
929 Raises
930 ------
931 TypeError
932 Raised if file contains instance of a type other than
933 `QuantumGraph`.
934 ValueError
935 Raised if one or more of the nodes requested is not in the
936 `QuantumGraph` or if graphID parameter does not match the graph
937 being loaded or if the supplied uri does not point at a valid
938 `QuantumGraph` save file.
939 RuntimeError
940 Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not
941 compatible with the `~lsst.daf.butler.DimensionUniverse` saved in
942 the graph.
943 """
944 uri = ResourcePath(uri)
945 if uri.getExtension() in {".qgraph"}:
946 with LoadHelper(uri, minimumVersion) as loader:
947 qgraph = loader.load(universe, nodes, graphID)
948 else:
949 raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}")
950 if not isinstance(qgraph, QuantumGraph):
951 raise TypeError(f"QuantumGraph file {uri} contains unexpected object type: {type(qgraph)}")
952 return qgraph
954 @classmethod
955 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None:
956 """Read the header of a `QuantumGraph` pointed to by the uri parameter
957 and return it as a string.
959 Parameters
960 ----------
961 uri : convertible to `~lsst.resources.ResourcePath`
962 The location of the `QuantumGraph` to load. If the argument is a
963 string, it must correspond to a valid
964 `~lsst.resources.ResourcePath` path.
965 minimumVersion : `int`
966 Minimum version of a save file to load. Set to -1 to load all
967 versions. Older versions may need to be loaded, and re-saved
968 to upgrade them to the latest format before they can be used in
969 production.
971 Returns
972 -------
973 header : `str` or `None`
974 The header associated with the specified `QuantumGraph` it there is
975 one, else `None`.
977 Raises
978 ------
979 ValueError
980 Raised if the extension of the file specified by uri is not a
981 `QuantumGraph` extension.
982 """
983 uri = ResourcePath(uri)
984 if uri.getExtension() in {".qgraph"}:
985 return LoadHelper(uri, minimumVersion).readHeader()
986 else:
987 raise ValueError("Only know how to handle files saved as `.qgraph`")
989 def buildAndPrintHeader(self) -> None:
990 """Create a header that would be used in a save of this object and
991 prints it out to standard out.
992 """
993 _, header = self._buildSaveObject(returnHeader=True)
994 print(json.dumps(header))
996 def save(self, file: BinaryIO) -> None:
997 """Save QuantumGraph to a file.
999 Parameters
1000 ----------
1001 file : `io.BufferedIOBase`
1002 File to write data open in binary mode.
1003 """
1004 buffer = self._buildSaveObject()
1005 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes
1007 def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]:
1008 # make some containers
1009 jsonData: deque[bytes] = deque()
1010 # node map is a list because json does not accept mapping keys that
1011 # are not strings, so we store a list of key, value pairs that will
1012 # be converted to a mapping on load
1013 nodeMap = []
1014 taskDefMap = {}
1015 headerData: dict[str, Any] = {}
1017 # Store the QauntumGraph BuildId, this will allow validating BuildIds
1018 # at load time, prior to loading any QuantumNodes. Name chosen for
1019 # unlikely conflicts.
1020 headerData["GraphBuildID"] = self.graphID
1021 headerData["Metadata"] = self._metadata
1023 # Store the universe this graph was created with
1024 universeConfig = self._universe.dimensionConfig
1025 headerData["universe"] = universeConfig.toDict()
1027 # counter for the number of bytes processed thus far
1028 count = 0
1029 # serialize out the task Defs recording the start and end bytes of each
1030 # taskDef
1031 inverseLookup = self._datasetDict.inverse
1032 taskDef: TaskDef
1033 # sort by task label to ensure serialization happens in the same order
1034 for taskDef in self.taskGraph:
1035 # compressing has very little impact on saving or load time, but
1036 # a large impact on on disk size, so it is worth doing
1037 taskDescription: dict[str, Any] = {}
1038 # save the fully qualified name.
1039 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass)
1040 # save the config as a text stream that will be un-persisted on the
1041 # other end
1042 stream = io.StringIO()
1043 taskDef.config.saveToStream(stream)
1044 taskDescription["config"] = stream.getvalue()
1045 taskDescription["label"] = taskDef.label
1046 if (refs := self._initInputRefs.get(taskDef)) is not None:
1047 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs]
1048 if (refs := self._initOutputRefs.get(taskDef)) is not None:
1049 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs]
1051 inputs = []
1052 outputs = []
1054 # Determine the connection between all of tasks and save that in
1055 # the header as a list of connections and edges in each task
1056 # this will help in un-persisting, and possibly in a "quick view"
1057 # method that does not require everything to be un-persisted
1058 #
1059 # Typing returns can't be parameter dependent
1060 for connection in inverseLookup[taskDef]: # type: ignore
1061 consumers = self._datasetDict.getConsumers(connection)
1062 producer = self._datasetDict.getProducer(connection)
1063 if taskDef in consumers:
1064 # This checks if the task consumes the connection directly
1065 # from the datastore or it is produced by another task
1066 producerLabel = producer.label if producer is not None else "datastore"
1067 inputs.append((producerLabel, connection))
1068 elif taskDef not in consumers and producer is taskDef:
1069 # If there are no consumers for this tasks produced
1070 # connection, the output will be said to be the datastore
1071 # in which case the for loop will be a zero length loop
1072 if not consumers:
1073 outputs.append(("datastore", connection))
1074 for td in consumers:
1075 outputs.append((td.label, connection))
1077 # dump to json string, and encode that string to bytes and then
1078 # conpress those bytes
1079 dump = lzma.compress(json.dumps(taskDescription).encode())
1080 # record the sizing and relation information
1081 taskDefMap[taskDef.label] = {
1082 "bytes": (count, count + len(dump)),
1083 "inputs": inputs,
1084 "outputs": outputs,
1085 }
1086 count += len(dump)
1087 jsonData.append(dump)
1089 headerData["TaskDefs"] = taskDefMap
1091 # serialize the nodes, recording the start and end bytes of each node
1092 dimAccumulator = DimensionRecordsAccumulator()
1093 for node in self:
1094 # compressing has very little impact on saving or load time, but
1095 # a large impact on on disk size, so it is worth doing
1096 simpleNode = node.to_simple(accumulator=dimAccumulator)
1098 dump = lzma.compress(simpleNode.json().encode())
1099 jsonData.append(dump)
1100 nodeMap.append(
1101 (
1102 str(node.nodeId),
1103 {
1104 "bytes": (count, count + len(dump)),
1105 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)],
1106 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)],
1107 },
1108 )
1109 )
1110 count += len(dump)
1112 headerData["DimensionRecords"] = {
1113 key: value.model_dump()
1114 for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items()
1115 }
1117 # need to serialize this as a series of key,value tuples because of
1118 # a limitation on how json cant do anything but strings as keys
1119 headerData["Nodes"] = nodeMap
1121 if self._globalInitOutputRefs:
1122 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs]
1124 if self._registryDatasetTypes:
1125 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes]
1127 # dump the headerData to json
1128 header_encode = lzma.compress(json.dumps(headerData).encode())
1130 # record the sizes as 2 unsigned long long numbers for a total of 16
1131 # bytes
1132 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION)
1134 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING()
1135 map_lengths = struct.pack(fmt_string, len(header_encode))
1137 # write each component of the save out in a deterministic order
1138 buffer = bytearray()
1139 buffer.extend(MAGIC_BYTES)
1140 buffer.extend(save_bytes)
1141 buffer.extend(map_lengths)
1142 buffer.extend(header_encode)
1143 # Iterate over the length of jsonData, and for each element pop the
1144 # leftmost element off the deque and write it out. This is to save
1145 # memory, as the memory is added to the buffer object, it is removed
1146 # from from the container.
1147 #
1148 # Only this section needs to worry about memory pressure because
1149 # everything else written to the buffer prior to this data is
1150 # only on the order of kilobytes to low numbers of megabytes.
1151 while jsonData:
1152 buffer.extend(jsonData.popleft())
1153 if returnHeader:
1154 return buffer, headerData
1155 else:
1156 return buffer
1158 @classmethod
1159 def load(
1160 cls,
1161 file: BinaryIO,
1162 universe: DimensionUniverse | None = None,
1163 nodes: Iterable[uuid.UUID] | None = None,
1164 graphID: BuildId | None = None,
1165 minimumVersion: int = 3,
1166 ) -> QuantumGraph:
1167 """Read `QuantumGraph` from a file that was made by `save`.
1169 Parameters
1170 ----------
1171 file : `io.IO` of bytes
1172 File with data open in binary mode.
1173 universe : `~lsst.daf.butler.DimensionUniverse`, optional
1174 If `None` it is loaded from the `QuantumGraph`
1175 saved structure. If supplied, the
1176 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph`
1177 will be validated against the supplied argument for compatibility.
1178 nodes : iterable of `uuid.UUID` or `None`
1179 UUIDs that correspond to nodes in the graph. If specified, only
1180 these nodes will be loaded. Defaults to None, in which case all
1181 nodes will be loaded.
1182 graphID : `str` or `None`
1183 If specified this ID is verified against the loaded graph prior to
1184 loading any Nodes. This defaults to None in which case no
1185 validation is done.
1186 minimumVersion : `int`
1187 Minimum version of a save file to load. Set to -1 to load all
1188 versions. Older versions may need to be loaded, and re-saved
1189 to upgrade them to the latest format before they can be used in
1190 production.
1192 Returns
1193 -------
1194 graph : `QuantumGraph`
1195 Resulting QuantumGraph instance.
1197 Raises
1198 ------
1199 TypeError
1200 Raised if data contains instance of a type other than
1201 `QuantumGraph`.
1202 ValueError
1203 Raised if one or more of the nodes requested is not in the
1204 `QuantumGraph` or if graphID parameter does not match the graph
1205 being loaded or if the supplied uri does not point at a valid
1206 `QuantumGraph` save file.
1207 """
1208 with LoadHelper(file, minimumVersion) as loader:
1209 qgraph = loader.load(universe, nodes, graphID)
1210 if not isinstance(qgraph, QuantumGraph):
1211 raise TypeError(f"QuantumGraph file contains unexpected object type: {type(qgraph)}")
1212 return qgraph
1214 def iterTaskGraph(self) -> Generator[TaskDef, None, None]:
1215 """Iterate over the `taskGraph` attribute in topological order
1217 Yields
1218 ------
1219 taskDef : `TaskDef`
1220 `TaskDef` objects in topological order
1221 """
1222 yield from nx.topological_sort(self.taskGraph)
1224 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None:
1225 """Change output run and dataset ID for each output dataset.
1227 Parameters
1228 ----------
1229 run : `str`
1230 New output run name.
1231 metadata_key : `str` or `None`
1232 Specifies matadata key corresponding to output run name to update
1233 with new run name. If `None` or if metadata is missing it is not
1234 updated. If metadata is present but key is missing, it will be
1235 added.
1236 update_graph_id : `bool`, optional
1237 If `True` then also update graph ID with a new unique value.
1238 """
1239 dataset_id_map: dict[DatasetId, DatasetId] = {}
1241 def _update_output_refs(
1242 refs: Iterable[DatasetRef], run: str, dataset_id_map: MutableMapping[DatasetId, DatasetId]
1243 ) -> Iterator[DatasetRef]:
1244 """Update a collection of `~lsst.daf.butler.DatasetRef` with new
1245 run and dataset IDs.
1246 """
1247 for ref in refs:
1248 new_ref = ref.replace(run=run)
1249 dataset_id_map[ref.id] = new_ref.id
1250 yield new_ref
1252 def _update_intermediate_refs(
1253 refs: Iterable[DatasetRef], run: str, dataset_id_map: Mapping[DatasetId, DatasetId]
1254 ) -> Iterator[DatasetRef]:
1255 """Update intermediate references with new run and IDs. Only the
1256 references that appear in ``dataset_id_map`` are updated, others
1257 are returned unchanged.
1258 """
1259 for ref in refs:
1260 if dataset_id := dataset_id_map.get(ref.id):
1261 ref = ref.replace(run=run, id=dataset_id)
1262 yield ref
1264 # Replace quantum output refs first.
1265 for node in self._connectedQuanta:
1266 quantum = node.quantum
1267 outputs = {
1268 dataset_type: tuple(_update_output_refs(refs, run, dataset_id_map))
1269 for dataset_type, refs in quantum.outputs.items()
1270 }
1271 updated_quantum = Quantum(
1272 taskName=quantum.taskName,
1273 dataId=quantum.dataId,
1274 initInputs=quantum.initInputs,
1275 inputs=quantum.inputs,
1276 outputs=outputs,
1277 datastore_records=quantum.datastore_records,
1278 )
1279 node._replace_quantum(updated_quantum)
1281 self._initOutputRefs = {
1282 task_def: list(_update_output_refs(refs, run, dataset_id_map))
1283 for task_def, refs in self._initOutputRefs.items()
1284 }
1285 self._globalInitOutputRefs = list(
1286 _update_output_refs(self._globalInitOutputRefs, run, dataset_id_map)
1287 )
1289 # Update all intermediates from their matching outputs.
1290 for node in self._connectedQuanta:
1291 quantum = node.quantum
1292 inputs = {
1293 dataset_type: tuple(_update_intermediate_refs(refs, run, dataset_id_map))
1294 for dataset_type, refs in quantum.inputs.items()
1295 }
1296 initInputs = list(_update_intermediate_refs(quantum.initInputs.values(), run, dataset_id_map))
1298 updated_quantum = Quantum(
1299 taskName=quantum.taskName,
1300 dataId=quantum.dataId,
1301 initInputs=initInputs,
1302 inputs=inputs,
1303 outputs=quantum.outputs,
1304 datastore_records=quantum.datastore_records,
1305 )
1306 node._replace_quantum(updated_quantum)
1308 self._initInputRefs = {
1309 task_def: list(_update_intermediate_refs(refs, run, dataset_id_map))
1310 for task_def, refs in self._initInputRefs.items()
1311 }
1313 if update_graph_id:
1314 self._buildId = BuildId(f"{time.time()}-{os.getpid()}")
1316 # Update metadata if present.
1317 if self._metadata is not None and metadata_key is not None:
1318 metadata = dict(self._metadata)
1319 metadata[metadata_key] = run
1320 self._metadata = metadata
1322 @property
1323 def graphID(self) -> BuildId:
1324 """The ID generated by the graph at construction time (`str`)."""
1325 return self._buildId
1327 @property
1328 def universe(self) -> DimensionUniverse:
1329 """Dimension universe associated with this graph
1330 (`~lsst.daf.butler.DimensionUniverse`).
1331 """
1332 return self._universe
1334 def __iter__(self) -> Generator[QuantumNode, None, None]:
1335 yield from nx.topological_sort(self._connectedQuanta)
1337 def __len__(self) -> int:
1338 return self._count
1340 def __contains__(self, node: QuantumNode) -> bool:
1341 return self._connectedQuanta.has_node(node)
1343 def __getstate__(self) -> dict:
1344 """Store a compact form of the graph as a list of graph nodes, and a
1345 tuple of task labels and task configs. The full graph can be
1346 reconstructed with this information, and it preserves the ordering of
1347 the graph nodes.
1348 """
1349 universe: DimensionUniverse | None = None
1350 for node in self:
1351 dId = node.quantum.dataId
1352 if dId is None:
1353 continue
1354 universe = dId.graph.universe
1355 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe}
1357 def __setstate__(self, state: dict) -> None:
1358 """Reconstructs the state of the graph from the information persisted
1359 in getstate.
1360 """
1361 buffer = io.BytesIO(state["reduced"])
1362 with LoadHelper(buffer, minimumVersion=3) as loader:
1363 qgraph = loader.load(state["universe"], graphID=state["graphId"])
1365 self._metadata = qgraph._metadata
1366 self._buildId = qgraph._buildId
1367 self._datasetDict = qgraph._datasetDict
1368 self._nodeIdMap = qgraph._nodeIdMap
1369 self._count = len(qgraph)
1370 self._taskToQuantumNode = qgraph._taskToQuantumNode
1371 self._taskGraph = qgraph._taskGraph
1372 self._connectedQuanta = qgraph._connectedQuanta
1373 self._initInputRefs = qgraph._initInputRefs
1374 self._initOutputRefs = qgraph._initOutputRefs
1376 def __eq__(self, other: object) -> bool:
1377 if not isinstance(other, QuantumGraph):
1378 return False
1379 if len(self) != len(other):
1380 return False
1381 for node in self:
1382 if node not in other:
1383 return False
1384 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node):
1385 return False
1386 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node):
1387 return False
1388 if set(self.allDatasetTypes) != set(other.allDatasetTypes):
1389 return False
1390 return set(self.taskGraph) == set(other.taskGraph)