Coverage for python/lsst/pipe/base/graph/graph.py: 20%

390 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-31 09:39 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QuantumGraph", "IncompatibleGraphError") 

24 

25import io 

26import json 

27import lzma 

28import os 

29import struct 

30import time 

31import uuid 

32from collections import defaultdict, deque 

33from collections.abc import Generator, Iterable, Iterator, Mapping, MutableMapping 

34from itertools import chain 

35from types import MappingProxyType 

36from typing import Any, BinaryIO, TypeVar 

37 

38import networkx as nx 

39from lsst.daf.butler import ( 

40 DatasetId, 

41 DatasetRef, 

42 DatasetType, 

43 DimensionRecordsAccumulator, 

44 DimensionUniverse, 

45 PersistenceContextVars, 

46 Quantum, 

47) 

48from lsst.resources import ResourcePath, ResourcePathExpression 

49from lsst.utils.introspection import get_full_type_name 

50from networkx.drawing.nx_agraph import write_dot 

51 

52from ..connections import iterConnections 

53from ..pipeline import TaskDef 

54from ._implDetails import DatasetTypeName, _DatasetTracker 

55from ._loadHelpers import LoadHelper 

56from ._versionDeserializers import DESERIALIZER_MAP 

57from .quantumNode import BuildId, QuantumNode 

58 

59_T = TypeVar("_T", bound="QuantumGraph") 

60 

61# modify this constant any time the on disk representation of the save file 

62# changes, and update the load helpers to behave properly for each version. 

63SAVE_VERSION = 3 

64 

65# Strings used to describe the format for the preamble bytes in a file save 

66# The base is a big endian encoded unsigned short that is used to hold the 

67# file format version. This allows reading version bytes and determine which 

68# loading code should be used for the rest of the file 

69STRUCT_FMT_BASE = ">H" 

70# 

71# Version 1 

72# This marks a big endian encoded format with an unsigned short, an unsigned 

73# long long, and an unsigned long long in the byte stream 

74# Version 2 

75# A big endian encoded format with an unsigned long long byte stream used to 

76# indicate the total length of the entire header. 

77STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"} 

78 

79# magic bytes that help determine this is a graph save 

80MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9" 

81 

82 

83class IncompatibleGraphError(Exception): 

84 """Exception class to indicate that a lookup by NodeId is impossible due 

85 to incompatibilities 

86 """ 

87 

88 pass 

89 

90 

91class QuantumGraph: 

92 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects 

93 

94 This data structure represents a concrete workflow generated from a 

95 `Pipeline`. 

96 

97 Parameters 

98 ---------- 

99 quanta : `~collections.abc.Mapping` [ `TaskDef`, \ 

100 `set` [ `~lsst.daf.butler.Quantum` ] ] 

101 This maps tasks (and their configs) to the sets of data they are to 

102 process. 

103 metadata : Optional `~collections.abc.Mapping` of `str` to primitives 

104 This is an optional parameter of extra data to carry with the graph. 

105 Entries in this mapping should be able to be serialized in JSON. 

106 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

107 The dimensions in which quanta can be defined. Need only be provided if 

108 no quanta have data IDs. 

109 initInputs : `~collections.abc.Mapping`, optional 

110 Maps tasks to their InitInput dataset refs. Dataset refs can be either 

111 resolved or non-resolved. Presently the same dataset refs are included 

112 in each `~lsst.daf.butler.Quantum` for the same task. 

113 initOutputs : `~collections.abc.Mapping`, optional 

114 Maps tasks to their InitOutput dataset refs. Dataset refs can be either 

115 resolved or non-resolved. For intermediate resolved refs their dataset 

116 ID must match ``initInputs`` and Quantum ``initInputs``. 

117 globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

118 Dataset refs for some global objects produced by pipeline. These 

119 objects include task configurations and package versions. Typically 

120 they have an empty DataId, but there is no real restriction on what 

121 can appear here. 

122 registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \ 

123 optional 

124 Dataset types which are used by this graph, their definitions must 

125 match registry. If registry does not define dataset type yet, then 

126 it should match one that will be created later. 

127 

128 Raises 

129 ------ 

130 ValueError 

131 Raised if the graph is pruned such that some tasks no longer have nodes 

132 associated with them. 

133 """ 

134 

135 def __init__( 

136 self, 

137 quanta: Mapping[TaskDef, set[Quantum]], 

138 metadata: Mapping[str, Any] | None = None, 

139 universe: DimensionUniverse | None = None, 

140 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

141 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

142 globalInitOutputs: Iterable[DatasetRef] | None = None, 

143 registryDatasetTypes: Iterable[DatasetType] | None = None, 

144 ): 

145 self._buildGraphs( 

146 quanta, 

147 metadata=metadata, 

148 universe=universe, 

149 initInputs=initInputs, 

150 initOutputs=initOutputs, 

151 globalInitOutputs=globalInitOutputs, 

152 registryDatasetTypes=registryDatasetTypes, 

153 ) 

154 

155 def _buildGraphs( 

156 self, 

157 quanta: Mapping[TaskDef, set[Quantum]], 

158 *, 

159 _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None, 

160 _buildId: BuildId | None = None, 

161 metadata: Mapping[str, Any] | None = None, 

162 universe: DimensionUniverse | None = None, 

163 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

164 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

165 globalInitOutputs: Iterable[DatasetRef] | None = None, 

166 registryDatasetTypes: Iterable[DatasetType] | None = None, 

167 ) -> None: 

168 """Build the graph that is used to store the relation between tasks, 

169 and the graph that holds the relations between quanta 

170 """ 

171 self._metadata = metadata 

172 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}") 

173 # Data structure used to identify relations between 

174 # DatasetTypeName -> TaskDef. 

175 self._datasetDict = _DatasetTracker(createInverse=True) 

176 

177 # Temporary graph that will have dataset UUIDs (as raw bytes) and 

178 # QuantumNode objects as nodes; will be collapsed down to just quanta 

179 # later. 

180 bipartite_graph = nx.DiGraph() 

181 

182 self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {} 

183 self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set) 

184 for taskDef, quantumSet in quanta.items(): 

185 connections = taskDef.connections 

186 

187 # For each type of connection in the task, add a key to the 

188 # `_DatasetTracker` for the connections name, with a value of 

189 # the TaskDef in the appropriate field 

190 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")): 

191 # Have to handle components in inputs. 

192 dataset_name, _, _ = inpt.name.partition(".") 

193 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef) 

194 

195 for output in iterConnections(connections, ("outputs",)): 

196 # Have to handle possible components in outputs. 

197 dataset_name, _, _ = output.name.partition(".") 

198 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef) 

199 

200 # For each `Quantum` in the set of all `Quantum` for this task, 

201 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one 

202 # of the individual datasets inside the `Quantum`, with a value of 

203 # a newly created QuantumNode to the appropriate input/output 

204 # field. 

205 for quantum in quantumSet: 

206 if quantum.dataId is not None: 

207 if universe is None: 

208 universe = quantum.dataId.universe 

209 elif universe != quantum.dataId.universe: 

210 raise RuntimeError( 

211 "Mismatched dimension universes in QuantumGraph construction: " 

212 f"{universe} != {quantum.dataId.universe}. " 

213 ) 

214 

215 if _quantumToNodeId: 

216 if (nodeId := _quantumToNodeId.get(quantum)) is None: 

217 raise ValueError( 

218 "If _quantuMToNodeNumber is not None, all quanta must have an " 

219 "associated value in the mapping" 

220 ) 

221 else: 

222 nodeId = uuid.uuid4() 

223 

224 inits = quantum.initInputs.values() 

225 inputs = quantum.inputs.values() 

226 value = QuantumNode(quantum, taskDef, nodeId) 

227 self._taskToQuantumNode[taskDef].add(value) 

228 self._nodeIdMap[nodeId] = value 

229 

230 bipartite_graph.add_node(value, bipartite=0) 

231 for dsRef in chain(inits, inputs): 

232 # unfortunately, `Quantum` allows inits to be individual 

233 # `DatasetRef`s or an Iterable of such, so there must 

234 # be an instance check here 

235 if isinstance(dsRef, Iterable): 

236 for sub in dsRef: 

237 bipartite_graph.add_node(sub.id.bytes, bipartite=1) 

238 bipartite_graph.add_edge(sub.id.bytes, value) 

239 else: 

240 assert isinstance(dsRef, DatasetRef) 

241 if dsRef.isComponent(): 

242 dsRef = dsRef.makeCompositeRef() 

243 bipartite_graph.add_node(dsRef.id.bytes, bipartite=1) 

244 bipartite_graph.add_edge(dsRef.id.bytes, value) 

245 for dsRef in chain.from_iterable(quantum.outputs.values()): 

246 bipartite_graph.add_node(dsRef.id.bytes, bipartite=1) 

247 bipartite_graph.add_edge(value, dsRef.id.bytes) 

248 

249 # Dimension universe 

250 if universe is None: 

251 raise RuntimeError( 

252 "Dimension universe or at least one quantum with a data ID " 

253 "must be provided when constructing a QuantumGraph." 

254 ) 

255 self._universe = universe 

256 

257 # Make graph of quanta relations, by projecting out the dataset nodes 

258 # in the bipartite_graph, leaving just the quanta. 

259 self._connectedQuanta = nx.algorithms.bipartite.projected_graph( 

260 bipartite_graph, self._nodeIdMap.values() 

261 ) 

262 self._count = len(self._connectedQuanta) 

263 

264 # Graph of task relations, used in various methods 

265 self._taskGraph = self._datasetDict.makeNetworkXGraph() 

266 

267 # convert default dict into a regular to prevent accidental key 

268 # insertion 

269 self._taskToQuantumNode = dict(self._taskToQuantumNode.items()) 

270 

271 self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {} 

272 self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {} 

273 self._globalInitOutputRefs: list[DatasetRef] = [] 

274 self._registryDatasetTypes: list[DatasetType] = [] 

275 if initInputs is not None: 

276 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()} 

277 if initOutputs is not None: 

278 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()} 

279 if globalInitOutputs is not None: 

280 self._globalInitOutputRefs = list(globalInitOutputs) 

281 if registryDatasetTypes is not None: 

282 self._registryDatasetTypes = list(registryDatasetTypes) 

283 

284 @property 

285 def taskGraph(self) -> nx.DiGraph: 

286 """A graph representing the relations between the tasks inside 

287 the quantum graph (`networkx.DiGraph`). 

288 """ 

289 return self._taskGraph 

290 

291 @property 

292 def graph(self) -> nx.DiGraph: 

293 """A graph representing the relations between all the `QuantumNode` 

294 objects (`networkx.DiGraph`). 

295 

296 The graph should usually be iterated over, or passed to methods of this 

297 class, but sometimes direct access to the ``networkx`` object may be 

298 helpful. 

299 """ 

300 return self._connectedQuanta 

301 

302 @property 

303 def inputQuanta(self) -> Iterable[QuantumNode]: 

304 """The nodes that are inputs to the graph (iterable [`QuantumNode`]). 

305 

306 These are the nodes that do not depend on any other nodes in the 

307 graph. 

308 """ 

309 return (q for q, n in self._connectedQuanta.in_degree if n == 0) 

310 

311 @property 

312 def outputQuanta(self) -> Iterable[QuantumNode]: 

313 """The nodes that are outputs of the graph (iterable [`QuantumNode`]). 

314 

315 These are the nodes that have no nodes that depend on them in the 

316 graph. 

317 """ 

318 return [q for q, n in self._connectedQuanta.out_degree if n == 0] 

319 

320 @property 

321 def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]: 

322 """All the data set type names that are present in the graph 

323 (`tuple` [`str`]). 

324 

325 These types do not include global init-outputs. 

326 """ 

327 return tuple(self._datasetDict.keys()) 

328 

329 @property 

330 def isConnected(self) -> bool: 

331 """Whether all of the nodes in the graph are connected, ignoring 

332 directionality of connections (`bool`). 

333 """ 

334 return nx.is_weakly_connected(self._connectedQuanta) 

335 

336 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: 

337 """Lookup a `QuantumNode` from an id associated with the node. 

338 

339 Parameters 

340 ---------- 

341 nodeId : `NodeId` 

342 The number associated with a node 

343 

344 Returns 

345 ------- 

346 node : `QuantumNode` 

347 The node corresponding with input number 

348 

349 Raises 

350 ------ 

351 KeyError 

352 Raised if the requested nodeId is not in the graph. 

353 """ 

354 return self._nodeIdMap[nodeId] 

355 

356 def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]: 

357 """Return all the `~lsst.daf.butler.Quantum` associated with a 

358 `TaskDef`. 

359 

360 Parameters 

361 ---------- 

362 taskDef : `TaskDef` 

363 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

364 queried. 

365 

366 Returns 

367 ------- 

368 quanta : `frozenset` of `~lsst.daf.butler.Quantum` 

369 The `set` of `~lsst.daf.butler.Quantum` that is associated with the 

370 specified `TaskDef`. 

371 """ 

372 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) 

373 

374 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: 

375 """Return the number of `~lsst.daf.butler.Quantum` associated with 

376 a `TaskDef`. 

377 

378 Parameters 

379 ---------- 

380 taskDef : `TaskDef` 

381 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

382 queried. 

383 

384 Returns 

385 ------- 

386 count : `int` 

387 The number of `~lsst.daf.butler.Quantum` that are associated with 

388 the specified `TaskDef`. 

389 """ 

390 return len(self._taskToQuantumNode.get(taskDef, ())) 

391 

392 def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]: 

393 r"""Return all the `QuantumNode`\s associated with a `TaskDef`. 

394 

395 Parameters 

396 ---------- 

397 taskDef : `TaskDef` 

398 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

399 queried. 

400 

401 Returns 

402 ------- 

403 nodes : `frozenset` [ `QuantumNode` ] 

404 A `frozenset` of `QuantumNode` that is associated with the 

405 specified `TaskDef`. 

406 """ 

407 return frozenset(self._taskToQuantumNode[taskDef]) 

408 

409 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

410 """Find all tasks that have the specified dataset type name as an 

411 input. 

412 

413 Parameters 

414 ---------- 

415 datasetTypeName : `str` 

416 A string representing the name of a dataset type to be queried, 

417 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

418 `str` for type safety in static type checking. 

419 

420 Returns 

421 ------- 

422 tasks : iterable of `TaskDef` 

423 `TaskDef` objects that have the specified `DatasetTypeName` as an 

424 input, list will be empty if no tasks use specified 

425 `DatasetTypeName` as an input. 

426 

427 Raises 

428 ------ 

429 KeyError 

430 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

431 """ 

432 return (c for c in self._datasetDict.getConsumers(datasetTypeName)) 

433 

434 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None: 

435 """Find all tasks that have the specified dataset type name as an 

436 output. 

437 

438 Parameters 

439 ---------- 

440 datasetTypeName : `str` 

441 A string representing the name of a dataset type to be queried, 

442 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

443 `str` for type safety in static type checking. 

444 

445 Returns 

446 ------- 

447 result : `TaskDef` or `None` 

448 `TaskDef` that outputs `DatasetTypeName` as an output or `None` if 

449 none of the tasks produce this `DatasetTypeName`. 

450 

451 Raises 

452 ------ 

453 KeyError 

454 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

455 """ 

456 return self._datasetDict.getProducer(datasetTypeName) 

457 

458 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

459 """Find all tasks that are associated with the specified dataset type 

460 name. 

461 

462 Parameters 

463 ---------- 

464 datasetTypeName : `str` 

465 A string representing the name of a dataset type to be queried, 

466 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

467 `str` for type safety in static type checking. 

468 

469 Returns 

470 ------- 

471 result : iterable of `TaskDef` 

472 `TaskDef` objects that are associated with the specified 

473 `DatasetTypeName`. 

474 

475 Raises 

476 ------ 

477 KeyError 

478 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

479 """ 

480 return self._datasetDict.getAll(datasetTypeName) 

481 

482 def findTaskDefByName(self, taskName: str) -> list[TaskDef]: 

483 """Determine which `TaskDef` objects in this graph are associated 

484 with a `str` representing a task name (looks at the ``taskName`` 

485 property of `TaskDef` objects). 

486 

487 Returns a list of `TaskDef` objects as a `PipelineTask` may appear 

488 multiple times in a graph with different labels. 

489 

490 Parameters 

491 ---------- 

492 taskName : `str` 

493 Name of a task to search for. 

494 

495 Returns 

496 ------- 

497 result : `list` of `TaskDef` 

498 List of the `TaskDef` objects that have the name specified. 

499 Multiple values are returned in the case that a task is used 

500 multiple times with different labels. 

501 """ 

502 results = [] 

503 for task in self._taskToQuantumNode: 

504 split = task.taskName.split(".") 

505 if split[-1] == taskName: 

506 results.append(task) 

507 return results 

508 

509 def findTaskDefByLabel(self, label: str) -> TaskDef | None: 

510 """Determine which `TaskDef` objects in this graph are associated 

511 with a `str` representing a tasks label. 

512 

513 Parameters 

514 ---------- 

515 taskName : `str` 

516 Name of a task to search for 

517 

518 Returns 

519 ------- 

520 result : `TaskDef` 

521 `TaskDef` objects that has the specified label. 

522 """ 

523 for task in self._taskToQuantumNode: 

524 if label == task.label: 

525 return task 

526 return None 

527 

528 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]: 

529 r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified 

530 `DatasetTypeName`. 

531 

532 Parameters 

533 ---------- 

534 datasetTypeName : `str` 

535 The name of the dataset type to search for as a string, 

536 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

537 `str` for type safety in static type checking. 

538 

539 Returns 

540 ------- 

541 result : `set` of `QuantumNode` objects 

542 A `set` of `QuantumNode`\s that contain specified 

543 `DatasetTypeName`. 

544 

545 Raises 

546 ------ 

547 KeyError 

548 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

549 

550 """ 

551 tasks = self._datasetDict.getAll(datasetTypeName) 

552 result: set[Quantum] = set() 

553 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task)) 

554 return result 

555 

556 def checkQuantumInGraph(self, quantum: Quantum) -> bool: 

557 """Check if specified quantum appears in the graph as part of a node. 

558 

559 Parameters 

560 ---------- 

561 quantum : `lsst.daf.butler.Quantum` 

562 The quantum to search for. 

563 

564 Returns 

565 ------- 

566 in_graph : `bool` 

567 The result of searching for the quantum. 

568 """ 

569 return any(quantum == node.quantum for node in self) 

570 

571 def writeDotGraph(self, output: str | io.BufferedIOBase) -> None: 

572 """Write out the graph as a dot graph. 

573 

574 Parameters 

575 ---------- 

576 output : `str` or `io.BufferedIOBase` 

577 Either a filesystem path to write to, or a file handle object. 

578 """ 

579 write_dot(self._connectedQuanta, output) 

580 

581 def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T: 

582 """Create a new graph object that contains the subset of the nodes 

583 specified as input. Node number is preserved. 

584 

585 Parameters 

586 ---------- 

587 nodes : `QuantumNode` or iterable of `QuantumNode` 

588 Nodes from which to create subset. 

589 

590 Returns 

591 ------- 

592 graph : instance of graph type 

593 An instance of the type from which the subset was created. 

594 """ 

595 if not isinstance(nodes, Iterable): 

596 nodes = (nodes,) 

597 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes 

598 quantumMap = defaultdict(set) 

599 

600 dataset_type_names: set[str] = set() 

601 node: QuantumNode 

602 for node in quantumSubgraph: 

603 quantumMap[node.taskDef].add(node.quantum) 

604 dataset_type_names.update( 

605 dstype.name 

606 for dstype in chain( 

607 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys() 

608 ) 

609 ) 

610 

611 # May need to trim dataset types from registryDatasetTypes. 

612 for taskDef in quantumMap: 

613 if refs := self.initOutputRefs(taskDef): 

614 dataset_type_names.update(ref.datasetType.name for ref in refs) 

615 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs) 

616 registryDatasetTypes = [ 

617 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names 

618 ] 

619 

620 # convert to standard dict to prevent accidental key insertion 

621 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

622 # Create an empty graph, and then populate it with custom mapping 

623 newInst = type(self)({}, universe=self._universe) 

624 # TODO: Do we need to copy initInputs/initOutputs? 

625 newInst._buildGraphs( 

626 quantumDict, 

627 _quantumToNodeId={n.quantum: n.nodeId for n in nodes}, 

628 _buildId=self._buildId, 

629 metadata=self._metadata, 

630 universe=self._universe, 

631 globalInitOutputs=self._globalInitOutputRefs, 

632 registryDatasetTypes=registryDatasetTypes, 

633 ) 

634 return newInst 

635 

636 def subsetToConnected(self: _T) -> tuple[_T, ...]: 

637 """Generate a list of subgraphs where each is connected. 

638 

639 Returns 

640 ------- 

641 result : `list` of `QuantumGraph` 

642 A list of graphs that are each connected. 

643 """ 

644 return tuple( 

645 self.subset(connectedSet) 

646 for connectedSet in nx.weakly_connected_components(self._connectedQuanta) 

647 ) 

648 

649 def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

650 """Return a set of `QuantumNode` that are direct inputs to a specified 

651 node. 

652 

653 Parameters 

654 ---------- 

655 node : `QuantumNode` 

656 The node of the graph for which inputs are to be determined. 

657 

658 Returns 

659 ------- 

660 inputs : `set` of `QuantumNode` 

661 All the nodes that are direct inputs to specified node. 

662 """ 

663 return set(self._connectedQuanta.predecessors(node)) 

664 

665 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

666 """Return a set of `QuantumNode` that are direct outputs of a specified 

667 node. 

668 

669 Parameters 

670 ---------- 

671 node : `QuantumNode` 

672 The node of the graph for which outputs are to be determined. 

673 

674 Returns 

675 ------- 

676 outputs : `set` of `QuantumNode` 

677 All the nodes that are direct outputs to specified node. 

678 """ 

679 return set(self._connectedQuanta.successors(node)) 

680 

681 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

682 """Return a graph of `QuantumNode` that are direct inputs and outputs 

683 of a specified node. 

684 

685 Parameters 

686 ---------- 

687 node : `QuantumNode` 

688 The node of the graph for which connected nodes are to be 

689 determined. 

690 

691 Returns 

692 ------- 

693 graph : graph of `QuantumNode` 

694 All the nodes that are directly connected to specified node. 

695 """ 

696 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node)) 

697 nodes.add(node) 

698 return self.subset(nodes) 

699 

700 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

701 """Return a graph of the specified node and all the ancestor nodes 

702 directly reachable by walking edges. 

703 

704 Parameters 

705 ---------- 

706 node : `QuantumNode` 

707 The node for which all ancestors are to be determined 

708 

709 Returns 

710 ------- 

711 ancestors : graph of `QuantumNode` 

712 Graph of node and all of its ancestors. 

713 """ 

714 predecessorNodes = nx.ancestors(self._connectedQuanta, node) 

715 predecessorNodes.add(node) 

716 return self.subset(predecessorNodes) 

717 

718 def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]: 

719 """Check a graph for the presense of cycles and returns the edges of 

720 any cycles found, or an empty list if there is no cycle. 

721 

722 Returns 

723 ------- 

724 result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ] 

725 A list of any graph edges that form a cycle, or an empty list if 

726 there is no cycle. Empty list to so support if graph.find_cycle() 

727 syntax as an empty list is falsy. 

728 """ 

729 try: 

730 return nx.find_cycle(self._connectedQuanta) 

731 except nx.NetworkXNoCycle: 

732 return [] 

733 

734 def saveUri(self, uri: ResourcePathExpression) -> None: 

735 """Save `QuantumGraph` to the specified URI. 

736 

737 Parameters 

738 ---------- 

739 uri : convertible to `~lsst.resources.ResourcePath` 

740 URI to where the graph should be saved. 

741 """ 

742 buffer = self._buildSaveObject() 

743 path = ResourcePath(uri) 

744 if path.getExtension() not in (".qgraph"): 

745 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}") 

746 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

747 

748 @property 

749 def metadata(self) -> MappingProxyType[str, Any] | None: 

750 """Extra data carried with the graph (mapping [`str`] or `None`). 

751 

752 The mapping is a dynamic view of this object's metadata. Values should 

753 be able to be serialized in JSON. 

754 """ 

755 if self._metadata is None: 

756 return None 

757 return MappingProxyType(self._metadata) 

758 

759 def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

760 """Return DatasetRefs for a given task InitInputs. 

761 

762 Parameters 

763 ---------- 

764 taskDef : `TaskDef` 

765 Task definition structure. 

766 

767 Returns 

768 ------- 

769 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

770 DatasetRef for the task InitInput, can be `None`. This can return 

771 either resolved or non-resolved reference. 

772 """ 

773 return self._initInputRefs.get(taskDef) 

774 

775 def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

776 """Return DatasetRefs for a given task InitOutputs. 

777 

778 Parameters 

779 ---------- 

780 taskDef : `TaskDef` 

781 Task definition structure. 

782 

783 Returns 

784 ------- 

785 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

786 DatasetRefs for the task InitOutput, can be `None`. This can return 

787 either resolved or non-resolved reference. Resolved reference will 

788 match Quantum's initInputs if this is an intermediate dataset type. 

789 """ 

790 return self._initOutputRefs.get(taskDef) 

791 

792 def globalInitOutputRefs(self) -> list[DatasetRef]: 

793 """Return DatasetRefs for global InitOutputs. 

794 

795 Returns 

796 ------- 

797 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] 

798 DatasetRefs for global InitOutputs. 

799 """ 

800 return self._globalInitOutputRefs 

801 

802 def registryDatasetTypes(self) -> list[DatasetType]: 

803 """Return dataset types used by this graph, their definitions match 

804 dataset types from registry. 

805 

806 Returns 

807 ------- 

808 refs : `list` [ `~lsst.daf.butler.DatasetType` ] 

809 Dataset types for this graph. 

810 """ 

811 return self._registryDatasetTypes 

812 

813 @classmethod 

814 def loadUri( 

815 cls, 

816 uri: ResourcePathExpression, 

817 universe: DimensionUniverse | None = None, 

818 nodes: Iterable[uuid.UUID] | None = None, 

819 graphID: BuildId | None = None, 

820 minimumVersion: int = 3, 

821 ) -> QuantumGraph: 

822 """Read `QuantumGraph` from a URI. 

823 

824 Parameters 

825 ---------- 

826 uri : convertible to `~lsst.resources.ResourcePath` 

827 URI from where to load the graph. 

828 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

829 If `None` it is loaded from the `QuantumGraph` 

830 saved structure. If supplied, the 

831 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

832 will be validated against the supplied argument for compatibility. 

833 nodes : iterable of `uuid.UUID` or `None` 

834 UUIDs that correspond to nodes in the graph. If specified, only 

835 these nodes will be loaded. Defaults to None, in which case all 

836 nodes will be loaded. 

837 graphID : `str` or `None` 

838 If specified this ID is verified against the loaded graph prior to 

839 loading any Nodes. This defaults to None in which case no 

840 validation is done. 

841 minimumVersion : `int` 

842 Minimum version of a save file to load. Set to -1 to load all 

843 versions. Older versions may need to be loaded, and re-saved 

844 to upgrade them to the latest format before they can be used in 

845 production. 

846 

847 Returns 

848 ------- 

849 graph : `QuantumGraph` 

850 Resulting QuantumGraph instance. 

851 

852 Raises 

853 ------ 

854 TypeError 

855 Raised if file contains instance of a type other than 

856 `QuantumGraph`. 

857 ValueError 

858 Raised if one or more of the nodes requested is not in the 

859 `QuantumGraph` or if graphID parameter does not match the graph 

860 being loaded or if the supplied uri does not point at a valid 

861 `QuantumGraph` save file. 

862 RuntimeError 

863 Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not 

864 compatible with the `~lsst.daf.butler.DimensionUniverse` saved in 

865 the graph. 

866 """ 

867 uri = ResourcePath(uri) 

868 if uri.getExtension() in {".qgraph"}: 

869 with LoadHelper(uri, minimumVersion) as loader: 

870 qgraph = loader.load(universe, nodes, graphID) 

871 else: 

872 raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}") 

873 if not isinstance(qgraph, QuantumGraph): 

874 raise TypeError(f"QuantumGraph file {uri} contains unexpected object type: {type(qgraph)}") 

875 return qgraph 

876 

877 @classmethod 

878 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None: 

879 """Read the header of a `QuantumGraph` pointed to by the uri parameter 

880 and return it as a string. 

881 

882 Parameters 

883 ---------- 

884 uri : convertible to `~lsst.resources.ResourcePath` 

885 The location of the `QuantumGraph` to load. If the argument is a 

886 string, it must correspond to a valid 

887 `~lsst.resources.ResourcePath` path. 

888 minimumVersion : `int` 

889 Minimum version of a save file to load. Set to -1 to load all 

890 versions. Older versions may need to be loaded, and re-saved 

891 to upgrade them to the latest format before they can be used in 

892 production. 

893 

894 Returns 

895 ------- 

896 header : `str` or `None` 

897 The header associated with the specified `QuantumGraph` it there is 

898 one, else `None`. 

899 

900 Raises 

901 ------ 

902 ValueError 

903 Raised if the extension of the file specified by uri is not a 

904 `QuantumGraph` extension. 

905 """ 

906 uri = ResourcePath(uri) 

907 if uri.getExtension() in {".qgraph"}: 

908 return LoadHelper(uri, minimumVersion).readHeader() 

909 else: 

910 raise ValueError("Only know how to handle files saved as `.qgraph`") 

911 

912 def buildAndPrintHeader(self) -> None: 

913 """Create a header that would be used in a save of this object and 

914 prints it out to standard out. 

915 """ 

916 _, header = self._buildSaveObject(returnHeader=True) 

917 print(json.dumps(header)) 

918 

919 def save(self, file: BinaryIO) -> None: 

920 """Save QuantumGraph to a file. 

921 

922 Parameters 

923 ---------- 

924 file : `io.BufferedIOBase` 

925 File to write data open in binary mode. 

926 """ 

927 buffer = self._buildSaveObject() 

928 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

929 

930 def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]: 

931 thing = PersistenceContextVars() 

932 result = thing.run(self._buildSaveObjectImpl, returnHeader) 

933 return result 

934 

935 def _buildSaveObjectImpl(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]: 

936 # make some containers 

937 jsonData: deque[bytes] = deque() 

938 # node map is a list because json does not accept mapping keys that 

939 # are not strings, so we store a list of key, value pairs that will 

940 # be converted to a mapping on load 

941 nodeMap = [] 

942 taskDefMap = {} 

943 headerData: dict[str, Any] = {} 

944 

945 # Store the QauntumGraph BuildId, this will allow validating BuildIds 

946 # at load time, prior to loading any QuantumNodes. Name chosen for 

947 # unlikely conflicts. 

948 headerData["GraphBuildID"] = self.graphID 

949 headerData["Metadata"] = self._metadata 

950 

951 # Store the universe this graph was created with 

952 universeConfig = self._universe.dimensionConfig 

953 headerData["universe"] = universeConfig.toDict() 

954 

955 # counter for the number of bytes processed thus far 

956 count = 0 

957 # serialize out the task Defs recording the start and end bytes of each 

958 # taskDef 

959 inverseLookup = self._datasetDict.inverse 

960 taskDef: TaskDef 

961 # sort by task label to ensure serialization happens in the same order 

962 for taskDef in self.taskGraph: 

963 # compressing has very little impact on saving or load time, but 

964 # a large impact on on disk size, so it is worth doing 

965 taskDescription: dict[str, Any] = {} 

966 # save the fully qualified name. 

967 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) 

968 # save the config as a text stream that will be un-persisted on the 

969 # other end 

970 stream = io.StringIO() 

971 taskDef.config.saveToStream(stream) 

972 taskDescription["config"] = stream.getvalue() 

973 taskDescription["label"] = taskDef.label 

974 if (refs := self._initInputRefs.get(taskDef)) is not None: 

975 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs] 

976 if (refs := self._initOutputRefs.get(taskDef)) is not None: 

977 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs] 

978 

979 inputs = [] 

980 outputs = [] 

981 

982 # Determine the connection between all of tasks and save that in 

983 # the header as a list of connections and edges in each task 

984 # this will help in un-persisting, and possibly in a "quick view" 

985 # method that does not require everything to be un-persisted 

986 # 

987 # Typing returns can't be parameter dependent 

988 for connection in inverseLookup[taskDef]: # type: ignore 

989 consumers = self._datasetDict.getConsumers(connection) 

990 producer = self._datasetDict.getProducer(connection) 

991 if taskDef in consumers: 

992 # This checks if the task consumes the connection directly 

993 # from the datastore or it is produced by another task 

994 producerLabel = producer.label if producer is not None else "datastore" 

995 inputs.append((producerLabel, connection)) 

996 elif taskDef not in consumers and producer is taskDef: 

997 # If there are no consumers for this tasks produced 

998 # connection, the output will be said to be the datastore 

999 # in which case the for loop will be a zero length loop 

1000 if not consumers: 

1001 outputs.append(("datastore", connection)) 

1002 for td in consumers: 

1003 outputs.append((td.label, connection)) 

1004 

1005 # dump to json string, and encode that string to bytes and then 

1006 # conpress those bytes 

1007 dump = lzma.compress(json.dumps(taskDescription).encode(), preset=2) 

1008 # record the sizing and relation information 

1009 taskDefMap[taskDef.label] = { 

1010 "bytes": (count, count + len(dump)), 

1011 "inputs": inputs, 

1012 "outputs": outputs, 

1013 } 

1014 count += len(dump) 

1015 jsonData.append(dump) 

1016 

1017 headerData["TaskDefs"] = taskDefMap 

1018 

1019 # serialize the nodes, recording the start and end bytes of each node 

1020 dimAccumulator = DimensionRecordsAccumulator() 

1021 for node in self: 

1022 # compressing has very little impact on saving or load time, but 

1023 # a large impact on on disk size, so it is worth doing 

1024 simpleNode = node.to_simple(accumulator=dimAccumulator) 

1025 

1026 dump = lzma.compress(simpleNode.json().encode(), preset=2) 

1027 jsonData.append(dump) 

1028 nodeMap.append( 

1029 ( 

1030 str(node.nodeId), 

1031 { 

1032 "bytes": (count, count + len(dump)), 

1033 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)], 

1034 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)], 

1035 }, 

1036 ) 

1037 ) 

1038 count += len(dump) 

1039 

1040 headerData["DimensionRecords"] = { 

1041 key: value.model_dump() 

1042 for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items() 

1043 } 

1044 

1045 # need to serialize this as a series of key,value tuples because of 

1046 # a limitation on how json cant do anything but strings as keys 

1047 headerData["Nodes"] = nodeMap 

1048 

1049 if self._globalInitOutputRefs: 

1050 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs] 

1051 

1052 if self._registryDatasetTypes: 

1053 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes] 

1054 

1055 # dump the headerData to json 

1056 header_encode = lzma.compress(json.dumps(headerData).encode()) 

1057 

1058 # record the sizes as 2 unsigned long long numbers for a total of 16 

1059 # bytes 

1060 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION) 

1061 

1062 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING() 

1063 map_lengths = struct.pack(fmt_string, len(header_encode)) 

1064 

1065 # write each component of the save out in a deterministic order 

1066 buffer = bytearray() 

1067 buffer.extend(MAGIC_BYTES) 

1068 buffer.extend(save_bytes) 

1069 buffer.extend(map_lengths) 

1070 buffer.extend(header_encode) 

1071 # Iterate over the length of jsonData, and for each element pop the 

1072 # leftmost element off the deque and write it out. This is to save 

1073 # memory, as the memory is added to the buffer object, it is removed 

1074 # from from the container. 

1075 # 

1076 # Only this section needs to worry about memory pressure because 

1077 # everything else written to the buffer prior to this data is 

1078 # only on the order of kilobytes to low numbers of megabytes. 

1079 while jsonData: 

1080 buffer.extend(jsonData.popleft()) 

1081 if returnHeader: 

1082 return buffer, headerData 

1083 else: 

1084 return buffer 

1085 

1086 @classmethod 

1087 def load( 

1088 cls, 

1089 file: BinaryIO, 

1090 universe: DimensionUniverse | None = None, 

1091 nodes: Iterable[uuid.UUID] | None = None, 

1092 graphID: BuildId | None = None, 

1093 minimumVersion: int = 3, 

1094 ) -> QuantumGraph: 

1095 """Read `QuantumGraph` from a file that was made by `save`. 

1096 

1097 Parameters 

1098 ---------- 

1099 file : `io.IO` of bytes 

1100 File with data open in binary mode. 

1101 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

1102 If `None` it is loaded from the `QuantumGraph` 

1103 saved structure. If supplied, the 

1104 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

1105 will be validated against the supplied argument for compatibility. 

1106 nodes : iterable of `uuid.UUID` or `None` 

1107 UUIDs that correspond to nodes in the graph. If specified, only 

1108 these nodes will be loaded. Defaults to None, in which case all 

1109 nodes will be loaded. 

1110 graphID : `str` or `None` 

1111 If specified this ID is verified against the loaded graph prior to 

1112 loading any Nodes. This defaults to None in which case no 

1113 validation is done. 

1114 minimumVersion : `int` 

1115 Minimum version of a save file to load. Set to -1 to load all 

1116 versions. Older versions may need to be loaded, and re-saved 

1117 to upgrade them to the latest format before they can be used in 

1118 production. 

1119 

1120 Returns 

1121 ------- 

1122 graph : `QuantumGraph` 

1123 Resulting QuantumGraph instance. 

1124 

1125 Raises 

1126 ------ 

1127 TypeError 

1128 Raised if data contains instance of a type other than 

1129 `QuantumGraph`. 

1130 ValueError 

1131 Raised if one or more of the nodes requested is not in the 

1132 `QuantumGraph` or if graphID parameter does not match the graph 

1133 being loaded or if the supplied uri does not point at a valid 

1134 `QuantumGraph` save file. 

1135 """ 

1136 with LoadHelper(file, minimumVersion) as loader: 

1137 qgraph = loader.load(universe, nodes, graphID) 

1138 if not isinstance(qgraph, QuantumGraph): 

1139 raise TypeError(f"QuantumGraph file contains unexpected object type: {type(qgraph)}") 

1140 return qgraph 

1141 

1142 def iterTaskGraph(self) -> Generator[TaskDef, None, None]: 

1143 """Iterate over the `taskGraph` attribute in topological order 

1144 

1145 Yields 

1146 ------ 

1147 taskDef : `TaskDef` 

1148 `TaskDef` objects in topological order 

1149 """ 

1150 yield from nx.topological_sort(self.taskGraph) 

1151 

1152 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None: 

1153 """Change output run and dataset ID for each output dataset. 

1154 

1155 Parameters 

1156 ---------- 

1157 run : `str` 

1158 New output run name. 

1159 metadata_key : `str` or `None` 

1160 Specifies matadata key corresponding to output run name to update 

1161 with new run name. If `None` or if metadata is missing it is not 

1162 updated. If metadata is present but key is missing, it will be 

1163 added. 

1164 update_graph_id : `bool`, optional 

1165 If `True` then also update graph ID with a new unique value. 

1166 """ 

1167 dataset_id_map: dict[DatasetId, DatasetId] = {} 

1168 

1169 def _update_output_refs( 

1170 refs: Iterable[DatasetRef], run: str, dataset_id_map: MutableMapping[DatasetId, DatasetId] 

1171 ) -> Iterator[DatasetRef]: 

1172 """Update a collection of `~lsst.daf.butler.DatasetRef` with new 

1173 run and dataset IDs. 

1174 """ 

1175 for ref in refs: 

1176 new_ref = ref.replace(run=run) 

1177 dataset_id_map[ref.id] = new_ref.id 

1178 yield new_ref 

1179 

1180 def _update_intermediate_refs( 

1181 refs: Iterable[DatasetRef], run: str, dataset_id_map: Mapping[DatasetId, DatasetId] 

1182 ) -> Iterator[DatasetRef]: 

1183 """Update intermediate references with new run and IDs. Only the 

1184 references that appear in ``dataset_id_map`` are updated, others 

1185 are returned unchanged. 

1186 """ 

1187 for ref in refs: 

1188 if dataset_id := dataset_id_map.get(ref.id): 

1189 ref = ref.replace(run=run, id=dataset_id) 

1190 yield ref 

1191 

1192 # Replace quantum output refs first. 

1193 for node in self._connectedQuanta: 

1194 quantum = node.quantum 

1195 outputs = { 

1196 dataset_type: tuple(_update_output_refs(refs, run, dataset_id_map)) 

1197 for dataset_type, refs in quantum.outputs.items() 

1198 } 

1199 updated_quantum = Quantum( 

1200 taskName=quantum.taskName, 

1201 dataId=quantum.dataId, 

1202 initInputs=quantum.initInputs, 

1203 inputs=quantum.inputs, 

1204 outputs=outputs, 

1205 datastore_records=quantum.datastore_records, 

1206 ) 

1207 node._replace_quantum(updated_quantum) 

1208 

1209 self._initOutputRefs = { 

1210 task_def: list(_update_output_refs(refs, run, dataset_id_map)) 

1211 for task_def, refs in self._initOutputRefs.items() 

1212 } 

1213 self._globalInitOutputRefs = list( 

1214 _update_output_refs(self._globalInitOutputRefs, run, dataset_id_map) 

1215 ) 

1216 

1217 # Update all intermediates from their matching outputs. 

1218 for node in self._connectedQuanta: 

1219 quantum = node.quantum 

1220 inputs = { 

1221 dataset_type: tuple(_update_intermediate_refs(refs, run, dataset_id_map)) 

1222 for dataset_type, refs in quantum.inputs.items() 

1223 } 

1224 initInputs = list(_update_intermediate_refs(quantum.initInputs.values(), run, dataset_id_map)) 

1225 

1226 updated_quantum = Quantum( 

1227 taskName=quantum.taskName, 

1228 dataId=quantum.dataId, 

1229 initInputs=initInputs, 

1230 inputs=inputs, 

1231 outputs=quantum.outputs, 

1232 datastore_records=quantum.datastore_records, 

1233 ) 

1234 node._replace_quantum(updated_quantum) 

1235 

1236 self._initInputRefs = { 

1237 task_def: list(_update_intermediate_refs(refs, run, dataset_id_map)) 

1238 for task_def, refs in self._initInputRefs.items() 

1239 } 

1240 

1241 if update_graph_id: 

1242 self._buildId = BuildId(f"{time.time()}-{os.getpid()}") 

1243 

1244 # Update metadata if present. 

1245 if self._metadata is not None and metadata_key is not None: 

1246 metadata = dict(self._metadata) 

1247 metadata[metadata_key] = run 

1248 self._metadata = metadata 

1249 

1250 @property 

1251 def graphID(self) -> BuildId: 

1252 """The ID generated by the graph at construction time (`str`).""" 

1253 return self._buildId 

1254 

1255 @property 

1256 def universe(self) -> DimensionUniverse: 

1257 """Dimension universe associated with this graph 

1258 (`~lsst.daf.butler.DimensionUniverse`). 

1259 """ 

1260 return self._universe 

1261 

1262 def __iter__(self) -> Generator[QuantumNode, None, None]: 

1263 yield from nx.topological_sort(self._connectedQuanta) 

1264 

1265 def __len__(self) -> int: 

1266 return self._count 

1267 

1268 def __contains__(self, node: QuantumNode) -> bool: 

1269 return self._connectedQuanta.has_node(node) 

1270 

1271 def __getstate__(self) -> dict: 

1272 """Store a compact form of the graph as a list of graph nodes, and a 

1273 tuple of task labels and task configs. The full graph can be 

1274 reconstructed with this information, and it preserves the ordering of 

1275 the graph nodes. 

1276 """ 

1277 universe: DimensionUniverse | None = None 

1278 for node in self: 

1279 dId = node.quantum.dataId 

1280 if dId is None: 

1281 continue 

1282 universe = dId.graph.universe 

1283 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe} 

1284 

1285 def __setstate__(self, state: dict) -> None: 

1286 """Reconstructs the state of the graph from the information persisted 

1287 in getstate. 

1288 """ 

1289 buffer = io.BytesIO(state["reduced"]) 

1290 with LoadHelper(buffer, minimumVersion=3) as loader: 

1291 qgraph = loader.load(state["universe"], graphID=state["graphId"]) 

1292 

1293 self._metadata = qgraph._metadata 

1294 self._buildId = qgraph._buildId 

1295 self._datasetDict = qgraph._datasetDict 

1296 self._nodeIdMap = qgraph._nodeIdMap 

1297 self._count = len(qgraph) 

1298 self._taskToQuantumNode = qgraph._taskToQuantumNode 

1299 self._taskGraph = qgraph._taskGraph 

1300 self._connectedQuanta = qgraph._connectedQuanta 

1301 self._initInputRefs = qgraph._initInputRefs 

1302 self._initOutputRefs = qgraph._initOutputRefs 

1303 

1304 def __eq__(self, other: object) -> bool: 

1305 if not isinstance(other, QuantumGraph): 

1306 return False 

1307 if len(self) != len(other): 

1308 return False 

1309 for node in self: 

1310 if node not in other: 

1311 return False 

1312 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node): 

1313 return False 

1314 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node): 

1315 return False 

1316 if set(self.allDatasetTypes) != set(other.allDatasetTypes): 

1317 return False 

1318 return set(self.taskGraph) == set(other.taskGraph)