Coverage for python/lsst/pipe/base/graph/graph.py: 19%

402 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-06 02:28 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QuantumGraph", "IncompatibleGraphError") 

24 

25import io 

26import json 

27import lzma 

28import os 

29import struct 

30import time 

31import uuid 

32from collections import defaultdict, deque 

33from collections.abc import Generator, Iterable, Mapping, MutableMapping 

34from itertools import chain 

35from types import MappingProxyType 

36from typing import Any, BinaryIO, TypeVar 

37 

38import networkx as nx 

39from lsst.daf.butler import DatasetRef, DatasetType, DimensionRecordsAccumulator, DimensionUniverse, Quantum 

40from lsst.resources import ResourcePath, ResourcePathExpression 

41from lsst.utils.introspection import get_full_type_name 

42from networkx.drawing.nx_agraph import write_dot 

43 

44from ..connections import iterConnections 

45from ..pipeline import TaskDef 

46from ._implDetails import DatasetTypeName, _DatasetTracker, _pruner 

47from ._loadHelpers import LoadHelper 

48from ._versionDeserializers import DESERIALIZER_MAP 

49from .quantumNode import BuildId, QuantumNode 

50 

51_T = TypeVar("_T", bound="QuantumGraph") 

52 

53# modify this constant any time the on disk representation of the save file 

54# changes, and update the load helpers to behave properly for each version. 

55SAVE_VERSION = 3 

56 

57# Strings used to describe the format for the preamble bytes in a file save 

58# The base is a big endian encoded unsigned short that is used to hold the 

59# file format version. This allows reading version bytes and determine which 

60# loading code should be used for the rest of the file 

61STRUCT_FMT_BASE = ">H" 

62# 

63# Version 1 

64# This marks a big endian encoded format with an unsigned short, an unsigned 

65# long long, and an unsigned long long in the byte stream 

66# Version 2 

67# A big endian encoded format with an unsigned long long byte stream used to 

68# indicate the total length of the entire header. 

69STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"} 

70 

71# magic bytes that help determine this is a graph save 

72MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9" 

73 

74 

75class IncompatibleGraphError(Exception): 

76 """Exception class to indicate that a lookup by NodeId is impossible due 

77 to incompatibilities 

78 """ 

79 

80 pass 

81 

82 

83class QuantumGraph: 

84 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects 

85 

86 This data structure represents a concrete workflow generated from a 

87 `Pipeline`. 

88 

89 Parameters 

90 ---------- 

91 quanta : `~collections.abc.Mapping` [ `TaskDef`, \ 

92 `set` [ `~lsst.daf.butler.Quantum` ] ] 

93 This maps tasks (and their configs) to the sets of data they are to 

94 process. 

95 metadata : Optional `~collections.abc.Mapping` of `str` to primitives 

96 This is an optional parameter of extra data to carry with the graph. 

97 Entries in this mapping should be able to be serialized in JSON. 

98 pruneRefs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

99 Set of dataset refs to exclude from a graph. 

100 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

101 The dimensions in which quanta can be defined. Need only be provided if 

102 no quanta have data IDs. 

103 initInputs : `~collections.abc.Mapping`, optional 

104 Maps tasks to their InitInput dataset refs. Dataset refs can be either 

105 resolved or non-resolved. Presently the same dataset refs are included 

106 in each `~lsst.daf.butler.Quantum` for the same task. 

107 initOutputs : `~collections.abc.Mapping`, optional 

108 Maps tasks to their InitOutput dataset refs. Dataset refs can be either 

109 resolved or non-resolved. For intermediate resolved refs their dataset 

110 ID must match ``initInputs`` and Quantum ``initInputs``. 

111 globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

112 Dataset refs for some global objects produced by pipeline. These 

113 objects include task configurations and package versions. Typically 

114 they have an empty DataId, but there is no real restriction on what 

115 can appear here. 

116 registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \ 

117 optional 

118 Dataset types which are used by this graph, their definitions must 

119 match registry. If registry does not define dataset type yet, then 

120 it should match one that will be created later. 

121 

122 Raises 

123 ------ 

124 ValueError 

125 Raised if the graph is pruned such that some tasks no longer have nodes 

126 associated with them. 

127 """ 

128 

129 def __init__( 

130 self, 

131 quanta: Mapping[TaskDef, set[Quantum]], 

132 metadata: Mapping[str, Any] | None = None, 

133 pruneRefs: Iterable[DatasetRef] | None = None, 

134 universe: DimensionUniverse | None = None, 

135 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

136 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

137 globalInitOutputs: Iterable[DatasetRef] | None = None, 

138 registryDatasetTypes: Iterable[DatasetType] | None = None, 

139 ): 

140 self._buildGraphs( 

141 quanta, 

142 metadata=metadata, 

143 pruneRefs=pruneRefs, 

144 universe=universe, 

145 initInputs=initInputs, 

146 initOutputs=initOutputs, 

147 globalInitOutputs=globalInitOutputs, 

148 registryDatasetTypes=registryDatasetTypes, 

149 ) 

150 

151 def _buildGraphs( 

152 self, 

153 quanta: Mapping[TaskDef, set[Quantum]], 

154 *, 

155 _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None, 

156 _buildId: BuildId | None = None, 

157 metadata: Mapping[str, Any] | None = None, 

158 pruneRefs: Iterable[DatasetRef] | None = None, 

159 universe: DimensionUniverse | None = None, 

160 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

161 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

162 globalInitOutputs: Iterable[DatasetRef] | None = None, 

163 registryDatasetTypes: Iterable[DatasetType] | None = None, 

164 ) -> None: 

165 """Build the graph that is used to store the relation between tasks, 

166 and the graph that holds the relations between quanta 

167 """ 

168 self._metadata = metadata 

169 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}") 

170 # Data structures used to identify relations between components; 

171 # DatasetTypeName -> TaskDef for task, 

172 # and DatasetRef -> QuantumNode for the quanta 

173 self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True) 

174 self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]() 

175 

176 self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {} 

177 self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set) 

178 for taskDef, quantumSet in quanta.items(): 

179 connections = taskDef.connections 

180 

181 # For each type of connection in the task, add a key to the 

182 # `_DatasetTracker` for the connections name, with a value of 

183 # the TaskDef in the appropriate field 

184 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")): 

185 # Have to handle components in inputs. 

186 dataset_name, _, _ = inpt.name.partition(".") 

187 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef) 

188 

189 for output in iterConnections(connections, ("outputs",)): 

190 # Have to handle possible components in outputs. 

191 dataset_name, _, _ = output.name.partition(".") 

192 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef) 

193 

194 # For each `Quantum` in the set of all `Quantum` for this task, 

195 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one 

196 # of the individual datasets inside the `Quantum`, with a value of 

197 # a newly created QuantumNode to the appropriate input/output 

198 # field. 

199 for quantum in quantumSet: 

200 if quantum.dataId is not None: 

201 if universe is None: 

202 universe = quantum.dataId.universe 

203 elif universe != quantum.dataId.universe: 

204 raise RuntimeError( 

205 "Mismatched dimension universes in QuantumGraph construction: " 

206 f"{universe} != {quantum.dataId.universe}. " 

207 ) 

208 

209 if _quantumToNodeId: 

210 if (nodeId := _quantumToNodeId.get(quantum)) is None: 

211 raise ValueError( 

212 "If _quantuMToNodeNumber is not None, all quanta must have an " 

213 "associated value in the mapping" 

214 ) 

215 else: 

216 nodeId = uuid.uuid4() 

217 

218 inits = quantum.initInputs.values() 

219 inputs = quantum.inputs.values() 

220 value = QuantumNode(quantum, taskDef, nodeId) 

221 self._taskToQuantumNode[taskDef].add(value) 

222 self._nodeIdMap[nodeId] = value 

223 

224 for dsRef in chain(inits, inputs): 

225 # unfortunately, `Quantum` allows inits to be individual 

226 # `DatasetRef`s or an Iterable of such, so there must 

227 # be an instance check here 

228 if isinstance(dsRef, Iterable): 

229 for sub in dsRef: 

230 if sub.isComponent(): 

231 sub = sub.makeCompositeRef() 

232 self._datasetRefDict.addConsumer(sub, value) 

233 else: 

234 assert isinstance(dsRef, DatasetRef) 

235 if dsRef.isComponent(): 

236 dsRef = dsRef.makeCompositeRef() 

237 self._datasetRefDict.addConsumer(dsRef, value) 

238 for dsRef in chain.from_iterable(quantum.outputs.values()): 

239 self._datasetRefDict.addProducer(dsRef, value) 

240 

241 if pruneRefs is not None: 

242 # track what refs were pruned and prune the graph 

243 prunes: set[QuantumNode] = set() 

244 _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes) 

245 

246 # recreate the taskToQuantumNode dict removing nodes that have been 

247 # pruned. Keep track of task defs that now have no QuantumNodes 

248 emptyTasks: set[str] = set() 

249 newTaskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set) 

250 # accumulate all types 

251 types_ = set() 

252 # tracker for any pruneRefs that have caused tasks to have no nodes 

253 # This helps the user find out what caused the issues seen. 

254 culprits = set() 

255 # Find all the types from the refs to prune 

256 for r in pruneRefs: 

257 types_.add(r.datasetType) 

258 

259 # For each of the tasks, and their associated nodes, remove any 

260 # any nodes that were pruned. If there are no nodes associated 

261 # with a task, record that task, and find out if that was due to 

262 # a type from an input ref to prune. 

263 for td, taskNodes in self._taskToQuantumNode.items(): 

264 diff = taskNodes.difference(prunes) 

265 if len(diff) == 0: 

266 if len(taskNodes) != 0: 

267 tp: DatasetType 

268 for tp in types_: 

269 if (tmpRefs := next(iter(taskNodes)).quantum.inputs.get(tp)) and not set( 

270 tmpRefs 

271 ).difference(pruneRefs): 

272 culprits.add(tp.name) 

273 emptyTasks.add(td.label) 

274 newTaskToQuantumNode[td] = diff 

275 

276 # update the internal dict 

277 self._taskToQuantumNode = newTaskToQuantumNode 

278 

279 if emptyTasks: 

280 raise ValueError( 

281 f"{', '.join(emptyTasks)} task(s) have no nodes associated with them " 

282 f"after graph pruning; {', '.join(culprits)} caused over-pruning" 

283 ) 

284 

285 # Dimension universe 

286 if universe is None: 

287 raise RuntimeError( 

288 "Dimension universe or at least one quantum with a data ID " 

289 "must be provided when constructing a QuantumGraph." 

290 ) 

291 self._universe = universe 

292 

293 # Graph of quanta relations 

294 self._connectedQuanta = self._datasetRefDict.makeNetworkXGraph() 

295 self._count = len(self._connectedQuanta) 

296 

297 # Graph of task relations, used in various methods 

298 self._taskGraph = self._datasetDict.makeNetworkXGraph() 

299 

300 # convert default dict into a regular to prevent accidental key 

301 # insertion 

302 self._taskToQuantumNode = dict(self._taskToQuantumNode.items()) 

303 

304 self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {} 

305 self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {} 

306 self._globalInitOutputRefs: list[DatasetRef] = [] 

307 self._registryDatasetTypes: list[DatasetType] = [] 

308 if initInputs is not None: 

309 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()} 

310 if initOutputs is not None: 

311 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()} 

312 if globalInitOutputs is not None: 

313 self._globalInitOutputRefs = list(globalInitOutputs) 

314 if registryDatasetTypes is not None: 

315 self._registryDatasetTypes = list(registryDatasetTypes) 

316 

317 @property 

318 def taskGraph(self) -> nx.DiGraph: 

319 """A graph representing the relations between the tasks inside 

320 the quantum graph (`networkx.DiGraph`). 

321 """ 

322 return self._taskGraph 

323 

324 @property 

325 def graph(self) -> nx.DiGraph: 

326 """A graph representing the relations between all the `QuantumNode` 

327 objects (`networkx.DiGraph`). 

328 

329 The graph should usually be iterated over, or passed to methods of this 

330 class, but sometimes direct access to the ``networkx`` object may be 

331 helpful. 

332 """ 

333 return self._connectedQuanta 

334 

335 @property 

336 def inputQuanta(self) -> Iterable[QuantumNode]: 

337 """The nodes that are inputs to the graph (iterable [`QuantumNode`]). 

338 

339 These are the nodes that do not depend on any other nodes in the 

340 graph. 

341 """ 

342 return (q for q, n in self._connectedQuanta.in_degree if n == 0) 

343 

344 @property 

345 def outputQuanta(self) -> Iterable[QuantumNode]: 

346 """The nodes that are outputs of the graph (iterable [`QuantumNode`]). 

347 

348 These are the nodes that have no nodes that depend on them in the 

349 graph. 

350 """ 

351 return [q for q, n in self._connectedQuanta.out_degree if n == 0] 

352 

353 @property 

354 def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]: 

355 """All the data set type names that are present in the graph 

356 (`tuple` [`str`]). 

357 

358 These types do not include global init-outputs. 

359 """ 

360 return tuple(self._datasetDict.keys()) 

361 

362 @property 

363 def isConnected(self) -> bool: 

364 """Whether all of the nodes in the graph are connected, ignoring 

365 directionality of connections (`bool`). 

366 """ 

367 return nx.is_weakly_connected(self._connectedQuanta) 

368 

369 def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: 

370 r"""Return a graph pruned of input `~lsst.daf.butler.DatasetRef`\ s 

371 and nodes which depend on them. 

372 

373 Parameters 

374 ---------- 

375 refs : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef` 

376 Refs which should be removed from resulting graph 

377 

378 Returns 

379 ------- 

380 graph : `QuantumGraph` 

381 A graph that has been pruned of specified refs and the nodes that 

382 depend on them. 

383 """ 

384 newInst = object.__new__(type(self)) 

385 quantumMap = defaultdict(set) 

386 for node in self: 

387 quantumMap[node.taskDef].add(node.quantum) 

388 

389 # convert to standard dict to prevent accidental key insertion 

390 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

391 

392 # This should not change set of tasks in a graph, so we can keep the 

393 # same registryDatasetTypes as in the original graph. 

394 # TODO: Do we need to copy initInputs/initOutputs? 

395 newInst._buildGraphs( 

396 quantumDict, 

397 _quantumToNodeId={n.quantum: n.nodeId for n in self}, 

398 metadata=self._metadata, 

399 pruneRefs=refs, 

400 universe=self._universe, 

401 globalInitOutputs=self._globalInitOutputRefs, 

402 registryDatasetTypes=self._registryDatasetTypes, 

403 ) 

404 return newInst 

405 

406 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: 

407 """Lookup a `QuantumNode` from an id associated with the node. 

408 

409 Parameters 

410 ---------- 

411 nodeId : `NodeId` 

412 The number associated with a node 

413 

414 Returns 

415 ------- 

416 node : `QuantumNode` 

417 The node corresponding with input number 

418 

419 Raises 

420 ------ 

421 KeyError 

422 Raised if the requested nodeId is not in the graph. 

423 """ 

424 return self._nodeIdMap[nodeId] 

425 

426 def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]: 

427 """Return all the `~lsst.daf.butler.Quantum` associated with a 

428 `TaskDef`. 

429 

430 Parameters 

431 ---------- 

432 taskDef : `TaskDef` 

433 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

434 queried. 

435 

436 Returns 

437 ------- 

438 quanta : `frozenset` of `~lsst.daf.butler.Quantum` 

439 The `set` of `~lsst.daf.butler.Quantum` that is associated with the 

440 specified `TaskDef`. 

441 """ 

442 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) 

443 

444 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: 

445 """Return the number of `~lsst.daf.butler.Quantum` associated with 

446 a `TaskDef`. 

447 

448 Parameters 

449 ---------- 

450 taskDef : `TaskDef` 

451 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

452 queried. 

453 

454 Returns 

455 ------- 

456 count : `int` 

457 The number of `~lsst.daf.butler.Quantum` that are associated with 

458 the specified `TaskDef`. 

459 """ 

460 return len(self._taskToQuantumNode.get(taskDef, ())) 

461 

462 def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]: 

463 r"""Return all the `QuantumNode`\s associated with a `TaskDef`. 

464 

465 Parameters 

466 ---------- 

467 taskDef : `TaskDef` 

468 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

469 queried. 

470 

471 Returns 

472 ------- 

473 nodes : `frozenset` [ `QuantumNode` ] 

474 A `frozenset` of `QuantumNode` that is associated with the 

475 specified `TaskDef`. 

476 """ 

477 return frozenset(self._taskToQuantumNode[taskDef]) 

478 

479 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

480 """Find all tasks that have the specified dataset type name as an 

481 input. 

482 

483 Parameters 

484 ---------- 

485 datasetTypeName : `str` 

486 A string representing the name of a dataset type to be queried, 

487 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

488 `str` for type safety in static type checking. 

489 

490 Returns 

491 ------- 

492 tasks : iterable of `TaskDef` 

493 `TaskDef` objects that have the specified `DatasetTypeName` as an 

494 input, list will be empty if no tasks use specified 

495 `DatasetTypeName` as an input. 

496 

497 Raises 

498 ------ 

499 KeyError 

500 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

501 """ 

502 return (c for c in self._datasetDict.getConsumers(datasetTypeName)) 

503 

504 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None: 

505 """Find all tasks that have the specified dataset type name as an 

506 output. 

507 

508 Parameters 

509 ---------- 

510 datasetTypeName : `str` 

511 A string representing the name of a dataset type to be queried, 

512 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

513 `str` for type safety in static type checking. 

514 

515 Returns 

516 ------- 

517 result : `TaskDef` or `None` 

518 `TaskDef` that outputs `DatasetTypeName` as an output or `None` if 

519 none of the tasks produce this `DatasetTypeName`. 

520 

521 Raises 

522 ------ 

523 KeyError 

524 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

525 """ 

526 return self._datasetDict.getProducer(datasetTypeName) 

527 

528 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

529 """Find all tasks that are associated with the specified dataset type 

530 name. 

531 

532 Parameters 

533 ---------- 

534 datasetTypeName : `str` 

535 A string representing the name of a dataset type to be queried, 

536 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

537 `str` for type safety in static type checking. 

538 

539 Returns 

540 ------- 

541 result : iterable of `TaskDef` 

542 `TaskDef` objects that are associated with the specified 

543 `DatasetTypeName`. 

544 

545 Raises 

546 ------ 

547 KeyError 

548 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

549 """ 

550 return self._datasetDict.getAll(datasetTypeName) 

551 

552 def findTaskDefByName(self, taskName: str) -> list[TaskDef]: 

553 """Determine which `TaskDef` objects in this graph are associated 

554 with a `str` representing a task name (looks at the ``taskName`` 

555 property of `TaskDef` objects). 

556 

557 Returns a list of `TaskDef` objects as a `PipelineTask` may appear 

558 multiple times in a graph with different labels. 

559 

560 Parameters 

561 ---------- 

562 taskName : `str` 

563 Name of a task to search for. 

564 

565 Returns 

566 ------- 

567 result : `list` of `TaskDef` 

568 List of the `TaskDef` objects that have the name specified. 

569 Multiple values are returned in the case that a task is used 

570 multiple times with different labels. 

571 """ 

572 results = [] 

573 for task in self._taskToQuantumNode: 

574 split = task.taskName.split(".") 

575 if split[-1] == taskName: 

576 results.append(task) 

577 return results 

578 

579 def findTaskDefByLabel(self, label: str) -> TaskDef | None: 

580 """Determine which `TaskDef` objects in this graph are associated 

581 with a `str` representing a tasks label. 

582 

583 Parameters 

584 ---------- 

585 taskName : `str` 

586 Name of a task to search for 

587 

588 Returns 

589 ------- 

590 result : `TaskDef` 

591 `TaskDef` objects that has the specified label. 

592 """ 

593 for task in self._taskToQuantumNode: 

594 if label == task.label: 

595 return task 

596 return None 

597 

598 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]: 

599 r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified 

600 `DatasetTypeName`. 

601 

602 Parameters 

603 ---------- 

604 datasetTypeName : `str` 

605 The name of the dataset type to search for as a string, 

606 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

607 `str` for type safety in static type checking. 

608 

609 Returns 

610 ------- 

611 result : `set` of `QuantumNode` objects 

612 A `set` of `QuantumNode`\s that contain specified 

613 `DatasetTypeName`. 

614 

615 Raises 

616 ------ 

617 KeyError 

618 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

619 

620 """ 

621 tasks = self._datasetDict.getAll(datasetTypeName) 

622 result: set[Quantum] = set() 

623 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task)) 

624 return result 

625 

626 def checkQuantumInGraph(self, quantum: Quantum) -> bool: 

627 """Check if specified quantum appears in the graph as part of a node. 

628 

629 Parameters 

630 ---------- 

631 quantum : `lsst.daf.butler.Quantum` 

632 The quantum to search for. 

633 

634 Returns 

635 ------- 

636 in_graph : `bool` 

637 The result of searching for the quantum. 

638 """ 

639 return any(quantum == node.quantum for node in self) 

640 

641 def writeDotGraph(self, output: str | io.BufferedIOBase) -> None: 

642 """Write out the graph as a dot graph. 

643 

644 Parameters 

645 ---------- 

646 output : `str` or `io.BufferedIOBase` 

647 Either a filesystem path to write to, or a file handle object. 

648 """ 

649 write_dot(self._connectedQuanta, output) 

650 

651 def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T: 

652 """Create a new graph object that contains the subset of the nodes 

653 specified as input. Node number is preserved. 

654 

655 Parameters 

656 ---------- 

657 nodes : `QuantumNode` or iterable of `QuantumNode` 

658 Nodes from which to create subset. 

659 

660 Returns 

661 ------- 

662 graph : instance of graph type 

663 An instance of the type from which the subset was created. 

664 """ 

665 if not isinstance(nodes, Iterable): 

666 nodes = (nodes,) 

667 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes 

668 quantumMap = defaultdict(set) 

669 

670 dataset_type_names: set[str] = set() 

671 node: QuantumNode 

672 for node in quantumSubgraph: 

673 quantumMap[node.taskDef].add(node.quantum) 

674 dataset_type_names.update( 

675 dstype.name 

676 for dstype in chain( 

677 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys() 

678 ) 

679 ) 

680 

681 # May need to trim dataset types from registryDatasetTypes. 

682 for taskDef in quantumMap: 

683 if refs := self.initOutputRefs(taskDef): 

684 dataset_type_names.update(ref.datasetType.name for ref in refs) 

685 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs) 

686 registryDatasetTypes = [ 

687 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names 

688 ] 

689 

690 # convert to standard dict to prevent accidental key insertion 

691 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

692 # Create an empty graph, and then populate it with custom mapping 

693 newInst = type(self)({}, universe=self._universe) 

694 # TODO: Do we need to copy initInputs/initOutputs? 

695 newInst._buildGraphs( 

696 quantumDict, 

697 _quantumToNodeId={n.quantum: n.nodeId for n in nodes}, 

698 _buildId=self._buildId, 

699 metadata=self._metadata, 

700 universe=self._universe, 

701 globalInitOutputs=self._globalInitOutputRefs, 

702 registryDatasetTypes=registryDatasetTypes, 

703 ) 

704 return newInst 

705 

706 def subsetToConnected(self: _T) -> tuple[_T, ...]: 

707 """Generate a list of subgraphs where each is connected. 

708 

709 Returns 

710 ------- 

711 result : `list` of `QuantumGraph` 

712 A list of graphs that are each connected. 

713 """ 

714 return tuple( 

715 self.subset(connectedSet) 

716 for connectedSet in nx.weakly_connected_components(self._connectedQuanta) 

717 ) 

718 

719 def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

720 """Return a set of `QuantumNode` that are direct inputs to a specified 

721 node. 

722 

723 Parameters 

724 ---------- 

725 node : `QuantumNode` 

726 The node of the graph for which inputs are to be determined. 

727 

728 Returns 

729 ------- 

730 inputs : `set` of `QuantumNode` 

731 All the nodes that are direct inputs to specified node. 

732 """ 

733 return set(self._connectedQuanta.predecessors(node)) 

734 

735 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

736 """Return a set of `QuantumNode` that are direct outputs of a specified 

737 node. 

738 

739 Parameters 

740 ---------- 

741 node : `QuantumNode` 

742 The node of the graph for which outputs are to be determined. 

743 

744 Returns 

745 ------- 

746 outputs : `set` of `QuantumNode` 

747 All the nodes that are direct outputs to specified node. 

748 """ 

749 return set(self._connectedQuanta.successors(node)) 

750 

751 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

752 """Return a graph of `QuantumNode` that are direct inputs and outputs 

753 of a specified node. 

754 

755 Parameters 

756 ---------- 

757 node : `QuantumNode` 

758 The node of the graph for which connected nodes are to be 

759 determined. 

760 

761 Returns 

762 ------- 

763 graph : graph of `QuantumNode` 

764 All the nodes that are directly connected to specified node. 

765 """ 

766 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node)) 

767 nodes.add(node) 

768 return self.subset(nodes) 

769 

770 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

771 """Return a graph of the specified node and all the ancestor nodes 

772 directly reachable by walking edges. 

773 

774 Parameters 

775 ---------- 

776 node : `QuantumNode` 

777 The node for which all ancestors are to be determined 

778 

779 Returns 

780 ------- 

781 ancestors : graph of `QuantumNode` 

782 Graph of node and all of its ancestors. 

783 """ 

784 predecessorNodes = nx.ancestors(self._connectedQuanta, node) 

785 predecessorNodes.add(node) 

786 return self.subset(predecessorNodes) 

787 

788 def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]: 

789 """Check a graph for the presense of cycles and returns the edges of 

790 any cycles found, or an empty list if there is no cycle. 

791 

792 Returns 

793 ------- 

794 result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ] 

795 A list of any graph edges that form a cycle, or an empty list if 

796 there is no cycle. Empty list to so support if graph.find_cycle() 

797 syntax as an empty list is falsy. 

798 """ 

799 try: 

800 return nx.find_cycle(self._connectedQuanta) 

801 except nx.NetworkXNoCycle: 

802 return [] 

803 

804 def saveUri(self, uri: ResourcePathExpression) -> None: 

805 """Save `QuantumGraph` to the specified URI. 

806 

807 Parameters 

808 ---------- 

809 uri : convertible to `~lsst.resources.ResourcePath` 

810 URI to where the graph should be saved. 

811 """ 

812 buffer = self._buildSaveObject() 

813 path = ResourcePath(uri) 

814 if path.getExtension() not in (".qgraph"): 

815 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}") 

816 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

817 

818 @property 

819 def metadata(self) -> MappingProxyType[str, Any] | None: 

820 """Extra data carried with the graph (mapping [`str`] or `None`). 

821 

822 The mapping is a dynamic view of this object's metadata. Values should 

823 be able to be serialized in JSON. 

824 """ 

825 if self._metadata is None: 

826 return None 

827 return MappingProxyType(self._metadata) 

828 

829 def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

830 """Return DatasetRefs for a given task InitInputs. 

831 

832 Parameters 

833 ---------- 

834 taskDef : `TaskDef` 

835 Task definition structure. 

836 

837 Returns 

838 ------- 

839 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

840 DatasetRef for the task InitInput, can be `None`. This can return 

841 either resolved or non-resolved reference. 

842 """ 

843 return self._initInputRefs.get(taskDef) 

844 

845 def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

846 """Return DatasetRefs for a given task InitOutputs. 

847 

848 Parameters 

849 ---------- 

850 taskDef : `TaskDef` 

851 Task definition structure. 

852 

853 Returns 

854 ------- 

855 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

856 DatasetRefs for the task InitOutput, can be `None`. This can return 

857 either resolved or non-resolved reference. Resolved reference will 

858 match Quantum's initInputs if this is an intermediate dataset type. 

859 """ 

860 return self._initOutputRefs.get(taskDef) 

861 

862 def globalInitOutputRefs(self) -> list[DatasetRef]: 

863 """Return DatasetRefs for global InitOutputs. 

864 

865 Returns 

866 ------- 

867 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] 

868 DatasetRefs for global InitOutputs. 

869 """ 

870 return self._globalInitOutputRefs 

871 

872 def registryDatasetTypes(self) -> list[DatasetType]: 

873 """Return dataset types used by this graph, their definitions match 

874 dataset types from registry. 

875 

876 Returns 

877 ------- 

878 refs : `list` [ `~lsst.daf.butler.DatasetType` ] 

879 Dataset types for this graph. 

880 """ 

881 return self._registryDatasetTypes 

882 

883 @classmethod 

884 def loadUri( 

885 cls, 

886 uri: ResourcePathExpression, 

887 universe: DimensionUniverse | None = None, 

888 nodes: Iterable[uuid.UUID] | None = None, 

889 graphID: BuildId | None = None, 

890 minimumVersion: int = 3, 

891 ) -> QuantumGraph: 

892 """Read `QuantumGraph` from a URI. 

893 

894 Parameters 

895 ---------- 

896 uri : convertible to `~lsst.resources.ResourcePath` 

897 URI from where to load the graph. 

898 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

899 If `None` it is loaded from the `QuantumGraph` 

900 saved structure. If supplied, the 

901 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

902 will be validated against the supplied argument for compatibility. 

903 nodes : iterable of `uuid.UUID` or `None` 

904 UUIDs that correspond to nodes in the graph. If specified, only 

905 these nodes will be loaded. Defaults to None, in which case all 

906 nodes will be loaded. 

907 graphID : `str` or `None` 

908 If specified this ID is verified against the loaded graph prior to 

909 loading any Nodes. This defaults to None in which case no 

910 validation is done. 

911 minimumVersion : `int` 

912 Minimum version of a save file to load. Set to -1 to load all 

913 versions. Older versions may need to be loaded, and re-saved 

914 to upgrade them to the latest format before they can be used in 

915 production. 

916 

917 Returns 

918 ------- 

919 graph : `QuantumGraph` 

920 Resulting QuantumGraph instance. 

921 

922 Raises 

923 ------ 

924 TypeError 

925 Raised if file contains instance of a type other than 

926 `QuantumGraph`. 

927 ValueError 

928 Raised if one or more of the nodes requested is not in the 

929 `QuantumGraph` or if graphID parameter does not match the graph 

930 being loaded or if the supplied uri does not point at a valid 

931 `QuantumGraph` save file. 

932 RuntimeError 

933 Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not 

934 compatible with the `~lsst.daf.butler.DimensionUniverse` saved in 

935 the graph. 

936 """ 

937 uri = ResourcePath(uri) 

938 if uri.getExtension() in {".qgraph"}: 

939 with LoadHelper(uri, minimumVersion) as loader: 

940 qgraph = loader.load(universe, nodes, graphID) 

941 else: 

942 raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}") 

943 if not isinstance(qgraph, QuantumGraph): 

944 raise TypeError(f"QuantumGraph file {uri} contains unexpected object type: {type(qgraph)}") 

945 return qgraph 

946 

947 @classmethod 

948 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None: 

949 """Read the header of a `QuantumGraph` pointed to by the uri parameter 

950 and return it as a string. 

951 

952 Parameters 

953 ---------- 

954 uri : convertible to `~lsst.resources.ResourcePath` 

955 The location of the `QuantumGraph` to load. If the argument is a 

956 string, it must correspond to a valid 

957 `~lsst.resources.ResourcePath` path. 

958 minimumVersion : `int` 

959 Minimum version of a save file to load. Set to -1 to load all 

960 versions. Older versions may need to be loaded, and re-saved 

961 to upgrade them to the latest format before they can be used in 

962 production. 

963 

964 Returns 

965 ------- 

966 header : `str` or `None` 

967 The header associated with the specified `QuantumGraph` it there is 

968 one, else `None`. 

969 

970 Raises 

971 ------ 

972 ValueError 

973 Raised if the extension of the file specified by uri is not a 

974 `QuantumGraph` extension. 

975 """ 

976 uri = ResourcePath(uri) 

977 if uri.getExtension() in {".qgraph"}: 

978 return LoadHelper(uri, minimumVersion).readHeader() 

979 else: 

980 raise ValueError("Only know how to handle files saved as `.qgraph`") 

981 

982 def buildAndPrintHeader(self) -> None: 

983 """Create a header that would be used in a save of this object and 

984 prints it out to standard out. 

985 """ 

986 _, header = self._buildSaveObject(returnHeader=True) 

987 print(json.dumps(header)) 

988 

989 def save(self, file: BinaryIO) -> None: 

990 """Save QuantumGraph to a file. 

991 

992 Parameters 

993 ---------- 

994 file : `io.BufferedIOBase` 

995 File to write data open in binary mode. 

996 """ 

997 buffer = self._buildSaveObject() 

998 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

999 

1000 def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]: 

1001 # make some containers 

1002 jsonData: deque[bytes] = deque() 

1003 # node map is a list because json does not accept mapping keys that 

1004 # are not strings, so we store a list of key, value pairs that will 

1005 # be converted to a mapping on load 

1006 nodeMap = [] 

1007 taskDefMap = {} 

1008 headerData: dict[str, Any] = {} 

1009 

1010 # Store the QauntumGraph BuildId, this will allow validating BuildIds 

1011 # at load time, prior to loading any QuantumNodes. Name chosen for 

1012 # unlikely conflicts. 

1013 headerData["GraphBuildID"] = self.graphID 

1014 headerData["Metadata"] = self._metadata 

1015 

1016 # Store the universe this graph was created with 

1017 universeConfig = self._universe.dimensionConfig 

1018 headerData["universe"] = universeConfig.toDict() 

1019 

1020 # counter for the number of bytes processed thus far 

1021 count = 0 

1022 # serialize out the task Defs recording the start and end bytes of each 

1023 # taskDef 

1024 inverseLookup = self._datasetDict.inverse 

1025 taskDef: TaskDef 

1026 # sort by task label to ensure serialization happens in the same order 

1027 for taskDef in self.taskGraph: 

1028 # compressing has very little impact on saving or load time, but 

1029 # a large impact on on disk size, so it is worth doing 

1030 taskDescription: dict[str, Any] = {} 

1031 # save the fully qualified name. 

1032 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) 

1033 # save the config as a text stream that will be un-persisted on the 

1034 # other end 

1035 stream = io.StringIO() 

1036 taskDef.config.saveToStream(stream) 

1037 taskDescription["config"] = stream.getvalue() 

1038 taskDescription["label"] = taskDef.label 

1039 if (refs := self._initInputRefs.get(taskDef)) is not None: 

1040 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs] 

1041 if (refs := self._initOutputRefs.get(taskDef)) is not None: 

1042 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs] 

1043 

1044 inputs = [] 

1045 outputs = [] 

1046 

1047 # Determine the connection between all of tasks and save that in 

1048 # the header as a list of connections and edges in each task 

1049 # this will help in un-persisting, and possibly in a "quick view" 

1050 # method that does not require everything to be un-persisted 

1051 # 

1052 # Typing returns can't be parameter dependent 

1053 for connection in inverseLookup[taskDef]: # type: ignore 

1054 consumers = self._datasetDict.getConsumers(connection) 

1055 producer = self._datasetDict.getProducer(connection) 

1056 if taskDef in consumers: 

1057 # This checks if the task consumes the connection directly 

1058 # from the datastore or it is produced by another task 

1059 producerLabel = producer.label if producer is not None else "datastore" 

1060 inputs.append((producerLabel, connection)) 

1061 elif taskDef not in consumers and producer is taskDef: 

1062 # If there are no consumers for this tasks produced 

1063 # connection, the output will be said to be the datastore 

1064 # in which case the for loop will be a zero length loop 

1065 if not consumers: 

1066 outputs.append(("datastore", connection)) 

1067 for td in consumers: 

1068 outputs.append((td.label, connection)) 

1069 

1070 # dump to json string, and encode that string to bytes and then 

1071 # conpress those bytes 

1072 dump = lzma.compress(json.dumps(taskDescription).encode()) 

1073 # record the sizing and relation information 

1074 taskDefMap[taskDef.label] = { 

1075 "bytes": (count, count + len(dump)), 

1076 "inputs": inputs, 

1077 "outputs": outputs, 

1078 } 

1079 count += len(dump) 

1080 jsonData.append(dump) 

1081 

1082 headerData["TaskDefs"] = taskDefMap 

1083 

1084 # serialize the nodes, recording the start and end bytes of each node 

1085 dimAccumulator = DimensionRecordsAccumulator() 

1086 for node in self: 

1087 # compressing has very little impact on saving or load time, but 

1088 # a large impact on on disk size, so it is worth doing 

1089 simpleNode = node.to_simple(accumulator=dimAccumulator) 

1090 

1091 dump = lzma.compress(simpleNode.json().encode()) 

1092 jsonData.append(dump) 

1093 nodeMap.append( 

1094 ( 

1095 str(node.nodeId), 

1096 { 

1097 "bytes": (count, count + len(dump)), 

1098 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)], 

1099 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)], 

1100 }, 

1101 ) 

1102 ) 

1103 count += len(dump) 

1104 

1105 headerData["DimensionRecords"] = { 

1106 key: value.model_dump() 

1107 for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items() 

1108 } 

1109 

1110 # need to serialize this as a series of key,value tuples because of 

1111 # a limitation on how json cant do anything but strings as keys 

1112 headerData["Nodes"] = nodeMap 

1113 

1114 if self._globalInitOutputRefs: 

1115 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs] 

1116 

1117 if self._registryDatasetTypes: 

1118 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes] 

1119 

1120 # dump the headerData to json 

1121 header_encode = lzma.compress(json.dumps(headerData).encode()) 

1122 

1123 # record the sizes as 2 unsigned long long numbers for a total of 16 

1124 # bytes 

1125 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION) 

1126 

1127 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING() 

1128 map_lengths = struct.pack(fmt_string, len(header_encode)) 

1129 

1130 # write each component of the save out in a deterministic order 

1131 buffer = bytearray() 

1132 buffer.extend(MAGIC_BYTES) 

1133 buffer.extend(save_bytes) 

1134 buffer.extend(map_lengths) 

1135 buffer.extend(header_encode) 

1136 # Iterate over the length of jsonData, and for each element pop the 

1137 # leftmost element off the deque and write it out. This is to save 

1138 # memory, as the memory is added to the buffer object, it is removed 

1139 # from from the container. 

1140 # 

1141 # Only this section needs to worry about memory pressure because 

1142 # everything else written to the buffer prior to this data is 

1143 # only on the order of kilobytes to low numbers of megabytes. 

1144 while jsonData: 

1145 buffer.extend(jsonData.popleft()) 

1146 if returnHeader: 

1147 return buffer, headerData 

1148 else: 

1149 return buffer 

1150 

1151 @classmethod 

1152 def load( 

1153 cls, 

1154 file: BinaryIO, 

1155 universe: DimensionUniverse | None = None, 

1156 nodes: Iterable[uuid.UUID] | None = None, 

1157 graphID: BuildId | None = None, 

1158 minimumVersion: int = 3, 

1159 ) -> QuantumGraph: 

1160 """Read `QuantumGraph` from a file that was made by `save`. 

1161 

1162 Parameters 

1163 ---------- 

1164 file : `io.IO` of bytes 

1165 File with data open in binary mode. 

1166 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

1167 If `None` it is loaded from the `QuantumGraph` 

1168 saved structure. If supplied, the 

1169 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

1170 will be validated against the supplied argument for compatibility. 

1171 nodes : iterable of `uuid.UUID` or `None` 

1172 UUIDs that correspond to nodes in the graph. If specified, only 

1173 these nodes will be loaded. Defaults to None, in which case all 

1174 nodes will be loaded. 

1175 graphID : `str` or `None` 

1176 If specified this ID is verified against the loaded graph prior to 

1177 loading any Nodes. This defaults to None in which case no 

1178 validation is done. 

1179 minimumVersion : `int` 

1180 Minimum version of a save file to load. Set to -1 to load all 

1181 versions. Older versions may need to be loaded, and re-saved 

1182 to upgrade them to the latest format before they can be used in 

1183 production. 

1184 

1185 Returns 

1186 ------- 

1187 graph : `QuantumGraph` 

1188 Resulting QuantumGraph instance. 

1189 

1190 Raises 

1191 ------ 

1192 TypeError 

1193 Raised if data contains instance of a type other than 

1194 `QuantumGraph`. 

1195 ValueError 

1196 Raised if one or more of the nodes requested is not in the 

1197 `QuantumGraph` or if graphID parameter does not match the graph 

1198 being loaded or if the supplied uri does not point at a valid 

1199 `QuantumGraph` save file. 

1200 """ 

1201 with LoadHelper(file, minimumVersion) as loader: 

1202 qgraph = loader.load(universe, nodes, graphID) 

1203 if not isinstance(qgraph, QuantumGraph): 

1204 raise TypeError(f"QuantumGraph file contains unexpected object type: {type(qgraph)}") 

1205 return qgraph 

1206 

1207 def iterTaskGraph(self) -> Generator[TaskDef, None, None]: 

1208 """Iterate over the `taskGraph` attribute in topological order 

1209 

1210 Yields 

1211 ------ 

1212 taskDef : `TaskDef` 

1213 `TaskDef` objects in topological order 

1214 """ 

1215 yield from nx.topological_sort(self.taskGraph) 

1216 

1217 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None: 

1218 """Change output run and dataset ID for each output dataset. 

1219 

1220 Parameters 

1221 ---------- 

1222 run : `str` 

1223 New output run name. 

1224 metadata_key : `str` or `None` 

1225 Specifies matadata key corresponding to output run name to update 

1226 with new run name. If `None` or if metadata is missing it is not 

1227 updated. If metadata is present but key is missing, it will be 

1228 added. 

1229 update_graph_id : `bool`, optional 

1230 If `True` then also update graph ID with a new unique value. 

1231 """ 

1232 

1233 def _update_refs_in_place(refs: list[DatasetRef], run: str) -> None: 

1234 """Update list of `~lsst.daf.butler.DatasetRef` with new run and 

1235 dataset IDs. 

1236 """ 

1237 for ref in refs: 

1238 # hack the run to be replaced explicitly 

1239 object.__setattr__(ref, "run", run) 

1240 

1241 # Loop through all outputs and update their datasets. 

1242 for node in self._connectedQuanta: 

1243 for refs in node.quantum.outputs.values(): 

1244 _update_refs_in_place(refs, run) 

1245 

1246 for refs in self._initOutputRefs.values(): 

1247 _update_refs_in_place(refs, run) 

1248 

1249 _update_refs_in_place(self._globalInitOutputRefs, run) 

1250 

1251 # Update all intermediates from their matching outputs. 

1252 for node in self._connectedQuanta: 

1253 for refs in node.quantum.inputs.values(): 

1254 _update_refs_in_place(refs, run) 

1255 

1256 for refs in self._initInputRefs.values(): 

1257 _update_refs_in_place(refs, run) 

1258 

1259 if update_graph_id: 

1260 self._buildId = BuildId(f"{time.time()}-{os.getpid()}") 

1261 

1262 # Update metadata if present. 

1263 if self._metadata is not None and metadata_key is not None: 

1264 metadata = dict(self._metadata) 

1265 metadata[metadata_key] = run 

1266 self._metadata = metadata 

1267 

1268 @property 

1269 def graphID(self) -> BuildId: 

1270 """The ID generated by the graph at construction time (`str`).""" 

1271 return self._buildId 

1272 

1273 @property 

1274 def universe(self) -> DimensionUniverse: 

1275 """Dimension universe associated with this graph 

1276 (`~lsst.daf.butler.DimensionUniverse`). 

1277 """ 

1278 return self._universe 

1279 

1280 def __iter__(self) -> Generator[QuantumNode, None, None]: 

1281 yield from nx.topological_sort(self._connectedQuanta) 

1282 

1283 def __len__(self) -> int: 

1284 return self._count 

1285 

1286 def __contains__(self, node: QuantumNode) -> bool: 

1287 return self._connectedQuanta.has_node(node) 

1288 

1289 def __getstate__(self) -> dict: 

1290 """Store a compact form of the graph as a list of graph nodes, and a 

1291 tuple of task labels and task configs. The full graph can be 

1292 reconstructed with this information, and it preserves the ordering of 

1293 the graph nodes. 

1294 """ 

1295 universe: DimensionUniverse | None = None 

1296 for node in self: 

1297 dId = node.quantum.dataId 

1298 if dId is None: 

1299 continue 

1300 universe = dId.graph.universe 

1301 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe} 

1302 

1303 def __setstate__(self, state: dict) -> None: 

1304 """Reconstructs the state of the graph from the information persisted 

1305 in getstate. 

1306 """ 

1307 buffer = io.BytesIO(state["reduced"]) 

1308 with LoadHelper(buffer, minimumVersion=3) as loader: 

1309 qgraph = loader.load(state["universe"], graphID=state["graphId"]) 

1310 

1311 self._metadata = qgraph._metadata 

1312 self._buildId = qgraph._buildId 

1313 self._datasetDict = qgraph._datasetDict 

1314 self._nodeIdMap = qgraph._nodeIdMap 

1315 self._count = len(qgraph) 

1316 self._taskToQuantumNode = qgraph._taskToQuantumNode 

1317 self._taskGraph = qgraph._taskGraph 

1318 self._connectedQuanta = qgraph._connectedQuanta 

1319 self._initInputRefs = qgraph._initInputRefs 

1320 self._initOutputRefs = qgraph._initOutputRefs 

1321 

1322 def __eq__(self, other: object) -> bool: 

1323 if not isinstance(other, QuantumGraph): 

1324 return False 

1325 if len(self) != len(other): 

1326 return False 

1327 for node in self: 

1328 if node not in other: 

1329 return False 

1330 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node): 

1331 return False 

1332 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node): 

1333 return False 

1334 if set(self.allDatasetTypes) != set(other.allDatasetTypes): 

1335 return False 

1336 return set(self.taskGraph) == set(other.taskGraph)