Coverage for python/lsst/pipe/base/graph/graph.py: 15%

429 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-04 09:31 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QuantumGraph", "IncompatibleGraphError") 

24 

25import io 

26import json 

27import lzma 

28import os 

29import pickle 

30import struct 

31import time 

32import uuid 

33import warnings 

34from collections import defaultdict, deque 

35from itertools import chain 

36from types import MappingProxyType 

37from typing import ( 

38 Any, 

39 BinaryIO, 

40 DefaultDict, 

41 Deque, 

42 Dict, 

43 FrozenSet, 

44 Generator, 

45 Iterable, 

46 List, 

47 Mapping, 

48 MutableMapping, 

49 Optional, 

50 Set, 

51 Tuple, 

52 TypeVar, 

53 Union, 

54) 

55 

56import networkx as nx 

57from lsst.daf.butler import DatasetRef, DatasetType, DimensionRecordsAccumulator, DimensionUniverse, Quantum 

58from lsst.resources import ResourcePath, ResourcePathExpression 

59from lsst.utils.introspection import get_full_type_name 

60from networkx.drawing.nx_agraph import write_dot 

61 

62from ..connections import iterConnections 

63from ..pipeline import TaskDef 

64from ._implDetails import DatasetTypeName, _DatasetTracker, _pruner 

65from ._loadHelpers import LoadHelper 

66from ._versionDeserializers import DESERIALIZER_MAP 

67from .quantumNode import BuildId, QuantumNode 

68 

69_T = TypeVar("_T", bound="QuantumGraph") 

70 

71# modify this constant any time the on disk representation of the save file 

72# changes, and update the load helpers to behave properly for each version. 

73SAVE_VERSION = 3 

74 

75# Strings used to describe the format for the preamble bytes in a file save 

76# The base is a big endian encoded unsigned short that is used to hold the 

77# file format version. This allows reading version bytes and determine which 

78# loading code should be used for the rest of the file 

79STRUCT_FMT_BASE = ">H" 

80# 

81# Version 1 

82# This marks a big endian encoded format with an unsigned short, an unsigned 

83# long long, and an unsigned long long in the byte stream 

84# Version 2 

85# A big endian encoded format with an unsigned long long byte stream used to 

86# indicate the total length of the entire header. 

87STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"} 

88 

89# magic bytes that help determine this is a graph save 

90MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9" 

91 

92 

93class IncompatibleGraphError(Exception): 

94 """Exception class to indicate that a lookup by NodeId is impossible due 

95 to incompatibilities 

96 """ 

97 

98 pass 

99 

100 

101class QuantumGraph: 

102 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects 

103 

104 This data structure represents a concrete workflow generated from a 

105 `Pipeline`. 

106 

107 Parameters 

108 ---------- 

109 quanta : Mapping of `TaskDef` to sets of `Quantum` 

110 This maps tasks (and their configs) to the sets of data they are to 

111 process. 

112 metadata : Optional Mapping of `str` to primitives 

113 This is an optional parameter of extra data to carry with the graph. 

114 Entries in this mapping should be able to be serialized in JSON. 

115 pruneRefs : iterable [ `DatasetRef` ], optional 

116 Set of dataset refs to exclude from a graph. 

117 universe : `lsst.daf.butler.DimensionUniverse`, optional 

118 The dimensions in which quanta can be defined. Need only be provided if 

119 no quanta have data IDs. 

120 initInputs : `Mapping`, optional 

121 Maps tasks to their InitInput dataset refs. Dataset refs can be either 

122 resolved or non-resolved. Presently the same dataset refs are included 

123 in each `Quantum` for the same task. 

124 initOutputs : `Mapping`, optional 

125 Maps tasks to their InitOutput dataset refs. Dataset refs can be either 

126 resolved or non-resolved. For intermediate resolved refs their dataset 

127 ID must match ``initInputs`` and Quantum ``initInputs``. 

128 globalInitOutputs : iterable [ `DatasetRef` ], optional 

129 Dataset refs for some global objects produced by pipeline. These 

130 objects include task configurations and package versions. Typically 

131 they have an empty DataId, but there is no real restriction on what 

132 can appear here. 

133 registryDatasetTypes : iterable [ `DatasetType` ], optional 

134 Dataset types which are used by this graph, their definitions must 

135 match registry. If registry does not define dataset type yet, then 

136 it should match one that will be created later. 

137 

138 Raises 

139 ------ 

140 ValueError 

141 Raised if the graph is pruned such that some tasks no longer have nodes 

142 associated with them. 

143 """ 

144 

145 def __init__( 

146 self, 

147 quanta: Mapping[TaskDef, Set[Quantum]], 

148 metadata: Optional[Mapping[str, Any]] = None, 

149 pruneRefs: Optional[Iterable[DatasetRef]] = None, 

150 universe: Optional[DimensionUniverse] = None, 

151 initInputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, 

152 initOutputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, 

153 globalInitOutputs: Optional[Iterable[DatasetRef]] = None, 

154 registryDatasetTypes: Optional[Iterable[DatasetType]] = None, 

155 ): 

156 self._buildGraphs( 

157 quanta, 

158 metadata=metadata, 

159 pruneRefs=pruneRefs, 

160 universe=universe, 

161 initInputs=initInputs, 

162 initOutputs=initOutputs, 

163 globalInitOutputs=globalInitOutputs, 

164 registryDatasetTypes=registryDatasetTypes, 

165 ) 

166 

167 def _buildGraphs( 

168 self, 

169 quanta: Mapping[TaskDef, Set[Quantum]], 

170 *, 

171 _quantumToNodeId: Optional[Mapping[Quantum, uuid.UUID]] = None, 

172 _buildId: Optional[BuildId] = None, 

173 metadata: Optional[Mapping[str, Any]] = None, 

174 pruneRefs: Optional[Iterable[DatasetRef]] = None, 

175 universe: Optional[DimensionUniverse] = None, 

176 initInputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, 

177 initOutputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, 

178 globalInitOutputs: Optional[Iterable[DatasetRef]] = None, 

179 registryDatasetTypes: Optional[Iterable[DatasetType]] = None, 

180 ) -> None: 

181 """Builds the graph that is used to store the relation between tasks, 

182 and the graph that holds the relations between quanta 

183 """ 

184 self._metadata = metadata 

185 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}") 

186 # Data structures used to identify relations between components; 

187 # DatasetTypeName -> TaskDef for task, 

188 # and DatasetRef -> QuantumNode for the quanta 

189 self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True) 

190 self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]() 

191 

192 self._nodeIdMap: Dict[uuid.UUID, QuantumNode] = {} 

193 self._taskToQuantumNode: MutableMapping[TaskDef, Set[QuantumNode]] = defaultdict(set) 

194 for taskDef, quantumSet in quanta.items(): 

195 connections = taskDef.connections 

196 

197 # For each type of connection in the task, add a key to the 

198 # `_DatasetTracker` for the connections name, with a value of 

199 # the TaskDef in the appropriate field 

200 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")): 

201 # Have to handle components in inputs. 

202 dataset_name, _, _ = inpt.name.partition(".") 

203 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef) 

204 

205 for output in iterConnections(connections, ("outputs",)): 

206 # Have to handle possible components in outputs. 

207 dataset_name, _, _ = output.name.partition(".") 

208 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef) 

209 

210 # For each `Quantum` in the set of all `Quantum` for this task, 

211 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one 

212 # of the individual datasets inside the `Quantum`, with a value of 

213 # a newly created QuantumNode to the appropriate input/output 

214 # field. 

215 for quantum in quantumSet: 

216 if quantum.dataId is not None: 

217 if universe is None: 

218 universe = quantum.dataId.universe 

219 elif universe != quantum.dataId.universe: 

220 raise RuntimeError( 

221 "Mismatched dimension universes in QuantumGraph construction: " 

222 f"{universe} != {quantum.dataId.universe}. " 

223 ) 

224 

225 if _quantumToNodeId: 

226 if (nodeId := _quantumToNodeId.get(quantum)) is None: 

227 raise ValueError( 

228 "If _quantuMToNodeNumber is not None, all quanta must have an " 

229 "associated value in the mapping" 

230 ) 

231 else: 

232 nodeId = uuid.uuid4() 

233 

234 inits = quantum.initInputs.values() 

235 inputs = quantum.inputs.values() 

236 value = QuantumNode(quantum, taskDef, nodeId) 

237 self._taskToQuantumNode[taskDef].add(value) 

238 self._nodeIdMap[nodeId] = value 

239 

240 for dsRef in chain(inits, inputs): 

241 # unfortunately, `Quantum` allows inits to be individual 

242 # `DatasetRef`s or an Iterable of such, so there must 

243 # be an instance check here 

244 if isinstance(dsRef, Iterable): 

245 for sub in dsRef: 

246 if sub.isComponent(): 

247 sub = sub.makeCompositeRef() 

248 self._datasetRefDict.addConsumer(sub, value) 

249 else: 

250 assert isinstance(dsRef, DatasetRef) 

251 if dsRef.isComponent(): 

252 dsRef = dsRef.makeCompositeRef() 

253 self._datasetRefDict.addConsumer(dsRef, value) 

254 for dsRef in chain.from_iterable(quantum.outputs.values()): 

255 self._datasetRefDict.addProducer(dsRef, value) 

256 

257 if pruneRefs is not None: 

258 # track what refs were pruned and prune the graph 

259 prunes: Set[QuantumNode] = set() 

260 _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes) 

261 

262 # recreate the taskToQuantumNode dict removing nodes that have been 

263 # pruned. Keep track of task defs that now have no QuantumNodes 

264 emptyTasks: Set[str] = set() 

265 newTaskToQuantumNode: DefaultDict[TaskDef, Set[QuantumNode]] = defaultdict(set) 

266 # accumulate all types 

267 types_ = set() 

268 # tracker for any pruneRefs that have caused tasks to have no nodes 

269 # This helps the user find out what caused the issues seen. 

270 culprits = set() 

271 # Find all the types from the refs to prune 

272 for r in pruneRefs: 

273 types_.add(r.datasetType) 

274 

275 # For each of the tasks, and their associated nodes, remove any 

276 # any nodes that were pruned. If there are no nodes associated 

277 # with a task, record that task, and find out if that was due to 

278 # a type from an input ref to prune. 

279 for td, taskNodes in self._taskToQuantumNode.items(): 

280 diff = taskNodes.difference(prunes) 

281 if len(diff) == 0: 

282 if len(taskNodes) != 0: 

283 tp: DatasetType 

284 for tp in types_: 

285 if (tmpRefs := next(iter(taskNodes)).quantum.inputs.get(tp)) and not set( 

286 tmpRefs 

287 ).difference(pruneRefs): 

288 culprits.add(tp.name) 

289 emptyTasks.add(td.label) 

290 newTaskToQuantumNode[td] = diff 

291 

292 # update the internal dict 

293 self._taskToQuantumNode = newTaskToQuantumNode 

294 

295 if emptyTasks: 

296 raise ValueError( 

297 f"{', '.join(emptyTasks)} task(s) have no nodes associated with them " 

298 f"after graph pruning; {', '.join(culprits)} caused over-pruning" 

299 ) 

300 

301 # Dimension universe 

302 if universe is None: 

303 raise RuntimeError( 

304 "Dimension universe or at least one quantum with a data ID " 

305 "must be provided when constructing a QuantumGraph." 

306 ) 

307 self._universe = universe 

308 

309 # Graph of quanta relations 

310 self._connectedQuanta = self._datasetRefDict.makeNetworkXGraph() 

311 self._count = len(self._connectedQuanta) 

312 

313 # Graph of task relations, used in various methods 

314 self._taskGraph = self._datasetDict.makeNetworkXGraph() 

315 

316 # convert default dict into a regular to prevent accidental key 

317 # insertion 

318 self._taskToQuantumNode = dict(self._taskToQuantumNode.items()) 

319 

320 self._initInputRefs: Dict[TaskDef, List[DatasetRef]] = {} 

321 self._initOutputRefs: Dict[TaskDef, List[DatasetRef]] = {} 

322 self._globalInitOutputRefs: List[DatasetRef] = [] 

323 self._registryDatasetTypes: List[DatasetType] = [] 

324 if initInputs is not None: 

325 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()} 

326 if initOutputs is not None: 

327 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()} 

328 if globalInitOutputs is not None: 

329 self._globalInitOutputRefs = list(globalInitOutputs) 

330 if registryDatasetTypes is not None: 

331 self._registryDatasetTypes = list(registryDatasetTypes) 

332 

333 @property 

334 def taskGraph(self) -> nx.DiGraph: 

335 """A graph representing the relations between the tasks inside 

336 the quantum graph (`networkx.DiGraph`). 

337 """ 

338 return self._taskGraph 

339 

340 @property 

341 def graph(self) -> nx.DiGraph: 

342 """A graph representing the relations between all the `QuantumNode` 

343 objects (`networkx.DiGraph`). 

344 

345 The graph should usually be iterated over, or passed to methods of this 

346 class, but sometimes direct access to the ``networkx`` object may be 

347 helpful. 

348 """ 

349 return self._connectedQuanta 

350 

351 @property 

352 def inputQuanta(self) -> Iterable[QuantumNode]: 

353 """The nodes that are inputs to the graph (iterable [`QuantumNode`]). 

354 

355 These are the nodes that do not depend on any other nodes in the 

356 graph. 

357 """ 

358 return (q for q, n in self._connectedQuanta.in_degree if n == 0) 

359 

360 @property 

361 def outputQuanta(self) -> Iterable[QuantumNode]: 

362 """The nodes that are outputs of the graph (iterable [`QuantumNode`]). 

363 

364 These are the nodes that have no nodes that depend on them in the 

365 graph. 

366 """ 

367 return [q for q, n in self._connectedQuanta.out_degree if n == 0] 

368 

369 @property 

370 def allDatasetTypes(self) -> Tuple[DatasetTypeName, ...]: 

371 """All the data set type names that are present in the graph 

372 (`tuple` [`str`]). 

373 

374 These types do not include global init-outputs. 

375 """ 

376 return tuple(self._datasetDict.keys()) 

377 

378 @property 

379 def isConnected(self) -> bool: 

380 """Whether all of the nodes in the graph are connected, ignoring 

381 directionality of connections (`bool`). 

382 """ 

383 return nx.is_weakly_connected(self._connectedQuanta) 

384 

385 def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: 

386 r"""Return a graph pruned of input `~lsst.daf.butler.DatasetRef`\ s 

387 and nodes which depend on them. 

388 

389 Parameters 

390 ---------- 

391 refs : `Iterable` of `DatasetRef` 

392 Refs which should be removed from resulting graph 

393 

394 Returns 

395 ------- 

396 graph : `QuantumGraph` 

397 A graph that has been pruned of specified refs and the nodes that 

398 depend on them. 

399 """ 

400 newInst = object.__new__(type(self)) 

401 quantumMap = defaultdict(set) 

402 for node in self: 

403 quantumMap[node.taskDef].add(node.quantum) 

404 

405 # convert to standard dict to prevent accidental key insertion 

406 quantumDict: Dict[TaskDef, Set[Quantum]] = dict(quantumMap.items()) 

407 

408 # This should not change set of tasks in a graph, so we can keep the 

409 # same registryDatasetTypes as in the original graph. 

410 # TODO: Do we need to copy initInputs/initOutputs? 

411 newInst._buildGraphs( 

412 quantumDict, 

413 _quantumToNodeId={n.quantum: n.nodeId for n in self}, 

414 metadata=self._metadata, 

415 pruneRefs=refs, 

416 universe=self._universe, 

417 globalInitOutputs=self._globalInitOutputRefs, 

418 registryDatasetTypes=self._registryDatasetTypes, 

419 ) 

420 return newInst 

421 

422 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: 

423 """Lookup a `QuantumNode` from an id associated with the node. 

424 

425 Parameters 

426 ---------- 

427 nodeId : `NodeId` 

428 The number associated with a node 

429 

430 Returns 

431 ------- 

432 node : `QuantumNode` 

433 The node corresponding with input number 

434 

435 Raises 

436 ------ 

437 KeyError 

438 Raised if the requested nodeId is not in the graph. 

439 """ 

440 return self._nodeIdMap[nodeId] 

441 

442 def getQuantaForTask(self, taskDef: TaskDef) -> FrozenSet[Quantum]: 

443 """Return all the `Quantum` associated with a `TaskDef`. 

444 

445 Parameters 

446 ---------- 

447 taskDef : `TaskDef` 

448 The `TaskDef` for which `Quantum` are to be queried 

449 

450 Returns 

451 ------- 

452 frozenset of `Quantum` 

453 The `set` of `Quantum` that is associated with the specified 

454 `TaskDef`. 

455 """ 

456 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) 

457 

458 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: 

459 """Return all the number of `Quantum` associated with a `TaskDef`. 

460 

461 Parameters 

462 ---------- 

463 taskDef : `TaskDef` 

464 The `TaskDef` for which `Quantum` are to be queried 

465 

466 Returns 

467 ------- 

468 count : int 

469 The number of `Quantum` that are associated with the specified 

470 `TaskDef`. 

471 """ 

472 return len(self._taskToQuantumNode.get(taskDef, ())) 

473 

474 def getNodesForTask(self, taskDef: TaskDef) -> FrozenSet[QuantumNode]: 

475 """Return all the `QuantumNodes` associated with a `TaskDef`. 

476 

477 Parameters 

478 ---------- 

479 taskDef : `TaskDef` 

480 The `TaskDef` for which `Quantum` are to be queried 

481 

482 Returns 

483 ------- 

484 frozenset of `QuantumNodes` 

485 The `frozenset` of `QuantumNodes` that is associated with the 

486 specified `TaskDef`. 

487 """ 

488 return frozenset(self._taskToQuantumNode[taskDef]) 

489 

490 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

491 """Find all tasks that have the specified dataset type name as an 

492 input. 

493 

494 Parameters 

495 ---------- 

496 datasetTypeName : `str` 

497 A string representing the name of a dataset type to be queried, 

498 can also accept a `DatasetTypeName` which is a `NewType` of str for 

499 type safety in static type checking. 

500 

501 Returns 

502 ------- 

503 tasks : iterable of `TaskDef` 

504 `TaskDef` objects that have the specified `DatasetTypeName` as an 

505 input, list will be empty if no tasks use specified 

506 `DatasetTypeName` as an input. 

507 

508 Raises 

509 ------ 

510 KeyError 

511 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

512 """ 

513 return (c for c in self._datasetDict.getConsumers(datasetTypeName)) 

514 

515 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> Optional[TaskDef]: 

516 """Find all tasks that have the specified dataset type name as an 

517 output. 

518 

519 Parameters 

520 ---------- 

521 datasetTypeName : `str` 

522 A string representing the name of a dataset type to be queried, 

523 can also accept a `DatasetTypeName` which is a `NewType` of str for 

524 type safety in static type checking. 

525 

526 Returns 

527 ------- 

528 `TaskDef` or `None` 

529 `TaskDef` that outputs `DatasetTypeName` as an output or None if 

530 none of the tasks produce this `DatasetTypeName`. 

531 

532 Raises 

533 ------ 

534 KeyError 

535 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

536 """ 

537 return self._datasetDict.getProducer(datasetTypeName) 

538 

539 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

540 """Find all tasks that are associated with the specified dataset type 

541 name. 

542 

543 Parameters 

544 ---------- 

545 datasetTypeName : `str` 

546 A string representing the name of a dataset type to be queried, 

547 can also accept a `DatasetTypeName` which is a `NewType` of str for 

548 type safety in static type checking. 

549 

550 Returns 

551 ------- 

552 result : iterable of `TaskDef` 

553 `TaskDef` objects that are associated with the specified 

554 `DatasetTypeName` 

555 

556 Raises 

557 ------ 

558 KeyError 

559 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

560 """ 

561 return self._datasetDict.getAll(datasetTypeName) 

562 

563 def findTaskDefByName(self, taskName: str) -> List[TaskDef]: 

564 """Determine which `TaskDef` objects in this graph are associated 

565 with a `str` representing a task name (looks at the taskName property 

566 of `TaskDef` objects). 

567 

568 Returns a list of `TaskDef` objects as a `PipelineTask` may appear 

569 multiple times in a graph with different labels. 

570 

571 Parameters 

572 ---------- 

573 taskName : str 

574 Name of a task to search for 

575 

576 Returns 

577 ------- 

578 result : list of `TaskDef` 

579 List of the `TaskDef` objects that have the name specified. 

580 Multiple values are returned in the case that a task is used 

581 multiple times with different labels. 

582 """ 

583 results = [] 

584 for task in self._taskToQuantumNode.keys(): 

585 split = task.taskName.split(".") 

586 if split[-1] == taskName: 

587 results.append(task) 

588 return results 

589 

590 def findTaskDefByLabel(self, label: str) -> Optional[TaskDef]: 

591 """Determine which `TaskDef` objects in this graph are associated 

592 with a `str` representing a tasks label. 

593 

594 Parameters 

595 ---------- 

596 taskName : str 

597 Name of a task to search for 

598 

599 Returns 

600 ------- 

601 result : `TaskDef` 

602 `TaskDef` objects that has the specified label. 

603 """ 

604 for task in self._taskToQuantumNode.keys(): 

605 if label == task.label: 

606 return task 

607 return None 

608 

609 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> Set[Quantum]: 

610 """Return all the `Quantum` that contain a specified `DatasetTypeName`. 

611 

612 Parameters 

613 ---------- 

614 datasetTypeName : `str` 

615 The name of the dataset type to search for as a string, 

616 can also accept a `DatasetTypeName` which is a `NewType` of str for 

617 type safety in static type checking. 

618 

619 Returns 

620 ------- 

621 result : `set` of `QuantumNode` objects 

622 A `set` of `QuantumNode`s that contain specified `DatasetTypeName` 

623 

624 Raises 

625 ------ 

626 KeyError 

627 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

628 

629 """ 

630 tasks = self._datasetDict.getAll(datasetTypeName) 

631 result: Set[Quantum] = set() 

632 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task)) 

633 return result 

634 

635 def checkQuantumInGraph(self, quantum: Quantum) -> bool: 

636 """Check if specified quantum appears in the graph as part of a node. 

637 

638 Parameters 

639 ---------- 

640 quantum : `Quantum` 

641 The quantum to search for 

642 

643 Returns 

644 ------- 

645 `bool` 

646 The result of searching for the quantum 

647 """ 

648 for node in self: 

649 if quantum == node.quantum: 

650 return True 

651 return False 

652 

653 def writeDotGraph(self, output: Union[str, io.BufferedIOBase]) -> None: 

654 """Write out the graph as a dot graph. 

655 

656 Parameters 

657 ---------- 

658 output : str or `io.BufferedIOBase` 

659 Either a filesystem path to write to, or a file handle object 

660 """ 

661 write_dot(self._connectedQuanta, output) 

662 

663 def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T: 

664 """Create a new graph object that contains the subset of the nodes 

665 specified as input. Node number is preserved. 

666 

667 Parameters 

668 ---------- 

669 nodes : `QuantumNode` or iterable of `QuantumNode` 

670 

671 Returns 

672 ------- 

673 graph : instance of graph type 

674 An instance of the type from which the subset was created 

675 """ 

676 if not isinstance(nodes, Iterable): 

677 nodes = (nodes,) 

678 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes 

679 quantumMap = defaultdict(set) 

680 

681 dataset_type_names: set[str] = set() 

682 node: QuantumNode 

683 for node in quantumSubgraph: 

684 quantumMap[node.taskDef].add(node.quantum) 

685 dataset_type_names.update( 

686 dstype.name 

687 for dstype in chain( 

688 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys() 

689 ) 

690 ) 

691 

692 # May need to trim dataset types from registryDatasetTypes. 

693 for taskDef in quantumMap: 

694 if refs := self.initOutputRefs(taskDef): 

695 dataset_type_names.update(ref.datasetType.name for ref in refs) 

696 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs) 

697 registryDatasetTypes = [ 

698 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names 

699 ] 

700 

701 # convert to standard dict to prevent accidental key insertion 

702 quantumDict: Dict[TaskDef, Set[Quantum]] = dict(quantumMap.items()) 

703 # Create an empty graph, and then populate it with custom mapping 

704 newInst = type(self)({}, universe=self._universe) 

705 # TODO: Do we need to copy initInputs/initOutputs? 

706 newInst._buildGraphs( 

707 quantumDict, 

708 _quantumToNodeId={n.quantum: n.nodeId for n in nodes}, 

709 _buildId=self._buildId, 

710 metadata=self._metadata, 

711 universe=self._universe, 

712 globalInitOutputs=self._globalInitOutputRefs, 

713 registryDatasetTypes=registryDatasetTypes, 

714 ) 

715 return newInst 

716 

717 def subsetToConnected(self: _T) -> Tuple[_T, ...]: 

718 """Generate a list of subgraphs where each is connected. 

719 

720 Returns 

721 ------- 

722 result : list of `QuantumGraph` 

723 A list of graphs that are each connected 

724 """ 

725 return tuple( 

726 self.subset(connectedSet) 

727 for connectedSet in nx.weakly_connected_components(self._connectedQuanta) 

728 ) 

729 

730 def determineInputsToQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: 

731 """Return a set of `QuantumNode` that are direct inputs to a specified 

732 node. 

733 

734 Parameters 

735 ---------- 

736 node : `QuantumNode` 

737 The node of the graph for which inputs are to be determined 

738 

739 Returns 

740 ------- 

741 set of `QuantumNode` 

742 All the nodes that are direct inputs to specified node 

743 """ 

744 return set(pred for pred in self._connectedQuanta.predecessors(node)) 

745 

746 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: 

747 """Return a set of `QuantumNode` that are direct outputs of a specified 

748 node. 

749 

750 Parameters 

751 ---------- 

752 node : `QuantumNode` 

753 The node of the graph for which outputs are to be determined 

754 

755 Returns 

756 ------- 

757 set of `QuantumNode` 

758 All the nodes that are direct outputs to specified node 

759 """ 

760 return set(succ for succ in self._connectedQuanta.successors(node)) 

761 

762 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

763 """Return a graph of `QuantumNode` that are direct inputs and outputs 

764 of a specified node. 

765 

766 Parameters 

767 ---------- 

768 node : `QuantumNode` 

769 The node of the graph for which connected nodes are to be 

770 determined. 

771 

772 Returns 

773 ------- 

774 graph : graph of `QuantumNode` 

775 All the nodes that are directly connected to specified node 

776 """ 

777 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node)) 

778 nodes.add(node) 

779 return self.subset(nodes) 

780 

781 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

782 """Return a graph of the specified node and all the ancestor nodes 

783 directly reachable by walking edges. 

784 

785 Parameters 

786 ---------- 

787 node : `QuantumNode` 

788 The node for which all ansestors are to be determined 

789 

790 Returns 

791 ------- 

792 graph of `QuantumNode` 

793 Graph of node and all of its ansestors 

794 """ 

795 predecessorNodes = nx.ancestors(self._connectedQuanta, node) 

796 predecessorNodes.add(node) 

797 return self.subset(predecessorNodes) 

798 

799 def findCycle(self) -> List[Tuple[QuantumNode, QuantumNode]]: 

800 """Check a graph for the presense of cycles and returns the edges of 

801 any cycles found, or an empty list if there is no cycle. 

802 

803 Returns 

804 ------- 

805 result : list of tuple of `QuantumNode`, `QuantumNode` 

806 A list of any graph edges that form a cycle, or an empty list if 

807 there is no cycle. Empty list to so support if graph.find_cycle() 

808 syntax as an empty list is falsy. 

809 """ 

810 try: 

811 return nx.find_cycle(self._connectedQuanta) 

812 except nx.NetworkXNoCycle: 

813 return [] 

814 

815 def saveUri(self, uri: ResourcePathExpression) -> None: 

816 """Save `QuantumGraph` to the specified URI. 

817 

818 Parameters 

819 ---------- 

820 uri : convertible to `ResourcePath` 

821 URI to where the graph should be saved. 

822 """ 

823 buffer = self._buildSaveObject() 

824 path = ResourcePath(uri) 

825 if path.getExtension() not in (".qgraph"): 

826 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}") 

827 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

828 

829 @property 

830 def metadata(self) -> Optional[MappingProxyType[str, Any]]: 

831 """Extra data carried with the graph (mapping [`str`] or `None`). 

832 

833 The mapping is a dynamic view of this object's metadata. Values should 

834 be able to be serialized in JSON. 

835 """ 

836 if self._metadata is None: 

837 return None 

838 return MappingProxyType(self._metadata) 

839 

840 def initInputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: 

841 """Return DatasetRefs for a given task InitInputs. 

842 

843 Parameters 

844 ---------- 

845 taskDef : `TaskDef` 

846 Task definition structure. 

847 

848 Returns 

849 ------- 

850 refs : `list` [ `DatasetRef` ] or None 

851 DatasetRef for the task InitInput, can be `None`. This can return 

852 either resolved or non-resolved reference. 

853 """ 

854 return self._initInputRefs.get(taskDef) 

855 

856 def initOutputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: 

857 """Return DatasetRefs for a given task InitOutputs. 

858 

859 Parameters 

860 ---------- 

861 taskDef : `TaskDef` 

862 Task definition structure. 

863 

864 Returns 

865 ------- 

866 refs : `list` [ `DatasetRef` ] or None 

867 DatasetRefs for the task InitOutput, can be `None`. This can return 

868 either resolved or non-resolved reference. Resolved reference will 

869 match Quantum's initInputs if this is an intermediate dataset type. 

870 """ 

871 return self._initOutputRefs.get(taskDef) 

872 

873 def globalInitOutputRefs(self) -> List[DatasetRef]: 

874 """Return DatasetRefs for global InitOutputs. 

875 

876 Returns 

877 ------- 

878 refs : `list` [ `DatasetRef` ] 

879 DatasetRefs for global InitOutputs. 

880 """ 

881 return self._globalInitOutputRefs 

882 

883 def registryDatasetTypes(self) -> List[DatasetType]: 

884 """Return dataset types used by this graph, their definitions match 

885 dataset types from registry. 

886 

887 Returns 

888 ------- 

889 refs : `list` [ `DatasetType` ] 

890 Dataset types for this graph. 

891 """ 

892 return self._registryDatasetTypes 

893 

894 @classmethod 

895 def loadUri( 

896 cls, 

897 uri: ResourcePathExpression, 

898 universe: Optional[DimensionUniverse] = None, 

899 nodes: Optional[Iterable[uuid.UUID]] = None, 

900 graphID: Optional[BuildId] = None, 

901 minimumVersion: int = 3, 

902 ) -> QuantumGraph: 

903 """Read `QuantumGraph` from a URI. 

904 

905 Parameters 

906 ---------- 

907 uri : convertible to `ResourcePath` 

908 URI from where to load the graph. 

909 universe: `~lsst.daf.butler.DimensionUniverse` optional 

910 DimensionUniverse instance, not used by the method itself but 

911 needed to ensure that registry data structures are initialized. 

912 If None it is loaded from the QuantumGraph saved structure. If 

913 supplied, the DimensionUniverse from the loaded `QuantumGraph` 

914 will be validated against the supplied argument for compatibility. 

915 nodes: iterable of `int` or None 

916 Numbers that correspond to nodes in the graph. If specified, only 

917 these nodes will be loaded. Defaults to None, in which case all 

918 nodes will be loaded. 

919 graphID : `str` or `None` 

920 If specified this ID is verified against the loaded graph prior to 

921 loading any Nodes. This defaults to None in which case no 

922 validation is done. 

923 minimumVersion : int 

924 Minimum version of a save file to load. Set to -1 to load all 

925 versions. Older versions may need to be loaded, and re-saved 

926 to upgrade them to the latest format before they can be used in 

927 production. 

928 

929 Returns 

930 ------- 

931 graph : `QuantumGraph` 

932 Resulting QuantumGraph instance. 

933 

934 Raises 

935 ------ 

936 TypeError 

937 Raised if pickle contains instance of a type other than 

938 QuantumGraph. 

939 ValueError 

940 Raised if one or more of the nodes requested is not in the 

941 `QuantumGraph` or if graphID parameter does not match the graph 

942 being loaded or if the supplied uri does not point at a valid 

943 `QuantumGraph` save file. 

944 RuntimeError 

945 Raise if Supplied DimensionUniverse is not compatible with the 

946 DimensionUniverse saved in the graph 

947 

948 

949 Notes 

950 ----- 

951 Reading Quanta from pickle requires existence of singleton 

952 DimensionUniverse which is usually instantiated during Registry 

953 initialization. To make sure that DimensionUniverse exists this method 

954 accepts dummy DimensionUniverse argument. 

955 """ 

956 uri = ResourcePath(uri) 

957 # With ResourcePath we have the choice of always using a local file 

958 # or reading in the bytes directly. Reading in bytes can be more 

959 # efficient for reasonably-sized pickle files when the resource 

960 # is remote. For now use the local file variant. For a local file 

961 # as_local() does nothing. 

962 

963 if uri.getExtension() in (".pickle", ".pkl"): 

964 with uri.as_local() as local, open(local.ospath, "rb") as fd: 

965 warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method") 

966 qgraph = pickle.load(fd) 

967 elif uri.getExtension() in (".qgraph"): 

968 with LoadHelper(uri, minimumVersion) as loader: 

969 qgraph = loader.load(universe, nodes, graphID) 

970 else: 

971 raise ValueError("Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`") 

972 if not isinstance(qgraph, QuantumGraph): 

973 raise TypeError(f"QuantumGraph save file contains unexpected object type: {type(qgraph)}") 

974 return qgraph 

975 

976 @classmethod 

977 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> Optional[str]: 

978 """Read the header of a `QuantumGraph` pointed to by the uri parameter 

979 and return it as a string. 

980 

981 Parameters 

982 ---------- 

983 uri : convertible to `ResourcePath` 

984 The location of the `QuantumGraph` to load. If the argument is a 

985 string, it must correspond to a valid `ResourcePath` path. 

986 minimumVersion : int 

987 Minimum version of a save file to load. Set to -1 to load all 

988 versions. Older versions may need to be loaded, and re-saved 

989 to upgrade them to the latest format before they can be used in 

990 production. 

991 

992 Returns 

993 ------- 

994 header : `str` or `None` 

995 The header associated with the specified `QuantumGraph` it there is 

996 one, else `None`. 

997 

998 Raises 

999 ------ 

1000 ValueError 

1001 Raised if `QuantuGraph` was saved as a pickle. 

1002 Raised if the extention of the file specified by uri is not a 

1003 `QuantumGraph` extention. 

1004 """ 

1005 uri = ResourcePath(uri) 

1006 if uri.getExtension() in (".pickle", ".pkl"): 

1007 raise ValueError("Reading a header from a pickle save is not supported") 

1008 elif uri.getExtension() in (".qgraph"): 

1009 return LoadHelper(uri, minimumVersion).readHeader() 

1010 else: 

1011 raise ValueError("Only know how to handle files saved as `qgraph`") 

1012 

1013 def buildAndPrintHeader(self) -> None: 

1014 """Creates a header that would be used in a save of this object and 

1015 prints it out to standard out. 

1016 """ 

1017 _, header = self._buildSaveObject(returnHeader=True) 

1018 print(json.dumps(header)) 

1019 

1020 def save(self, file: BinaryIO) -> None: 

1021 """Save QuantumGraph to a file. 

1022 

1023 Parameters 

1024 ---------- 

1025 file : `io.BufferedIOBase` 

1026 File to write pickle data open in binary mode. 

1027 """ 

1028 buffer = self._buildSaveObject() 

1029 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

1030 

1031 def _buildSaveObject(self, returnHeader: bool = False) -> Union[bytearray, Tuple[bytearray, Dict]]: 

1032 # make some containers 

1033 jsonData: Deque[bytes] = deque() 

1034 # node map is a list because json does not accept mapping keys that 

1035 # are not strings, so we store a list of key, value pairs that will 

1036 # be converted to a mapping on load 

1037 nodeMap = [] 

1038 taskDefMap = {} 

1039 headerData: Dict[str, Any] = {} 

1040 

1041 # Store the QauntumGraph BuildId, this will allow validating BuildIds 

1042 # at load time, prior to loading any QuantumNodes. Name chosen for 

1043 # unlikely conflicts. 

1044 headerData["GraphBuildID"] = self.graphID 

1045 headerData["Metadata"] = self._metadata 

1046 

1047 # Store the universe this graph was created with 

1048 universeConfig = self._universe.dimensionConfig 

1049 headerData["universe"] = universeConfig.toDict() 

1050 

1051 # counter for the number of bytes processed thus far 

1052 count = 0 

1053 # serialize out the task Defs recording the start and end bytes of each 

1054 # taskDef 

1055 inverseLookup = self._datasetDict.inverse 

1056 taskDef: TaskDef 

1057 # sort by task label to ensure serialization happens in the same order 

1058 for taskDef in self.taskGraph: 

1059 # compressing has very little impact on saving or load time, but 

1060 # a large impact on on disk size, so it is worth doing 

1061 taskDescription: Dict[str, Any] = {} 

1062 # save the fully qualified name. 

1063 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) 

1064 # save the config as a text stream that will be un-persisted on the 

1065 # other end 

1066 stream = io.StringIO() 

1067 taskDef.config.saveToStream(stream) 

1068 taskDescription["config"] = stream.getvalue() 

1069 taskDescription["label"] = taskDef.label 

1070 if (refs := self._initInputRefs.get(taskDef)) is not None: 

1071 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs] 

1072 if (refs := self._initOutputRefs.get(taskDef)) is not None: 

1073 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs] 

1074 

1075 inputs = [] 

1076 outputs = [] 

1077 

1078 # Determine the connection between all of tasks and save that in 

1079 # the header as a list of connections and edges in each task 

1080 # this will help in un-persisting, and possibly in a "quick view" 

1081 # method that does not require everything to be un-persisted 

1082 # 

1083 # Typing returns can't be parameter dependent 

1084 for connection in inverseLookup[taskDef]: # type: ignore 

1085 consumers = self._datasetDict.getConsumers(connection) 

1086 producer = self._datasetDict.getProducer(connection) 

1087 if taskDef in consumers: 

1088 # This checks if the task consumes the connection directly 

1089 # from the datastore or it is produced by another task 

1090 producerLabel = producer.label if producer is not None else "datastore" 

1091 inputs.append((producerLabel, connection)) 

1092 elif taskDef not in consumers and producer is taskDef: 

1093 # If there are no consumers for this tasks produced 

1094 # connection, the output will be said to be the datastore 

1095 # in which case the for loop will be a zero length loop 

1096 if not consumers: 

1097 outputs.append(("datastore", connection)) 

1098 for td in consumers: 

1099 outputs.append((td.label, connection)) 

1100 

1101 # dump to json string, and encode that string to bytes and then 

1102 # conpress those bytes 

1103 dump = lzma.compress(json.dumps(taskDescription).encode()) 

1104 # record the sizing and relation information 

1105 taskDefMap[taskDef.label] = { 

1106 "bytes": (count, count + len(dump)), 

1107 "inputs": inputs, 

1108 "outputs": outputs, 

1109 } 

1110 count += len(dump) 

1111 jsonData.append(dump) 

1112 

1113 headerData["TaskDefs"] = taskDefMap 

1114 

1115 # serialize the nodes, recording the start and end bytes of each node 

1116 dimAccumulator = DimensionRecordsAccumulator() 

1117 for node in self: 

1118 # compressing has very little impact on saving or load time, but 

1119 # a large impact on on disk size, so it is worth doing 

1120 simpleNode = node.to_simple(accumulator=dimAccumulator) 

1121 

1122 dump = lzma.compress(simpleNode.json().encode()) 

1123 jsonData.append(dump) 

1124 nodeMap.append( 

1125 ( 

1126 str(node.nodeId), 

1127 { 

1128 "bytes": (count, count + len(dump)), 

1129 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)], 

1130 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)], 

1131 }, 

1132 ) 

1133 ) 

1134 count += len(dump) 

1135 

1136 headerData["DimensionRecords"] = { 

1137 key: value.dict() for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items() 

1138 } 

1139 

1140 # need to serialize this as a series of key,value tuples because of 

1141 # a limitation on how json cant do anything but strings as keys 

1142 headerData["Nodes"] = nodeMap 

1143 

1144 if self._globalInitOutputRefs: 

1145 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs] 

1146 

1147 if self._registryDatasetTypes: 

1148 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes] 

1149 

1150 # dump the headerData to json 

1151 header_encode = lzma.compress(json.dumps(headerData).encode()) 

1152 

1153 # record the sizes as 2 unsigned long long numbers for a total of 16 

1154 # bytes 

1155 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION) 

1156 

1157 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING() 

1158 map_lengths = struct.pack(fmt_string, len(header_encode)) 

1159 

1160 # write each component of the save out in a deterministic order 

1161 # buffer = io.BytesIO() 

1162 # buffer.write(map_lengths) 

1163 # buffer.write(taskDef_pickle) 

1164 # buffer.write(map_pickle) 

1165 buffer = bytearray() 

1166 buffer.extend(MAGIC_BYTES) 

1167 buffer.extend(save_bytes) 

1168 buffer.extend(map_lengths) 

1169 buffer.extend(header_encode) 

1170 # Iterate over the length of pickleData, and for each element pop the 

1171 # leftmost element off the deque and write it out. This is to save 

1172 # memory, as the memory is added to the buffer object, it is removed 

1173 # from from the container. 

1174 # 

1175 # Only this section needs to worry about memory pressue because 

1176 # everything else written to the buffer prior to this pickle data is 

1177 # only on the order of kilobytes to low numbers of megabytes. 

1178 while jsonData: 

1179 buffer.extend(jsonData.popleft()) 

1180 if returnHeader: 

1181 return buffer, headerData 

1182 else: 

1183 return buffer 

1184 

1185 @classmethod 

1186 def load( 

1187 cls, 

1188 file: BinaryIO, 

1189 universe: Optional[DimensionUniverse] = None, 

1190 nodes: Optional[Iterable[uuid.UUID]] = None, 

1191 graphID: Optional[BuildId] = None, 

1192 minimumVersion: int = 3, 

1193 ) -> QuantumGraph: 

1194 """Read QuantumGraph from a file that was made by `save`. 

1195 

1196 Parameters 

1197 ---------- 

1198 file : `io.IO` of bytes 

1199 File with pickle data open in binary mode. 

1200 universe: `~lsst.daf.butler.DimensionUniverse`, optional 

1201 DimensionUniverse instance, not used by the method itself but 

1202 needed to ensure that registry data structures are initialized. 

1203 If None it is loaded from the QuantumGraph saved structure. If 

1204 supplied, the DimensionUniverse from the loaded `QuantumGraph` 

1205 will be validated against the supplied argument for compatibility. 

1206 nodes: iterable of `int` or None 

1207 Numbers that correspond to nodes in the graph. If specified, only 

1208 these nodes will be loaded. Defaults to None, in which case all 

1209 nodes will be loaded. 

1210 graphID : `str` or `None` 

1211 If specified this ID is verified against the loaded graph prior to 

1212 loading any Nodes. This defaults to None in which case no 

1213 validation is done. 

1214 minimumVersion : int 

1215 Minimum version of a save file to load. Set to -1 to load all 

1216 versions. Older versions may need to be loaded, and re-saved 

1217 to upgrade them to the latest format before they can be used in 

1218 production. 

1219 

1220 Returns 

1221 ------- 

1222 graph : `QuantumGraph` 

1223 Resulting QuantumGraph instance. 

1224 

1225 Raises 

1226 ------ 

1227 TypeError 

1228 Raised if pickle contains instance of a type other than 

1229 QuantumGraph. 

1230 ValueError 

1231 Raised if one or more of the nodes requested is not in the 

1232 `QuantumGraph` or if graphID parameter does not match the graph 

1233 being loaded or if the supplied uri does not point at a valid 

1234 `QuantumGraph` save file. 

1235 

1236 Notes 

1237 ----- 

1238 Reading Quanta from pickle requires existence of singleton 

1239 DimensionUniverse which is usually instantiated during Registry 

1240 initialization. To make sure that DimensionUniverse exists this method 

1241 accepts dummy DimensionUniverse argument. 

1242 """ 

1243 # Try to see if the file handle contains pickle data, this will be 

1244 # removed in the future 

1245 try: 

1246 qgraph = pickle.load(file) 

1247 warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method") 

1248 except pickle.UnpicklingError: 

1249 with LoadHelper(file, minimumVersion) as loader: 

1250 qgraph = loader.load(universe, nodes, graphID) 

1251 if not isinstance(qgraph, QuantumGraph): 

1252 raise TypeError(f"QuantumGraph pickle file has contains unexpected object type: {type(qgraph)}") 

1253 return qgraph 

1254 

1255 def iterTaskGraph(self) -> Generator[TaskDef, None, None]: 

1256 """Iterate over the `taskGraph` attribute in topological order 

1257 

1258 Yields 

1259 ------ 

1260 taskDef : `TaskDef` 

1261 `TaskDef` objects in topological order 

1262 """ 

1263 yield from nx.topological_sort(self.taskGraph) 

1264 

1265 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None: 

1266 """Change output run and dataset ID for each output dataset. 

1267 

1268 Parameters 

1269 ---------- 

1270 run : `str` 

1271 New output run name. 

1272 metadata_key : `str` or `None` 

1273 Specifies matadata key corresponding to output run name to update 

1274 with new run name. If `None` or if metadata is missing it is not 

1275 updated. If metadata is present but key is missing, it will be 

1276 added. 

1277 update_graph_id : `bool`, optional 

1278 If `True` then also update graph ID with a new unique value. 

1279 """ 

1280 dataset_id_map = {} 

1281 

1282 def _update_output_refs_in_place(refs: list[DatasetRef], run: str) -> None: 

1283 """Updated list of DatasetRef with new run and dataset IDs.""" 

1284 new_refs = [] 

1285 for ref in refs: 

1286 new_ref = DatasetRef(ref.datasetType, ref.dataId, run=run, conform=False) 

1287 dataset_id_map[ref.id] = new_ref.id 

1288 new_refs.append(new_ref) 

1289 refs[:] = new_refs 

1290 

1291 def _update_input_refs_in_place(refs: list[DatasetRef], run: str) -> None: 

1292 """Updated list of DatasetRef with IDs from dataset_id_map.""" 

1293 new_refs = [] 

1294 for ref in refs: 

1295 if (new_id := dataset_id_map.get(ref.id)) is not None: 

1296 new_ref = DatasetRef(ref.datasetType, ref.dataId, id=new_id, run=run, conform=False) 

1297 new_refs.append(new_ref) 

1298 else: 

1299 new_refs.append(ref) 

1300 refs[:] = new_refs 

1301 

1302 # Loop through all outputs and update their datasets. 

1303 for node in self._connectedQuanta: 

1304 for refs in node.quantum.outputs.values(): 

1305 _update_output_refs_in_place(refs, run) 

1306 

1307 for refs in self._initOutputRefs.values(): 

1308 _update_output_refs_in_place(refs, run) 

1309 

1310 _update_output_refs_in_place(self._globalInitOutputRefs, run) 

1311 

1312 # Update all intermediates from their matching outputs. 

1313 for node in self._connectedQuanta: 

1314 for refs in node.quantum.inputs.values(): 

1315 _update_input_refs_in_place(refs, run) 

1316 

1317 for refs in self._initInputRefs.values(): 

1318 _update_input_refs_in_place(refs, run) 

1319 

1320 if update_graph_id: 

1321 self._buildId = BuildId(f"{time.time()}-{os.getpid()}") 

1322 

1323 # Update metadata if present. 

1324 if self._metadata is not None and metadata_key is not None: 

1325 metadata = dict(self._metadata) 

1326 metadata[metadata_key] = run 

1327 self._metadata = metadata 

1328 

1329 @property 

1330 def graphID(self) -> BuildId: 

1331 """The ID generated by the graph at construction time (`str`).""" 

1332 return self._buildId 

1333 

1334 @property 

1335 def universe(self) -> DimensionUniverse: 

1336 """Dimension universe associated with this graph 

1337 (`~lsst.daf.butler.DimensionUniverse`). 

1338 """ 

1339 return self._universe 

1340 

1341 def __iter__(self) -> Generator[QuantumNode, None, None]: 

1342 yield from nx.topological_sort(self._connectedQuanta) 

1343 

1344 def __len__(self) -> int: 

1345 return self._count 

1346 

1347 def __contains__(self, node: QuantumNode) -> bool: 

1348 return self._connectedQuanta.has_node(node) 

1349 

1350 def __getstate__(self) -> dict: 

1351 """Stores a compact form of the graph as a list of graph nodes, and a 

1352 tuple of task labels and task configs. The full graph can be 

1353 reconstructed with this information, and it preserves the ordering of 

1354 the graph nodes. 

1355 """ 

1356 universe: Optional[DimensionUniverse] = None 

1357 for node in self: 

1358 dId = node.quantum.dataId 

1359 if dId is None: 

1360 continue 

1361 universe = dId.graph.universe 

1362 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe} 

1363 

1364 def __setstate__(self, state: dict) -> None: 

1365 """Reconstructs the state of the graph from the information persisted 

1366 in getstate. 

1367 """ 

1368 buffer = io.BytesIO(state["reduced"]) 

1369 with LoadHelper(buffer, minimumVersion=3) as loader: 

1370 qgraph = loader.load(state["universe"], graphID=state["graphId"]) 

1371 

1372 self._metadata = qgraph._metadata 

1373 self._buildId = qgraph._buildId 

1374 self._datasetDict = qgraph._datasetDict 

1375 self._nodeIdMap = qgraph._nodeIdMap 

1376 self._count = len(qgraph) 

1377 self._taskToQuantumNode = qgraph._taskToQuantumNode 

1378 self._taskGraph = qgraph._taskGraph 

1379 self._connectedQuanta = qgraph._connectedQuanta 

1380 self._initInputRefs = qgraph._initInputRefs 

1381 self._initOutputRefs = qgraph._initOutputRefs 

1382 

1383 def __eq__(self, other: object) -> bool: 

1384 if not isinstance(other, QuantumGraph): 

1385 return False 

1386 if len(self) != len(other): 

1387 return False 

1388 for node in self: 

1389 if node not in other: 

1390 return False 

1391 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node): 

1392 return False 

1393 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node): 

1394 return False 

1395 if set(self.allDatasetTypes) != set(other.allDatasetTypes): 

1396 return False 

1397 return set(self.taskGraph) == set(other.taskGraph)