Coverage for python/lsst/pipe/base/graph/graph.py: 16%

417 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 11:14 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QuantumGraph", "IncompatibleGraphError") 

24 

25import io 

26import json 

27import lzma 

28import os 

29import pickle 

30import struct 

31import time 

32import uuid 

33import warnings 

34from collections import defaultdict, deque 

35from collections.abc import Generator, Iterable, Mapping, MutableMapping 

36from itertools import chain 

37from types import MappingProxyType 

38from typing import Any, BinaryIO, TypeVar 

39 

40import networkx as nx 

41from lsst.daf.butler import DatasetRef, DatasetType, DimensionRecordsAccumulator, DimensionUniverse, Quantum 

42from lsst.resources import ResourcePath, ResourcePathExpression 

43from lsst.utils.introspection import get_full_type_name 

44from networkx.drawing.nx_agraph import write_dot 

45 

46from ..connections import iterConnections 

47from ..pipeline import TaskDef 

48from ._implDetails import DatasetTypeName, _DatasetTracker, _pruner 

49from ._loadHelpers import LoadHelper 

50from ._versionDeserializers import DESERIALIZER_MAP 

51from .quantumNode import BuildId, QuantumNode 

52 

53_T = TypeVar("_T", bound="QuantumGraph") 

54 

55# modify this constant any time the on disk representation of the save file 

56# changes, and update the load helpers to behave properly for each version. 

57SAVE_VERSION = 3 

58 

59# Strings used to describe the format for the preamble bytes in a file save 

60# The base is a big endian encoded unsigned short that is used to hold the 

61# file format version. This allows reading version bytes and determine which 

62# loading code should be used for the rest of the file 

63STRUCT_FMT_BASE = ">H" 

64# 

65# Version 1 

66# This marks a big endian encoded format with an unsigned short, an unsigned 

67# long long, and an unsigned long long in the byte stream 

68# Version 2 

69# A big endian encoded format with an unsigned long long byte stream used to 

70# indicate the total length of the entire header. 

71STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"} 

72 

73# magic bytes that help determine this is a graph save 

74MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9" 

75 

76 

77class IncompatibleGraphError(Exception): 

78 """Exception class to indicate that a lookup by NodeId is impossible due 

79 to incompatibilities 

80 """ 

81 

82 pass 

83 

84 

85class QuantumGraph: 

86 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects 

87 

88 This data structure represents a concrete workflow generated from a 

89 `Pipeline`. 

90 

91 Parameters 

92 ---------- 

93 quanta : `~collections.abc.Mapping` [ `TaskDef`, \ 

94 `set` [ `~lsst.daf.butler.Quantum` ] ] 

95 This maps tasks (and their configs) to the sets of data they are to 

96 process. 

97 metadata : Optional `~collections.abc.Mapping` of `str` to primitives 

98 This is an optional parameter of extra data to carry with the graph. 

99 Entries in this mapping should be able to be serialized in JSON. 

100 pruneRefs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

101 Set of dataset refs to exclude from a graph. 

102 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

103 The dimensions in which quanta can be defined. Need only be provided if 

104 no quanta have data IDs. 

105 initInputs : `~collections.abc.Mapping`, optional 

106 Maps tasks to their InitInput dataset refs. Dataset refs can be either 

107 resolved or non-resolved. Presently the same dataset refs are included 

108 in each `~lsst.daf.butler.Quantum` for the same task. 

109 initOutputs : `~collections.abc.Mapping`, optional 

110 Maps tasks to their InitOutput dataset refs. Dataset refs can be either 

111 resolved or non-resolved. For intermediate resolved refs their dataset 

112 ID must match ``initInputs`` and Quantum ``initInputs``. 

113 globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

114 Dataset refs for some global objects produced by pipeline. These 

115 objects include task configurations and package versions. Typically 

116 they have an empty DataId, but there is no real restriction on what 

117 can appear here. 

118 registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \ 

119 optional 

120 Dataset types which are used by this graph, their definitions must 

121 match registry. If registry does not define dataset type yet, then 

122 it should match one that will be created later. 

123 

124 Raises 

125 ------ 

126 ValueError 

127 Raised if the graph is pruned such that some tasks no longer have nodes 

128 associated with them. 

129 """ 

130 

131 def __init__( 

132 self, 

133 quanta: Mapping[TaskDef, set[Quantum]], 

134 metadata: Mapping[str, Any] | None = None, 

135 pruneRefs: Iterable[DatasetRef] | None = None, 

136 universe: DimensionUniverse | None = None, 

137 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

138 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

139 globalInitOutputs: Iterable[DatasetRef] | None = None, 

140 registryDatasetTypes: Iterable[DatasetType] | None = None, 

141 ): 

142 self._buildGraphs( 

143 quanta, 

144 metadata=metadata, 

145 pruneRefs=pruneRefs, 

146 universe=universe, 

147 initInputs=initInputs, 

148 initOutputs=initOutputs, 

149 globalInitOutputs=globalInitOutputs, 

150 registryDatasetTypes=registryDatasetTypes, 

151 ) 

152 

153 def _buildGraphs( 

154 self, 

155 quanta: Mapping[TaskDef, set[Quantum]], 

156 *, 

157 _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None, 

158 _buildId: BuildId | None = None, 

159 metadata: Mapping[str, Any] | None = None, 

160 pruneRefs: Iterable[DatasetRef] | None = None, 

161 universe: DimensionUniverse | None = None, 

162 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

163 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

164 globalInitOutputs: Iterable[DatasetRef] | None = None, 

165 registryDatasetTypes: Iterable[DatasetType] | None = None, 

166 ) -> None: 

167 """Build the graph that is used to store the relation between tasks, 

168 and the graph that holds the relations between quanta 

169 """ 

170 self._metadata = metadata 

171 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}") 

172 # Data structures used to identify relations between components; 

173 # DatasetTypeName -> TaskDef for task, 

174 # and DatasetRef -> QuantumNode for the quanta 

175 self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True) 

176 self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]() 

177 

178 self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {} 

179 self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set) 

180 for taskDef, quantumSet in quanta.items(): 

181 connections = taskDef.connections 

182 

183 # For each type of connection in the task, add a key to the 

184 # `_DatasetTracker` for the connections name, with a value of 

185 # the TaskDef in the appropriate field 

186 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")): 

187 # Have to handle components in inputs. 

188 dataset_name, _, _ = inpt.name.partition(".") 

189 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef) 

190 

191 for output in iterConnections(connections, ("outputs",)): 

192 # Have to handle possible components in outputs. 

193 dataset_name, _, _ = output.name.partition(".") 

194 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef) 

195 

196 # For each `Quantum` in the set of all `Quantum` for this task, 

197 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one 

198 # of the individual datasets inside the `Quantum`, with a value of 

199 # a newly created QuantumNode to the appropriate input/output 

200 # field. 

201 for quantum in quantumSet: 

202 if quantum.dataId is not None: 

203 if universe is None: 

204 universe = quantum.dataId.universe 

205 elif universe != quantum.dataId.universe: 

206 raise RuntimeError( 

207 "Mismatched dimension universes in QuantumGraph construction: " 

208 f"{universe} != {quantum.dataId.universe}. " 

209 ) 

210 

211 if _quantumToNodeId: 

212 if (nodeId := _quantumToNodeId.get(quantum)) is None: 

213 raise ValueError( 

214 "If _quantuMToNodeNumber is not None, all quanta must have an " 

215 "associated value in the mapping" 

216 ) 

217 else: 

218 nodeId = uuid.uuid4() 

219 

220 inits = quantum.initInputs.values() 

221 inputs = quantum.inputs.values() 

222 value = QuantumNode(quantum, taskDef, nodeId) 

223 self._taskToQuantumNode[taskDef].add(value) 

224 self._nodeIdMap[nodeId] = value 

225 

226 for dsRef in chain(inits, inputs): 

227 # unfortunately, `Quantum` allows inits to be individual 

228 # `DatasetRef`s or an Iterable of such, so there must 

229 # be an instance check here 

230 if isinstance(dsRef, Iterable): 

231 for sub in dsRef: 

232 if sub.isComponent(): 

233 sub = sub.makeCompositeRef() 

234 self._datasetRefDict.addConsumer(sub, value) 

235 else: 

236 assert isinstance(dsRef, DatasetRef) 

237 if dsRef.isComponent(): 

238 dsRef = dsRef.makeCompositeRef() 

239 self._datasetRefDict.addConsumer(dsRef, value) 

240 for dsRef in chain.from_iterable(quantum.outputs.values()): 

241 self._datasetRefDict.addProducer(dsRef, value) 

242 

243 if pruneRefs is not None: 

244 # track what refs were pruned and prune the graph 

245 prunes: set[QuantumNode] = set() 

246 _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes) 

247 

248 # recreate the taskToQuantumNode dict removing nodes that have been 

249 # pruned. Keep track of task defs that now have no QuantumNodes 

250 emptyTasks: set[str] = set() 

251 newTaskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set) 

252 # accumulate all types 

253 types_ = set() 

254 # tracker for any pruneRefs that have caused tasks to have no nodes 

255 # This helps the user find out what caused the issues seen. 

256 culprits = set() 

257 # Find all the types from the refs to prune 

258 for r in pruneRefs: 

259 types_.add(r.datasetType) 

260 

261 # For each of the tasks, and their associated nodes, remove any 

262 # any nodes that were pruned. If there are no nodes associated 

263 # with a task, record that task, and find out if that was due to 

264 # a type from an input ref to prune. 

265 for td, taskNodes in self._taskToQuantumNode.items(): 

266 diff = taskNodes.difference(prunes) 

267 if len(diff) == 0: 

268 if len(taskNodes) != 0: 

269 tp: DatasetType 

270 for tp in types_: 

271 if (tmpRefs := next(iter(taskNodes)).quantum.inputs.get(tp)) and not set( 

272 tmpRefs 

273 ).difference(pruneRefs): 

274 culprits.add(tp.name) 

275 emptyTasks.add(td.label) 

276 newTaskToQuantumNode[td] = diff 

277 

278 # update the internal dict 

279 self._taskToQuantumNode = newTaskToQuantumNode 

280 

281 if emptyTasks: 

282 raise ValueError( 

283 f"{', '.join(emptyTasks)} task(s) have no nodes associated with them " 

284 f"after graph pruning; {', '.join(culprits)} caused over-pruning" 

285 ) 

286 

287 # Dimension universe 

288 if universe is None: 

289 raise RuntimeError( 

290 "Dimension universe or at least one quantum with a data ID " 

291 "must be provided when constructing a QuantumGraph." 

292 ) 

293 self._universe = universe 

294 

295 # Graph of quanta relations 

296 self._connectedQuanta = self._datasetRefDict.makeNetworkXGraph() 

297 self._count = len(self._connectedQuanta) 

298 

299 # Graph of task relations, used in various methods 

300 self._taskGraph = self._datasetDict.makeNetworkXGraph() 

301 

302 # convert default dict into a regular to prevent accidental key 

303 # insertion 

304 self._taskToQuantumNode = dict(self._taskToQuantumNode.items()) 

305 

306 self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {} 

307 self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {} 

308 self._globalInitOutputRefs: list[DatasetRef] = [] 

309 self._registryDatasetTypes: list[DatasetType] = [] 

310 if initInputs is not None: 

311 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()} 

312 if initOutputs is not None: 

313 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()} 

314 if globalInitOutputs is not None: 

315 self._globalInitOutputRefs = list(globalInitOutputs) 

316 if registryDatasetTypes is not None: 

317 self._registryDatasetTypes = list(registryDatasetTypes) 

318 

319 @property 

320 def taskGraph(self) -> nx.DiGraph: 

321 """A graph representing the relations between the tasks inside 

322 the quantum graph (`networkx.DiGraph`). 

323 """ 

324 return self._taskGraph 

325 

326 @property 

327 def graph(self) -> nx.DiGraph: 

328 """A graph representing the relations between all the `QuantumNode` 

329 objects (`networkx.DiGraph`). 

330 

331 The graph should usually be iterated over, or passed to methods of this 

332 class, but sometimes direct access to the ``networkx`` object may be 

333 helpful. 

334 """ 

335 return self._connectedQuanta 

336 

337 @property 

338 def inputQuanta(self) -> Iterable[QuantumNode]: 

339 """The nodes that are inputs to the graph (iterable [`QuantumNode`]). 

340 

341 These are the nodes that do not depend on any other nodes in the 

342 graph. 

343 """ 

344 return (q for q, n in self._connectedQuanta.in_degree if n == 0) 

345 

346 @property 

347 def outputQuanta(self) -> Iterable[QuantumNode]: 

348 """The nodes that are outputs of the graph (iterable [`QuantumNode`]). 

349 

350 These are the nodes that have no nodes that depend on them in the 

351 graph. 

352 """ 

353 return [q for q, n in self._connectedQuanta.out_degree if n == 0] 

354 

355 @property 

356 def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]: 

357 """All the data set type names that are present in the graph 

358 (`tuple` [`str`]). 

359 

360 These types do not include global init-outputs. 

361 """ 

362 return tuple(self._datasetDict.keys()) 

363 

364 @property 

365 def isConnected(self) -> bool: 

366 """Whether all of the nodes in the graph are connected, ignoring 

367 directionality of connections (`bool`). 

368 """ 

369 return nx.is_weakly_connected(self._connectedQuanta) 

370 

371 def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: 

372 r"""Return a graph pruned of input `~lsst.daf.butler.DatasetRef`\ s 

373 and nodes which depend on them. 

374 

375 Parameters 

376 ---------- 

377 refs : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef` 

378 Refs which should be removed from resulting graph 

379 

380 Returns 

381 ------- 

382 graph : `QuantumGraph` 

383 A graph that has been pruned of specified refs and the nodes that 

384 depend on them. 

385 """ 

386 newInst = object.__new__(type(self)) 

387 quantumMap = defaultdict(set) 

388 for node in self: 

389 quantumMap[node.taskDef].add(node.quantum) 

390 

391 # convert to standard dict to prevent accidental key insertion 

392 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

393 

394 # This should not change set of tasks in a graph, so we can keep the 

395 # same registryDatasetTypes as in the original graph. 

396 # TODO: Do we need to copy initInputs/initOutputs? 

397 newInst._buildGraphs( 

398 quantumDict, 

399 _quantumToNodeId={n.quantum: n.nodeId for n in self}, 

400 metadata=self._metadata, 

401 pruneRefs=refs, 

402 universe=self._universe, 

403 globalInitOutputs=self._globalInitOutputRefs, 

404 registryDatasetTypes=self._registryDatasetTypes, 

405 ) 

406 return newInst 

407 

408 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: 

409 """Lookup a `QuantumNode` from an id associated with the node. 

410 

411 Parameters 

412 ---------- 

413 nodeId : `NodeId` 

414 The number associated with a node 

415 

416 Returns 

417 ------- 

418 node : `QuantumNode` 

419 The node corresponding with input number 

420 

421 Raises 

422 ------ 

423 KeyError 

424 Raised if the requested nodeId is not in the graph. 

425 """ 

426 return self._nodeIdMap[nodeId] 

427 

428 def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]: 

429 """Return all the `~lsst.daf.butler.Quantum` associated with a 

430 `TaskDef`. 

431 

432 Parameters 

433 ---------- 

434 taskDef : `TaskDef` 

435 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

436 queried. 

437 

438 Returns 

439 ------- 

440 quanta : `frozenset` of `~lsst.daf.butler.Quantum` 

441 The `set` of `~lsst.daf.butler.Quantum` that is associated with the 

442 specified `TaskDef`. 

443 """ 

444 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) 

445 

446 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: 

447 """Return the number of `~lsst.daf.butler.Quantum` associated with 

448 a `TaskDef`. 

449 

450 Parameters 

451 ---------- 

452 taskDef : `TaskDef` 

453 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

454 queried. 

455 

456 Returns 

457 ------- 

458 count : `int` 

459 The number of `~lsst.daf.butler.Quantum` that are associated with 

460 the specified `TaskDef`. 

461 """ 

462 return len(self._taskToQuantumNode.get(taskDef, ())) 

463 

464 def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]: 

465 r"""Return all the `QuantumNode`\s associated with a `TaskDef`. 

466 

467 Parameters 

468 ---------- 

469 taskDef : `TaskDef` 

470 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

471 queried. 

472 

473 Returns 

474 ------- 

475 nodes : `frozenset` [ `QuantumNode` ] 

476 A `frozenset` of `QuantumNode` that is associated with the 

477 specified `TaskDef`. 

478 """ 

479 return frozenset(self._taskToQuantumNode[taskDef]) 

480 

481 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

482 """Find all tasks that have the specified dataset type name as an 

483 input. 

484 

485 Parameters 

486 ---------- 

487 datasetTypeName : `str` 

488 A string representing the name of a dataset type to be queried, 

489 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

490 `str` for type safety in static type checking. 

491 

492 Returns 

493 ------- 

494 tasks : iterable of `TaskDef` 

495 `TaskDef` objects that have the specified `DatasetTypeName` as an 

496 input, list will be empty if no tasks use specified 

497 `DatasetTypeName` as an input. 

498 

499 Raises 

500 ------ 

501 KeyError 

502 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

503 """ 

504 return (c for c in self._datasetDict.getConsumers(datasetTypeName)) 

505 

506 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None: 

507 """Find all tasks that have the specified dataset type name as an 

508 output. 

509 

510 Parameters 

511 ---------- 

512 datasetTypeName : `str` 

513 A string representing the name of a dataset type to be queried, 

514 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

515 `str` for type safety in static type checking. 

516 

517 Returns 

518 ------- 

519 result : `TaskDef` or `None` 

520 `TaskDef` that outputs `DatasetTypeName` as an output or `None` if 

521 none of the tasks produce this `DatasetTypeName`. 

522 

523 Raises 

524 ------ 

525 KeyError 

526 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

527 """ 

528 return self._datasetDict.getProducer(datasetTypeName) 

529 

530 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

531 """Find all tasks that are associated with the specified dataset type 

532 name. 

533 

534 Parameters 

535 ---------- 

536 datasetTypeName : `str` 

537 A string representing the name of a dataset type to be queried, 

538 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

539 `str` for type safety in static type checking. 

540 

541 Returns 

542 ------- 

543 result : iterable of `TaskDef` 

544 `TaskDef` objects that are associated with the specified 

545 `DatasetTypeName`. 

546 

547 Raises 

548 ------ 

549 KeyError 

550 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

551 """ 

552 return self._datasetDict.getAll(datasetTypeName) 

553 

554 def findTaskDefByName(self, taskName: str) -> list[TaskDef]: 

555 """Determine which `TaskDef` objects in this graph are associated 

556 with a `str` representing a task name (looks at the ``taskName`` 

557 property of `TaskDef` objects). 

558 

559 Returns a list of `TaskDef` objects as a `PipelineTask` may appear 

560 multiple times in a graph with different labels. 

561 

562 Parameters 

563 ---------- 

564 taskName : `str` 

565 Name of a task to search for. 

566 

567 Returns 

568 ------- 

569 result : `list` of `TaskDef` 

570 List of the `TaskDef` objects that have the name specified. 

571 Multiple values are returned in the case that a task is used 

572 multiple times with different labels. 

573 """ 

574 results = [] 

575 for task in self._taskToQuantumNode.keys(): 

576 split = task.taskName.split(".") 

577 if split[-1] == taskName: 

578 results.append(task) 

579 return results 

580 

581 def findTaskDefByLabel(self, label: str) -> TaskDef | None: 

582 """Determine which `TaskDef` objects in this graph are associated 

583 with a `str` representing a tasks label. 

584 

585 Parameters 

586 ---------- 

587 taskName : `str` 

588 Name of a task to search for 

589 

590 Returns 

591 ------- 

592 result : `TaskDef` 

593 `TaskDef` objects that has the specified label. 

594 """ 

595 for task in self._taskToQuantumNode.keys(): 

596 if label == task.label: 

597 return task 

598 return None 

599 

600 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]: 

601 r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified 

602 `DatasetTypeName`. 

603 

604 Parameters 

605 ---------- 

606 datasetTypeName : `str` 

607 The name of the dataset type to search for as a string, 

608 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

609 `str` for type safety in static type checking. 

610 

611 Returns 

612 ------- 

613 result : `set` of `QuantumNode` objects 

614 A `set` of `QuantumNode`\s that contain specified 

615 `DatasetTypeName`. 

616 

617 Raises 

618 ------ 

619 KeyError 

620 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

621 

622 """ 

623 tasks = self._datasetDict.getAll(datasetTypeName) 

624 result: set[Quantum] = set() 

625 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task)) 

626 return result 

627 

628 def checkQuantumInGraph(self, quantum: Quantum) -> bool: 

629 """Check if specified quantum appears in the graph as part of a node. 

630 

631 Parameters 

632 ---------- 

633 quantum : `lsst.daf.butler.Quantum` 

634 The quantum to search for. 

635 

636 Returns 

637 ------- 

638 in_graph : `bool` 

639 The result of searching for the quantum. 

640 """ 

641 for node in self: 

642 if quantum == node.quantum: 

643 return True 

644 return False 

645 

646 def writeDotGraph(self, output: str | io.BufferedIOBase) -> None: 

647 """Write out the graph as a dot graph. 

648 

649 Parameters 

650 ---------- 

651 output : `str` or `io.BufferedIOBase` 

652 Either a filesystem path to write to, or a file handle object. 

653 """ 

654 write_dot(self._connectedQuanta, output) 

655 

656 def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T: 

657 """Create a new graph object that contains the subset of the nodes 

658 specified as input. Node number is preserved. 

659 

660 Parameters 

661 ---------- 

662 nodes : `QuantumNode` or iterable of `QuantumNode` 

663 Nodes from which to create subset. 

664 

665 Returns 

666 ------- 

667 graph : instance of graph type 

668 An instance of the type from which the subset was created. 

669 """ 

670 if not isinstance(nodes, Iterable): 

671 nodes = (nodes,) 

672 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes 

673 quantumMap = defaultdict(set) 

674 

675 dataset_type_names: set[str] = set() 

676 node: QuantumNode 

677 for node in quantumSubgraph: 

678 quantumMap[node.taskDef].add(node.quantum) 

679 dataset_type_names.update( 

680 dstype.name 

681 for dstype in chain( 

682 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys() 

683 ) 

684 ) 

685 

686 # May need to trim dataset types from registryDatasetTypes. 

687 for taskDef in quantumMap: 

688 if refs := self.initOutputRefs(taskDef): 

689 dataset_type_names.update(ref.datasetType.name for ref in refs) 

690 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs) 

691 registryDatasetTypes = [ 

692 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names 

693 ] 

694 

695 # convert to standard dict to prevent accidental key insertion 

696 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

697 # Create an empty graph, and then populate it with custom mapping 

698 newInst = type(self)({}, universe=self._universe) 

699 # TODO: Do we need to copy initInputs/initOutputs? 

700 newInst._buildGraphs( 

701 quantumDict, 

702 _quantumToNodeId={n.quantum: n.nodeId for n in nodes}, 

703 _buildId=self._buildId, 

704 metadata=self._metadata, 

705 universe=self._universe, 

706 globalInitOutputs=self._globalInitOutputRefs, 

707 registryDatasetTypes=registryDatasetTypes, 

708 ) 

709 return newInst 

710 

711 def subsetToConnected(self: _T) -> tuple[_T, ...]: 

712 """Generate a list of subgraphs where each is connected. 

713 

714 Returns 

715 ------- 

716 result : `list` of `QuantumGraph` 

717 A list of graphs that are each connected. 

718 """ 

719 return tuple( 

720 self.subset(connectedSet) 

721 for connectedSet in nx.weakly_connected_components(self._connectedQuanta) 

722 ) 

723 

724 def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

725 """Return a set of `QuantumNode` that are direct inputs to a specified 

726 node. 

727 

728 Parameters 

729 ---------- 

730 node : `QuantumNode` 

731 The node of the graph for which inputs are to be determined. 

732 

733 Returns 

734 ------- 

735 inputs : `set` of `QuantumNode` 

736 All the nodes that are direct inputs to specified node. 

737 """ 

738 return set(pred for pred in self._connectedQuanta.predecessors(node)) 

739 

740 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

741 """Return a set of `QuantumNode` that are direct outputs of a specified 

742 node. 

743 

744 Parameters 

745 ---------- 

746 node : `QuantumNode` 

747 The node of the graph for which outputs are to be determined. 

748 

749 Returns 

750 ------- 

751 outputs : `set` of `QuantumNode` 

752 All the nodes that are direct outputs to specified node. 

753 """ 

754 return set(succ for succ in self._connectedQuanta.successors(node)) 

755 

756 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

757 """Return a graph of `QuantumNode` that are direct inputs and outputs 

758 of a specified node. 

759 

760 Parameters 

761 ---------- 

762 node : `QuantumNode` 

763 The node of the graph for which connected nodes are to be 

764 determined. 

765 

766 Returns 

767 ------- 

768 graph : graph of `QuantumNode` 

769 All the nodes that are directly connected to specified node. 

770 """ 

771 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node)) 

772 nodes.add(node) 

773 return self.subset(nodes) 

774 

775 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

776 """Return a graph of the specified node and all the ancestor nodes 

777 directly reachable by walking edges. 

778 

779 Parameters 

780 ---------- 

781 node : `QuantumNode` 

782 The node for which all ancestors are to be determined 

783 

784 Returns 

785 ------- 

786 ancestors : graph of `QuantumNode` 

787 Graph of node and all of its ancestors. 

788 """ 

789 predecessorNodes = nx.ancestors(self._connectedQuanta, node) 

790 predecessorNodes.add(node) 

791 return self.subset(predecessorNodes) 

792 

793 def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]: 

794 """Check a graph for the presense of cycles and returns the edges of 

795 any cycles found, or an empty list if there is no cycle. 

796 

797 Returns 

798 ------- 

799 result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ] 

800 A list of any graph edges that form a cycle, or an empty list if 

801 there is no cycle. Empty list to so support if graph.find_cycle() 

802 syntax as an empty list is falsy. 

803 """ 

804 try: 

805 return nx.find_cycle(self._connectedQuanta) 

806 except nx.NetworkXNoCycle: 

807 return [] 

808 

809 def saveUri(self, uri: ResourcePathExpression) -> None: 

810 """Save `QuantumGraph` to the specified URI. 

811 

812 Parameters 

813 ---------- 

814 uri : convertible to `~lsst.resources.ResourcePath` 

815 URI to where the graph should be saved. 

816 """ 

817 buffer = self._buildSaveObject() 

818 path = ResourcePath(uri) 

819 if path.getExtension() not in (".qgraph"): 

820 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}") 

821 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

822 

823 @property 

824 def metadata(self) -> MappingProxyType[str, Any] | None: 

825 """Extra data carried with the graph (mapping [`str`] or `None`). 

826 

827 The mapping is a dynamic view of this object's metadata. Values should 

828 be able to be serialized in JSON. 

829 """ 

830 if self._metadata is None: 

831 return None 

832 return MappingProxyType(self._metadata) 

833 

834 def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

835 """Return DatasetRefs for a given task InitInputs. 

836 

837 Parameters 

838 ---------- 

839 taskDef : `TaskDef` 

840 Task definition structure. 

841 

842 Returns 

843 ------- 

844 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

845 DatasetRef for the task InitInput, can be `None`. This can return 

846 either resolved or non-resolved reference. 

847 """ 

848 return self._initInputRefs.get(taskDef) 

849 

850 def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

851 """Return DatasetRefs for a given task InitOutputs. 

852 

853 Parameters 

854 ---------- 

855 taskDef : `TaskDef` 

856 Task definition structure. 

857 

858 Returns 

859 ------- 

860 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

861 DatasetRefs for the task InitOutput, can be `None`. This can return 

862 either resolved or non-resolved reference. Resolved reference will 

863 match Quantum's initInputs if this is an intermediate dataset type. 

864 """ 

865 return self._initOutputRefs.get(taskDef) 

866 

867 def globalInitOutputRefs(self) -> list[DatasetRef]: 

868 """Return DatasetRefs for global InitOutputs. 

869 

870 Returns 

871 ------- 

872 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] 

873 DatasetRefs for global InitOutputs. 

874 """ 

875 return self._globalInitOutputRefs 

876 

877 def registryDatasetTypes(self) -> list[DatasetType]: 

878 """Return dataset types used by this graph, their definitions match 

879 dataset types from registry. 

880 

881 Returns 

882 ------- 

883 refs : `list` [ `~lsst.daf.butler.DatasetType` ] 

884 Dataset types for this graph. 

885 """ 

886 return self._registryDatasetTypes 

887 

888 @classmethod 

889 def loadUri( 

890 cls, 

891 uri: ResourcePathExpression, 

892 universe: DimensionUniverse | None = None, 

893 nodes: Iterable[uuid.UUID] | None = None, 

894 graphID: BuildId | None = None, 

895 minimumVersion: int = 3, 

896 ) -> QuantumGraph: 

897 """Read `QuantumGraph` from a URI. 

898 

899 Parameters 

900 ---------- 

901 uri : convertible to `~lsst.resources.ResourcePath` 

902 URI from where to load the graph. 

903 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

904 `~lsst.daf.butler.DimensionUniverse` instance, not used by the 

905 method itself but needed to ensure that registry data structures 

906 are initialized. If `None` it is loaded from the `QuantumGraph` 

907 saved structure. If supplied, the 

908 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

909 will be validated against the supplied argument for compatibility. 

910 nodes : iterable of `uuid.UUID` or `None` 

911 UUIDs that correspond to nodes in the graph. If specified, only 

912 these nodes will be loaded. Defaults to None, in which case all 

913 nodes will be loaded. 

914 graphID : `str` or `None` 

915 If specified this ID is verified against the loaded graph prior to 

916 loading any Nodes. This defaults to None in which case no 

917 validation is done. 

918 minimumVersion : `int` 

919 Minimum version of a save file to load. Set to -1 to load all 

920 versions. Older versions may need to be loaded, and re-saved 

921 to upgrade them to the latest format before they can be used in 

922 production. 

923 

924 Returns 

925 ------- 

926 graph : `QuantumGraph` 

927 Resulting QuantumGraph instance. 

928 

929 Raises 

930 ------ 

931 TypeError 

932 Raised if pickle contains instance of a type other than 

933 `QuantumGraph`. 

934 ValueError 

935 Raised if one or more of the nodes requested is not in the 

936 `QuantumGraph` or if graphID parameter does not match the graph 

937 being loaded or if the supplied uri does not point at a valid 

938 `QuantumGraph` save file. 

939 RuntimeError 

940 Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not 

941 compatible with the `~lsst.daf.butler.DimensionUniverse` saved in 

942 the graph. 

943 

944 Notes 

945 ----- 

946 Reading Quanta from pickle requires existence of singleton 

947 `~lsst.daf.butler.DimensionUniverse` which is usually instantiated 

948 during `~lsst.daf.butler.Registry` initialization. To make sure 

949 that `~lsst.daf.butler.DimensionUniverse` exists this method 

950 accepts dummy `~lsst.daf.butler.DimensionUniverse` argument. 

951 """ 

952 uri = ResourcePath(uri) 

953 # With ResourcePath we have the choice of always using a local file 

954 # or reading in the bytes directly. Reading in bytes can be more 

955 # efficient for reasonably-sized pickle files when the resource 

956 # is remote. For now use the local file variant. For a local file 

957 # as_local() does nothing. 

958 

959 if uri.getExtension() in (".pickle", ".pkl"): 

960 with uri.as_local() as local, open(local.ospath, "rb") as fd: 

961 warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method") 

962 qgraph = pickle.load(fd) 

963 elif uri.getExtension() in (".qgraph"): 

964 with LoadHelper(uri, minimumVersion) as loader: 

965 qgraph = loader.load(universe, nodes, graphID) 

966 else: 

967 raise ValueError("Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`") 

968 if not isinstance(qgraph, QuantumGraph): 

969 raise TypeError(f"QuantumGraph save file contains unexpected object type: {type(qgraph)}") 

970 return qgraph 

971 

972 @classmethod 

973 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None: 

974 """Read the header of a `QuantumGraph` pointed to by the uri parameter 

975 and return it as a string. 

976 

977 Parameters 

978 ---------- 

979 uri : convertible to `~lsst.resources.ResourcePath` 

980 The location of the `QuantumGraph` to load. If the argument is a 

981 string, it must correspond to a valid 

982 `~lsst.resources.ResourcePath` path. 

983 minimumVersion : `int` 

984 Minimum version of a save file to load. Set to -1 to load all 

985 versions. Older versions may need to be loaded, and re-saved 

986 to upgrade them to the latest format before they can be used in 

987 production. 

988 

989 Returns 

990 ------- 

991 header : `str` or `None` 

992 The header associated with the specified `QuantumGraph` it there is 

993 one, else `None`. 

994 

995 Raises 

996 ------ 

997 ValueError 

998 Raised if `QuantumGraph` was saved as a pickle. 

999 Raised if the extension of the file specified by uri is not a 

1000 `QuantumGraph` extension. 

1001 """ 

1002 uri = ResourcePath(uri) 

1003 if uri.getExtension() in (".pickle", ".pkl"): 

1004 raise ValueError("Reading a header from a pickle save is not supported") 

1005 elif uri.getExtension() in (".qgraph"): 

1006 return LoadHelper(uri, minimumVersion).readHeader() 

1007 else: 

1008 raise ValueError("Only know how to handle files saved as `qgraph`") 

1009 

1010 def buildAndPrintHeader(self) -> None: 

1011 """Create a header that would be used in a save of this object and 

1012 prints it out to standard out. 

1013 """ 

1014 _, header = self._buildSaveObject(returnHeader=True) 

1015 print(json.dumps(header)) 

1016 

1017 def save(self, file: BinaryIO) -> None: 

1018 """Save QuantumGraph to a file. 

1019 

1020 Parameters 

1021 ---------- 

1022 file : `io.BufferedIOBase` 

1023 File to write pickle data open in binary mode. 

1024 """ 

1025 buffer = self._buildSaveObject() 

1026 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

1027 

1028 def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]: 

1029 # make some containers 

1030 jsonData: deque[bytes] = deque() 

1031 # node map is a list because json does not accept mapping keys that 

1032 # are not strings, so we store a list of key, value pairs that will 

1033 # be converted to a mapping on load 

1034 nodeMap = [] 

1035 taskDefMap = {} 

1036 headerData: dict[str, Any] = {} 

1037 

1038 # Store the QauntumGraph BuildId, this will allow validating BuildIds 

1039 # at load time, prior to loading any QuantumNodes. Name chosen for 

1040 # unlikely conflicts. 

1041 headerData["GraphBuildID"] = self.graphID 

1042 headerData["Metadata"] = self._metadata 

1043 

1044 # Store the universe this graph was created with 

1045 universeConfig = self._universe.dimensionConfig 

1046 headerData["universe"] = universeConfig.toDict() 

1047 

1048 # counter for the number of bytes processed thus far 

1049 count = 0 

1050 # serialize out the task Defs recording the start and end bytes of each 

1051 # taskDef 

1052 inverseLookup = self._datasetDict.inverse 

1053 taskDef: TaskDef 

1054 # sort by task label to ensure serialization happens in the same order 

1055 for taskDef in self.taskGraph: 

1056 # compressing has very little impact on saving or load time, but 

1057 # a large impact on on disk size, so it is worth doing 

1058 taskDescription: dict[str, Any] = {} 

1059 # save the fully qualified name. 

1060 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) 

1061 # save the config as a text stream that will be un-persisted on the 

1062 # other end 

1063 stream = io.StringIO() 

1064 taskDef.config.saveToStream(stream) 

1065 taskDescription["config"] = stream.getvalue() 

1066 taskDescription["label"] = taskDef.label 

1067 if (refs := self._initInputRefs.get(taskDef)) is not None: 

1068 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs] 

1069 if (refs := self._initOutputRefs.get(taskDef)) is not None: 

1070 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs] 

1071 

1072 inputs = [] 

1073 outputs = [] 

1074 

1075 # Determine the connection between all of tasks and save that in 

1076 # the header as a list of connections and edges in each task 

1077 # this will help in un-persisting, and possibly in a "quick view" 

1078 # method that does not require everything to be un-persisted 

1079 # 

1080 # Typing returns can't be parameter dependent 

1081 for connection in inverseLookup[taskDef]: # type: ignore 

1082 consumers = self._datasetDict.getConsumers(connection) 

1083 producer = self._datasetDict.getProducer(connection) 

1084 if taskDef in consumers: 

1085 # This checks if the task consumes the connection directly 

1086 # from the datastore or it is produced by another task 

1087 producerLabel = producer.label if producer is not None else "datastore" 

1088 inputs.append((producerLabel, connection)) 

1089 elif taskDef not in consumers and producer is taskDef: 

1090 # If there are no consumers for this tasks produced 

1091 # connection, the output will be said to be the datastore 

1092 # in which case the for loop will be a zero length loop 

1093 if not consumers: 

1094 outputs.append(("datastore", connection)) 

1095 for td in consumers: 

1096 outputs.append((td.label, connection)) 

1097 

1098 # dump to json string, and encode that string to bytes and then 

1099 # conpress those bytes 

1100 dump = lzma.compress(json.dumps(taskDescription).encode()) 

1101 # record the sizing and relation information 

1102 taskDefMap[taskDef.label] = { 

1103 "bytes": (count, count + len(dump)), 

1104 "inputs": inputs, 

1105 "outputs": outputs, 

1106 } 

1107 count += len(dump) 

1108 jsonData.append(dump) 

1109 

1110 headerData["TaskDefs"] = taskDefMap 

1111 

1112 # serialize the nodes, recording the start and end bytes of each node 

1113 dimAccumulator = DimensionRecordsAccumulator() 

1114 for node in self: 

1115 # compressing has very little impact on saving or load time, but 

1116 # a large impact on on disk size, so it is worth doing 

1117 simpleNode = node.to_simple(accumulator=dimAccumulator) 

1118 

1119 dump = lzma.compress(simpleNode.json().encode()) 

1120 jsonData.append(dump) 

1121 nodeMap.append( 

1122 ( 

1123 str(node.nodeId), 

1124 { 

1125 "bytes": (count, count + len(dump)), 

1126 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)], 

1127 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)], 

1128 }, 

1129 ) 

1130 ) 

1131 count += len(dump) 

1132 

1133 headerData["DimensionRecords"] = { 

1134 key: value.dict() for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items() 

1135 } 

1136 

1137 # need to serialize this as a series of key,value tuples because of 

1138 # a limitation on how json cant do anything but strings as keys 

1139 headerData["Nodes"] = nodeMap 

1140 

1141 if self._globalInitOutputRefs: 

1142 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs] 

1143 

1144 if self._registryDatasetTypes: 

1145 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes] 

1146 

1147 # dump the headerData to json 

1148 header_encode = lzma.compress(json.dumps(headerData).encode()) 

1149 

1150 # record the sizes as 2 unsigned long long numbers for a total of 16 

1151 # bytes 

1152 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION) 

1153 

1154 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING() 

1155 map_lengths = struct.pack(fmt_string, len(header_encode)) 

1156 

1157 # write each component of the save out in a deterministic order 

1158 # buffer = io.BytesIO() 

1159 # buffer.write(map_lengths) 

1160 # buffer.write(taskDef_pickle) 

1161 # buffer.write(map_pickle) 

1162 buffer = bytearray() 

1163 buffer.extend(MAGIC_BYTES) 

1164 buffer.extend(save_bytes) 

1165 buffer.extend(map_lengths) 

1166 buffer.extend(header_encode) 

1167 # Iterate over the length of pickleData, and for each element pop the 

1168 # leftmost element off the deque and write it out. This is to save 

1169 # memory, as the memory is added to the buffer object, it is removed 

1170 # from from the container. 

1171 # 

1172 # Only this section needs to worry about memory pressue because 

1173 # everything else written to the buffer prior to this pickle data is 

1174 # only on the order of kilobytes to low numbers of megabytes. 

1175 while jsonData: 

1176 buffer.extend(jsonData.popleft()) 

1177 if returnHeader: 

1178 return buffer, headerData 

1179 else: 

1180 return buffer 

1181 

1182 @classmethod 

1183 def load( 

1184 cls, 

1185 file: BinaryIO, 

1186 universe: DimensionUniverse | None = None, 

1187 nodes: Iterable[uuid.UUID] | None = None, 

1188 graphID: BuildId | None = None, 

1189 minimumVersion: int = 3, 

1190 ) -> QuantumGraph: 

1191 """Read `QuantumGraph` from a file that was made by `save`. 

1192 

1193 Parameters 

1194 ---------- 

1195 file : `io.IO` of bytes 

1196 File with pickle data open in binary mode. 

1197 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

1198 `~lsst.daf.butler.DimensionUniverse` instance, not used by the 

1199 method itself but needed to ensure that registry data structures 

1200 are initialized. If `None` it is loaded from the `QuantumGraph` 

1201 saved structure. If supplied, the 

1202 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

1203 will be validated against the supplied argument for compatibility. 

1204 nodes : iterable of `uuid.UUID` or `None` 

1205 UUIDs that correspond to nodes in the graph. If specified, only 

1206 these nodes will be loaded. Defaults to None, in which case all 

1207 nodes will be loaded. 

1208 graphID : `str` or `None` 

1209 If specified this ID is verified against the loaded graph prior to 

1210 loading any Nodes. This defaults to None in which case no 

1211 validation is done. 

1212 minimumVersion : `int` 

1213 Minimum version of a save file to load. Set to -1 to load all 

1214 versions. Older versions may need to be loaded, and re-saved 

1215 to upgrade them to the latest format before they can be used in 

1216 production. 

1217 

1218 Returns 

1219 ------- 

1220 graph : `QuantumGraph` 

1221 Resulting QuantumGraph instance. 

1222 

1223 Raises 

1224 ------ 

1225 TypeError 

1226 Raised if pickle contains instance of a type other than 

1227 `QuantumGraph`. 

1228 ValueError 

1229 Raised if one or more of the nodes requested is not in the 

1230 `QuantumGraph` or if graphID parameter does not match the graph 

1231 being loaded or if the supplied uri does not point at a valid 

1232 `QuantumGraph` save file. 

1233 

1234 Notes 

1235 ----- 

1236 Reading Quanta from pickle requires existence of singleton 

1237 `~lsst.daf.butler.DimensionUniverse` which is usually instantiated 

1238 during `~lsst.daf.butler.Registry` initialization. To make sure that 

1239 `~lsst.daf.butler.DimensionUniverse` exists this method accepts dummy 

1240 `~lsst.daf.butler.DimensionUniverse` argument. 

1241 """ 

1242 # Try to see if the file handle contains pickle data, this will be 

1243 # removed in the future 

1244 try: 

1245 qgraph = pickle.load(file) 

1246 warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method") 

1247 except pickle.UnpicklingError: 

1248 with LoadHelper(file, minimumVersion) as loader: 

1249 qgraph = loader.load(universe, nodes, graphID) 

1250 if not isinstance(qgraph, QuantumGraph): 

1251 raise TypeError(f"QuantumGraph pickle file has contains unexpected object type: {type(qgraph)}") 

1252 return qgraph 

1253 

1254 def iterTaskGraph(self) -> Generator[TaskDef, None, None]: 

1255 """Iterate over the `taskGraph` attribute in topological order 

1256 

1257 Yields 

1258 ------ 

1259 taskDef : `TaskDef` 

1260 `TaskDef` objects in topological order 

1261 """ 

1262 yield from nx.topological_sort(self.taskGraph) 

1263 

1264 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None: 

1265 """Change output run and dataset ID for each output dataset. 

1266 

1267 Parameters 

1268 ---------- 

1269 run : `str` 

1270 New output run name. 

1271 metadata_key : `str` or `None` 

1272 Specifies matadata key corresponding to output run name to update 

1273 with new run name. If `None` or if metadata is missing it is not 

1274 updated. If metadata is present but key is missing, it will be 

1275 added. 

1276 update_graph_id : `bool`, optional 

1277 If `True` then also update graph ID with a new unique value. 

1278 """ 

1279 

1280 def _update_refs_in_place(refs: list[DatasetRef], run: str) -> None: 

1281 """Update list of `~lsst.daf.butler.DatasetRef` with new run and 

1282 dataset IDs. 

1283 """ 

1284 for ref in refs: 

1285 # hack the run to be replaced explicitly 

1286 object.__setattr__(ref, "run", run) 

1287 

1288 # Loop through all outputs and update their datasets. 

1289 for node in self._connectedQuanta: 

1290 for refs in node.quantum.outputs.values(): 

1291 _update_refs_in_place(refs, run) 

1292 

1293 for refs in self._initOutputRefs.values(): 

1294 _update_refs_in_place(refs, run) 

1295 

1296 _update_refs_in_place(self._globalInitOutputRefs, run) 

1297 

1298 # Update all intermediates from their matching outputs. 

1299 for node in self._connectedQuanta: 

1300 for refs in node.quantum.inputs.values(): 

1301 _update_refs_in_place(refs, run) 

1302 

1303 for refs in self._initInputRefs.values(): 

1304 _update_refs_in_place(refs, run) 

1305 

1306 if update_graph_id: 

1307 self._buildId = BuildId(f"{time.time()}-{os.getpid()}") 

1308 

1309 # Update metadata if present. 

1310 if self._metadata is not None and metadata_key is not None: 

1311 metadata = dict(self._metadata) 

1312 metadata[metadata_key] = run 

1313 self._metadata = metadata 

1314 

1315 @property 

1316 def graphID(self) -> BuildId: 

1317 """The ID generated by the graph at construction time (`str`).""" 

1318 return self._buildId 

1319 

1320 @property 

1321 def universe(self) -> DimensionUniverse: 

1322 """Dimension universe associated with this graph 

1323 (`~lsst.daf.butler.DimensionUniverse`). 

1324 """ 

1325 return self._universe 

1326 

1327 def __iter__(self) -> Generator[QuantumNode, None, None]: 

1328 yield from nx.topological_sort(self._connectedQuanta) 

1329 

1330 def __len__(self) -> int: 

1331 return self._count 

1332 

1333 def __contains__(self, node: QuantumNode) -> bool: 

1334 return self._connectedQuanta.has_node(node) 

1335 

1336 def __getstate__(self) -> dict: 

1337 """Store a compact form of the graph as a list of graph nodes, and a 

1338 tuple of task labels and task configs. The full graph can be 

1339 reconstructed with this information, and it preserves the ordering of 

1340 the graph nodes. 

1341 """ 

1342 universe: DimensionUniverse | None = None 

1343 for node in self: 

1344 dId = node.quantum.dataId 

1345 if dId is None: 

1346 continue 

1347 universe = dId.graph.universe 

1348 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe} 

1349 

1350 def __setstate__(self, state: dict) -> None: 

1351 """Reconstructs the state of the graph from the information persisted 

1352 in getstate. 

1353 """ 

1354 buffer = io.BytesIO(state["reduced"]) 

1355 with LoadHelper(buffer, minimumVersion=3) as loader: 

1356 qgraph = loader.load(state["universe"], graphID=state["graphId"]) 

1357 

1358 self._metadata = qgraph._metadata 

1359 self._buildId = qgraph._buildId 

1360 self._datasetDict = qgraph._datasetDict 

1361 self._nodeIdMap = qgraph._nodeIdMap 

1362 self._count = len(qgraph) 

1363 self._taskToQuantumNode = qgraph._taskToQuantumNode 

1364 self._taskGraph = qgraph._taskGraph 

1365 self._connectedQuanta = qgraph._connectedQuanta 

1366 self._initInputRefs = qgraph._initInputRefs 

1367 self._initOutputRefs = qgraph._initOutputRefs 

1368 

1369 def __eq__(self, other: object) -> bool: 

1370 if not isinstance(other, QuantumGraph): 

1371 return False 

1372 if len(self) != len(other): 

1373 return False 

1374 for node in self: 

1375 if node not in other: 

1376 return False 

1377 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node): 

1378 return False 

1379 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node): 

1380 return False 

1381 if set(self.allDatasetTypes) != set(other.allDatasetTypes): 

1382 return False 

1383 return set(self.taskGraph) == set(other.taskGraph)