Coverage for python/lsst/pipe/base/graph/graph.py: 19%

413 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-23 10:31 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QuantumGraph", "IncompatibleGraphError") 

24 

25import io 

26import json 

27import lzma 

28import os 

29import struct 

30import time 

31import uuid 

32from collections import defaultdict, deque 

33from collections.abc import Generator, Iterable, Iterator, Mapping, MutableMapping 

34from itertools import chain 

35from types import MappingProxyType 

36from typing import Any, BinaryIO, TypeVar 

37 

38import networkx as nx 

39from lsst.daf.butler import ( 

40 DatasetId, 

41 DatasetRef, 

42 DatasetType, 

43 DimensionRecordsAccumulator, 

44 DimensionUniverse, 

45 Quantum, 

46) 

47from lsst.resources import ResourcePath, ResourcePathExpression 

48from lsst.utils.introspection import get_full_type_name 

49from networkx.drawing.nx_agraph import write_dot 

50 

51from ..connections import iterConnections 

52from ..pipeline import TaskDef 

53from ._implDetails import DatasetTypeName, _DatasetTracker, _pruner 

54from ._loadHelpers import LoadHelper 

55from ._versionDeserializers import DESERIALIZER_MAP 

56from .quantumNode import BuildId, QuantumNode 

57 

58_T = TypeVar("_T", bound="QuantumGraph") 

59 

60# modify this constant any time the on disk representation of the save file 

61# changes, and update the load helpers to behave properly for each version. 

62SAVE_VERSION = 3 

63 

64# Strings used to describe the format for the preamble bytes in a file save 

65# The base is a big endian encoded unsigned short that is used to hold the 

66# file format version. This allows reading version bytes and determine which 

67# loading code should be used for the rest of the file 

68STRUCT_FMT_BASE = ">H" 

69# 

70# Version 1 

71# This marks a big endian encoded format with an unsigned short, an unsigned 

72# long long, and an unsigned long long in the byte stream 

73# Version 2 

74# A big endian encoded format with an unsigned long long byte stream used to 

75# indicate the total length of the entire header. 

76STRUCT_FMT_STRING = {1: ">QQ", 2: ">Q"} 

77 

78# magic bytes that help determine this is a graph save 

79MAGIC_BYTES = b"qgraph4\xf6\xe8\xa9" 

80 

81 

82class IncompatibleGraphError(Exception): 

83 """Exception class to indicate that a lookup by NodeId is impossible due 

84 to incompatibilities 

85 """ 

86 

87 pass 

88 

89 

90class QuantumGraph: 

91 """QuantumGraph is a directed acyclic graph of `QuantumNode` objects 

92 

93 This data structure represents a concrete workflow generated from a 

94 `Pipeline`. 

95 

96 Parameters 

97 ---------- 

98 quanta : `~collections.abc.Mapping` [ `TaskDef`, \ 

99 `set` [ `~lsst.daf.butler.Quantum` ] ] 

100 This maps tasks (and their configs) to the sets of data they are to 

101 process. 

102 metadata : Optional `~collections.abc.Mapping` of `str` to primitives 

103 This is an optional parameter of extra data to carry with the graph. 

104 Entries in this mapping should be able to be serialized in JSON. 

105 pruneRefs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

106 Set of dataset refs to exclude from a graph. 

107 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

108 The dimensions in which quanta can be defined. Need only be provided if 

109 no quanta have data IDs. 

110 initInputs : `~collections.abc.Mapping`, optional 

111 Maps tasks to their InitInput dataset refs. Dataset refs can be either 

112 resolved or non-resolved. Presently the same dataset refs are included 

113 in each `~lsst.daf.butler.Quantum` for the same task. 

114 initOutputs : `~collections.abc.Mapping`, optional 

115 Maps tasks to their InitOutput dataset refs. Dataset refs can be either 

116 resolved or non-resolved. For intermediate resolved refs their dataset 

117 ID must match ``initInputs`` and Quantum ``initInputs``. 

118 globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional 

119 Dataset refs for some global objects produced by pipeline. These 

120 objects include task configurations and package versions. Typically 

121 they have an empty DataId, but there is no real restriction on what 

122 can appear here. 

123 registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \ 

124 optional 

125 Dataset types which are used by this graph, their definitions must 

126 match registry. If registry does not define dataset type yet, then 

127 it should match one that will be created later. 

128 

129 Raises 

130 ------ 

131 ValueError 

132 Raised if the graph is pruned such that some tasks no longer have nodes 

133 associated with them. 

134 """ 

135 

136 def __init__( 

137 self, 

138 quanta: Mapping[TaskDef, set[Quantum]], 

139 metadata: Mapping[str, Any] | None = None, 

140 pruneRefs: Iterable[DatasetRef] | None = None, 

141 universe: DimensionUniverse | None = None, 

142 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

143 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

144 globalInitOutputs: Iterable[DatasetRef] | None = None, 

145 registryDatasetTypes: Iterable[DatasetType] | None = None, 

146 ): 

147 self._buildGraphs( 

148 quanta, 

149 metadata=metadata, 

150 pruneRefs=pruneRefs, 

151 universe=universe, 

152 initInputs=initInputs, 

153 initOutputs=initOutputs, 

154 globalInitOutputs=globalInitOutputs, 

155 registryDatasetTypes=registryDatasetTypes, 

156 ) 

157 

158 def _buildGraphs( 

159 self, 

160 quanta: Mapping[TaskDef, set[Quantum]], 

161 *, 

162 _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None, 

163 _buildId: BuildId | None = None, 

164 metadata: Mapping[str, Any] | None = None, 

165 pruneRefs: Iterable[DatasetRef] | None = None, 

166 universe: DimensionUniverse | None = None, 

167 initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

168 initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, 

169 globalInitOutputs: Iterable[DatasetRef] | None = None, 

170 registryDatasetTypes: Iterable[DatasetType] | None = None, 

171 ) -> None: 

172 """Build the graph that is used to store the relation between tasks, 

173 and the graph that holds the relations between quanta 

174 """ 

175 self._metadata = metadata 

176 self._buildId = _buildId if _buildId is not None else BuildId(f"{time.time()}-{os.getpid()}") 

177 # Data structures used to identify relations between components; 

178 # DatasetTypeName -> TaskDef for task, 

179 # and DatasetRef -> QuantumNode for the quanta 

180 self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True) 

181 self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]() 

182 

183 self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {} 

184 self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set) 

185 for taskDef, quantumSet in quanta.items(): 

186 connections = taskDef.connections 

187 

188 # For each type of connection in the task, add a key to the 

189 # `_DatasetTracker` for the connections name, with a value of 

190 # the TaskDef in the appropriate field 

191 for inpt in iterConnections(connections, ("inputs", "prerequisiteInputs", "initInputs")): 

192 # Have to handle components in inputs. 

193 dataset_name, _, _ = inpt.name.partition(".") 

194 self._datasetDict.addConsumer(DatasetTypeName(dataset_name), taskDef) 

195 

196 for output in iterConnections(connections, ("outputs",)): 

197 # Have to handle possible components in outputs. 

198 dataset_name, _, _ = output.name.partition(".") 

199 self._datasetDict.addProducer(DatasetTypeName(dataset_name), taskDef) 

200 

201 # For each `Quantum` in the set of all `Quantum` for this task, 

202 # add a key to the `_DatasetTracker` that is a `DatasetRef` for one 

203 # of the individual datasets inside the `Quantum`, with a value of 

204 # a newly created QuantumNode to the appropriate input/output 

205 # field. 

206 for quantum in quantumSet: 

207 if quantum.dataId is not None: 

208 if universe is None: 

209 universe = quantum.dataId.universe 

210 elif universe != quantum.dataId.universe: 

211 raise RuntimeError( 

212 "Mismatched dimension universes in QuantumGraph construction: " 

213 f"{universe} != {quantum.dataId.universe}. " 

214 ) 

215 

216 if _quantumToNodeId: 

217 if (nodeId := _quantumToNodeId.get(quantum)) is None: 

218 raise ValueError( 

219 "If _quantuMToNodeNumber is not None, all quanta must have an " 

220 "associated value in the mapping" 

221 ) 

222 else: 

223 nodeId = uuid.uuid4() 

224 

225 inits = quantum.initInputs.values() 

226 inputs = quantum.inputs.values() 

227 value = QuantumNode(quantum, taskDef, nodeId) 

228 self._taskToQuantumNode[taskDef].add(value) 

229 self._nodeIdMap[nodeId] = value 

230 

231 for dsRef in chain(inits, inputs): 

232 # unfortunately, `Quantum` allows inits to be individual 

233 # `DatasetRef`s or an Iterable of such, so there must 

234 # be an instance check here 

235 if isinstance(dsRef, Iterable): 

236 for sub in dsRef: 

237 if sub.isComponent(): 

238 sub = sub.makeCompositeRef() 

239 self._datasetRefDict.addConsumer(sub, value) 

240 else: 

241 assert isinstance(dsRef, DatasetRef) 

242 if dsRef.isComponent(): 

243 dsRef = dsRef.makeCompositeRef() 

244 self._datasetRefDict.addConsumer(dsRef, value) 

245 for dsRef in chain.from_iterable(quantum.outputs.values()): 

246 self._datasetRefDict.addProducer(dsRef, value) 

247 

248 if pruneRefs is not None: 

249 # track what refs were pruned and prune the graph 

250 prunes: set[QuantumNode] = set() 

251 _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes) 

252 

253 # recreate the taskToQuantumNode dict removing nodes that have been 

254 # pruned. Keep track of task defs that now have no QuantumNodes 

255 emptyTasks: set[str] = set() 

256 newTaskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set) 

257 # accumulate all types 

258 types_ = set() 

259 # tracker for any pruneRefs that have caused tasks to have no nodes 

260 # This helps the user find out what caused the issues seen. 

261 culprits = set() 

262 # Find all the types from the refs to prune 

263 for r in pruneRefs: 

264 types_.add(r.datasetType) 

265 

266 # For each of the tasks, and their associated nodes, remove any 

267 # any nodes that were pruned. If there are no nodes associated 

268 # with a task, record that task, and find out if that was due to 

269 # a type from an input ref to prune. 

270 for td, taskNodes in self._taskToQuantumNode.items(): 

271 diff = taskNodes.difference(prunes) 

272 if len(diff) == 0: 

273 if len(taskNodes) != 0: 

274 tp: DatasetType 

275 for tp in types_: 

276 if (tmpRefs := next(iter(taskNodes)).quantum.inputs.get(tp)) and not set( 

277 tmpRefs 

278 ).difference(pruneRefs): 

279 culprits.add(tp.name) 

280 emptyTasks.add(td.label) 

281 newTaskToQuantumNode[td] = diff 

282 

283 # update the internal dict 

284 self._taskToQuantumNode = newTaskToQuantumNode 

285 

286 if emptyTasks: 

287 raise ValueError( 

288 f"{', '.join(emptyTasks)} task(s) have no nodes associated with them " 

289 f"after graph pruning; {', '.join(culprits)} caused over-pruning" 

290 ) 

291 

292 # Dimension universe 

293 if universe is None: 

294 raise RuntimeError( 

295 "Dimension universe or at least one quantum with a data ID " 

296 "must be provided when constructing a QuantumGraph." 

297 ) 

298 self._universe = universe 

299 

300 # Graph of quanta relations 

301 self._connectedQuanta = self._datasetRefDict.makeNetworkXGraph() 

302 self._count = len(self._connectedQuanta) 

303 

304 # Graph of task relations, used in various methods 

305 self._taskGraph = self._datasetDict.makeNetworkXGraph() 

306 

307 # convert default dict into a regular to prevent accidental key 

308 # insertion 

309 self._taskToQuantumNode = dict(self._taskToQuantumNode.items()) 

310 

311 self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {} 

312 self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {} 

313 self._globalInitOutputRefs: list[DatasetRef] = [] 

314 self._registryDatasetTypes: list[DatasetType] = [] 

315 if initInputs is not None: 

316 self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()} 

317 if initOutputs is not None: 

318 self._initOutputRefs = {taskDef: list(refs) for taskDef, refs in initOutputs.items()} 

319 if globalInitOutputs is not None: 

320 self._globalInitOutputRefs = list(globalInitOutputs) 

321 if registryDatasetTypes is not None: 

322 self._registryDatasetTypes = list(registryDatasetTypes) 

323 

324 @property 

325 def taskGraph(self) -> nx.DiGraph: 

326 """A graph representing the relations between the tasks inside 

327 the quantum graph (`networkx.DiGraph`). 

328 """ 

329 return self._taskGraph 

330 

331 @property 

332 def graph(self) -> nx.DiGraph: 

333 """A graph representing the relations between all the `QuantumNode` 

334 objects (`networkx.DiGraph`). 

335 

336 The graph should usually be iterated over, or passed to methods of this 

337 class, but sometimes direct access to the ``networkx`` object may be 

338 helpful. 

339 """ 

340 return self._connectedQuanta 

341 

342 @property 

343 def inputQuanta(self) -> Iterable[QuantumNode]: 

344 """The nodes that are inputs to the graph (iterable [`QuantumNode`]). 

345 

346 These are the nodes that do not depend on any other nodes in the 

347 graph. 

348 """ 

349 return (q for q, n in self._connectedQuanta.in_degree if n == 0) 

350 

351 @property 

352 def outputQuanta(self) -> Iterable[QuantumNode]: 

353 """The nodes that are outputs of the graph (iterable [`QuantumNode`]). 

354 

355 These are the nodes that have no nodes that depend on them in the 

356 graph. 

357 """ 

358 return [q for q, n in self._connectedQuanta.out_degree if n == 0] 

359 

360 @property 

361 def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]: 

362 """All the data set type names that are present in the graph 

363 (`tuple` [`str`]). 

364 

365 These types do not include global init-outputs. 

366 """ 

367 return tuple(self._datasetDict.keys()) 

368 

369 @property 

370 def isConnected(self) -> bool: 

371 """Whether all of the nodes in the graph are connected, ignoring 

372 directionality of connections (`bool`). 

373 """ 

374 return nx.is_weakly_connected(self._connectedQuanta) 

375 

376 def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: 

377 r"""Return a graph pruned of input `~lsst.daf.butler.DatasetRef`\ s 

378 and nodes which depend on them. 

379 

380 Parameters 

381 ---------- 

382 refs : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef` 

383 Refs which should be removed from resulting graph 

384 

385 Returns 

386 ------- 

387 graph : `QuantumGraph` 

388 A graph that has been pruned of specified refs and the nodes that 

389 depend on them. 

390 """ 

391 newInst = object.__new__(type(self)) 

392 quantumMap = defaultdict(set) 

393 for node in self: 

394 quantumMap[node.taskDef].add(node.quantum) 

395 

396 # convert to standard dict to prevent accidental key insertion 

397 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

398 

399 # This should not change set of tasks in a graph, so we can keep the 

400 # same registryDatasetTypes as in the original graph. 

401 # TODO: Do we need to copy initInputs/initOutputs? 

402 newInst._buildGraphs( 

403 quantumDict, 

404 _quantumToNodeId={n.quantum: n.nodeId for n in self}, 

405 metadata=self._metadata, 

406 pruneRefs=refs, 

407 universe=self._universe, 

408 globalInitOutputs=self._globalInitOutputRefs, 

409 registryDatasetTypes=self._registryDatasetTypes, 

410 ) 

411 return newInst 

412 

413 def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: 

414 """Lookup a `QuantumNode` from an id associated with the node. 

415 

416 Parameters 

417 ---------- 

418 nodeId : `NodeId` 

419 The number associated with a node 

420 

421 Returns 

422 ------- 

423 node : `QuantumNode` 

424 The node corresponding with input number 

425 

426 Raises 

427 ------ 

428 KeyError 

429 Raised if the requested nodeId is not in the graph. 

430 """ 

431 return self._nodeIdMap[nodeId] 

432 

433 def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]: 

434 """Return all the `~lsst.daf.butler.Quantum` associated with a 

435 `TaskDef`. 

436 

437 Parameters 

438 ---------- 

439 taskDef : `TaskDef` 

440 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

441 queried. 

442 

443 Returns 

444 ------- 

445 quanta : `frozenset` of `~lsst.daf.butler.Quantum` 

446 The `set` of `~lsst.daf.butler.Quantum` that is associated with the 

447 specified `TaskDef`. 

448 """ 

449 return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) 

450 

451 def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: 

452 """Return the number of `~lsst.daf.butler.Quantum` associated with 

453 a `TaskDef`. 

454 

455 Parameters 

456 ---------- 

457 taskDef : `TaskDef` 

458 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

459 queried. 

460 

461 Returns 

462 ------- 

463 count : `int` 

464 The number of `~lsst.daf.butler.Quantum` that are associated with 

465 the specified `TaskDef`. 

466 """ 

467 return len(self._taskToQuantumNode.get(taskDef, ())) 

468 

469 def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]: 

470 r"""Return all the `QuantumNode`\s associated with a `TaskDef`. 

471 

472 Parameters 

473 ---------- 

474 taskDef : `TaskDef` 

475 The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be 

476 queried. 

477 

478 Returns 

479 ------- 

480 nodes : `frozenset` [ `QuantumNode` ] 

481 A `frozenset` of `QuantumNode` that is associated with the 

482 specified `TaskDef`. 

483 """ 

484 return frozenset(self._taskToQuantumNode[taskDef]) 

485 

486 def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

487 """Find all tasks that have the specified dataset type name as an 

488 input. 

489 

490 Parameters 

491 ---------- 

492 datasetTypeName : `str` 

493 A string representing the name of a dataset type to be queried, 

494 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

495 `str` for type safety in static type checking. 

496 

497 Returns 

498 ------- 

499 tasks : iterable of `TaskDef` 

500 `TaskDef` objects that have the specified `DatasetTypeName` as an 

501 input, list will be empty if no tasks use specified 

502 `DatasetTypeName` as an input. 

503 

504 Raises 

505 ------ 

506 KeyError 

507 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

508 """ 

509 return (c for c in self._datasetDict.getConsumers(datasetTypeName)) 

510 

511 def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None: 

512 """Find all tasks that have the specified dataset type name as an 

513 output. 

514 

515 Parameters 

516 ---------- 

517 datasetTypeName : `str` 

518 A string representing the name of a dataset type to be queried, 

519 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

520 `str` for type safety in static type checking. 

521 

522 Returns 

523 ------- 

524 result : `TaskDef` or `None` 

525 `TaskDef` that outputs `DatasetTypeName` as an output or `None` if 

526 none of the tasks produce this `DatasetTypeName`. 

527 

528 Raises 

529 ------ 

530 KeyError 

531 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

532 """ 

533 return self._datasetDict.getProducer(datasetTypeName) 

534 

535 def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef]: 

536 """Find all tasks that are associated with the specified dataset type 

537 name. 

538 

539 Parameters 

540 ---------- 

541 datasetTypeName : `str` 

542 A string representing the name of a dataset type to be queried, 

543 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

544 `str` for type safety in static type checking. 

545 

546 Returns 

547 ------- 

548 result : iterable of `TaskDef` 

549 `TaskDef` objects that are associated with the specified 

550 `DatasetTypeName`. 

551 

552 Raises 

553 ------ 

554 KeyError 

555 Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. 

556 """ 

557 return self._datasetDict.getAll(datasetTypeName) 

558 

559 def findTaskDefByName(self, taskName: str) -> list[TaskDef]: 

560 """Determine which `TaskDef` objects in this graph are associated 

561 with a `str` representing a task name (looks at the ``taskName`` 

562 property of `TaskDef` objects). 

563 

564 Returns a list of `TaskDef` objects as a `PipelineTask` may appear 

565 multiple times in a graph with different labels. 

566 

567 Parameters 

568 ---------- 

569 taskName : `str` 

570 Name of a task to search for. 

571 

572 Returns 

573 ------- 

574 result : `list` of `TaskDef` 

575 List of the `TaskDef` objects that have the name specified. 

576 Multiple values are returned in the case that a task is used 

577 multiple times with different labels. 

578 """ 

579 results = [] 

580 for task in self._taskToQuantumNode: 

581 split = task.taskName.split(".") 

582 if split[-1] == taskName: 

583 results.append(task) 

584 return results 

585 

586 def findTaskDefByLabel(self, label: str) -> TaskDef | None: 

587 """Determine which `TaskDef` objects in this graph are associated 

588 with a `str` representing a tasks label. 

589 

590 Parameters 

591 ---------- 

592 taskName : `str` 

593 Name of a task to search for 

594 

595 Returns 

596 ------- 

597 result : `TaskDef` 

598 `TaskDef` objects that has the specified label. 

599 """ 

600 for task in self._taskToQuantumNode: 

601 if label == task.label: 

602 return task 

603 return None 

604 

605 def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]: 

606 r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified 

607 `DatasetTypeName`. 

608 

609 Parameters 

610 ---------- 

611 datasetTypeName : `str` 

612 The name of the dataset type to search for as a string, 

613 can also accept a `DatasetTypeName` which is a `~typing.NewType` of 

614 `str` for type safety in static type checking. 

615 

616 Returns 

617 ------- 

618 result : `set` of `QuantumNode` objects 

619 A `set` of `QuantumNode`\s that contain specified 

620 `DatasetTypeName`. 

621 

622 Raises 

623 ------ 

624 KeyError 

625 Raised if the `DatasetTypeName` is not part of the `QuantumGraph` 

626 

627 """ 

628 tasks = self._datasetDict.getAll(datasetTypeName) 

629 result: set[Quantum] = set() 

630 result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task)) 

631 return result 

632 

633 def checkQuantumInGraph(self, quantum: Quantum) -> bool: 

634 """Check if specified quantum appears in the graph as part of a node. 

635 

636 Parameters 

637 ---------- 

638 quantum : `lsst.daf.butler.Quantum` 

639 The quantum to search for. 

640 

641 Returns 

642 ------- 

643 in_graph : `bool` 

644 The result of searching for the quantum. 

645 """ 

646 return any(quantum == node.quantum for node in self) 

647 

648 def writeDotGraph(self, output: str | io.BufferedIOBase) -> None: 

649 """Write out the graph as a dot graph. 

650 

651 Parameters 

652 ---------- 

653 output : `str` or `io.BufferedIOBase` 

654 Either a filesystem path to write to, or a file handle object. 

655 """ 

656 write_dot(self._connectedQuanta, output) 

657 

658 def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T: 

659 """Create a new graph object that contains the subset of the nodes 

660 specified as input. Node number is preserved. 

661 

662 Parameters 

663 ---------- 

664 nodes : `QuantumNode` or iterable of `QuantumNode` 

665 Nodes from which to create subset. 

666 

667 Returns 

668 ------- 

669 graph : instance of graph type 

670 An instance of the type from which the subset was created. 

671 """ 

672 if not isinstance(nodes, Iterable): 

673 nodes = (nodes,) 

674 quantumSubgraph = self._connectedQuanta.subgraph(nodes).nodes 

675 quantumMap = defaultdict(set) 

676 

677 dataset_type_names: set[str] = set() 

678 node: QuantumNode 

679 for node in quantumSubgraph: 

680 quantumMap[node.taskDef].add(node.quantum) 

681 dataset_type_names.update( 

682 dstype.name 

683 for dstype in chain( 

684 node.quantum.inputs.keys(), node.quantum.outputs.keys(), node.quantum.initInputs.keys() 

685 ) 

686 ) 

687 

688 # May need to trim dataset types from registryDatasetTypes. 

689 for taskDef in quantumMap: 

690 if refs := self.initOutputRefs(taskDef): 

691 dataset_type_names.update(ref.datasetType.name for ref in refs) 

692 dataset_type_names.update(ref.datasetType.name for ref in self._globalInitOutputRefs) 

693 registryDatasetTypes = [ 

694 dstype for dstype in self._registryDatasetTypes if dstype.name in dataset_type_names 

695 ] 

696 

697 # convert to standard dict to prevent accidental key insertion 

698 quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) 

699 # Create an empty graph, and then populate it with custom mapping 

700 newInst = type(self)({}, universe=self._universe) 

701 # TODO: Do we need to copy initInputs/initOutputs? 

702 newInst._buildGraphs( 

703 quantumDict, 

704 _quantumToNodeId={n.quantum: n.nodeId for n in nodes}, 

705 _buildId=self._buildId, 

706 metadata=self._metadata, 

707 universe=self._universe, 

708 globalInitOutputs=self._globalInitOutputRefs, 

709 registryDatasetTypes=registryDatasetTypes, 

710 ) 

711 return newInst 

712 

713 def subsetToConnected(self: _T) -> tuple[_T, ...]: 

714 """Generate a list of subgraphs where each is connected. 

715 

716 Returns 

717 ------- 

718 result : `list` of `QuantumGraph` 

719 A list of graphs that are each connected. 

720 """ 

721 return tuple( 

722 self.subset(connectedSet) 

723 for connectedSet in nx.weakly_connected_components(self._connectedQuanta) 

724 ) 

725 

726 def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

727 """Return a set of `QuantumNode` that are direct inputs to a specified 

728 node. 

729 

730 Parameters 

731 ---------- 

732 node : `QuantumNode` 

733 The node of the graph for which inputs are to be determined. 

734 

735 Returns 

736 ------- 

737 inputs : `set` of `QuantumNode` 

738 All the nodes that are direct inputs to specified node. 

739 """ 

740 return set(self._connectedQuanta.predecessors(node)) 

741 

742 def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: 

743 """Return a set of `QuantumNode` that are direct outputs of a specified 

744 node. 

745 

746 Parameters 

747 ---------- 

748 node : `QuantumNode` 

749 The node of the graph for which outputs are to be determined. 

750 

751 Returns 

752 ------- 

753 outputs : `set` of `QuantumNode` 

754 All the nodes that are direct outputs to specified node. 

755 """ 

756 return set(self._connectedQuanta.successors(node)) 

757 

758 def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

759 """Return a graph of `QuantumNode` that are direct inputs and outputs 

760 of a specified node. 

761 

762 Parameters 

763 ---------- 

764 node : `QuantumNode` 

765 The node of the graph for which connected nodes are to be 

766 determined. 

767 

768 Returns 

769 ------- 

770 graph : graph of `QuantumNode` 

771 All the nodes that are directly connected to specified node. 

772 """ 

773 nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node)) 

774 nodes.add(node) 

775 return self.subset(nodes) 

776 

777 def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: 

778 """Return a graph of the specified node and all the ancestor nodes 

779 directly reachable by walking edges. 

780 

781 Parameters 

782 ---------- 

783 node : `QuantumNode` 

784 The node for which all ancestors are to be determined 

785 

786 Returns 

787 ------- 

788 ancestors : graph of `QuantumNode` 

789 Graph of node and all of its ancestors. 

790 """ 

791 predecessorNodes = nx.ancestors(self._connectedQuanta, node) 

792 predecessorNodes.add(node) 

793 return self.subset(predecessorNodes) 

794 

795 def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]: 

796 """Check a graph for the presense of cycles and returns the edges of 

797 any cycles found, or an empty list if there is no cycle. 

798 

799 Returns 

800 ------- 

801 result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ] 

802 A list of any graph edges that form a cycle, or an empty list if 

803 there is no cycle. Empty list to so support if graph.find_cycle() 

804 syntax as an empty list is falsy. 

805 """ 

806 try: 

807 return nx.find_cycle(self._connectedQuanta) 

808 except nx.NetworkXNoCycle: 

809 return [] 

810 

811 def saveUri(self, uri: ResourcePathExpression) -> None: 

812 """Save `QuantumGraph` to the specified URI. 

813 

814 Parameters 

815 ---------- 

816 uri : convertible to `~lsst.resources.ResourcePath` 

817 URI to where the graph should be saved. 

818 """ 

819 buffer = self._buildSaveObject() 

820 path = ResourcePath(uri) 

821 if path.getExtension() not in (".qgraph"): 

822 raise TypeError(f"Can currently only save a graph in qgraph format not {uri}") 

823 path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

824 

825 @property 

826 def metadata(self) -> MappingProxyType[str, Any] | None: 

827 """Extra data carried with the graph (mapping [`str`] or `None`). 

828 

829 The mapping is a dynamic view of this object's metadata. Values should 

830 be able to be serialized in JSON. 

831 """ 

832 if self._metadata is None: 

833 return None 

834 return MappingProxyType(self._metadata) 

835 

836 def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

837 """Return DatasetRefs for a given task InitInputs. 

838 

839 Parameters 

840 ---------- 

841 taskDef : `TaskDef` 

842 Task definition structure. 

843 

844 Returns 

845 ------- 

846 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

847 DatasetRef for the task InitInput, can be `None`. This can return 

848 either resolved or non-resolved reference. 

849 """ 

850 return self._initInputRefs.get(taskDef) 

851 

852 def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: 

853 """Return DatasetRefs for a given task InitOutputs. 

854 

855 Parameters 

856 ---------- 

857 taskDef : `TaskDef` 

858 Task definition structure. 

859 

860 Returns 

861 ------- 

862 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` 

863 DatasetRefs for the task InitOutput, can be `None`. This can return 

864 either resolved or non-resolved reference. Resolved reference will 

865 match Quantum's initInputs if this is an intermediate dataset type. 

866 """ 

867 return self._initOutputRefs.get(taskDef) 

868 

869 def globalInitOutputRefs(self) -> list[DatasetRef]: 

870 """Return DatasetRefs for global InitOutputs. 

871 

872 Returns 

873 ------- 

874 refs : `list` [ `~lsst.daf.butler.DatasetRef` ] 

875 DatasetRefs for global InitOutputs. 

876 """ 

877 return self._globalInitOutputRefs 

878 

879 def registryDatasetTypes(self) -> list[DatasetType]: 

880 """Return dataset types used by this graph, their definitions match 

881 dataset types from registry. 

882 

883 Returns 

884 ------- 

885 refs : `list` [ `~lsst.daf.butler.DatasetType` ] 

886 Dataset types for this graph. 

887 """ 

888 return self._registryDatasetTypes 

889 

890 @classmethod 

891 def loadUri( 

892 cls, 

893 uri: ResourcePathExpression, 

894 universe: DimensionUniverse | None = None, 

895 nodes: Iterable[uuid.UUID] | None = None, 

896 graphID: BuildId | None = None, 

897 minimumVersion: int = 3, 

898 ) -> QuantumGraph: 

899 """Read `QuantumGraph` from a URI. 

900 

901 Parameters 

902 ---------- 

903 uri : convertible to `~lsst.resources.ResourcePath` 

904 URI from where to load the graph. 

905 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

906 If `None` it is loaded from the `QuantumGraph` 

907 saved structure. If supplied, the 

908 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

909 will be validated against the supplied argument for compatibility. 

910 nodes : iterable of `uuid.UUID` or `None` 

911 UUIDs that correspond to nodes in the graph. If specified, only 

912 these nodes will be loaded. Defaults to None, in which case all 

913 nodes will be loaded. 

914 graphID : `str` or `None` 

915 If specified this ID is verified against the loaded graph prior to 

916 loading any Nodes. This defaults to None in which case no 

917 validation is done. 

918 minimumVersion : `int` 

919 Minimum version of a save file to load. Set to -1 to load all 

920 versions. Older versions may need to be loaded, and re-saved 

921 to upgrade them to the latest format before they can be used in 

922 production. 

923 

924 Returns 

925 ------- 

926 graph : `QuantumGraph` 

927 Resulting QuantumGraph instance. 

928 

929 Raises 

930 ------ 

931 TypeError 

932 Raised if file contains instance of a type other than 

933 `QuantumGraph`. 

934 ValueError 

935 Raised if one or more of the nodes requested is not in the 

936 `QuantumGraph` or if graphID parameter does not match the graph 

937 being loaded or if the supplied uri does not point at a valid 

938 `QuantumGraph` save file. 

939 RuntimeError 

940 Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not 

941 compatible with the `~lsst.daf.butler.DimensionUniverse` saved in 

942 the graph. 

943 """ 

944 uri = ResourcePath(uri) 

945 if uri.getExtension() in {".qgraph"}: 

946 with LoadHelper(uri, minimumVersion) as loader: 

947 qgraph = loader.load(universe, nodes, graphID) 

948 else: 

949 raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}") 

950 if not isinstance(qgraph, QuantumGraph): 

951 raise TypeError(f"QuantumGraph file {uri} contains unexpected object type: {type(qgraph)}") 

952 return qgraph 

953 

954 @classmethod 

955 def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None: 

956 """Read the header of a `QuantumGraph` pointed to by the uri parameter 

957 and return it as a string. 

958 

959 Parameters 

960 ---------- 

961 uri : convertible to `~lsst.resources.ResourcePath` 

962 The location of the `QuantumGraph` to load. If the argument is a 

963 string, it must correspond to a valid 

964 `~lsst.resources.ResourcePath` path. 

965 minimumVersion : `int` 

966 Minimum version of a save file to load. Set to -1 to load all 

967 versions. Older versions may need to be loaded, and re-saved 

968 to upgrade them to the latest format before they can be used in 

969 production. 

970 

971 Returns 

972 ------- 

973 header : `str` or `None` 

974 The header associated with the specified `QuantumGraph` it there is 

975 one, else `None`. 

976 

977 Raises 

978 ------ 

979 ValueError 

980 Raised if the extension of the file specified by uri is not a 

981 `QuantumGraph` extension. 

982 """ 

983 uri = ResourcePath(uri) 

984 if uri.getExtension() in {".qgraph"}: 

985 return LoadHelper(uri, minimumVersion).readHeader() 

986 else: 

987 raise ValueError("Only know how to handle files saved as `.qgraph`") 

988 

989 def buildAndPrintHeader(self) -> None: 

990 """Create a header that would be used in a save of this object and 

991 prints it out to standard out. 

992 """ 

993 _, header = self._buildSaveObject(returnHeader=True) 

994 print(json.dumps(header)) 

995 

996 def save(self, file: BinaryIO) -> None: 

997 """Save QuantumGraph to a file. 

998 

999 Parameters 

1000 ---------- 

1001 file : `io.BufferedIOBase` 

1002 File to write data open in binary mode. 

1003 """ 

1004 buffer = self._buildSaveObject() 

1005 file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes 

1006 

1007 def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]: 

1008 # make some containers 

1009 jsonData: deque[bytes] = deque() 

1010 # node map is a list because json does not accept mapping keys that 

1011 # are not strings, so we store a list of key, value pairs that will 

1012 # be converted to a mapping on load 

1013 nodeMap = [] 

1014 taskDefMap = {} 

1015 headerData: dict[str, Any] = {} 

1016 

1017 # Store the QauntumGraph BuildId, this will allow validating BuildIds 

1018 # at load time, prior to loading any QuantumNodes. Name chosen for 

1019 # unlikely conflicts. 

1020 headerData["GraphBuildID"] = self.graphID 

1021 headerData["Metadata"] = self._metadata 

1022 

1023 # Store the universe this graph was created with 

1024 universeConfig = self._universe.dimensionConfig 

1025 headerData["universe"] = universeConfig.toDict() 

1026 

1027 # counter for the number of bytes processed thus far 

1028 count = 0 

1029 # serialize out the task Defs recording the start and end bytes of each 

1030 # taskDef 

1031 inverseLookup = self._datasetDict.inverse 

1032 taskDef: TaskDef 

1033 # sort by task label to ensure serialization happens in the same order 

1034 for taskDef in self.taskGraph: 

1035 # compressing has very little impact on saving or load time, but 

1036 # a large impact on on disk size, so it is worth doing 

1037 taskDescription: dict[str, Any] = {} 

1038 # save the fully qualified name. 

1039 taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) 

1040 # save the config as a text stream that will be un-persisted on the 

1041 # other end 

1042 stream = io.StringIO() 

1043 taskDef.config.saveToStream(stream) 

1044 taskDescription["config"] = stream.getvalue() 

1045 taskDescription["label"] = taskDef.label 

1046 if (refs := self._initInputRefs.get(taskDef)) is not None: 

1047 taskDescription["initInputRefs"] = [ref.to_json() for ref in refs] 

1048 if (refs := self._initOutputRefs.get(taskDef)) is not None: 

1049 taskDescription["initOutputRefs"] = [ref.to_json() for ref in refs] 

1050 

1051 inputs = [] 

1052 outputs = [] 

1053 

1054 # Determine the connection between all of tasks and save that in 

1055 # the header as a list of connections and edges in each task 

1056 # this will help in un-persisting, and possibly in a "quick view" 

1057 # method that does not require everything to be un-persisted 

1058 # 

1059 # Typing returns can't be parameter dependent 

1060 for connection in inverseLookup[taskDef]: # type: ignore 

1061 consumers = self._datasetDict.getConsumers(connection) 

1062 producer = self._datasetDict.getProducer(connection) 

1063 if taskDef in consumers: 

1064 # This checks if the task consumes the connection directly 

1065 # from the datastore or it is produced by another task 

1066 producerLabel = producer.label if producer is not None else "datastore" 

1067 inputs.append((producerLabel, connection)) 

1068 elif taskDef not in consumers and producer is taskDef: 

1069 # If there are no consumers for this tasks produced 

1070 # connection, the output will be said to be the datastore 

1071 # in which case the for loop will be a zero length loop 

1072 if not consumers: 

1073 outputs.append(("datastore", connection)) 

1074 for td in consumers: 

1075 outputs.append((td.label, connection)) 

1076 

1077 # dump to json string, and encode that string to bytes and then 

1078 # conpress those bytes 

1079 dump = lzma.compress(json.dumps(taskDescription).encode()) 

1080 # record the sizing and relation information 

1081 taskDefMap[taskDef.label] = { 

1082 "bytes": (count, count + len(dump)), 

1083 "inputs": inputs, 

1084 "outputs": outputs, 

1085 } 

1086 count += len(dump) 

1087 jsonData.append(dump) 

1088 

1089 headerData["TaskDefs"] = taskDefMap 

1090 

1091 # serialize the nodes, recording the start and end bytes of each node 

1092 dimAccumulator = DimensionRecordsAccumulator() 

1093 for node in self: 

1094 # compressing has very little impact on saving or load time, but 

1095 # a large impact on on disk size, so it is worth doing 

1096 simpleNode = node.to_simple(accumulator=dimAccumulator) 

1097 

1098 dump = lzma.compress(simpleNode.json().encode()) 

1099 jsonData.append(dump) 

1100 nodeMap.append( 

1101 ( 

1102 str(node.nodeId), 

1103 { 

1104 "bytes": (count, count + len(dump)), 

1105 "inputs": [str(n.nodeId) for n in self.determineInputsToQuantumNode(node)], 

1106 "outputs": [str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node)], 

1107 }, 

1108 ) 

1109 ) 

1110 count += len(dump) 

1111 

1112 headerData["DimensionRecords"] = { 

1113 key: value.model_dump() 

1114 for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items() 

1115 } 

1116 

1117 # need to serialize this as a series of key,value tuples because of 

1118 # a limitation on how json cant do anything but strings as keys 

1119 headerData["Nodes"] = nodeMap 

1120 

1121 if self._globalInitOutputRefs: 

1122 headerData["GlobalInitOutputRefs"] = [ref.to_json() for ref in self._globalInitOutputRefs] 

1123 

1124 if self._registryDatasetTypes: 

1125 headerData["RegistryDatasetTypes"] = [dstype.to_json() for dstype in self._registryDatasetTypes] 

1126 

1127 # dump the headerData to json 

1128 header_encode = lzma.compress(json.dumps(headerData).encode()) 

1129 

1130 # record the sizes as 2 unsigned long long numbers for a total of 16 

1131 # bytes 

1132 save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION) 

1133 

1134 fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING() 

1135 map_lengths = struct.pack(fmt_string, len(header_encode)) 

1136 

1137 # write each component of the save out in a deterministic order 

1138 buffer = bytearray() 

1139 buffer.extend(MAGIC_BYTES) 

1140 buffer.extend(save_bytes) 

1141 buffer.extend(map_lengths) 

1142 buffer.extend(header_encode) 

1143 # Iterate over the length of jsonData, and for each element pop the 

1144 # leftmost element off the deque and write it out. This is to save 

1145 # memory, as the memory is added to the buffer object, it is removed 

1146 # from from the container. 

1147 # 

1148 # Only this section needs to worry about memory pressure because 

1149 # everything else written to the buffer prior to this data is 

1150 # only on the order of kilobytes to low numbers of megabytes. 

1151 while jsonData: 

1152 buffer.extend(jsonData.popleft()) 

1153 if returnHeader: 

1154 return buffer, headerData 

1155 else: 

1156 return buffer 

1157 

1158 @classmethod 

1159 def load( 

1160 cls, 

1161 file: BinaryIO, 

1162 universe: DimensionUniverse | None = None, 

1163 nodes: Iterable[uuid.UUID] | None = None, 

1164 graphID: BuildId | None = None, 

1165 minimumVersion: int = 3, 

1166 ) -> QuantumGraph: 

1167 """Read `QuantumGraph` from a file that was made by `save`. 

1168 

1169 Parameters 

1170 ---------- 

1171 file : `io.IO` of bytes 

1172 File with data open in binary mode. 

1173 universe : `~lsst.daf.butler.DimensionUniverse`, optional 

1174 If `None` it is loaded from the `QuantumGraph` 

1175 saved structure. If supplied, the 

1176 `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` 

1177 will be validated against the supplied argument for compatibility. 

1178 nodes : iterable of `uuid.UUID` or `None` 

1179 UUIDs that correspond to nodes in the graph. If specified, only 

1180 these nodes will be loaded. Defaults to None, in which case all 

1181 nodes will be loaded. 

1182 graphID : `str` or `None` 

1183 If specified this ID is verified against the loaded graph prior to 

1184 loading any Nodes. This defaults to None in which case no 

1185 validation is done. 

1186 minimumVersion : `int` 

1187 Minimum version of a save file to load. Set to -1 to load all 

1188 versions. Older versions may need to be loaded, and re-saved 

1189 to upgrade them to the latest format before they can be used in 

1190 production. 

1191 

1192 Returns 

1193 ------- 

1194 graph : `QuantumGraph` 

1195 Resulting QuantumGraph instance. 

1196 

1197 Raises 

1198 ------ 

1199 TypeError 

1200 Raised if data contains instance of a type other than 

1201 `QuantumGraph`. 

1202 ValueError 

1203 Raised if one or more of the nodes requested is not in the 

1204 `QuantumGraph` or if graphID parameter does not match the graph 

1205 being loaded or if the supplied uri does not point at a valid 

1206 `QuantumGraph` save file. 

1207 """ 

1208 with LoadHelper(file, minimumVersion) as loader: 

1209 qgraph = loader.load(universe, nodes, graphID) 

1210 if not isinstance(qgraph, QuantumGraph): 

1211 raise TypeError(f"QuantumGraph file contains unexpected object type: {type(qgraph)}") 

1212 return qgraph 

1213 

1214 def iterTaskGraph(self) -> Generator[TaskDef, None, None]: 

1215 """Iterate over the `taskGraph` attribute in topological order 

1216 

1217 Yields 

1218 ------ 

1219 taskDef : `TaskDef` 

1220 `TaskDef` objects in topological order 

1221 """ 

1222 yield from nx.topological_sort(self.taskGraph) 

1223 

1224 def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_id: bool = False) -> None: 

1225 """Change output run and dataset ID for each output dataset. 

1226 

1227 Parameters 

1228 ---------- 

1229 run : `str` 

1230 New output run name. 

1231 metadata_key : `str` or `None` 

1232 Specifies matadata key corresponding to output run name to update 

1233 with new run name. If `None` or if metadata is missing it is not 

1234 updated. If metadata is present but key is missing, it will be 

1235 added. 

1236 update_graph_id : `bool`, optional 

1237 If `True` then also update graph ID with a new unique value. 

1238 """ 

1239 dataset_id_map: dict[DatasetId, DatasetId] = {} 

1240 

1241 def _update_output_refs( 

1242 refs: Iterable[DatasetRef], run: str, dataset_id_map: MutableMapping[DatasetId, DatasetId] 

1243 ) -> Iterator[DatasetRef]: 

1244 """Update a collection of `~lsst.daf.butler.DatasetRef` with new 

1245 run and dataset IDs. 

1246 """ 

1247 for ref in refs: 

1248 new_ref = ref.replace(run=run) 

1249 dataset_id_map[ref.id] = new_ref.id 

1250 yield new_ref 

1251 

1252 def _update_intermediate_refs( 

1253 refs: Iterable[DatasetRef], run: str, dataset_id_map: Mapping[DatasetId, DatasetId] 

1254 ) -> Iterator[DatasetRef]: 

1255 """Update intermediate references with new run and IDs. Only the 

1256 references that appear in ``dataset_id_map`` are updated, others 

1257 are returned unchanged. 

1258 """ 

1259 for ref in refs: 

1260 if dataset_id := dataset_id_map.get(ref.id): 

1261 ref = ref.replace(run=run, id=dataset_id) 

1262 yield ref 

1263 

1264 # Replace quantum output refs first. 

1265 for node in self._connectedQuanta: 

1266 quantum = node.quantum 

1267 outputs = { 

1268 dataset_type: tuple(_update_output_refs(refs, run, dataset_id_map)) 

1269 for dataset_type, refs in quantum.outputs.items() 

1270 } 

1271 updated_quantum = Quantum( 

1272 taskName=quantum.taskName, 

1273 dataId=quantum.dataId, 

1274 initInputs=quantum.initInputs, 

1275 inputs=quantum.inputs, 

1276 outputs=outputs, 

1277 datastore_records=quantum.datastore_records, 

1278 ) 

1279 node._replace_quantum(updated_quantum) 

1280 

1281 self._initOutputRefs = { 

1282 task_def: list(_update_output_refs(refs, run, dataset_id_map)) 

1283 for task_def, refs in self._initOutputRefs.items() 

1284 } 

1285 self._globalInitOutputRefs = list( 

1286 _update_output_refs(self._globalInitOutputRefs, run, dataset_id_map) 

1287 ) 

1288 

1289 # Update all intermediates from their matching outputs. 

1290 for node in self._connectedQuanta: 

1291 quantum = node.quantum 

1292 inputs = { 

1293 dataset_type: tuple(_update_intermediate_refs(refs, run, dataset_id_map)) 

1294 for dataset_type, refs in quantum.inputs.items() 

1295 } 

1296 initInputs = list(_update_intermediate_refs(quantum.initInputs.values(), run, dataset_id_map)) 

1297 

1298 updated_quantum = Quantum( 

1299 taskName=quantum.taskName, 

1300 dataId=quantum.dataId, 

1301 initInputs=initInputs, 

1302 inputs=inputs, 

1303 outputs=quantum.outputs, 

1304 datastore_records=quantum.datastore_records, 

1305 ) 

1306 node._replace_quantum(updated_quantum) 

1307 

1308 self._initInputRefs = { 

1309 task_def: list(_update_intermediate_refs(refs, run, dataset_id_map)) 

1310 for task_def, refs in self._initInputRefs.items() 

1311 } 

1312 

1313 if update_graph_id: 

1314 self._buildId = BuildId(f"{time.time()}-{os.getpid()}") 

1315 

1316 # Update metadata if present. 

1317 if self._metadata is not None and metadata_key is not None: 

1318 metadata = dict(self._metadata) 

1319 metadata[metadata_key] = run 

1320 self._metadata = metadata 

1321 

1322 @property 

1323 def graphID(self) -> BuildId: 

1324 """The ID generated by the graph at construction time (`str`).""" 

1325 return self._buildId 

1326 

1327 @property 

1328 def universe(self) -> DimensionUniverse: 

1329 """Dimension universe associated with this graph 

1330 (`~lsst.daf.butler.DimensionUniverse`). 

1331 """ 

1332 return self._universe 

1333 

1334 def __iter__(self) -> Generator[QuantumNode, None, None]: 

1335 yield from nx.topological_sort(self._connectedQuanta) 

1336 

1337 def __len__(self) -> int: 

1338 return self._count 

1339 

1340 def __contains__(self, node: QuantumNode) -> bool: 

1341 return self._connectedQuanta.has_node(node) 

1342 

1343 def __getstate__(self) -> dict: 

1344 """Store a compact form of the graph as a list of graph nodes, and a 

1345 tuple of task labels and task configs. The full graph can be 

1346 reconstructed with this information, and it preserves the ordering of 

1347 the graph nodes. 

1348 """ 

1349 universe: DimensionUniverse | None = None 

1350 for node in self: 

1351 dId = node.quantum.dataId 

1352 if dId is None: 

1353 continue 

1354 universe = dId.graph.universe 

1355 return {"reduced": self._buildSaveObject(), "graphId": self._buildId, "universe": universe} 

1356 

1357 def __setstate__(self, state: dict) -> None: 

1358 """Reconstructs the state of the graph from the information persisted 

1359 in getstate. 

1360 """ 

1361 buffer = io.BytesIO(state["reduced"]) 

1362 with LoadHelper(buffer, minimumVersion=3) as loader: 

1363 qgraph = loader.load(state["universe"], graphID=state["graphId"]) 

1364 

1365 self._metadata = qgraph._metadata 

1366 self._buildId = qgraph._buildId 

1367 self._datasetDict = qgraph._datasetDict 

1368 self._nodeIdMap = qgraph._nodeIdMap 

1369 self._count = len(qgraph) 

1370 self._taskToQuantumNode = qgraph._taskToQuantumNode 

1371 self._taskGraph = qgraph._taskGraph 

1372 self._connectedQuanta = qgraph._connectedQuanta 

1373 self._initInputRefs = qgraph._initInputRefs 

1374 self._initOutputRefs = qgraph._initOutputRefs 

1375 

1376 def __eq__(self, other: object) -> bool: 

1377 if not isinstance(other, QuantumGraph): 

1378 return False 

1379 if len(self) != len(other): 

1380 return False 

1381 for node in self: 

1382 if node not in other: 

1383 return False 

1384 if self.determineInputsToQuantumNode(node) != other.determineInputsToQuantumNode(node): 

1385 return False 

1386 if self.determineOutputsOfQuantumNode(node) != other.determineOutputsOfQuantumNode(node): 

1387 return False 

1388 if set(self.allDatasetTypes) != set(other.allDatasetTypes): 

1389 return False 

1390 return set(self.taskGraph) == set(other.taskGraph)