Coverage for python/lsst/pipe/base/quantum_graph_builder.py: 25%

369 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-11 09:32 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""The base class for the QuantumGraph-generation algorithm and various 

29helper classes. 

30""" 

31 

32from __future__ import annotations 

33 

34__all__ = ( 

35 "QuantumGraphBuilder", 

36 "ExistingDatasets", 

37 "QuantumGraphBuilderError", 

38 "OutputExistsError", 

39 "PrerequisiteMissingError", 

40) 

41 

42import dataclasses 

43from abc import ABC, abstractmethod 

44from collections.abc import Iterable, Mapping, Sequence 

45from typing import TYPE_CHECKING, Any, final 

46 

47from lsst.daf.butler import ( 

48 Butler, 

49 CollectionType, 

50 DataCoordinate, 

51 DatasetRef, 

52 DatasetType, 

53 DimensionUniverse, 

54 NamedKeyDict, 

55 NamedKeyMapping, 

56 Quantum, 

57) 

58from lsst.daf.butler.registry import MissingCollectionError, MissingDatasetTypeError 

59from lsst.utils.logging import LsstLogAdapter, getLogger 

60from lsst.utils.timer import timeMethod 

61 

62from . import automatic_connection_constants as acc 

63from ._status import NoWorkFound 

64from ._task_metadata import TaskMetadata 

65from .connections import AdjustQuantumHelper 

66from .graph import QuantumGraph 

67from .pipeline_graph import PipelineGraph, TaskNode 

68from .prerequisite_helpers import PrerequisiteInfo, SkyPixBoundsBuilder, TimespanBuilder 

69from .quantum_graph_skeleton import ( 

70 DatasetKey, 

71 PrerequisiteDatasetKey, 

72 QuantumGraphSkeleton, 

73 QuantumKey, 

74 TaskInitKey, 

75) 

76 

77if TYPE_CHECKING: 

78 from .pipeline import TaskDef 

79 

80 

81class QuantumGraphBuilderError(Exception): 

82 """Base class for exceptions generated by QuantumGraphBuilder.""" 

83 

84 pass 

85 

86 

87class GraphBuilderError(QuantumGraphBuilderError): 

88 """Backwards-compatibility near-alias for QuantumGraphBuilderError.""" 

89 

90 pass 

91 

92 

93# Inherit from backwards-compatibility alias for backwards-compatibility. 

94class OutputExistsError(GraphBuilderError): 

95 """Exception generated when output datasets already exist.""" 

96 

97 pass 

98 

99 

100# Inherit from backwards-compatibility alias for backwards-compatibility. 

101class PrerequisiteMissingError(GraphBuilderError): 

102 """Exception generated when a prerequisite dataset does not exist.""" 

103 

104 pass 

105 

106 

107class InitInputMissingError(QuantumGraphBuilderError): 

108 """Exception generated when an init-input dataset does not exist.""" 

109 

110 pass 

111 

112 

113class QuantumGraphBuilder(ABC): 

114 """An abstract base class for building `QuantumGraph` objects from a 

115 pipeline. 

116 

117 Parameters 

118 ---------- 

119 pipeline_graph : `.pipeline_graph.PipelineGraph` 

120 Pipeline to build a `QuantumGraph` from, as a graph. Will be resolved 

121 in-place with the given butler (any existing resolution is ignored). 

122 butler : `lsst.daf.butler.Butler` 

123 Client for the data repository. Should be read-only. 

124 input_collections : `~collections.abc.Sequence` [ `str` ], optional 

125 Collections to search for overall-input datasets. If not provided, 

126 ``butler.collections`` is used (and must not be empty). 

127 output_run : `str`, optional 

128 Output `~lsst.daf.butler.CollectionType.RUN` collection. If not 

129 provided, ``butler.run`` is used (and must not be `None`). 

130 skip_existing_in : `~collections.abc.Sequence` [ `str` ], optional 

131 Collections to search for outputs that already exist for the purpose of 

132 skipping quanta that have already been run. 

133 clobber : `bool`, optional 

134 Whether to raise if predicted outputs already exist in ``output_run`` 

135 (not including those quanta that would be skipped because they've 

136 already been run). This never actually clobbers outputs; it just 

137 informs the graph generation algorithm whether execution will run with 

138 clobbering enabled. This is ignored if ``output_run`` does not exist. 

139 

140 Notes 

141 ----- 

142 Constructing a `QuantumGraphBuilder` will run queries for existing datasets 

143 with empty data IDs (including but not limited to init inputs and outputs), 

144 in addition to resolving the given pipeline graph and testing for existence 

145 of the ``output`` run collection. 

146 

147 The `build` method splits the pipeline graph into independent subgraphs, 

148 then calls the abstract method `process_subgraph` on each, to allow 

149 concrete implementations to populate the rough graph structure (the 

150 `~quantum_graph_skeleton.QuantumGraphSkeleton` class) and search for 

151 existing datasets (further populating the builder's `existing_datasets` 

152 struct). The `build` method then: 

153 

154 - assembles `lsst.daf.butler.Quantum` instances from all data IDs in the 

155 skeleton; 

156 - looks for existing outputs found in ``skip_existing_in`` to see if any 

157 quanta should be skipped; 

158 - calls `PipelineTaskConnections.adjustQuantum` on all quanta, adjusting 

159 downstream quanta appropriately when preliminary predicted outputs are 

160 rejected (pruning nodes that will not have the inputs they need to run); 

161 - attaches datastore records and registry dataset types to the graph. 

162 

163 In addition to implementing `process_subgraph`, derived classes are 

164 generally expected to add new construction keyword-only arguments to 

165 control the data IDs of the quantum graph, while forwarding all of the 

166 arguments defined in the base class to `super`. 

167 """ 

168 

169 def __init__( 

170 self, 

171 pipeline_graph: PipelineGraph, 

172 butler: Butler, 

173 *, 

174 input_collections: Sequence[str] | None = None, 

175 output_run: str | None = None, 

176 skip_existing_in: Sequence[str] = (), 

177 clobber: bool = False, 

178 ): 

179 self.log = getLogger(__name__) 

180 self.metadata = TaskMetadata() 

181 self._pipeline_graph = pipeline_graph 

182 self.butler = butler 

183 self._pipeline_graph.resolve(self.butler.registry) 

184 if input_collections is None: 

185 input_collections = butler.collections 

186 if not input_collections: 

187 raise ValueError("No input collections provided.") 

188 self.input_collections = input_collections 

189 if output_run is None: 

190 output_run = butler.run 

191 if not output_run: 

192 raise ValueError("No output RUN collection provided.") 

193 self.output_run = output_run 

194 self.skip_existing_in = skip_existing_in 

195 self.empty_data_id = DataCoordinate.makeEmpty(butler.dimensions) 

196 self.clobber = clobber 

197 # See whether the output run already exists. 

198 self.output_run_exists = False 

199 try: 

200 if self.butler.registry.getCollectionType(self.output_run) is not CollectionType.RUN: 

201 raise RuntimeError(f"{self.output_run!r} is not a RUN collection.") 

202 self.output_run_exists = True 

203 except MissingCollectionError: 

204 # If the run doesn't exist we never need to clobber. This is not 

205 # an error so you can run with clobber=True the first time you 

206 # attempt some processing as well as all subsequent times, instead 

207 # of forcing the user to make the first attempt different. 

208 self.clobber = False 

209 # We need to know whether the skip_existing_in collection sequence 

210 # starts with the output run collection, as an optimization to avoid 

211 # queries later. 

212 try: 

213 skip_existing_in_flat = self.butler.registry.queryCollections( 

214 self.skip_existing_in, flattenChains=True 

215 ) 

216 except MissingCollectionError: 

217 skip_existing_in_flat = [] 

218 if not skip_existing_in_flat: 

219 self.skip_existing_in = [] 

220 if self.skip_existing_in and self.output_run_exists: 

221 self.skip_existing_starts_with_output_run = self.output_run == skip_existing_in_flat[0] 

222 else: 

223 self.skip_existing_starts_with_output_run = False 

224 self.existing_datasets = ExistingDatasets() 

225 try: 

226 packages_storage_class = butler.registry.getDatasetType( 

227 acc.PACKAGES_INIT_OUTPUT_NAME 

228 ).storageClass_name 

229 except MissingDatasetTypeError: 

230 packages_storage_class = acc.PACKAGES_INIT_OUTPUT_STORAGE_CLASS 

231 self._global_init_output_types = { 

232 acc.PACKAGES_INIT_OUTPUT_NAME: DatasetType( 

233 acc.PACKAGES_INIT_OUTPUT_NAME, 

234 self.universe.empty, 

235 packages_storage_class, 

236 ) 

237 } 

238 self._find_empty_dimension_datasets() 

239 self.prerequisite_info = { 

240 task_node.label: PrerequisiteInfo(task_node, self._pipeline_graph) 

241 for task_node in pipeline_graph.tasks.values() 

242 } 

243 

244 log: LsstLogAdapter 

245 """Logger to use for all quantum-graph generation messages. 

246 

247 General and per-task status messages should be logged at `~logging.INFO` 

248 level or higher, per-dataset-type status messages should be logged at 

249 `~lsst.utils.logging.VERBOSE` or higher, and per-data-ID status messages 

250 should be logged at `logging.DEBUG` or higher. 

251 """ 

252 

253 metadata: TaskMetadata 

254 """Metadata to store in the QuantumGraph. 

255 

256 The `TaskMetadata` class is used here primarily in order to enable 

257 resource-usage collection with the `lsst.utils.timer.timeMethod` decorator. 

258 """ 

259 

260 butler: Butler 

261 """Client for the data repository. 

262 

263 Should be read-only. 

264 """ 

265 

266 input_collections: Sequence[str] 

267 """Collections to search for overall-input datasets. 

268 """ 

269 

270 output_run: str 

271 """Output `~lsst.daf.butler.CollectionType.RUN` collection. 

272 """ 

273 

274 skip_existing_in: Sequence[str] 

275 """Collections to search for outputs that already exist for the purpose 

276 of skipping quanta that have already been run. 

277 """ 

278 

279 clobber: bool 

280 """Whether to raise if predicted outputs already exist in ``output_run`` 

281 

282 This never actually clobbers outputs; it just informs the graph generation 

283 algorithm whether execution will run with clobbering enabled. This is 

284 always `False` if `output_run_exists` is `False`. 

285 """ 

286 

287 empty_data_id: DataCoordinate 

288 """An empty data ID in the data repository's dimension universe. 

289 """ 

290 

291 output_run_exists: bool 

292 """Whether the output run exists in the data repository already. 

293 """ 

294 

295 skip_existing_starts_with_output_run: bool 

296 """Whether the `skip_existing_in` sequence begins with `output_run`. 

297 

298 If this is true, any dataset found in `output_run` can be used to 

299 short-circuit queries in `skip_existing_in`. 

300 """ 

301 

302 existing_datasets: ExistingDatasets 

303 """Struct holding datasets that have already been found in the data 

304 repository. 

305 

306 This is updated in-place as the `QuantumGraph` generation algorithm 

307 proceeds. 

308 """ 

309 

310 prerequisite_info: Mapping[str, PrerequisiteInfo] 

311 """Helper objects for finding prerequisite inputs, organized by task label. 

312 

313 Subclasses that find prerequisites should remove the 

314 covered `~prerequisite_helpers.PrerequisiteFinder` objects from this 

315 attribute. 

316 """ 

317 

318 @property 

319 def universe(self) -> DimensionUniverse: 

320 """Definitions of all data dimensions.""" 

321 return self.butler.dimensions 

322 

323 @final 

324 @timeMethod 

325 def build(self, metadata: Mapping[str, Any] | None = None) -> QuantumGraph: 

326 """Build the quantum graph. 

327 

328 Parameters 

329 ---------- 

330 metadata : `~collections.abc.Mapping`, optional 

331 Flexible metadata to add to the quantum graph. 

332 

333 Returns 

334 ------- 

335 quantum_graph : `QuantumGraph` 

336 DAG describing processing to be performed. 

337 

338 Notes 

339 ----- 

340 External code is expected to construct a `QuantumGraphBuilder` and then 

341 call this method exactly once. See class documentation for details on 

342 what it does. 

343 """ 

344 full_skeleton = QuantumGraphSkeleton(self._pipeline_graph.tasks) 

345 subgraphs = list(self._pipeline_graph.split_independent()) 

346 for i, subgraph in enumerate(subgraphs): 

347 self.log.info( 

348 "Processing pipeline subgraph %d of %d with %d task(s).", 

349 i + 1, 

350 len(subgraphs), 

351 len(subgraph.tasks), 

352 ) 

353 self.log.verbose("Subgraph tasks: [%s]", ", ".join(label for label in subgraph.tasks)) 

354 subgraph_skeleton = self.process_subgraph(subgraph) 

355 full_skeleton.update(subgraph_skeleton) 

356 # Loop over tasks. The pipeline graph must be topologically sorted, 

357 # so a quantum is only processed after any quantum that provides its 

358 # inputs has been processed. 

359 for task_node in self._pipeline_graph.tasks.values(): 

360 self._resolve_task_quanta(task_node, full_skeleton) 

361 # Add global init-outputs to the skeleton. 

362 for dataset_type in self._global_init_output_types.values(): 

363 dataset_key = full_skeleton.add_dataset_node( 

364 dataset_type.name, self.empty_data_id, is_global_init_output=True 

365 ) 

366 ref = self.existing_datasets.outputs_in_the_way.get(dataset_key) 

367 if ref is None: 

368 ref = DatasetRef(dataset_type, self.empty_data_id, run=self.output_run) 

369 full_skeleton[dataset_key]["ref"] = ref 

370 # Remove dataset nodes with no edges that are not global init outputs, 

371 # which are generally overall-inputs whose original quanta end up 

372 # skipped or with no work to do (we can't remove these along with the 

373 # quanta because no quantum knows if its the only consumer). 

374 full_skeleton.remove_orphan_datasets() 

375 self._attach_datastore_records(full_skeleton) 

376 # TODO initialize most metadata here instead of in ctrl_mpexec. 

377 if metadata is None: 

378 metadata = {} 

379 return self._construct_quantum_graph(full_skeleton, metadata) 

380 

381 @abstractmethod 

382 def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton: 

383 """Build the rough structure for an independent subset of the 

384 `QuantumGraph` and query for relevant existing datasets. 

385 

386 Parameters 

387 ---------- 

388 subgraph : `.pipeline_graph.PipelineGraph` 

389 Subset of the pipeline graph that should be processed by this call. 

390 This is always resolved and topologically sorted. It should not be 

391 modified. 

392 

393 Returns 

394 ------- 

395 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

396 Class representing an initial quantum graph. See 

397 `quantum_graph_skeleton.QuantumGraphSkeleton` docs for details. 

398 After this is returned, the object may be modified in-place in 

399 unspecified ways. 

400 

401 Notes 

402 ----- 

403 In addition to returning a 

404 `quantum_graph_skeleton.QuantumGraphSkeleton`, this method should 

405 populate the `existing_datasets` structure by querying for all relevant 

406 datasets with non-empty data IDs (those with empty data IDs will 

407 already be present). In particular: 

408 

409 - `~ExistingDatasets.inputs` must always be populated with all 

410 overall-input datasets (but not prerequisites), by querying 

411 `input_collections`; 

412 - `~ExistingDatasets.outputs_for_skip` must be populated with any 

413 intermediate our output datasets present in `skip_existing_in` (it 

414 can be ignored if `skip_existing_in` is empty); 

415 - `~ExistingDatasets.outputs_in_the_way` must be populated with any 

416 intermediate or output datasets present in `output_run`, if 

417 `output_run_exists` (it can be ignored if `output_run_exists` is 

418 `False`). Note that the presence of such datasets is not 

419 automatically an error, even if `clobber is `False`, as these may be 

420 quanta that will be skipped. 

421 - `~ExistingDatasets.inputs` must be populated with all 

422 prerequisite-input datasets that were included in the skeleton, by 

423 querying `input_collections` (not all prerequisite inputs need to be 

424 included in the skeleton, but the base class can only use per-quantum 

425 queries to find them, and that can be slow when there are many 

426 quanta). 

427 

428 Dataset types should never be components and should always use the 

429 "common" storage class definition in `pipeline_graph.DatasetTypeNode` 

430 (which is the data repository definition when the dataset type is 

431 registered). 

432 """ 

433 raise NotImplementedError() 

434 

435 @final 

436 @timeMethod 

437 def _resolve_task_quanta(self, task_node: TaskNode, skeleton: QuantumGraphSkeleton) -> None: 

438 """Process the quanta for one task in a skeleton graph to skip those 

439 that have already completed and adjust those that request it. 

440 

441 Parameters 

442 ---------- 

443 task_node : `pipeline_graph.TaskNode` 

444 Node for this task in the pipeline graph. 

445 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

446 Preliminary quantum graph, to be modified in-place. 

447 

448 Notes 

449 ----- 

450 This method modifies ``skeleton`` in-place in several ways: 

451 

452 - It adds a "ref" attribute to dataset nodes, using the contents of 

453 `existing_datasets`. This ensures producing and consuming tasks 

454 start from the same `DatasetRef`. 

455 - It adds "inputs", "outputs", and "init_inputs" attributes to the 

456 quantum nodes, holding the same `NamedValueMapping` objects needed to 

457 construct an actual `Quantum` instances. 

458 - It removes quantum nodes that are to be skipped because their outputs 

459 already exist in `skip_existing_in`. It also removes their outputs 

460 from `ExistingDatasets.outputs_in_the_way`. 

461 - It adds prerequisite dataset nodes and edges that connect them to the 

462 quanta that consume them. 

463 - It removes quantum nodes whose 

464 `~PipelineTaskConnections.adjustQuantum` calls raise `NoWorkFound` or 

465 predict no outputs; 

466 - It removes the nodes of output datasets that are "adjusted away". 

467 - It removes the edges of input datasets that are "adjusted away". 

468 

469 The difference between how adjusted inputs and outputs are handled 

470 reflects the fact that many quanta can share the same input, but only 

471 one produces each output. This can lead to the graph having 

472 superfluous isolated nodes after processing is complete, but these 

473 should only be removed after all the quanta from all tasks have been 

474 processed. 

475 """ 

476 # Extract the helper object for the prerequisite inputs of this task, 

477 # and tell it to prepare to construct skypix bounds and timespans for 

478 # each quantum (these will automatically do nothing if nothing needs 

479 # those bounds). 

480 task_prerequisite_info = self.prerequisite_info[task_node.label] 

481 task_prerequisite_info.update_bounds() 

482 # Loop over all quanta for this task, remembering the ones we've 

483 # gotten rid of. 

484 skipped_quanta = [] 

485 no_work_quanta = [] 

486 for quantum_key in skeleton.get_quanta(task_node.label): 

487 if self._skip_quantum_if_metadata_exists(task_node, quantum_key, skeleton): 

488 skipped_quanta.append(quantum_key) 

489 continue 

490 quantum_data_id = skeleton[quantum_key]["data_id"] 

491 skypix_bounds_builder = task_prerequisite_info.bounds.make_skypix_bounds_builder(quantum_data_id) 

492 timespan_builder = task_prerequisite_info.bounds.make_timespan_builder(quantum_data_id) 

493 adjusted_outputs = self._gather_quantum_outputs( 

494 task_node, quantum_key, skeleton, skypix_bounds_builder, timespan_builder 

495 ) 

496 adjusted_inputs = self._gather_quantum_inputs( 

497 task_node, 

498 quantum_key, 

499 skeleton, 

500 task_prerequisite_info, 

501 skypix_bounds_builder, 

502 timespan_builder, 

503 ) 

504 # Give the task's Connections class an opportunity to remove 

505 # some inputs, or complain if they are unacceptable. This will 

506 # raise if one of the check conditions is not met, which is the 

507 # intended behavior. 

508 helper = AdjustQuantumHelper(inputs=adjusted_inputs, outputs=adjusted_outputs) 

509 try: 

510 helper.adjust_in_place( 

511 task_node._get_imported_data().connections, task_node.label, quantum_data_id 

512 ) 

513 except NoWorkFound as err: 

514 # Do not generate this quantum; it would not produce any 

515 # outputs. Remove it and all of the outputs it might have 

516 # produced from the skeleton. 

517 try: 

518 _, connection_name, _ = err.args 

519 details = f"not enough datasets for connection {connection_name}." 

520 except ValueError: 

521 details = str(err) 

522 self.log.debug( 

523 "No work found for quantum %s of task %s: %s", 

524 quantum_key.data_id_values, 

525 quantum_key.task_label, 

526 details, 

527 ) 

528 no_work_quanta.append(quantum_key) 

529 continue 

530 if helper.outputs_adjusted: 

531 if not any(adjusted_refs for adjusted_refs in helper.outputs.values()): 

532 # No outputs also means we don't generate this quantum. 

533 self.log.debug( 

534 "No outputs predicted for quantum %s of task %s.", 

535 quantum_key.data_id_values, 

536 quantum_key.task_label, 

537 ) 

538 no_work_quanta.append(quantum_key) 

539 continue 

540 # Remove output nodes that were not retained by 

541 # adjustQuantum. 

542 skeleton.remove_dataset_nodes( 

543 self._find_removed(skeleton.iter_outputs_of(quantum_key), helper.outputs) 

544 ) 

545 if helper.inputs_adjusted: 

546 if not any(bool(adjusted_refs) for adjusted_refs in helper.inputs.values()): 

547 raise QuantumGraphBuilderError( 

548 f"adjustQuantum implementation for {task_node.label}@{quantum_key.data_id_values} " 

549 "returned outputs but no inputs." 

550 ) 

551 # Remove input dataset edges that were not retained by 

552 # adjustQuantum. We can't remove the input dataset nodes 

553 # because some other quantum might still want them. 

554 skeleton.remove_input_edges( 

555 quantum_key, self._find_removed(skeleton.iter_inputs_of(quantum_key), helper.inputs) 

556 ) 

557 # Save the adjusted inputs and outputs to the quantum node's 

558 # state so we don't have to regenerate those data structures 

559 # from the graph. 

560 skeleton[quantum_key]["inputs"] = helper.inputs 

561 skeleton[quantum_key]["outputs"] = helper.outputs 

562 for no_work_quantum in no_work_quanta: 

563 skeleton.remove_quantum_node(no_work_quantum, remove_outputs=True) 

564 for skipped_quantum in skipped_quanta: 

565 skeleton.remove_quantum_node(skipped_quantum, remove_outputs=False) 

566 remaining_quanta = skeleton.get_quanta(task_node.label) 

567 self._resolve_task_init(task_node, skeleton, bool(skipped_quanta)) 

568 message_terms = [] 

569 if no_work_quanta: 

570 message_terms.append(f"{len(no_work_quanta)} had no work to do") 

571 if skipped_quanta: 

572 message_terms.append(f"{len(no_work_quanta)} previously succeeded") 

573 message_parenthetical = f" ({', '.join(message_terms)})" if message_terms else "" 

574 if remaining_quanta: 

575 self.log.info( 

576 "Generated %s for task %s%s.", 

577 _quantum_or_quanta(len(remaining_quanta)), 

578 task_node.label, 

579 message_parenthetical, 

580 ) 

581 else: 

582 self.log.info( 

583 "Dropping task %s because no quanta remain%s.", task_node.label, message_parenthetical 

584 ) 

585 skeleton.remove_task(task_node.label) 

586 

587 def _skip_quantum_if_metadata_exists( 

588 self, task_node: TaskNode, quantum_key: QuantumKey, skeleton: QuantumGraphSkeleton 

589 ) -> bool: 

590 """Identify and drop quanta that should be skipped because their 

591 metadata datasets already exist. 

592 

593 Parameters 

594 ---------- 

595 task_node : `pipeline_graph.TaskNode` 

596 Node for this task in the pipeline graph. 

597 quantum_key : `QuantumKey` 

598 Identifier for this quantum in the graph. 

599 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

600 Preliminary quantum graph, to be modified in-place. 

601 

602 Returns 

603 ------- 

604 skipped : `bool` 

605 `True` if the quantum is being skipped and has been removed from 

606 the graph, `False` otherwise. 

607 

608 Notes 

609 ----- 

610 If the metadata dataset for this quantum exists in 

611 `ExistingDatasets.outputs_for_skip`, the quantum will be skipped. This 

612 causes the quantum node to be removed from the graph. Dataset nodes 

613 that were previously the outputs of this quantum will have their "ref" 

614 attribute set from `ExistingDatasets.outputs_for_skip`, or will be 

615 removed if there is no such dataset there. Any output dataset in 

616 `ExistingDatasets.outputs_in_the_way` will be removed. 

617 """ 

618 metadata_dataset_key = DatasetKey( 

619 task_node.metadata_output.parent_dataset_type_name, quantum_key.data_id_values 

620 ) 

621 if metadata_dataset_key in self.existing_datasets.outputs_for_skip: 

622 # This quantum's metadata is already present in the the 

623 # skip_existing_in collections; we'll skip it. But the presence of 

624 # the metadata dataset doesn't guarantee that all of the other 

625 # outputs we predicted are present; we have to check. 

626 for output_dataset_key in list(skeleton.iter_outputs_of(quantum_key)): 

627 if ( 

628 output_ref := self.existing_datasets.outputs_for_skip.get(output_dataset_key) 

629 ) is not None: 

630 # Populate the skeleton graph's node attributes 

631 # with the existing DatasetRef, just like a 

632 # predicted output of a non-skipped quantum. 

633 skeleton[output_dataset_key]["ref"] = output_ref 

634 else: 

635 # Remove this dataset from the skeleton graph, 

636 # because the quantum that would have produced it 

637 # is being skipped and it doesn't already exist. 

638 skeleton.remove_dataset_nodes([output_dataset_key]) 

639 # If this dataset was "in the way" (i.e. already in the 

640 # output run), it isn't anymore. 

641 self.existing_datasets.outputs_in_the_way.pop(output_dataset_key, None) 

642 # Removing the quantum node from the graph will happen outside this 

643 # function. 

644 return True 

645 return False 

646 

647 @final 

648 def _gather_quantum_outputs( 

649 self, 

650 task_node: TaskNode, 

651 quantum_key: QuantumKey, 

652 skeleton: QuantumGraphSkeleton, 

653 skypix_bounds_builder: SkyPixBoundsBuilder, 

654 timespan_builder: TimespanBuilder, 

655 ) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

656 """Collect outputs or generate datasets for a preliminary quantum and 

657 put them in the form used by `~lsst.daf.butler.Quantum` and 

658 `~PipelineTaskConnections.adjustQuantum`. 

659 

660 Parameters 

661 ---------- 

662 task_node : `pipeline_graph.TaskNode` 

663 Node for this task in the pipeline graph. 

664 quantum_key : `QuantumKey` 

665 Identifier for this quantum in the graph. 

666 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

667 Preliminary quantum graph, to be modified in-place. 

668 skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder` 

669 An object that accumulates the appropriate spatial bounds for a 

670 quantum. 

671 timespan_builder : `~prerequisite_helpers.TimespanBuilder` 

672 An object that accumulates the appropriate timespan for a quantum. 

673 

674 Returns 

675 ------- 

676 outputs : `~lsst.daf.butler.NamedKeyDict` [ \ 

677 `~lsst.daf.butler.DatasetType`, `list` [ \ 

678 `~lsst.daf.butler.DatasetRef` ] ] 

679 All outputs to the task, using the storage class and components 

680 defined by the task's own connections. 

681 

682 Notes 

683 ----- 

684 This first looks for outputs already present in the `output_run` by 

685 looking in `ExistingDatasets.outputs_in_the_way`; if it finds something 

686 and `clobber` is `True`, it uses that ref (it's not ideal that both the 

687 original dataset and its replacement will have the same UUID, but we 

688 don't have space in the quantum graph for two UUIDs, and we need the 

689 datastore records of the original there). If `clobber` is `False`, 

690 `RuntimeError` is raised. If there is no output already present, a new 

691 one with a random UUID is generated. In all cases the "ref" attribute 

692 of the dataset node in the skeleton is set. 

693 """ 

694 outputs_by_type: dict[str, list[DatasetRef]] = {} 

695 dataset_key: DatasetKey 

696 for dataset_key in skeleton.iter_outputs_of(quantum_key): 

697 dataset_data_id = skeleton[dataset_key]["data_id"] 

698 dataset_type_node = self._pipeline_graph.dataset_types[dataset_key.parent_dataset_type_name] 

699 if (ref := self.existing_datasets.outputs_in_the_way.get(dataset_key)) is None: 

700 ref = DatasetRef(dataset_type_node.dataset_type, dataset_data_id, run=self.output_run) 

701 elif not self.clobber: 

702 # We intentionally raise here, before running adjustQuantum, 

703 # because it'd be weird if we left an old potential output of a 

704 # task sitting there in the output collection, just because the 

705 # task happened to not actually produce it. 

706 raise OutputExistsError( 

707 f"Potential output dataset {ref} already exists in the output run " 

708 f"{self.output_run}, but clobbering outputs was not expected to be necessary." 

709 ) 

710 skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id) 

711 timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id) 

712 skeleton[dataset_key]["ref"] = ref 

713 outputs_by_type.setdefault(dataset_key.parent_dataset_type_name, []).append(ref) 

714 adapted_outputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

715 for write_edge in task_node.iter_all_outputs(): 

716 dataset_type_node = self._pipeline_graph.dataset_types[write_edge.parent_dataset_type_name] 

717 edge_dataset_type = write_edge.adapt_dataset_type(dataset_type_node.dataset_type) 

718 adapted_outputs[edge_dataset_type] = [ 

719 write_edge.adapt_dataset_ref(ref) 

720 for ref in sorted(outputs_by_type.get(write_edge.parent_dataset_type_name, [])) 

721 ] 

722 return adapted_outputs 

723 

724 @final 

725 def _gather_quantum_inputs( 

726 self, 

727 task_node: TaskNode, 

728 quantum_key: QuantumKey, 

729 skeleton: QuantumGraphSkeleton, 

730 task_prerequisite_info: PrerequisiteInfo, 

731 skypix_bounds_builder: SkyPixBoundsBuilder, 

732 timespan_builder: TimespanBuilder, 

733 ) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

734 """Collect input datasets for a preliminary quantum and put them in the 

735 form used by `~lsst.daf.butler.Quantum` and 

736 `~PipelineTaskConnections.adjustQuantum`. 

737 

738 Parameters 

739 ---------- 

740 task_node : `pipeline_graph.TaskNode` 

741 Node for this task in the pipeline graph. 

742 quantum_key : `QuantumKey` 

743 Identifier for this quantum in the graph. 

744 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

745 Preliminary quantum graph, to be modified in-place. 

746 skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder` 

747 An object that accumulates the appropriate spatial bounds for a 

748 quantum. 

749 timespan_builder : `~prerequisite_helpers.TimespanBuilder` 

750 An object that accumulates the appropriate timespan for a quantum. 

751 

752 Returns 

753 ------- 

754 inputs : `~lsst.daf.butler.NamedKeyDict` [ \ 

755 `~lsst.daf.butler.DatasetType`, `list` [ \ 

756 `~lsst.daf.butler.DatasetRef` ] ] 

757 All regular and prerequisite inputs to the task, using the storage 

758 class and components defined by the task's own connections. 

759 

760 Notes 

761 ----- 

762 On return, the dataset nodes that represent inputs to this quantum will 

763 either have their "ref" attribute set (using the common dataset type, 

764 not the task-specific one) or will be removed from the graph. 

765 

766 For regular inputs, usually an existing "ref" (corresponding to an 

767 output of another quantum) will be found and left unchanged. When 

768 there is no existing "ref" attribute, `ExistingDatasets.inputs` is 

769 searched next; if there is nothing there, the input will be removed. 

770 

771 Prerequisite inputs are always queried for directly here (delegating to 

772 `_find_prerequisite_inputs`). They are never produced by other tasks, 

773 and cannot in general be queried for in advance when 

774 `ExistingDatasets.inputs` is populated. 

775 """ 

776 quantum_data_id = skeleton[quantum_key]["data_id"] 

777 inputs_by_type: dict[str, set[DatasetRef]] = {} 

778 dataset_key: DatasetKey | PrerequisiteDatasetKey 

779 # Process inputs already present in the skeleton - this should include 

780 # all regular inputs (including intermediates) and may include some 

781 # prerequisites. 

782 for dataset_key in list(skeleton.iter_inputs_of(quantum_key)): 

783 if (ref := skeleton[dataset_key].get("ref")) is None: 

784 # This dataset is an overall input - if it was an intermediate, 

785 # we would have already either removed the node or set the 

786 # "ref" attribute when processing its producing quantum - and 

787 # this is the first time we're trying to resolve it. 

788 if (ref := self.existing_datasets.inputs.get(dataset_key)) is None: 

789 # It also doesn't exist in the input collections, so we 

790 # remove its node in the skeleton graph (so other consumers 

791 # won't have to check for it). 

792 skeleton.remove_dataset_nodes([dataset_key]) 

793 continue 

794 skeleton[dataset_key]["ref"] = ref 

795 inputs_by_type.setdefault(dataset_key.parent_dataset_type_name, set()).add(ref) 

796 skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId) 

797 timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId) 

798 # Query for any prerequisites not handled by process_subgraph. Note 

799 # that these were not already in the skeleton graph, so we add them 

800 # now. 

801 skypix_bounds = skypix_bounds_builder.finish() 

802 timespan = timespan_builder.finish() 

803 for finder in task_prerequisite_info.finders.values(): 

804 inputs_for_type = inputs_by_type.setdefault(finder.dataset_type_node.name, set()) 

805 dataset_keys = [] 

806 for ref in finder.find( 

807 self.butler, self.input_collections, quantum_data_id, skypix_bounds, timespan 

808 ): 

809 dataset_key = skeleton.add_prerequisite_node(ref.datasetType.name, ref=ref) 

810 dataset_keys.append(dataset_key) 

811 inputs_for_type.add(ref) 

812 skeleton.add_input_edges(quantum_key, dataset_keys) 

813 adapted_inputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

814 for read_edge in task_node.iter_all_inputs(): 

815 dataset_type_node = self._pipeline_graph.dataset_types[read_edge.parent_dataset_type_name] 

816 edge_dataset_type = read_edge.adapt_dataset_type(dataset_type_node.dataset_type) 

817 if (current_dataset_type := adapted_inputs.keys().get(edge_dataset_type.name)) is None: 

818 adapted_inputs[edge_dataset_type] = [ 

819 read_edge.adapt_dataset_ref(ref) 

820 for ref in sorted(inputs_by_type.get(read_edge.parent_dataset_type_name, frozenset())) 

821 ] 

822 elif current_dataset_type != edge_dataset_type: 

823 raise NotImplementedError( 

824 f"Task {task_node.label!r} has {edge_dataset_type.name!r} as an input via " 

825 "two different connections, with two different storage class overrides. " 

826 "This is not yet supported due to limitations in the Quantum data structure." 

827 ) 

828 # If neither the `if` nor the `elif` above match, it means 

829 # multiple input connections have exactly the same dataset 

830 # type, and hence nothing to do after the first one. 

831 return adapted_inputs 

832 

833 @final 

834 def _resolve_task_init( 

835 self, task_node: TaskNode, skeleton: QuantumGraphSkeleton, has_skipped_quanta: bool 

836 ) -> None: 

837 """Add init-input and init-output dataset nodes and edges for a task to 

838 the skeleton. 

839 

840 Parameters 

841 ---------- 

842 task_node : `pipeline_graph.TaskNode` 

843 Pipeline graph description of the task. 

844 skeleton : `QuantumGraphSkeleton` 

845 In-progress quantum graph data structure to update in-place. 

846 has_skipped_quanta : `bool` 

847 Whether any of this task's quanta were skipped because they had 

848 already succeeded. 

849 """ 

850 quanta = skeleton.get_quanta(task_node.label) 

851 task_init_key = TaskInitKey(task_node.label) 

852 if quanta: 

853 adapted_inputs: NamedKeyDict[DatasetType, DatasetRef] = NamedKeyDict() 

854 # Process init-inputs. 

855 input_keys: list[DatasetKey] = [] 

856 for read_edge in task_node.init.iter_all_inputs(): 

857 dataset_key = skeleton.add_dataset_node( 

858 read_edge.parent_dataset_type_name, self.empty_data_id 

859 ) 

860 skeleton.add_input_edge(task_init_key, dataset_key) 

861 if (ref := skeleton[dataset_key].get("ref")) is None: 

862 try: 

863 ref = self.existing_datasets.inputs[dataset_key] 

864 except KeyError: 

865 raise InitInputMissingError( 

866 f"Overall init-input dataset {read_edge.parent_dataset_type_name!r} " 

867 f"needed by task {task_node.label!r} not found in input collection(s) " 

868 f"{self.input_collections}." 

869 ) from None 

870 skeleton[dataset_key]["ref"] = ref 

871 for quantum_key in skeleton.get_quanta(task_node.label): 

872 skeleton.add_input_edge(quantum_key, dataset_key) 

873 input_keys.append(dataset_key) 

874 adapted_ref = read_edge.adapt_dataset_ref(ref) 

875 adapted_inputs[adapted_ref.datasetType] = adapted_ref 

876 # Save the quantum-adapted init inputs to each quantum, and add 

877 # skeleton edges connecting the init inputs to each quantum. 

878 for quantum_key in skeleton.get_quanta(task_node.label): 

879 skeleton[quantum_key]["init_inputs"] = adapted_inputs 

880 # Process init-outputs. 

881 adapted_outputs: NamedKeyDict[DatasetType, DatasetRef] = NamedKeyDict() 

882 for write_edge in task_node.init.iter_all_outputs(): 

883 dataset_key = skeleton.add_dataset_node( 

884 write_edge.parent_dataset_type_name, self.empty_data_id 

885 ) 

886 if (ref := self.existing_datasets.outputs_in_the_way.get(dataset_key)) is None: 

887 ref = DatasetRef( 

888 self._pipeline_graph.dataset_types[write_edge.parent_dataset_type_name].dataset_type, 

889 self.empty_data_id, 

890 run=self.output_run, 

891 ) 

892 skeleton[dataset_key]["ref"] = ref 

893 skeleton.add_output_edge(task_init_key, dataset_key) 

894 adapted_ref = write_edge.adapt_dataset_ref(ref) 

895 adapted_outputs[adapted_ref.datasetType] = adapted_ref 

896 skeleton[task_init_key]["inputs"] = adapted_inputs 

897 skeleton[task_init_key]["outputs"] = adapted_outputs 

898 elif has_skipped_quanta: 

899 # No quanta remain for this task, but at least one quantum was 

900 # skipped because its outputs were present in the skip_existing_in 

901 # collections. This means all init outputs should be present in 

902 # the skip_existing_in collections, too, and we need to put those 

903 # refs in the graph. 

904 for write_edge in task_node.init.iter_all_outputs(): 

905 dataset_key = skeleton.add_dataset_node( 

906 write_edge.parent_dataset_type_name, self.empty_data_id 

907 ) 

908 if (ref := self.existing_datasets.outputs_for_skip.get(dataset_key)) is None: 

909 raise InitInputMissingError( 

910 f"Init-output dataset {write_edge.parent_dataset_type_name!r} of skipped task " 

911 f"{task_node.label!r} not found in skip-existing-in collection(s) " 

912 f"{self.skip_existing_in}." 

913 ) from None 

914 skeleton[dataset_key]["ref"] = ref 

915 # If this dataset was "in the way" (i.e. already in the output 

916 # run), it isn't anymore. 

917 self.existing_datasets.outputs_in_the_way.pop(dataset_key, None) 

918 # No quanta remain in this task, but none were skipped; this means 

919 # they all got pruned because of NoWorkFound conditions. This 

920 # dooms all downstream quanta to the same fate, so we don't bother 

921 # doing anything with the task's init-outputs, since nothing is 

922 # going to consume them. 

923 

924 @final 

925 @timeMethod 

926 def _find_empty_dimension_datasets(self) -> None: 

927 """Query for all dataset types with no dimensions, updating 

928 `existing_datasets` in-place. 

929 

930 This includes but is not limited to init inputs and init outputs. 

931 """ 

932 _, dataset_type_nodes = self._pipeline_graph.group_by_dimensions()[self.universe.empty] 

933 dataset_types = [node.dataset_type for node in dataset_type_nodes.values()] 

934 dataset_types.extend(self._global_init_output_types.values()) 

935 for dataset_type in dataset_types: 

936 key = DatasetKey(dataset_type.name, self.empty_data_id.values_tuple()) 

937 if ( 

938 self._pipeline_graph.producer_of(dataset_type.name) is None 

939 and dataset_type.name not in self._global_init_output_types 

940 ): 

941 # Dataset type is an overall input; we always need to try to 

942 # find these. 

943 try: 

944 ref = self.butler.registry.findDataset( 

945 dataset_type.name, collections=self.input_collections 

946 ) 

947 except MissingDatasetTypeError: 

948 ref = None 

949 if ref is not None: 

950 self.existing_datasets.inputs[key] = ref 

951 elif self.skip_existing_in: 

952 # Dataset type is an intermediate or output; need to find these 

953 # if only they're from previously executed quanta that we might 

954 # skip... 

955 try: 

956 ref = self.butler.registry.findDataset( 

957 dataset_type.name, collections=self.skip_existing_in 

958 ) 

959 except MissingDatasetTypeError: 

960 ref = None 

961 if ref is not None: 

962 self.existing_datasets.outputs_for_skip[key] = ref 

963 if ref.run == self.output_run: 

964 self.existing_datasets.outputs_in_the_way[key] = ref 

965 if self.output_run_exists and not self.skip_existing_starts_with_output_run: 

966 # ...or if they're in the way and would need to be clobbered 

967 # (and we haven't already found them in the previous block). 

968 try: 

969 ref = self.butler.registry.findDataset(dataset_type.name, collections=[self.output_run]) 

970 except MissingDatasetTypeError: 

971 ref = None 

972 if ref is not None: 

973 self.existing_datasets.outputs_in_the_way[key] = ref 

974 

975 @final 

976 @timeMethod 

977 def _attach_datastore_records(self, skeleton: QuantumGraphSkeleton) -> None: 

978 """Add datastore records for all overall inputs to a preliminary 

979 quantum graph. 

980 

981 Parameters 

982 ---------- 

983 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

984 Preliminary quantum graph to update in place. 

985 

986 Notes 

987 ----- 

988 On return, all quantum nodes in the skeleton graph will have a 

989 "datastore_records" attribute that is a mapping from datastore name 

990 to `lsst.daf.butler.DatastoreRecordData`, as used by 

991 `lsst.daf.butler.Quantum`. 

992 """ 

993 overall_inputs = skeleton.extract_overall_inputs() 

994 exported_records = self.butler._datastore.export_records(overall_inputs.values()) 

995 for quantum_key in skeleton.iter_all_quanta(): 

996 quantum_records = {} 

997 input_ids = { 

998 ref.id 

999 for dataset_key in skeleton.iter_inputs_of(quantum_key) 

1000 if (ref := overall_inputs.get(dataset_key)) is not None 

1001 } 

1002 if input_ids: 

1003 for datastore_name, records in exported_records.items(): 

1004 matching_records = records.subset(input_ids) 

1005 if matching_records is not None: 

1006 quantum_records[datastore_name] = matching_records 

1007 skeleton[quantum_key]["datastore_records"] = quantum_records 

1008 

1009 @final 

1010 @timeMethod 

1011 def _construct_quantum_graph( 

1012 self, skeleton: QuantumGraphSkeleton, metadata: Mapping[str, Any] 

1013 ) -> QuantumGraph: 

1014 """Construct a `QuantumGraph` object from the contents of a 

1015 fully-processed `quantum_graph_skeleton.QuantumGraphSkeleton`. 

1016 

1017 Parameters 

1018 ---------- 

1019 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

1020 Preliminary quantum graph. Must have "init_inputs", "inputs", and 

1021 "outputs" attributes on all quantum nodes, as added by 

1022 `_resolve_task_quanta`, as well as a "datastore_records" attribute 

1023 as added by `_attach_datastore_records`. 

1024 metadata : `Mapping` 

1025 Flexible metadata to add to the graph. 

1026 

1027 Returns 

1028 ------- 

1029 quantum_graph : `QuantumGraph` 

1030 DAG describing processing to be performed. 

1031 """ 

1032 quanta: dict[TaskDef, set[Quantum]] = {} 

1033 init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {} 

1034 init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {} 

1035 for task_def in self._pipeline_graph._iter_task_defs(): 

1036 if not skeleton.has_task(task_def.label): 

1037 continue 

1038 task_node = self._pipeline_graph.tasks[task_def.label] 

1039 task_init_key = skeleton.get_task_init_node(task_def.label) 

1040 init_inputs[task_def] = skeleton[task_init_key]["inputs"].values() 

1041 init_outputs[task_def] = skeleton[task_init_key]["outputs"].values() 

1042 quanta_for_task: set[Quantum] = set() 

1043 for quantum_key in skeleton.get_quanta(task_node.label): 

1044 node_state = skeleton[quantum_key] 

1045 quanta_for_task.add( 

1046 Quantum( 

1047 taskName=task_node.task_class_name, 

1048 taskClass=task_node.task_class, 

1049 dataId=node_state["data_id"], 

1050 initInputs=node_state["init_inputs"], 

1051 inputs=node_state["inputs"], 

1052 outputs=node_state["outputs"], 

1053 datastore_records=node_state.get("datastore_records"), 

1054 ) 

1055 ) 

1056 quanta[task_def] = quanta_for_task 

1057 

1058 registry_dataset_types: list[DatasetType] = [ 

1059 node.dataset_type for node in self._pipeline_graph.dataset_types.values() 

1060 ] 

1061 

1062 all_metadata = self.metadata.to_dict() 

1063 all_metadata.update(metadata) 

1064 return QuantumGraph( 

1065 quanta, 

1066 metadata=all_metadata, 

1067 universe=self.universe, 

1068 initInputs=init_inputs, 

1069 initOutputs=init_outputs, 

1070 globalInitOutputs=[skeleton[key]["ref"] for key in skeleton.global_init_outputs], 

1071 registryDatasetTypes=registry_dataset_types, 

1072 ) 

1073 

1074 @staticmethod 

1075 @final 

1076 def _find_removed( 

1077 original: Iterable[DatasetKey | PrerequisiteDatasetKey], 

1078 adjusted: NamedKeyMapping[DatasetType, Sequence[DatasetRef]], 

1079 ) -> set[DatasetKey | PrerequisiteDatasetKey]: 

1080 """Identify skeleton-graph dataset nodes that have been removed by 

1081 `~PipelineTaskConnections.adjustQuantum`. 

1082 

1083 Parameters 

1084 ---------- 

1085 original : `~collections.abc.Iterable` [ `DatasetKey` or \ 

1086 `PrerequisiteDatasetKey` ] 

1087 Identifiers for the dataset nodes that were the original neighbors 

1088 (inputs or outputs) of a quantum. 

1089 adjusted : `~lsst.daf.butler.NamedKeyMapping` [ \ 

1090 `~lsst.daf.butler.DatasetType`, \ 

1091 `~collections.abc.Sequence` [ `lsst.daf.butler.DatasetType` ] ] 

1092 Adjusted neighbors, in the form used by `lsst.daf.butler.Quantum`. 

1093 

1094 Returns 

1095 ------- 

1096 removed : `set` [ `DatasetKey` ] 

1097 Datasets in ``original`` that have no counterpart in ``adjusted``. 

1098 """ 

1099 result = set(original) 

1100 for dataset_type, kept_refs in adjusted.items(): 

1101 parent_dataset_type_name, _ = DatasetType.splitDatasetTypeName(dataset_type.name) 

1102 for kept_ref in kept_refs: 

1103 result.remove(DatasetKey(parent_dataset_type_name, kept_ref.dataId.values_tuple())) 

1104 return result 

1105 

1106 

1107@dataclasses.dataclass(eq=False, order=False) 

1108class ExistingDatasets: 

1109 """Struct that holds the results of dataset queries for 

1110 `QuantumGraphBuilder`. 

1111 """ 

1112 

1113 inputs: dict[DatasetKey | PrerequisiteDatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1114 """Overall-input datasets found in `QuantumGraphBuilder.input_collections`. 

1115 

1116 This may include prerequisite inputs. It does include init-inputs. 

1117 It does not include intermediates. 

1118 """ 

1119 

1120 outputs_for_skip: dict[DatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1121 """Output datasets found in `QuantumGraphBuilder.skip_existing_in`. 

1122 

1123 It is unspecified whether this contains include init-outputs; there is 

1124 no concept of skipping at the init stage, so this is not expected to 

1125 matter. 

1126 """ 

1127 

1128 outputs_in_the_way: dict[DatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1129 """Output datasets found in `QuantumGraphBuilder.output_run`. 

1130 

1131 This includes regular outputs and init-outputs. 

1132 """ 

1133 

1134 

1135def _quantum_or_quanta(n: int) -> str: 

1136 """Correctly pluralize 'quantum' if needed.""" 

1137 return f"{n} quanta" if n != 1 else "1 quantum"