Coverage for python/lsst/pipe/base/quantum_graph_builder.py: 25%

372 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-23 03:26 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""The base class for the QuantumGraph-generation algorithm and various 

29helper classes. 

30""" 

31 

32from __future__ import annotations 

33 

34__all__ = ( 

35 "QuantumGraphBuilder", 

36 "ExistingDatasets", 

37 "QuantumGraphBuilderError", 

38 "OutputExistsError", 

39 "PrerequisiteMissingError", 

40) 

41 

42import dataclasses 

43from abc import ABC, abstractmethod 

44from collections.abc import Iterable, Mapping, Sequence 

45from typing import TYPE_CHECKING, Any, final 

46 

47from lsst.daf.butler import ( 

48 Butler, 

49 CollectionType, 

50 DataCoordinate, 

51 DatasetRef, 

52 DatasetType, 

53 DimensionUniverse, 

54 NamedKeyDict, 

55 NamedKeyMapping, 

56 Quantum, 

57) 

58from lsst.daf.butler.registry import MissingCollectionError, MissingDatasetTypeError 

59from lsst.utils.logging import LsstLogAdapter, getLogger 

60from lsst.utils.timer import timeMethod 

61 

62from . import automatic_connection_constants as acc 

63from ._status import NoWorkFound 

64from ._task_metadata import TaskMetadata 

65from .connections import AdjustQuantumHelper 

66from .graph import QuantumGraph 

67from .pipeline_graph import PipelineGraph, TaskNode 

68from .prerequisite_helpers import PrerequisiteInfo, SkyPixBoundsBuilder, TimespanBuilder 

69from .quantum_graph_skeleton import ( 

70 DatasetKey, 

71 PrerequisiteDatasetKey, 

72 QuantumGraphSkeleton, 

73 QuantumKey, 

74 TaskInitKey, 

75) 

76 

77if TYPE_CHECKING: 

78 from .pipeline import TaskDef 

79 

80 

81class QuantumGraphBuilderError(Exception): 

82 """Base class for exceptions generated by QuantumGraphBuilder.""" 

83 

84 pass 

85 

86 

87class GraphBuilderError(QuantumGraphBuilderError): 

88 """Backwards-compatibility near-alias for QuantumGraphBuilderError.""" 

89 

90 pass 

91 

92 

93# Inherit from backwards-compatibility alias for backwards-compatibility. 

94class OutputExistsError(GraphBuilderError): 

95 """Exception generated when output datasets already exist.""" 

96 

97 pass 

98 

99 

100# Inherit from backwards-compatibility alias for backwards-compatibility. 

101class PrerequisiteMissingError(GraphBuilderError): 

102 """Exception generated when a prerequisite dataset does not exist.""" 

103 

104 pass 

105 

106 

107class InitInputMissingError(QuantumGraphBuilderError): 

108 """Exception generated when an init-input dataset does not exist.""" 

109 

110 pass 

111 

112 

113class QuantumGraphBuilder(ABC): 

114 """An abstract base class for building `QuantumGraph` objects from a 

115 pipeline. 

116 

117 Parameters 

118 ---------- 

119 pipeline_graph : `.pipeline_graph.PipelineGraph` 

120 Pipeline to build a `QuantumGraph` from, as a graph. Will be resolved 

121 in-place with the given butler (any existing resolution is ignored). 

122 butler : `lsst.daf.butler.Butler` 

123 Client for the data repository. Should be read-only. 

124 input_collections : `~collections.abc.Sequence` [ `str` ], optional 

125 Collections to search for overall-input datasets. If not provided, 

126 ``butler.collections`` is used (and must not be empty). 

127 output_run : `str`, optional 

128 Output `~lsst.daf.butler.CollectionType.RUN` collection. If not 

129 provided, ``butler.run`` is used (and must not be `None`). 

130 skip_existing_in : `~collections.abc.Sequence` [ `str` ], optional 

131 Collections to search for outputs that already exist for the purpose of 

132 skipping quanta that have already been run. 

133 clobber : `bool`, optional 

134 Whether to raise if predicted outputs already exist in ``output_run`` 

135 (not including those quanta that would be skipped because they've 

136 already been run). This never actually clobbers outputs; it just 

137 informs the graph generation algorithm whether execution will run with 

138 clobbering enabled. This is ignored if ``output_run`` does not exist. 

139 

140 Notes 

141 ----- 

142 Constructing a `QuantumGraphBuilder` will run queries for existing datasets 

143 with empty data IDs (including but not limited to init inputs and outputs), 

144 in addition to resolving the given pipeline graph and testing for existence 

145 of the ``output`` run collection. 

146 

147 The `build` method splits the pipeline graph into independent subgraphs, 

148 then calls the abstract method `process_subgraph` on each, to allow 

149 concrete implementations to populate the rough graph structure (the 

150 `~quantum_graph_skeleton.QuantumGraphSkeleton` class) and search for 

151 existing datasets (further populating the builder's `existing_datasets` 

152 struct). The `build` method then: 

153 

154 - assembles `lsst.daf.butler.Quantum` instances from all data IDs in the 

155 skeleton; 

156 - looks for existing outputs found in ``skip_existing_in`` to see if any 

157 quanta should be skipped; 

158 - calls `PipelineTaskConnections.adjustQuantum` on all quanta, adjusting 

159 downstream quanta appropriately when preliminary predicted outputs are 

160 rejected (pruning nodes that will not have the inputs they need to run); 

161 - attaches datastore records and registry dataset types to the graph. 

162 

163 In addition to implementing `process_subgraph`, derived classes are 

164 generally expected to add new construction keyword-only arguments to 

165 control the data IDs of the quantum graph, while forwarding all of the 

166 arguments defined in the base class to `super`. 

167 """ 

168 

169 def __init__( 

170 self, 

171 pipeline_graph: PipelineGraph, 

172 butler: Butler, 

173 *, 

174 input_collections: Sequence[str] | None = None, 

175 output_run: str | None = None, 

176 skip_existing_in: Sequence[str] = (), 

177 clobber: bool = False, 

178 ): 

179 self.log = getLogger(__name__) 

180 self.metadata = TaskMetadata() 

181 self._pipeline_graph = pipeline_graph 

182 self.butler = butler 

183 if input_collections is None: 

184 input_collections = butler.collections 

185 if not input_collections: 

186 raise ValueError("No input collections provided.") 

187 self.input_collections = input_collections 

188 if output_run is None: 

189 output_run = butler.run 

190 if not output_run: 

191 raise ValueError("No output RUN collection provided.") 

192 self.output_run = output_run 

193 self.skip_existing_in = skip_existing_in 

194 self.empty_data_id = DataCoordinate.make_empty(butler.dimensions) 

195 self.clobber = clobber 

196 # See whether the output run already exists. 

197 self.output_run_exists = False 

198 try: 

199 if self.butler.registry.getCollectionType(self.output_run) is not CollectionType.RUN: 

200 raise RuntimeError(f"{self.output_run!r} is not a RUN collection.") 

201 self.output_run_exists = True 

202 except MissingCollectionError: 

203 # If the run doesn't exist we never need to clobber. This is not 

204 # an error so you can run with clobber=True the first time you 

205 # attempt some processing as well as all subsequent times, instead 

206 # of forcing the user to make the first attempt different. 

207 self.clobber = False 

208 # We need to know whether the skip_existing_in collection sequence 

209 # starts with the output run collection, as an optimization to avoid 

210 # queries later. 

211 try: 

212 skip_existing_in_flat = self.butler.registry.queryCollections( 

213 self.skip_existing_in, flattenChains=True 

214 ) 

215 except MissingCollectionError: 

216 skip_existing_in_flat = [] 

217 if not skip_existing_in_flat: 

218 self.skip_existing_in = [] 

219 if self.skip_existing_in and self.output_run_exists: 

220 self.skip_existing_starts_with_output_run = self.output_run == skip_existing_in_flat[0] 

221 else: 

222 self.skip_existing_starts_with_output_run = False 

223 self.existing_datasets = ExistingDatasets() 

224 try: 

225 packages_storage_class = butler.get_dataset_type(acc.PACKAGES_INIT_OUTPUT_NAME).storageClass_name 

226 except MissingDatasetTypeError: 

227 packages_storage_class = acc.PACKAGES_INIT_OUTPUT_STORAGE_CLASS 

228 self._global_init_output_types = { 

229 acc.PACKAGES_INIT_OUTPUT_NAME: DatasetType( 

230 acc.PACKAGES_INIT_OUTPUT_NAME, 

231 self.universe.empty, 

232 packages_storage_class, 

233 ) 

234 } 

235 with self.butler.registry.caching_context(): 

236 self._pipeline_graph.resolve(self.butler.registry) 

237 self._find_empty_dimension_datasets() 

238 self.prerequisite_info = { 

239 task_node.label: PrerequisiteInfo(task_node, self._pipeline_graph) 

240 for task_node in pipeline_graph.tasks.values() 

241 } 

242 

243 log: LsstLogAdapter 

244 """Logger to use for all quantum-graph generation messages. 

245 

246 General and per-task status messages should be logged at `~logging.INFO` 

247 level or higher, per-dataset-type status messages should be logged at 

248 `~lsst.utils.logging.VERBOSE` or higher, and per-data-ID status messages 

249 should be logged at `logging.DEBUG` or higher. 

250 """ 

251 

252 metadata: TaskMetadata 

253 """Metadata to store in the QuantumGraph. 

254 

255 The `TaskMetadata` class is used here primarily in order to enable 

256 resource-usage collection with the `lsst.utils.timer.timeMethod` decorator. 

257 """ 

258 

259 butler: Butler 

260 """Client for the data repository. 

261 

262 Should be read-only. 

263 """ 

264 

265 input_collections: Sequence[str] 

266 """Collections to search for overall-input datasets. 

267 """ 

268 

269 output_run: str 

270 """Output `~lsst.daf.butler.CollectionType.RUN` collection. 

271 """ 

272 

273 skip_existing_in: Sequence[str] 

274 """Collections to search for outputs that already exist for the purpose 

275 of skipping quanta that have already been run. 

276 """ 

277 

278 clobber: bool 

279 """Whether to raise if predicted outputs already exist in ``output_run`` 

280 

281 This never actually clobbers outputs; it just informs the graph generation 

282 algorithm whether execution will run with clobbering enabled. This is 

283 always `False` if `output_run_exists` is `False`. 

284 """ 

285 

286 empty_data_id: DataCoordinate 

287 """An empty data ID in the data repository's dimension universe. 

288 """ 

289 

290 output_run_exists: bool 

291 """Whether the output run exists in the data repository already. 

292 """ 

293 

294 skip_existing_starts_with_output_run: bool 

295 """Whether the `skip_existing_in` sequence begins with `output_run`. 

296 

297 If this is true, any dataset found in `output_run` can be used to 

298 short-circuit queries in `skip_existing_in`. 

299 """ 

300 

301 existing_datasets: ExistingDatasets 

302 """Struct holding datasets that have already been found in the data 

303 repository. 

304 

305 This is updated in-place as the `QuantumGraph` generation algorithm 

306 proceeds. 

307 """ 

308 

309 prerequisite_info: Mapping[str, PrerequisiteInfo] 

310 """Helper objects for finding prerequisite inputs, organized by task label. 

311 

312 Subclasses that find prerequisites should remove the 

313 covered `~prerequisite_helpers.PrerequisiteFinder` objects from this 

314 attribute. 

315 """ 

316 

317 @property 

318 def universe(self) -> DimensionUniverse: 

319 """Definitions of all data dimensions.""" 

320 return self.butler.dimensions 

321 

322 @final 

323 @timeMethod 

324 def build(self, metadata: Mapping[str, Any] | None = None) -> QuantumGraph: 

325 """Build the quantum graph. 

326 

327 Parameters 

328 ---------- 

329 metadata : `~collections.abc.Mapping`, optional 

330 Flexible metadata to add to the quantum graph. 

331 

332 Returns 

333 ------- 

334 quantum_graph : `QuantumGraph` 

335 DAG describing processing to be performed. 

336 

337 Notes 

338 ----- 

339 External code is expected to construct a `QuantumGraphBuilder` and then 

340 call this method exactly once. See class documentation for details on 

341 what it does. 

342 """ 

343 with self.butler.registry.caching_context(): 

344 full_skeleton = QuantumGraphSkeleton(self._pipeline_graph.tasks) 

345 subgraphs = list(self._pipeline_graph.split_independent()) 

346 for i, subgraph in enumerate(subgraphs): 

347 self.log.info( 

348 "Processing pipeline subgraph %d of %d with %d task(s).", 

349 i + 1, 

350 len(subgraphs), 

351 len(subgraph.tasks), 

352 ) 

353 self.log.verbose("Subgraph tasks: [%s]", ", ".join(label for label in subgraph.tasks)) 

354 subgraph_skeleton = self.process_subgraph(subgraph) 

355 full_skeleton.update(subgraph_skeleton) 

356 # Loop over tasks. The pipeline graph must be topologically 

357 # sorted, so a quantum is only processed after any quantum that 

358 # provides its inputs has been processed. 

359 for task_node in self._pipeline_graph.tasks.values(): 

360 self._resolve_task_quanta(task_node, full_skeleton) 

361 # Add global init-outputs to the skeleton. 

362 for dataset_type in self._global_init_output_types.values(): 

363 dataset_key = full_skeleton.add_dataset_node( 

364 dataset_type.name, self.empty_data_id, is_global_init_output=True 

365 ) 

366 ref = self.existing_datasets.outputs_in_the_way.get(dataset_key) 

367 if ref is None: 

368 ref = DatasetRef(dataset_type, self.empty_data_id, run=self.output_run) 

369 full_skeleton[dataset_key]["ref"] = ref 

370 # Remove dataset nodes with no edges that are not global init 

371 # outputs, which are generally overall-inputs whose original quanta 

372 # end up skipped or with no work to do (we can't remove these along 

373 # with the quanta because no quantum knows if its the only 

374 # consumer). 

375 full_skeleton.remove_orphan_datasets() 

376 self._attach_datastore_records(full_skeleton) 

377 # TODO initialize most metadata here instead of in ctrl_mpexec. 

378 if metadata is None: 

379 metadata = {} 

380 return self._construct_quantum_graph(full_skeleton, metadata) 

381 

382 @abstractmethod 

383 def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton: 

384 """Build the rough structure for an independent subset of the 

385 `QuantumGraph` and query for relevant existing datasets. 

386 

387 Parameters 

388 ---------- 

389 subgraph : `.pipeline_graph.PipelineGraph` 

390 Subset of the pipeline graph that should be processed by this call. 

391 This is always resolved and topologically sorted. It should not be 

392 modified. 

393 

394 Returns 

395 ------- 

396 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

397 Class representing an initial quantum graph. See 

398 `quantum_graph_skeleton.QuantumGraphSkeleton` docs for details. 

399 After this is returned, the object may be modified in-place in 

400 unspecified ways. 

401 

402 Notes 

403 ----- 

404 In addition to returning a 

405 `quantum_graph_skeleton.QuantumGraphSkeleton`, this method should 

406 populate the `existing_datasets` structure by querying for all relevant 

407 datasets with non-empty data IDs (those with empty data IDs will 

408 already be present). In particular: 

409 

410 - `~ExistingDatasets.inputs` must always be populated with all 

411 overall-input datasets (but not prerequisites), by querying 

412 `input_collections`; 

413 - `~ExistingDatasets.outputs_for_skip` must be populated with any 

414 intermediate our output datasets present in `skip_existing_in` (it 

415 can be ignored if `skip_existing_in` is empty); 

416 - `~ExistingDatasets.outputs_in_the_way` must be populated with any 

417 intermediate or output datasets present in `output_run`, if 

418 `output_run_exists` (it can be ignored if `output_run_exists` is 

419 `False`). Note that the presence of such datasets is not 

420 automatically an error, even if `clobber is `False`, as these may be 

421 quanta that will be skipped. 

422 - `~ExistingDatasets.inputs` must be populated with all 

423 prerequisite-input datasets that were included in the skeleton, by 

424 querying `input_collections` (not all prerequisite inputs need to be 

425 included in the skeleton, but the base class can only use per-quantum 

426 queries to find them, and that can be slow when there are many 

427 quanta). 

428 

429 Dataset types should never be components and should always use the 

430 "common" storage class definition in `pipeline_graph.DatasetTypeNode` 

431 (which is the data repository definition when the dataset type is 

432 registered). 

433 """ 

434 raise NotImplementedError() 

435 

436 @final 

437 @timeMethod 

438 def _resolve_task_quanta(self, task_node: TaskNode, skeleton: QuantumGraphSkeleton) -> None: 

439 """Process the quanta for one task in a skeleton graph to skip those 

440 that have already completed and adjust those that request it. 

441 

442 Parameters 

443 ---------- 

444 task_node : `pipeline_graph.TaskNode` 

445 Node for this task in the pipeline graph. 

446 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

447 Preliminary quantum graph, to be modified in-place. 

448 

449 Notes 

450 ----- 

451 This method modifies ``skeleton`` in-place in several ways: 

452 

453 - It adds a "ref" attribute to dataset nodes, using the contents of 

454 `existing_datasets`. This ensures producing and consuming tasks 

455 start from the same `DatasetRef`. 

456 - It adds "inputs", "outputs", and "init_inputs" attributes to the 

457 quantum nodes, holding the same `NamedValueMapping` objects needed to 

458 construct an actual `Quantum` instances. 

459 - It removes quantum nodes that are to be skipped because their outputs 

460 already exist in `skip_existing_in`. It also removes their outputs 

461 from `ExistingDatasets.outputs_in_the_way`. 

462 - It adds prerequisite dataset nodes and edges that connect them to the 

463 quanta that consume them. 

464 - It removes quantum nodes whose 

465 `~PipelineTaskConnections.adjustQuantum` calls raise `NoWorkFound` or 

466 predict no outputs; 

467 - It removes the nodes of output datasets that are "adjusted away". 

468 - It removes the edges of input datasets that are "adjusted away". 

469 

470 The difference between how adjusted inputs and outputs are handled 

471 reflects the fact that many quanta can share the same input, but only 

472 one produces each output. This can lead to the graph having 

473 superfluous isolated nodes after processing is complete, but these 

474 should only be removed after all the quanta from all tasks have been 

475 processed. 

476 """ 

477 # Extract the helper object for the prerequisite inputs of this task, 

478 # and tell it to prepare to construct skypix bounds and timespans for 

479 # each quantum (these will automatically do nothing if nothing needs 

480 # those bounds). 

481 task_prerequisite_info = self.prerequisite_info[task_node.label] 

482 task_prerequisite_info.update_bounds() 

483 # Loop over all quanta for this task, remembering the ones we've 

484 # gotten rid of. 

485 skipped_quanta = [] 

486 no_work_quanta = [] 

487 for quantum_key in skeleton.get_quanta(task_node.label): 

488 if self._skip_quantum_if_metadata_exists(task_node, quantum_key, skeleton): 

489 skipped_quanta.append(quantum_key) 

490 continue 

491 quantum_data_id = skeleton[quantum_key]["data_id"] 

492 skypix_bounds_builder = task_prerequisite_info.bounds.make_skypix_bounds_builder(quantum_data_id) 

493 timespan_builder = task_prerequisite_info.bounds.make_timespan_builder(quantum_data_id) 

494 adjusted_outputs = self._gather_quantum_outputs( 

495 task_node, quantum_key, skeleton, skypix_bounds_builder, timespan_builder 

496 ) 

497 adjusted_inputs = self._gather_quantum_inputs( 

498 task_node, 

499 quantum_key, 

500 skeleton, 

501 task_prerequisite_info, 

502 skypix_bounds_builder, 

503 timespan_builder, 

504 ) 

505 # Give the task's Connections class an opportunity to remove 

506 # some inputs, or complain if they are unacceptable. This will 

507 # raise if one of the check conditions is not met, which is the 

508 # intended behavior. 

509 helper = AdjustQuantumHelper(inputs=adjusted_inputs, outputs=adjusted_outputs) 

510 try: 

511 helper.adjust_in_place( 

512 task_node._get_imported_data().connections, task_node.label, quantum_data_id 

513 ) 

514 except NoWorkFound as err: 

515 # Do not generate this quantum; it would not produce any 

516 # outputs. Remove it and all of the outputs it might have 

517 # produced from the skeleton. 

518 try: 

519 _, connection_name, _ = err.args 

520 details = f"not enough datasets for connection {connection_name}." 

521 except ValueError: 

522 details = str(err) 

523 self.log.debug( 

524 "No work found for quantum %s of task %s: %s", 

525 quantum_key.data_id_values, 

526 quantum_key.task_label, 

527 details, 

528 ) 

529 no_work_quanta.append(quantum_key) 

530 continue 

531 if helper.outputs_adjusted: 

532 if not any(adjusted_refs for adjusted_refs in helper.outputs.values()): 

533 # No outputs also means we don't generate this quantum. 

534 self.log.debug( 

535 "No outputs predicted for quantum %s of task %s.", 

536 quantum_key.data_id_values, 

537 quantum_key.task_label, 

538 ) 

539 no_work_quanta.append(quantum_key) 

540 continue 

541 # Remove output nodes that were not retained by 

542 # adjustQuantum. 

543 skeleton.remove_dataset_nodes( 

544 self._find_removed(skeleton.iter_outputs_of(quantum_key), helper.outputs) 

545 ) 

546 if helper.inputs_adjusted: 

547 if not any(bool(adjusted_refs) for adjusted_refs in helper.inputs.values()): 

548 raise QuantumGraphBuilderError( 

549 f"adjustQuantum implementation for {task_node.label}@{quantum_key.data_id_values} " 

550 "returned outputs but no inputs." 

551 ) 

552 # Remove input dataset edges that were not retained by 

553 # adjustQuantum. We can't remove the input dataset nodes 

554 # because some other quantum might still want them. 

555 skeleton.remove_input_edges( 

556 quantum_key, self._find_removed(skeleton.iter_inputs_of(quantum_key), helper.inputs) 

557 ) 

558 # Save the adjusted inputs and outputs to the quantum node's 

559 # state so we don't have to regenerate those data structures 

560 # from the graph. 

561 skeleton[quantum_key]["inputs"] = helper.inputs 

562 skeleton[quantum_key]["outputs"] = helper.outputs 

563 for no_work_quantum in no_work_quanta: 

564 skeleton.remove_quantum_node(no_work_quantum, remove_outputs=True) 

565 for skipped_quantum in skipped_quanta: 

566 skeleton.remove_quantum_node(skipped_quantum, remove_outputs=False) 

567 remaining_quanta = skeleton.get_quanta(task_node.label) 

568 self._resolve_task_init(task_node, skeleton, bool(skipped_quanta)) 

569 message_terms = [] 

570 if no_work_quanta: 

571 message_terms.append(f"{len(no_work_quanta)} had no work to do") 

572 if skipped_quanta: 

573 message_terms.append(f"{len(skipped_quanta)} previously succeeded") 

574 message_parenthetical = f" ({', '.join(message_terms)})" if message_terms else "" 

575 if remaining_quanta: 

576 self.log.info( 

577 "Generated %s for task %s%s.", 

578 _quantum_or_quanta(len(remaining_quanta)), 

579 task_node.label, 

580 message_parenthetical, 

581 ) 

582 else: 

583 self.log.info( 

584 "Dropping task %s because no quanta remain%s.", task_node.label, message_parenthetical 

585 ) 

586 skeleton.remove_task(task_node.label) 

587 

588 def _skip_quantum_if_metadata_exists( 

589 self, task_node: TaskNode, quantum_key: QuantumKey, skeleton: QuantumGraphSkeleton 

590 ) -> bool: 

591 """Identify and drop quanta that should be skipped because their 

592 metadata datasets already exist. 

593 

594 Parameters 

595 ---------- 

596 task_node : `pipeline_graph.TaskNode` 

597 Node for this task in the pipeline graph. 

598 quantum_key : `QuantumKey` 

599 Identifier for this quantum in the graph. 

600 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

601 Preliminary quantum graph, to be modified in-place. 

602 

603 Returns 

604 ------- 

605 skipped : `bool` 

606 `True` if the quantum is being skipped and has been removed from 

607 the graph, `False` otherwise. 

608 

609 Notes 

610 ----- 

611 If the metadata dataset for this quantum exists in 

612 `ExistingDatasets.outputs_for_skip`, the quantum will be skipped. This 

613 causes the quantum node to be removed from the graph. Dataset nodes 

614 that were previously the outputs of this quantum will have their "ref" 

615 attribute set from `ExistingDatasets.outputs_for_skip`, or will be 

616 removed if there is no such dataset there. Any output dataset in 

617 `ExistingDatasets.outputs_in_the_way` will be removed. 

618 """ 

619 metadata_dataset_key = DatasetKey( 

620 task_node.metadata_output.parent_dataset_type_name, quantum_key.data_id_values 

621 ) 

622 if metadata_dataset_key in self.existing_datasets.outputs_for_skip: 

623 # This quantum's metadata is already present in the the 

624 # skip_existing_in collections; we'll skip it. But the presence of 

625 # the metadata dataset doesn't guarantee that all of the other 

626 # outputs we predicted are present; we have to check. 

627 for output_dataset_key in list(skeleton.iter_outputs_of(quantum_key)): 

628 if ( 

629 output_ref := self.existing_datasets.outputs_for_skip.get(output_dataset_key) 

630 ) is not None: 

631 # Populate the skeleton graph's node attributes 

632 # with the existing DatasetRef, just like a 

633 # predicted output of a non-skipped quantum. 

634 skeleton[output_dataset_key]["ref"] = output_ref 

635 else: 

636 # Remove this dataset from the skeleton graph, 

637 # because the quantum that would have produced it 

638 # is being skipped and it doesn't already exist. 

639 skeleton.remove_dataset_nodes([output_dataset_key]) 

640 # If this dataset was "in the way" (i.e. already in the 

641 # output run), it isn't anymore. 

642 self.existing_datasets.outputs_in_the_way.pop(output_dataset_key, None) 

643 # Removing the quantum node from the graph will happen outside this 

644 # function. 

645 return True 

646 return False 

647 

648 @final 

649 def _gather_quantum_outputs( 

650 self, 

651 task_node: TaskNode, 

652 quantum_key: QuantumKey, 

653 skeleton: QuantumGraphSkeleton, 

654 skypix_bounds_builder: SkyPixBoundsBuilder, 

655 timespan_builder: TimespanBuilder, 

656 ) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

657 """Collect outputs or generate datasets for a preliminary quantum and 

658 put them in the form used by `~lsst.daf.butler.Quantum` and 

659 `~PipelineTaskConnections.adjustQuantum`. 

660 

661 Parameters 

662 ---------- 

663 task_node : `pipeline_graph.TaskNode` 

664 Node for this task in the pipeline graph. 

665 quantum_key : `QuantumKey` 

666 Identifier for this quantum in the graph. 

667 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

668 Preliminary quantum graph, to be modified in-place. 

669 skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder` 

670 An object that accumulates the appropriate spatial bounds for a 

671 quantum. 

672 timespan_builder : `~prerequisite_helpers.TimespanBuilder` 

673 An object that accumulates the appropriate timespan for a quantum. 

674 

675 Returns 

676 ------- 

677 outputs : `~lsst.daf.butler.NamedKeyDict` [ \ 

678 `~lsst.daf.butler.DatasetType`, `list` [ \ 

679 `~lsst.daf.butler.DatasetRef` ] ] 

680 All outputs to the task, using the storage class and components 

681 defined by the task's own connections. 

682 

683 Notes 

684 ----- 

685 This first looks for outputs already present in the `output_run` by 

686 looking in `ExistingDatasets.outputs_in_the_way`; if it finds something 

687 and `clobber` is `True`, it uses that ref (it's not ideal that both the 

688 original dataset and its replacement will have the same UUID, but we 

689 don't have space in the quantum graph for two UUIDs, and we need the 

690 datastore records of the original there). If `clobber` is `False`, 

691 `RuntimeError` is raised. If there is no output already present, a new 

692 one with a random UUID is generated. In all cases the "ref" attribute 

693 of the dataset node in the skeleton is set. 

694 """ 

695 outputs_by_type: dict[str, list[DatasetRef]] = {} 

696 dataset_key: DatasetKey 

697 for dataset_key in skeleton.iter_outputs_of(quantum_key): 

698 dataset_data_id = skeleton[dataset_key]["data_id"] 

699 dataset_type_node = self._pipeline_graph.dataset_types[dataset_key.parent_dataset_type_name] 

700 if (ref := self.existing_datasets.outputs_in_the_way.get(dataset_key)) is None: 

701 ref = DatasetRef(dataset_type_node.dataset_type, dataset_data_id, run=self.output_run) 

702 elif not self.clobber: 

703 # We intentionally raise here, before running adjustQuantum, 

704 # because it'd be weird if we left an old potential output of a 

705 # task sitting there in the output collection, just because the 

706 # task happened to not actually produce it. 

707 raise OutputExistsError( 

708 f"Potential output dataset {ref} already exists in the output run " 

709 f"{self.output_run}, but clobbering outputs was not expected to be necessary." 

710 ) 

711 skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id) 

712 timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id) 

713 skeleton[dataset_key]["ref"] = ref 

714 outputs_by_type.setdefault(dataset_key.parent_dataset_type_name, []).append(ref) 

715 adapted_outputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

716 for write_edge in task_node.iter_all_outputs(): 

717 dataset_type_node = self._pipeline_graph.dataset_types[write_edge.parent_dataset_type_name] 

718 edge_dataset_type = write_edge.adapt_dataset_type(dataset_type_node.dataset_type) 

719 adapted_outputs[edge_dataset_type] = [ 

720 write_edge.adapt_dataset_ref(ref) 

721 for ref in sorted(outputs_by_type.get(write_edge.parent_dataset_type_name, [])) 

722 ] 

723 return adapted_outputs 

724 

725 @final 

726 def _gather_quantum_inputs( 

727 self, 

728 task_node: TaskNode, 

729 quantum_key: QuantumKey, 

730 skeleton: QuantumGraphSkeleton, 

731 task_prerequisite_info: PrerequisiteInfo, 

732 skypix_bounds_builder: SkyPixBoundsBuilder, 

733 timespan_builder: TimespanBuilder, 

734 ) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

735 """Collect input datasets for a preliminary quantum and put them in the 

736 form used by `~lsst.daf.butler.Quantum` and 

737 `~PipelineTaskConnections.adjustQuantum`. 

738 

739 Parameters 

740 ---------- 

741 task_node : `pipeline_graph.TaskNode` 

742 Node for this task in the pipeline graph. 

743 quantum_key : `QuantumKey` 

744 Identifier for this quantum in the graph. 

745 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

746 Preliminary quantum graph, to be modified in-place. 

747 skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder` 

748 An object that accumulates the appropriate spatial bounds for a 

749 quantum. 

750 timespan_builder : `~prerequisite_helpers.TimespanBuilder` 

751 An object that accumulates the appropriate timespan for a quantum. 

752 

753 Returns 

754 ------- 

755 inputs : `~lsst.daf.butler.NamedKeyDict` [ \ 

756 `~lsst.daf.butler.DatasetType`, `list` [ \ 

757 `~lsst.daf.butler.DatasetRef` ] ] 

758 All regular and prerequisite inputs to the task, using the storage 

759 class and components defined by the task's own connections. 

760 

761 Notes 

762 ----- 

763 On return, the dataset nodes that represent inputs to this quantum will 

764 either have their "ref" attribute set (using the common dataset type, 

765 not the task-specific one) or will be removed from the graph. 

766 

767 For regular inputs, usually an existing "ref" (corresponding to an 

768 output of another quantum) will be found and left unchanged. When 

769 there is no existing "ref" attribute, `ExistingDatasets.inputs` is 

770 searched next; if there is nothing there, the input will be removed. 

771 

772 Prerequisite inputs are always queried for directly here (delegating to 

773 `_find_prerequisite_inputs`). They are never produced by other tasks, 

774 and cannot in general be queried for in advance when 

775 `ExistingDatasets.inputs` is populated. 

776 """ 

777 quantum_data_id = skeleton[quantum_key]["data_id"] 

778 inputs_by_type: dict[str, set[DatasetRef]] = {} 

779 dataset_key: DatasetKey | PrerequisiteDatasetKey 

780 # Process inputs already present in the skeleton - this should include 

781 # all regular inputs (including intermediates) and may include some 

782 # prerequisites. 

783 for dataset_key in list(skeleton.iter_inputs_of(quantum_key)): 

784 if (ref := skeleton[dataset_key].get("ref")) is None: 

785 # This dataset is an overall input - if it was an intermediate, 

786 # we would have already either removed the node or set the 

787 # "ref" attribute when processing its producing quantum - and 

788 # this is the first time we're trying to resolve it. 

789 if (ref := self.existing_datasets.inputs.get(dataset_key)) is None: 

790 # It also doesn't exist in the input collections, so we 

791 # remove its node in the skeleton graph (so other consumers 

792 # won't have to check for it). 

793 skeleton.remove_dataset_nodes([dataset_key]) 

794 continue 

795 skeleton[dataset_key]["ref"] = ref 

796 inputs_by_type.setdefault(dataset_key.parent_dataset_type_name, set()).add(ref) 

797 skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId) 

798 timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId) 

799 # Query for any prerequisites not handled by process_subgraph. Note 

800 # that these were not already in the skeleton graph, so we add them 

801 # now. 

802 skypix_bounds = skypix_bounds_builder.finish() 

803 timespan = timespan_builder.finish() 

804 for finder in task_prerequisite_info.finders.values(): 

805 inputs_for_type = inputs_by_type.setdefault(finder.dataset_type_node.name, set()) 

806 dataset_keys = [] 

807 for ref in finder.find( 

808 self.butler, self.input_collections, quantum_data_id, skypix_bounds, timespan 

809 ): 

810 dataset_key = skeleton.add_prerequisite_node(ref.datasetType.name, ref=ref) 

811 dataset_keys.append(dataset_key) 

812 inputs_for_type.add(ref) 

813 skeleton.add_input_edges(quantum_key, dataset_keys) 

814 adapted_inputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

815 for read_edge in task_node.iter_all_inputs(): 

816 dataset_type_node = self._pipeline_graph.dataset_types[read_edge.parent_dataset_type_name] 

817 edge_dataset_type = read_edge.adapt_dataset_type(dataset_type_node.dataset_type) 

818 if (current_dataset_type := adapted_inputs.keys().get(edge_dataset_type.name)) is None: 

819 adapted_inputs[edge_dataset_type] = [ 

820 read_edge.adapt_dataset_ref(ref) 

821 for ref in sorted(inputs_by_type.get(read_edge.parent_dataset_type_name, frozenset())) 

822 ] 

823 elif current_dataset_type != edge_dataset_type: 

824 raise NotImplementedError( 

825 f"Task {task_node.label!r} has {edge_dataset_type.name!r} as an input via " 

826 "two different connections, with two different storage class overrides. " 

827 "This is not yet supported due to limitations in the Quantum data structure." 

828 ) 

829 # If neither the `if` nor the `elif` above match, it means 

830 # multiple input connections have exactly the same dataset 

831 # type, and hence nothing to do after the first one. 

832 return adapted_inputs 

833 

834 @final 

835 def _resolve_task_init( 

836 self, task_node: TaskNode, skeleton: QuantumGraphSkeleton, has_skipped_quanta: bool 

837 ) -> None: 

838 """Add init-input and init-output dataset nodes and edges for a task to 

839 the skeleton. 

840 

841 Parameters 

842 ---------- 

843 task_node : `pipeline_graph.TaskNode` 

844 Pipeline graph description of the task. 

845 skeleton : `QuantumGraphSkeleton` 

846 In-progress quantum graph data structure to update in-place. 

847 has_skipped_quanta : `bool` 

848 Whether any of this task's quanta were skipped because they had 

849 already succeeded. 

850 """ 

851 quanta = skeleton.get_quanta(task_node.label) 

852 task_init_key = TaskInitKey(task_node.label) 

853 if quanta: 

854 adapted_inputs: NamedKeyDict[DatasetType, DatasetRef] = NamedKeyDict() 

855 # Process init-inputs. 

856 input_keys: list[DatasetKey] = [] 

857 for read_edge in task_node.init.iter_all_inputs(): 

858 dataset_key = skeleton.add_dataset_node( 

859 read_edge.parent_dataset_type_name, self.empty_data_id 

860 ) 

861 skeleton.add_input_edge(task_init_key, dataset_key) 

862 if (ref := skeleton[dataset_key].get("ref")) is None: 

863 try: 

864 ref = self.existing_datasets.inputs[dataset_key] 

865 except KeyError: 

866 raise InitInputMissingError( 

867 f"Overall init-input dataset {read_edge.parent_dataset_type_name!r} " 

868 f"needed by task {task_node.label!r} not found in input collection(s) " 

869 f"{self.input_collections}." 

870 ) from None 

871 skeleton[dataset_key]["ref"] = ref 

872 for quantum_key in skeleton.get_quanta(task_node.label): 

873 skeleton.add_input_edge(quantum_key, dataset_key) 

874 input_keys.append(dataset_key) 

875 adapted_ref = read_edge.adapt_dataset_ref(ref) 

876 adapted_inputs[adapted_ref.datasetType] = adapted_ref 

877 # Save the quantum-adapted init inputs to each quantum, and add 

878 # skeleton edges connecting the init inputs to each quantum. 

879 for quantum_key in skeleton.get_quanta(task_node.label): 

880 skeleton[quantum_key]["init_inputs"] = adapted_inputs 

881 # Process init-outputs. 

882 adapted_outputs: NamedKeyDict[DatasetType, DatasetRef] = NamedKeyDict() 

883 for write_edge in task_node.init.iter_all_outputs(): 

884 dataset_key = skeleton.add_dataset_node( 

885 write_edge.parent_dataset_type_name, self.empty_data_id 

886 ) 

887 if (ref := self.existing_datasets.outputs_in_the_way.get(dataset_key)) is None: 

888 ref = DatasetRef( 

889 self._pipeline_graph.dataset_types[write_edge.parent_dataset_type_name].dataset_type, 

890 self.empty_data_id, 

891 run=self.output_run, 

892 ) 

893 skeleton[dataset_key]["ref"] = ref 

894 skeleton.add_output_edge(task_init_key, dataset_key) 

895 adapted_ref = write_edge.adapt_dataset_ref(ref) 

896 adapted_outputs[adapted_ref.datasetType] = adapted_ref 

897 skeleton[task_init_key]["inputs"] = adapted_inputs 

898 skeleton[task_init_key]["outputs"] = adapted_outputs 

899 elif has_skipped_quanta: 

900 # No quanta remain for this task, but at least one quantum was 

901 # skipped because its outputs were present in the skip_existing_in 

902 # collections. This means all init outputs should be present in 

903 # the skip_existing_in collections, too, and we need to put those 

904 # refs in the graph. 

905 for write_edge in task_node.init.iter_all_outputs(): 

906 dataset_key = skeleton.add_dataset_node( 

907 write_edge.parent_dataset_type_name, self.empty_data_id 

908 ) 

909 if (ref := self.existing_datasets.outputs_for_skip.get(dataset_key)) is None: 

910 raise InitInputMissingError( 

911 f"Init-output dataset {write_edge.parent_dataset_type_name!r} of skipped task " 

912 f"{task_node.label!r} not found in skip-existing-in collection(s) " 

913 f"{self.skip_existing_in}." 

914 ) from None 

915 skeleton[dataset_key]["ref"] = ref 

916 # If this dataset was "in the way" (i.e. already in the output 

917 # run), it isn't anymore. 

918 self.existing_datasets.outputs_in_the_way.pop(dataset_key, None) 

919 # No quanta remain in this task, but none were skipped; this means 

920 # they all got pruned because of NoWorkFound conditions. This 

921 # dooms all downstream quanta to the same fate, so we don't bother 

922 # doing anything with the task's init-outputs, since nothing is 

923 # going to consume them. 

924 

925 @final 

926 @timeMethod 

927 def _find_empty_dimension_datasets(self) -> None: 

928 """Query for all dataset types with no dimensions, updating 

929 `existing_datasets` in-place. 

930 

931 This includes but is not limited to init inputs and init outputs. 

932 """ 

933 _, dataset_type_nodes = self._pipeline_graph.group_by_dimensions()[self.universe.empty.as_group()] 

934 dataset_types = [node.dataset_type for node in dataset_type_nodes.values()] 

935 dataset_types.extend(self._global_init_output_types.values()) 

936 for dataset_type in dataset_types: 

937 key = DatasetKey(dataset_type.name, self.empty_data_id.required_values) 

938 if ( 

939 self._pipeline_graph.producer_of(dataset_type.name) is None 

940 and dataset_type.name not in self._global_init_output_types 

941 ): 

942 # Dataset type is an overall input; we always need to try to 

943 # find these. 

944 try: 

945 ref = self.butler.find_dataset(dataset_type.name, collections=self.input_collections) 

946 except MissingDatasetTypeError: 

947 ref = None 

948 if ref is not None: 

949 self.existing_datasets.inputs[key] = ref 

950 elif self.skip_existing_in: 

951 # Dataset type is an intermediate or output; need to find these 

952 # if only they're from previously executed quanta that we might 

953 # skip... 

954 try: 

955 ref = self.butler.find_dataset(dataset_type.name, collections=self.skip_existing_in) 

956 except MissingDatasetTypeError: 

957 ref = None 

958 if ref is not None: 

959 self.existing_datasets.outputs_for_skip[key] = ref 

960 if ref.run == self.output_run: 

961 self.existing_datasets.outputs_in_the_way[key] = ref 

962 if self.output_run_exists and not self.skip_existing_starts_with_output_run: 

963 # ...or if they're in the way and would need to be clobbered 

964 # (and we haven't already found them in the previous block). 

965 try: 

966 ref = self.butler.find_dataset(dataset_type.name, collections=[self.output_run]) 

967 except MissingDatasetTypeError: 

968 ref = None 

969 if ref is not None: 

970 self.existing_datasets.outputs_in_the_way[key] = ref 

971 

972 @final 

973 @timeMethod 

974 def _attach_datastore_records(self, skeleton: QuantumGraphSkeleton) -> None: 

975 """Add datastore records for all overall inputs to a preliminary 

976 quantum graph. 

977 

978 Parameters 

979 ---------- 

980 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

981 Preliminary quantum graph to update in place. 

982 

983 Notes 

984 ----- 

985 On return, all quantum nodes in the skeleton graph will have a 

986 "datastore_records" attribute that is a mapping from datastore name 

987 to `lsst.daf.butler.DatastoreRecordData`, as used by 

988 `lsst.daf.butler.Quantum`. 

989 """ 

990 overall_inputs = skeleton.extract_overall_inputs() 

991 exported_records = self.butler._datastore.export_records(overall_inputs.values()) 

992 for quantum_key in skeleton.iter_all_quanta(): 

993 quantum_records = {} 

994 input_ids = { 

995 ref.id 

996 for dataset_key in skeleton.iter_inputs_of(quantum_key) 

997 if (ref := overall_inputs.get(dataset_key)) is not None 

998 } 

999 if input_ids: 

1000 for datastore_name, records in exported_records.items(): 

1001 matching_records = records.subset(input_ids) 

1002 if matching_records is not None: 

1003 quantum_records[datastore_name] = matching_records 

1004 skeleton[quantum_key]["datastore_records"] = quantum_records 

1005 

1006 @final 

1007 @timeMethod 

1008 def _construct_quantum_graph( 

1009 self, skeleton: QuantumGraphSkeleton, metadata: Mapping[str, Any] 

1010 ) -> QuantumGraph: 

1011 """Construct a `QuantumGraph` object from the contents of a 

1012 fully-processed `quantum_graph_skeleton.QuantumGraphSkeleton`. 

1013 

1014 Parameters 

1015 ---------- 

1016 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

1017 Preliminary quantum graph. Must have "init_inputs", "inputs", and 

1018 "outputs" attributes on all quantum nodes, as added by 

1019 `_resolve_task_quanta`, as well as a "datastore_records" attribute 

1020 as added by `_attach_datastore_records`. 

1021 metadata : `Mapping` 

1022 Flexible metadata to add to the graph. 

1023 

1024 Returns 

1025 ------- 

1026 quantum_graph : `QuantumGraph` 

1027 DAG describing processing to be performed. 

1028 """ 

1029 quanta: dict[TaskDef, set[Quantum]] = {} 

1030 init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {} 

1031 init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {} 

1032 for task_def in self._pipeline_graph._iter_task_defs(): 

1033 if not skeleton.has_task(task_def.label): 

1034 continue 

1035 task_node = self._pipeline_graph.tasks[task_def.label] 

1036 task_init_key = skeleton.get_task_init_node(task_def.label) 

1037 init_inputs[task_def] = skeleton[task_init_key]["inputs"].values() 

1038 init_outputs[task_def] = skeleton[task_init_key]["outputs"].values() 

1039 quanta_for_task: set[Quantum] = set() 

1040 for quantum_key in skeleton.get_quanta(task_node.label): 

1041 node_state = skeleton[quantum_key] 

1042 quanta_for_task.add( 

1043 Quantum( 

1044 taskName=task_node.task_class_name, 

1045 taskClass=task_node.task_class, 

1046 dataId=node_state["data_id"], 

1047 initInputs=node_state["init_inputs"], 

1048 inputs=node_state["inputs"], 

1049 outputs=node_state["outputs"], 

1050 datastore_records=node_state.get("datastore_records"), 

1051 ) 

1052 ) 

1053 quanta[task_def] = quanta_for_task 

1054 

1055 registry_dataset_types: list[DatasetType] = [ 

1056 node.dataset_type for node in self._pipeline_graph.dataset_types.values() 

1057 ] 

1058 

1059 all_metadata = self.metadata.to_dict() 

1060 all_metadata.update(metadata) 

1061 return QuantumGraph( 

1062 quanta, 

1063 metadata=all_metadata, 

1064 universe=self.universe, 

1065 initInputs=init_inputs, 

1066 initOutputs=init_outputs, 

1067 globalInitOutputs=[skeleton[key]["ref"] for key in skeleton.global_init_outputs], 

1068 registryDatasetTypes=registry_dataset_types, 

1069 ) 

1070 

1071 @staticmethod 

1072 @final 

1073 def _find_removed( 

1074 original: Iterable[DatasetKey | PrerequisiteDatasetKey], 

1075 adjusted: NamedKeyMapping[DatasetType, Sequence[DatasetRef]], 

1076 ) -> set[DatasetKey | PrerequisiteDatasetKey]: 

1077 """Identify skeleton-graph dataset nodes that have been removed by 

1078 `~PipelineTaskConnections.adjustQuantum`. 

1079 

1080 Parameters 

1081 ---------- 

1082 original : `~collections.abc.Iterable` [ `DatasetKey` or \ 

1083 `PrerequisiteDatasetKey` ] 

1084 Identifiers for the dataset nodes that were the original neighbors 

1085 (inputs or outputs) of a quantum. 

1086 adjusted : `~lsst.daf.butler.NamedKeyMapping` [ \ 

1087 `~lsst.daf.butler.DatasetType`, \ 

1088 `~collections.abc.Sequence` [ `lsst.daf.butler.DatasetType` ] ] 

1089 Adjusted neighbors, in the form used by `lsst.daf.butler.Quantum`. 

1090 

1091 Returns 

1092 ------- 

1093 removed : `set` [ `DatasetKey` ] 

1094 Datasets in ``original`` that have no counterpart in ``adjusted``. 

1095 """ 

1096 result = set(original) 

1097 for dataset_type, kept_refs in adjusted.items(): 

1098 parent_dataset_type_name, _ = DatasetType.splitDatasetTypeName(dataset_type.name) 

1099 for kept_ref in kept_refs: 

1100 # We don't know if this was a DatasetKey or a 

1101 # PrerequisiteDatasetKey; just try both. 

1102 result.discard(DatasetKey(parent_dataset_type_name, kept_ref.dataId.required_values)) 

1103 result.discard(PrerequisiteDatasetKey(parent_dataset_type_name, kept_ref.id.bytes)) 

1104 return result 

1105 

1106 

1107@dataclasses.dataclass(eq=False, order=False) 

1108class ExistingDatasets: 

1109 """Struct that holds the results of dataset queries for 

1110 `QuantumGraphBuilder`. 

1111 """ 

1112 

1113 inputs: dict[DatasetKey | PrerequisiteDatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1114 """Overall-input datasets found in `QuantumGraphBuilder.input_collections`. 

1115 

1116 This may include prerequisite inputs. It does include init-inputs. 

1117 It does not include intermediates. 

1118 """ 

1119 

1120 outputs_for_skip: dict[DatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1121 """Output datasets found in `QuantumGraphBuilder.skip_existing_in`. 

1122 

1123 It is unspecified whether this contains include init-outputs; there is 

1124 no concept of skipping at the init stage, so this is not expected to 

1125 matter. 

1126 """ 

1127 

1128 outputs_in_the_way: dict[DatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1129 """Output datasets found in `QuantumGraphBuilder.output_run`. 

1130 

1131 This includes regular outputs and init-outputs. 

1132 """ 

1133 

1134 

1135def _quantum_or_quanta(n: int) -> str: 

1136 """Correctly pluralize 'quantum' if needed.""" 

1137 return f"{n} quanta" if n != 1 else "1 quantum"