Coverage for python/lsst/pipe/base/quantum_graph_builder.py: 25%

370 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-31 09:39 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""The base class for the QuantumGraph-generation algorithm and various 

23helper classes. 

24""" 

25 

26from __future__ import annotations 

27 

28__all__ = ( 

29 "QuantumGraphBuilder", 

30 "ExistingDatasets", 

31 "QuantumGraphBuilderError", 

32 "OutputExistsError", 

33 "PrerequisiteMissingError", 

34) 

35 

36import dataclasses 

37from abc import ABC, abstractmethod 

38from collections.abc import Iterable, Mapping, Sequence 

39from typing import TYPE_CHECKING, Any, final 

40 

41from lsst.daf.butler import ( 

42 Butler, 

43 CollectionType, 

44 DataCoordinate, 

45 DatasetRef, 

46 DatasetType, 

47 DimensionUniverse, 

48 Quantum, 

49) 

50from lsst.daf.butler.core.named import NamedKeyDict, NamedKeyMapping 

51from lsst.daf.butler.registry import MissingCollectionError, MissingDatasetTypeError 

52from lsst.utils.logging import LsstLogAdapter, getLogger 

53from lsst.utils.timer import timeMethod 

54 

55from . import automatic_connection_constants as acc 

56from ._status import NoWorkFound 

57from ._task_metadata import TaskMetadata 

58from .connections import AdjustQuantumHelper 

59from .graph import QuantumGraph 

60from .pipeline_graph import PipelineGraph, TaskNode 

61from .prerequisite_helpers import PrerequisiteInfo, SkyPixBoundsBuilder, TimespanBuilder 

62from .quantum_graph_skeleton import ( 

63 DatasetKey, 

64 PrerequisiteDatasetKey, 

65 QuantumGraphSkeleton, 

66 QuantumKey, 

67 TaskInitKey, 

68) 

69 

70if TYPE_CHECKING: 

71 from .pipeline import TaskDef 

72 

73 

74class QuantumGraphBuilderError(Exception): 

75 """Base class for exceptions generated by QuantumGraphBuilder.""" 

76 

77 pass 

78 

79 

80class GraphBuilderError(QuantumGraphBuilderError): 

81 """Backwards-compatibility near-alias for QuantumGraphBuilderError.""" 

82 

83 pass 

84 

85 

86# Inherit from backwards-compatibility alias for backwards-compatibility. 

87class OutputExistsError(GraphBuilderError): 

88 """Exception generated when output datasets already exist.""" 

89 

90 pass 

91 

92 

93# Inherit from backwards-compatibility alias for backwards-compatibility. 

94class PrerequisiteMissingError(GraphBuilderError): 

95 """Exception generated when a prerequisite dataset does not exist.""" 

96 

97 pass 

98 

99 

100class InitInputMissingError(QuantumGraphBuilderError): 

101 """Exception generated when an init-input dataset does not exist.""" 

102 

103 pass 

104 

105 

106class QuantumGraphBuilder(ABC): 

107 """An abstract base class for building `QuantumGraph` objects from a 

108 pipeline. 

109 

110 Parameters 

111 ---------- 

112 pipeline_graph : `.pipeline_graph.PipelineGraph` 

113 Pipeline to build a `QuantumGraph` from, as a graph. Will be resolved 

114 in-place with the given butler (any existing resolution is ignored). 

115 butler : `lsst.daf.butler.Butler` 

116 Client for the data repository. Should be read-only. 

117 input_collections : `~collections.abc.Sequence` [ `str` ], optional 

118 Collections to search for overall-input datasets. If not provided, 

119 ``butler.collections`` is used (and must not be empty). 

120 output_run : `str`, optional 

121 Output `~lsst.daf.butler.CollectionType.RUN` collection. If not 

122 provided, ``butler.run`` is used (and must not be `None`). 

123 skip_existing_in : `~collections.abc.Sequence` [ `str` ], optional 

124 Collections to search for outputs that already exist for the purpose of 

125 skipping quanta that have already been run. 

126 clobber : `bool`, optional 

127 Whether to raise if predicted outputs already exist in ``output_run`` 

128 (not including those quanta that would be skipped because they've 

129 already been run). This never actually clobbers outputs; it just 

130 informs the graph generation algorithm whether execution will run with 

131 clobbering enabled. This is ignored if ``output_run`` does not exist. 

132 

133 Notes 

134 ----- 

135 Constructing a `QuantumGraphBuilder` will run queries for existing datasets 

136 with empty data IDs (including but not limited to init inputs and outputs), 

137 in addition to resolving the given pipeline graph and testing for existence 

138 of the ``output`` run collection. 

139 

140 The `build` method splits the pipeline graph into independent subgraphs, 

141 then calls the abstract method `process_subgraph` on each, to allow 

142 concrete implementations to populate the rough graph structure (the 

143 `~quantum_graph_skeleton.QuantumGraphSkeleton` class) and search for 

144 existing datasets (further populating the builder's `existing_datasets` 

145 struct). The `build` method then: 

146 

147 - assembles `lsst.daf.butler.Quantum` instances from all data IDs in the 

148 skeleton; 

149 - looks for existing outputs found in ``skip_existing_in`` to see if any 

150 quanta should be skipped; 

151 - calls `PipelineTaskConnections.adjustQuantum` on all quanta, adjusting 

152 downstream quanta appropriately when preliminary predicted outputs are 

153 rejected (pruning nodes that will not have the inputs they need to run); 

154 - attaches datastore records and registry dataset types to the graph. 

155 

156 In addition to implementing `process_subgraph`, derived classes are 

157 generally expected to add new construction keyword-only arguments to 

158 control the data IDs of the quantum graph, while forwarding all of the 

159 arguments defined in the base class to `super`. 

160 """ 

161 

162 def __init__( 

163 self, 

164 pipeline_graph: PipelineGraph, 

165 butler: Butler, 

166 *, 

167 input_collections: Sequence[str] | None = None, 

168 output_run: str | None = None, 

169 skip_existing_in: Sequence[str] = (), 

170 clobber: bool = False, 

171 ): 

172 self.log = getLogger(__name__) 

173 self.metadata = TaskMetadata() 

174 self._pipeline_graph = pipeline_graph 

175 self.butler = butler 

176 self._pipeline_graph.resolve(self.butler.registry) 

177 if input_collections is None: 

178 input_collections = butler.collections 

179 if not input_collections: 

180 raise ValueError("No input collections provided.") 

181 self.input_collections = input_collections 

182 if output_run is None: 

183 output_run = butler.run 

184 if not output_run: 

185 raise ValueError("No output RUN collection provided.") 

186 self.output_run = output_run 

187 self.skip_existing_in = skip_existing_in 

188 self.empty_data_id = DataCoordinate.makeEmpty(butler.dimensions) 

189 self.clobber = clobber 

190 # See whether the output run already exists. 

191 self.output_run_exists = False 

192 try: 

193 if self.butler.registry.getCollectionType(self.output_run) is not CollectionType.RUN: 

194 raise RuntimeError(f"{self.output_run!r} is not a RUN collection.") 

195 self.output_run_exists = True 

196 except MissingCollectionError: 

197 # If the run doesn't exist we never need to clobber. This is not 

198 # an error so you can run with clobber=True the first time you 

199 # attempt some processing as well as all subsequent times, instead 

200 # of forcing the user to make the first attempt different. 

201 self.clobber = False 

202 # We need to know whether the skip_existing_in collection sequence 

203 # starts with the output run collection, as an optimization to avoid 

204 # queries later. 

205 try: 

206 skip_existing_in_flat = self.butler.registry.queryCollections( 

207 self.skip_existing_in, flattenChains=True 

208 ) 

209 except MissingCollectionError: 

210 skip_existing_in_flat = [] 

211 if not skip_existing_in_flat: 

212 self.skip_existing_in = [] 

213 if self.skip_existing_in and self.output_run_exists: 

214 self.skip_existing_starts_with_output_run = self.output_run == skip_existing_in_flat[0] 

215 else: 

216 self.skip_existing_starts_with_output_run = False 

217 self.existing_datasets = ExistingDatasets() 

218 try: 

219 packages_storage_class = butler.registry.getDatasetType( 

220 acc.PACKAGES_INIT_OUTPUT_NAME 

221 ).storageClass_name 

222 except MissingDatasetTypeError: 

223 packages_storage_class = acc.PACKAGES_INIT_OUTPUT_STORAGE_CLASS 

224 self._global_init_output_types = { 

225 acc.PACKAGES_INIT_OUTPUT_NAME: DatasetType( 

226 acc.PACKAGES_INIT_OUTPUT_NAME, 

227 self.universe.empty, 

228 packages_storage_class, 

229 ) 

230 } 

231 self._find_empty_dimension_datasets() 

232 self.prerequisite_info = { 

233 task_node.label: PrerequisiteInfo(task_node, self._pipeline_graph) 

234 for task_node in pipeline_graph.tasks.values() 

235 } 

236 

237 log: LsstLogAdapter 

238 """Logger to use for all quantum-graph generation messages. 

239 

240 General and per-task status messages should be logged at `~logging.INFO` 

241 level or higher, per-dataset-type status messages should be logged at 

242 `~lsst.utils.logging.VERBOSE` or higher, and per-data-ID status messages 

243 should be logged at `logging.DEBUG` or higher. 

244 """ 

245 

246 metadata: TaskMetadata 

247 """Metadata to store in the QuantumGraph. 

248 

249 The `TaskMetadata` class is used here primarily in order to enable 

250 resource-usage collection with the `lsst.utils.timer.timeMethod` decorator. 

251 """ 

252 

253 butler: Butler 

254 """Client for the data repository. 

255 

256 Should be read-only. 

257 """ 

258 

259 input_collections: Sequence[str] 

260 """Collections to search for overall-input datasets. 

261 """ 

262 

263 output_run: str 

264 """Output `~lsst.daf.butler.CollectionType.RUN` collection. 

265 """ 

266 

267 skip_existing_in: Sequence[str] 

268 """Collections to search for outputs that already exist for the purpose 

269 of skipping quanta that have already been run. 

270 """ 

271 

272 clobber: bool 

273 """Whether to raise if predicted outputs already exist in ``output_run`` 

274 

275 This never actually clobbers outputs; it just informs the graph generation 

276 algorithm whether execution will run with clobbering enabled. This is 

277 always `False` if `output_run_exists` is `False`. 

278 """ 

279 

280 empty_data_id: DataCoordinate 

281 """An empty data ID in the data repository's dimension universe. 

282 """ 

283 

284 output_run_exists: bool 

285 """Whether the output run exists in the data repository already. 

286 """ 

287 

288 skip_existing_starts_with_output_run: bool 

289 """Whether the `skip_existing_in` sequence begins with `output_run`. 

290 

291 If this is true, any dataset found in `output_run` can be used to 

292 short-circuit queries in `skip_existing_in`. 

293 """ 

294 

295 existing_datasets: ExistingDatasets 

296 """Struct holding datasets that have already been found in the data 

297 repository. 

298 

299 This is updated in-place as the `QuantumGraph` generation algorithm 

300 proceeds. 

301 """ 

302 

303 prerequisite_info: Mapping[str, PrerequisiteInfo] 

304 """Helper objects for finding prerequisite inputs, organized by task label. 

305 

306 Subclasses that find prerequisites should remove the 

307 covered `~prerequisite_helpers.PrerequisiteFinder` objects from this 

308 attribute. 

309 """ 

310 

311 @property 

312 def universe(self) -> DimensionUniverse: 

313 """Definitions of all data dimensions.""" 

314 return self.butler.dimensions 

315 

316 @final 

317 @timeMethod 

318 def build(self, metadata: Mapping[str, Any] | None = None) -> QuantumGraph: 

319 """Build the quantum graph. 

320 

321 Parameters 

322 ---------- 

323 metadata : `~collections.abc.Mapping`, optional 

324 Flexible metadata to add to the quantum graph. 

325 

326 Returns 

327 ------- 

328 quantum_graph : `QuantumGraph` 

329 DAG describing processing to be performed. 

330 

331 Notes 

332 ----- 

333 External code is expected to construct a `QuantumGraphBuilder` and then 

334 call this method exactly once. See class documentation for details on 

335 what it does. 

336 """ 

337 full_skeleton = QuantumGraphSkeleton(self._pipeline_graph.tasks) 

338 subgraphs = list(self._pipeline_graph.split_independent()) 

339 for i, subgraph in enumerate(subgraphs): 

340 self.log.info( 

341 "Processing pipeline subgraph %d of %d with %d task(s).", 

342 i + 1, 

343 len(subgraphs), 

344 len(subgraph.tasks), 

345 ) 

346 self.log.verbose("Subgraph tasks: [%s]", ", ".join(label for label in subgraph.tasks)) 

347 subgraph_skeleton = self.process_subgraph(subgraph) 

348 full_skeleton.update(subgraph_skeleton) 

349 # Loop over tasks. The pipeline graph must be topologically sorted, 

350 # so a quantum is only processed after any quantum that provides its 

351 # inputs has been processed. 

352 for task_node in self._pipeline_graph.tasks.values(): 

353 self._resolve_task_quanta(task_node, full_skeleton) 

354 # Add global init-outputs to the skeleton. 

355 for dataset_type in self._global_init_output_types.values(): 

356 dataset_key = full_skeleton.add_dataset_node( 

357 dataset_type.name, self.empty_data_id, is_global_init_output=True 

358 ) 

359 ref = self.existing_datasets.outputs_in_the_way.get(dataset_key) 

360 if ref is None: 

361 ref = DatasetRef(dataset_type, self.empty_data_id, run=self.output_run) 

362 full_skeleton[dataset_key]["ref"] = ref 

363 # Remove dataset nodes with no edges that are not global init outputs, 

364 # which are generally overall-inputs whose original quanta end up 

365 # skipped or with no work to do (we can't remove these along with the 

366 # quanta because no quantum knows if its the only consumer). 

367 full_skeleton.remove_orphan_datasets() 

368 self._attach_datastore_records(full_skeleton) 

369 # TODO initialize most metadata here instead of in ctrl_mpexec. 

370 if metadata is None: 

371 metadata = {} 

372 return self._construct_quantum_graph(full_skeleton, metadata) 

373 

374 @abstractmethod 

375 def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton: 

376 """Build the rough structure for an independent subset of the 

377 `QuantumGraph` and query for relevant existing datasets. 

378 

379 Parameters 

380 ---------- 

381 subgraph : `.pipeline_graph.PipelineGraph` 

382 Subset of the pipeline graph that should be processed by this call. 

383 This is always resolved and topologically sorted. It should not be 

384 modified. 

385 

386 Returns 

387 ------- 

388 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

389 Class representing an initial quantum graph. See 

390 `quantum_graph_skeleton.QuantumGraphSkeleton` docs for details. 

391 After this is returned, the object may be modified in-place in 

392 unspecified ways. 

393 

394 Notes 

395 ----- 

396 In addition to returning a 

397 `quantum_graph_skeleton.QuantumGraphSkeleton`, this method should 

398 populate the `existing_datasets` structure by querying for all relevant 

399 datasets with non-empty data IDs (those with empty data IDs will 

400 already be present). In particular: 

401 

402 - `~ExistingDatasets.inputs` must always be populated with all 

403 overall-input datasets (but not prerequisites), by querying 

404 `input_collections`; 

405 - `~ExistingDatasets.outputs_for_skip` must be populated with any 

406 intermediate our output datasets present in `skip_existing_in` (it 

407 can be ignored if `skip_existing_in` is empty); 

408 - `~ExistingDatasets.outputs_in_the_way` must be populated with any 

409 intermediate or output datasets present in `output_run`, if 

410 `output_run_exists` (it can be ignored if `output_run_exists` is 

411 `False`). Note that the presence of such datasets is not 

412 automatically an error, even if `clobber is `False`, as these may be 

413 quanta that will be skipped. 

414 - `~ExistingDatasets.inputs` must be populated with all 

415 prerequisite-input datasets that were included in the skeleton, by 

416 querying `input_collections` (not all prerequisite inputs need to be 

417 included in the skeleton, but the base class can only use per-quantum 

418 queries to find them, and that can be slow when there are many 

419 quanta). 

420 

421 Dataset types should never be components and should always use the 

422 "common" storage class definition in `pipeline_graph.DatasetTypeNode` 

423 (which is the data repository definition when the dataset type is 

424 registered). 

425 """ 

426 raise NotImplementedError() 

427 

428 @final 

429 @timeMethod 

430 def _resolve_task_quanta(self, task_node: TaskNode, skeleton: QuantumGraphSkeleton) -> None: 

431 """Process the quanta for one task in a skeleton graph to skip those 

432 that have already completed and adjust those that request it. 

433 

434 Parameters 

435 ---------- 

436 task_node : `pipeline_graph.TaskNode` 

437 Node for this task in the pipeline graph. 

438 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

439 Preliminary quantum graph, to be modified in-place. 

440 

441 Notes 

442 ----- 

443 This method modifies ``skeleton`` in-place in several ways: 

444 

445 - It adds a "ref" attribute to dataset nodes, using the contents of 

446 `existing_datasets`. This ensures producing and consuming tasks 

447 start from the same `DatasetRef`. 

448 - It adds "inputs", "outputs", and "init_inputs" attributes to the 

449 quantum nodes, holding the same `NamedValueMapping` objects needed to 

450 construct an actual `Quantum` instances. 

451 - It removes quantum nodes that are to be skipped because their outputs 

452 already exist in `skip_existing_in`. It also removes their outputs 

453 from `ExistingDatasets.outputs_in_the_way`. 

454 - It adds prerequisite dataset nodes and edges that connect them to the 

455 quanta that consume them. 

456 - It removes quantum nodes whose 

457 `~PipelineTaskConnections.adjustQuantum` calls raise `NoWorkFound` or 

458 predict no outputs; 

459 - It removes the nodes of output datasets that are "adjusted away". 

460 - It removes the edges of input datasets that are "adjusted away". 

461 

462 The difference between how adjusted inputs and outputs are handled 

463 reflects the fact that many quanta can share the same input, but only 

464 one produces each output. This can lead to the graph having 

465 superfluous isolated nodes after processing is complete, but these 

466 should only be removed after all the quanta from all tasks have been 

467 processed. 

468 """ 

469 # Extract the helper object for the prerequisite inputs of this task, 

470 # and tell it to prepare to construct skypix bounds and timespans for 

471 # each quantum (these will automatically do nothing if nothing needs 

472 # those bounds). 

473 task_prerequisite_info = self.prerequisite_info[task_node.label] 

474 task_prerequisite_info.update_bounds() 

475 # Loop over all quanta for this task, remembering the ones we've 

476 # gotten rid of. 

477 skipped_quanta = [] 

478 no_work_quanta = [] 

479 for quantum_key in skeleton.get_quanta(task_node.label): 

480 if self._skip_quantum_if_metadata_exists(task_node, quantum_key, skeleton): 

481 skipped_quanta.append(quantum_key) 

482 continue 

483 quantum_data_id = skeleton[quantum_key]["data_id"] 

484 skypix_bounds_builder = task_prerequisite_info.bounds.make_skypix_bounds_builder(quantum_data_id) 

485 timespan_builder = task_prerequisite_info.bounds.make_timespan_builder(quantum_data_id) 

486 adjusted_outputs = self._gather_quantum_outputs( 

487 task_node, quantum_key, skeleton, skypix_bounds_builder, timespan_builder 

488 ) 

489 adjusted_inputs = self._gather_quantum_inputs( 

490 task_node, 

491 quantum_key, 

492 skeleton, 

493 task_prerequisite_info, 

494 skypix_bounds_builder, 

495 timespan_builder, 

496 ) 

497 # Give the task's Connections class an opportunity to remove 

498 # some inputs, or complain if they are unacceptable. This will 

499 # raise if one of the check conditions is not met, which is the 

500 # intended behavior. 

501 helper = AdjustQuantumHelper(inputs=adjusted_inputs, outputs=adjusted_outputs) 

502 try: 

503 helper.adjust_in_place( 

504 task_node._get_imported_data().connections, task_node.label, quantum_data_id 

505 ) 

506 except NoWorkFound as err: 

507 # Do not generate this quantum; it would not produce any 

508 # outputs. Remove it and all of the outputs it might have 

509 # produced from the skeleton. 

510 try: 

511 _, connection_name, _ = err.args 

512 details = f"not enough datasets for connection {connection_name}." 

513 except ValueError: 

514 details = str(err) 

515 self.log.debug( 

516 "No work found for quantum %s of task %s: %s", 

517 quantum_key.data_id_values, 

518 quantum_key.task_label, 

519 details, 

520 ) 

521 no_work_quanta.append(quantum_key) 

522 continue 

523 if helper.outputs_adjusted: 

524 if not any(adjusted_refs for adjusted_refs in helper.outputs.values()): 

525 # No outputs also means we don't generate this quantum. 

526 self.log.debug( 

527 "No outputs predicted for quantum %s of task %s.", 

528 quantum_key.data_id_values, 

529 quantum_key.task_label, 

530 ) 

531 no_work_quanta.append(quantum_key) 

532 continue 

533 # Remove output nodes that were not retained by 

534 # adjustQuantum. 

535 skeleton.remove_dataset_nodes( 

536 self._find_removed(skeleton.iter_outputs_of(quantum_key), helper.outputs) 

537 ) 

538 if helper.inputs_adjusted: 

539 if not any(bool(adjusted_refs) for adjusted_refs in helper.inputs.values()): 

540 raise QuantumGraphBuilderError( 

541 f"adjustQuantum implementation for {task_node.label}@{quantum_key.data_id_values} " 

542 "returned outputs but no inputs." 

543 ) 

544 # Remove input dataset edges that were not retained by 

545 # adjustQuantum. We can't remove the input dataset nodes 

546 # because some other quantum might still want them. 

547 skeleton.remove_input_edges( 

548 quantum_key, self._find_removed(skeleton.iter_inputs_of(quantum_key), helper.inputs) 

549 ) 

550 # Save the adjusted inputs and outputs to the quantum node's 

551 # state so we don't have to regenerate those data structures 

552 # from the graph. 

553 skeleton[quantum_key]["inputs"] = helper.inputs 

554 skeleton[quantum_key]["outputs"] = helper.outputs 

555 for no_work_quantum in no_work_quanta: 

556 skeleton.remove_quantum_node(no_work_quantum, remove_outputs=True) 

557 for skipped_quantum in skipped_quanta: 

558 skeleton.remove_quantum_node(skipped_quantum, remove_outputs=False) 

559 remaining_quanta = skeleton.get_quanta(task_node.label) 

560 self._resolve_task_init(task_node, skeleton, bool(skipped_quanta)) 

561 message_terms = [] 

562 if no_work_quanta: 

563 message_terms.append(f"{len(no_work_quanta)} had no work to do") 

564 if skipped_quanta: 

565 message_terms.append(f"{len(no_work_quanta)} previously succeeded") 

566 message_parenthetical = f" ({', '.join(message_terms)})" if message_terms else "" 

567 if remaining_quanta: 

568 self.log.info( 

569 "Generated %s for task %s%s.", 

570 _quantum_or_quanta(len(remaining_quanta)), 

571 task_node.label, 

572 message_parenthetical, 

573 ) 

574 else: 

575 self.log.info( 

576 "Dropping task %s because no quanta remain%s.", task_node.label, message_parenthetical 

577 ) 

578 skeleton.remove_task(task_node.label) 

579 

580 def _skip_quantum_if_metadata_exists( 

581 self, task_node: TaskNode, quantum_key: QuantumKey, skeleton: QuantumGraphSkeleton 

582 ) -> bool: 

583 """Identify and drop quanta that should be skipped because their 

584 metadata datasets already exist. 

585 

586 Parameters 

587 ---------- 

588 task_node : `pipeline_graph.TaskNode` 

589 Node for this task in the pipeline graph. 

590 quantum_key : `QuantumKey` 

591 Identifier for this quantum in the graph. 

592 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

593 Preliminary quantum graph, to be modified in-place. 

594 

595 Returns 

596 ------- 

597 skipped : `bool` 

598 `True` if the quantum is being skipped and has been removed from 

599 the graph, `False` otherwise. 

600 

601 Notes 

602 ----- 

603 If the metadata dataset for this quantum exists in 

604 `ExistingDatasets.outputs_for_skip`, the quantum will be skipped. This 

605 causes the quantum node to be removed from the graph. Dataset nodes 

606 that were previously the outputs of this quantum will have their "ref" 

607 attribute set from `ExistingDatasets.outputs_for_skip`, or will be 

608 removed if there is no such dataset there. Any output dataset in 

609 `ExistingDatasets.outputs_in_the_way` will be removed. 

610 """ 

611 metadata_dataset_key = DatasetKey( 

612 task_node.metadata_output.parent_dataset_type_name, quantum_key.data_id_values 

613 ) 

614 if metadata_dataset_key in self.existing_datasets.outputs_for_skip: 

615 # This quantum's metadata is already present in the the 

616 # skip_existing_in collections; we'll skip it. But the presence of 

617 # the metadata dataset doesn't guarantee that all of the other 

618 # outputs we predicted are present; we have to check. 

619 for output_dataset_key in list(skeleton.iter_outputs_of(quantum_key)): 

620 if ( 

621 output_ref := self.existing_datasets.outputs_for_skip.get(output_dataset_key) 

622 ) is not None: 

623 # Populate the skeleton graph's node attributes 

624 # with the existing DatasetRef, just like a 

625 # predicted output of a non-skipped quantum. 

626 skeleton[output_dataset_key]["ref"] = output_ref 

627 else: 

628 # Remove this dataset from the skeleton graph, 

629 # because the quantum that would have produced it 

630 # is being skipped and it doesn't already exist. 

631 skeleton.remove_dataset_nodes([output_dataset_key]) 

632 # If this dataset was "in the way" (i.e. already in the 

633 # output run), it isn't anymore. 

634 self.existing_datasets.outputs_in_the_way.pop(output_dataset_key, None) 

635 # Removing the quantum node from the graph will happen outside this 

636 # function. 

637 return True 

638 return False 

639 

640 @final 

641 def _gather_quantum_outputs( 

642 self, 

643 task_node: TaskNode, 

644 quantum_key: QuantumKey, 

645 skeleton: QuantumGraphSkeleton, 

646 skypix_bounds_builder: SkyPixBoundsBuilder, 

647 timespan_builder: TimespanBuilder, 

648 ) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

649 """Collect outputs or generate datasets for a preliminary quantum and 

650 put them in the form used by `~lsst.daf.butler.Quantum` and 

651 `~PipelineTaskConnections.adjustQuantum`. 

652 

653 Parameters 

654 ---------- 

655 task_node : `pipeline_graph.TaskNode` 

656 Node for this task in the pipeline graph. 

657 quantum_key : `QuantumKey` 

658 Identifier for this quantum in the graph. 

659 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

660 Preliminary quantum graph, to be modified in-place. 

661 skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder` 

662 An object that accumulates the appropriate spatial bounds for a 

663 quantum. 

664 timespan_builder : `~prerequisite_helpers.TimespanBuilder` 

665 An object that accumulates the appropriate timespan for a quantum. 

666 

667 Returns 

668 ------- 

669 outputs : `~lsst.daf.butler.NamedKeyDict` [ \ 

670 `~lsst.daf.butler.DatasetType`, `list` [ \ 

671 `~lsst.daf.butler.DatasetRef` ] ] 

672 All outputs to the task, using the storage class and components 

673 defined by the task's own connections. 

674 

675 Notes 

676 ----- 

677 This first looks for outputs already present in the `output_run` by 

678 looking in `ExistingDatasets.outputs_in_the_way`; if it finds something 

679 and `clobber` is `True`, it uses that ref (it's not ideal that both the 

680 original dataset and its replacement will have the same UUID, but we 

681 don't have space in the quantum graph for two UUIDs, and we need the 

682 datastore records of the original there). If `clobber` is `False`, 

683 `RuntimeError` is raised. If there is no output already present, a new 

684 one with a random UUID is generated. In all cases the "ref" attribute 

685 of the dataset node in the skeleton is set. 

686 """ 

687 outputs_by_type: dict[str, list[DatasetRef]] = {} 

688 dataset_key: DatasetKey 

689 for dataset_key in skeleton.iter_outputs_of(quantum_key): 

690 dataset_data_id = skeleton[dataset_key]["data_id"] 

691 dataset_type_node = self._pipeline_graph.dataset_types[dataset_key.parent_dataset_type_name] 

692 if (ref := self.existing_datasets.outputs_in_the_way.get(dataset_key)) is None: 

693 ref = DatasetRef(dataset_type_node.dataset_type, dataset_data_id, run=self.output_run) 

694 elif not self.clobber: 

695 # We intentionally raise here, before running adjustQuantum, 

696 # because it'd be weird if we left an old potential output of a 

697 # task sitting there in the output collection, just because the 

698 # task happened to not actually produce it. 

699 raise OutputExistsError( 

700 f"Potential output dataset {ref} already exists in the output run " 

701 f"{self.output_run}, but clobbering outputs was not expected to be necessary." 

702 ) 

703 skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id) 

704 timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id) 

705 skeleton[dataset_key]["ref"] = ref 

706 outputs_by_type.setdefault(dataset_key.parent_dataset_type_name, []).append(ref) 

707 adapted_outputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

708 for write_edge in task_node.iter_all_outputs(): 

709 dataset_type_node = self._pipeline_graph.dataset_types[write_edge.parent_dataset_type_name] 

710 edge_dataset_type = write_edge.adapt_dataset_type(dataset_type_node.dataset_type) 

711 adapted_outputs[edge_dataset_type] = [ 

712 write_edge.adapt_dataset_ref(ref) 

713 for ref in sorted(outputs_by_type.get(write_edge.parent_dataset_type_name, [])) 

714 ] 

715 return adapted_outputs 

716 

717 @final 

718 def _gather_quantum_inputs( 

719 self, 

720 task_node: TaskNode, 

721 quantum_key: QuantumKey, 

722 skeleton: QuantumGraphSkeleton, 

723 task_prerequisite_info: PrerequisiteInfo, 

724 skypix_bounds_builder: SkyPixBoundsBuilder, 

725 timespan_builder: TimespanBuilder, 

726 ) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

727 """Collect input datasets for a preliminary quantum and put them in the 

728 form used by `~lsst.daf.butler.Quantum` and 

729 `~PipelineTaskConnections.adjustQuantum`. 

730 

731 Parameters 

732 ---------- 

733 task_node : `pipeline_graph.TaskNode` 

734 Node for this task in the pipeline graph. 

735 quantum_key : `QuantumKey` 

736 Identifier for this quantum in the graph. 

737 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

738 Preliminary quantum graph, to be modified in-place. 

739 skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder` 

740 An object that accumulates the appropriate spatial bounds for a 

741 quantum. 

742 timespan_builder : `~prerequisite_helpers.TimespanBuilder` 

743 An object that accumulates the appropriate timespan for a quantum. 

744 

745 Returns 

746 ------- 

747 inputs : `~lsst.daf.butler.NamedKeyDict` [ \ 

748 `~lsst.daf.butler.DatasetType`, `list` [ \ 

749 `~lsst.daf.butler.DatasetRef` ] ] 

750 All regular and prerequisite inputs to the task, using the storage 

751 class and components defined by the task's own connections. 

752 

753 Notes 

754 ----- 

755 On return, the dataset nodes that represent inputs to this quantum will 

756 either have their "ref" attribute set (using the common dataset type, 

757 not the task-specific one) or will be removed from the graph. 

758 

759 For regular inputs, usually an existing "ref" (corresponding to an 

760 output of another quantum) will be found and left unchanged. When 

761 there is no existing "ref" attribute, `ExistingDatasets.inputs` is 

762 searched next; if there is nothing there, the input will be removed. 

763 

764 Prerequisite inputs are always queried for directly here (delegating to 

765 `_find_prerequisite_inputs`). They are never produced by other tasks, 

766 and cannot in general be queried for in advance when 

767 `ExistingDatasets.inputs` is populated. 

768 """ 

769 quantum_data_id = skeleton[quantum_key]["data_id"] 

770 inputs_by_type: dict[str, set[DatasetRef]] = {} 

771 dataset_key: DatasetKey | PrerequisiteDatasetKey 

772 # Process inputs already present in the skeleton - this should include 

773 # all regular inputs (including intermediates) and may include some 

774 # prerequisites. 

775 for dataset_key in list(skeleton.iter_inputs_of(quantum_key)): 

776 if (ref := skeleton[dataset_key].get("ref")) is None: 

777 # This dataset is an overall input - if it was an intermediate, 

778 # we would have already either removed the node or set the 

779 # "ref" attribute when processing its producing quantum - and 

780 # this is the first time we're trying to resolve it. 

781 if (ref := self.existing_datasets.inputs.get(dataset_key)) is None: 

782 # It also doesn't exist in the input collections, so we 

783 # remove its node in the skeleton graph (so other consumers 

784 # won't have to check for it). 

785 skeleton.remove_dataset_nodes([dataset_key]) 

786 continue 

787 skeleton[dataset_key]["ref"] = ref 

788 inputs_by_type.setdefault(dataset_key.parent_dataset_type_name, set()).add(ref) 

789 skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId) 

790 timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId) 

791 # Query for any prerequisites not handled by process_subgraph. Note 

792 # that these were not already in the skeleton graph, so we add them 

793 # now. 

794 skypix_bounds = skypix_bounds_builder.finish() 

795 timespan = timespan_builder.finish() 

796 for finder in task_prerequisite_info.finders.values(): 

797 inputs_for_type = inputs_by_type.setdefault(finder.dataset_type_node.name, set()) 

798 dataset_keys = [] 

799 for ref in finder.find( 

800 self.butler, self.input_collections, quantum_data_id, skypix_bounds, timespan 

801 ): 

802 dataset_key = skeleton.add_prerequisite_node(ref.datasetType.name, ref=ref) 

803 dataset_keys.append(dataset_key) 

804 inputs_for_type.add(ref) 

805 skeleton.add_input_edges(quantum_key, dataset_keys) 

806 adapted_inputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

807 for read_edge in task_node.iter_all_inputs(): 

808 dataset_type_node = self._pipeline_graph.dataset_types[read_edge.parent_dataset_type_name] 

809 edge_dataset_type = read_edge.adapt_dataset_type(dataset_type_node.dataset_type) 

810 if (current_dataset_type := adapted_inputs.keys().get(edge_dataset_type.name)) is None: 

811 adapted_inputs[edge_dataset_type] = [ 

812 read_edge.adapt_dataset_ref(ref) 

813 for ref in sorted(inputs_by_type.get(read_edge.parent_dataset_type_name, frozenset())) 

814 ] 

815 elif current_dataset_type != edge_dataset_type: 

816 raise NotImplementedError( 

817 f"Task {task_node.label!r} has {edge_dataset_type.name!r} as an input via " 

818 "two different connections, with two different storage class overrides. " 

819 "This is not yet supported due to limitations in the Quantum data structure." 

820 ) 

821 # If neither the `if` nor the `elif` above match, it means 

822 # multiple input connections have exactly the same dataset 

823 # type, and hence nothing to do after the first one. 

824 return adapted_inputs 

825 

826 @final 

827 def _resolve_task_init( 

828 self, task_node: TaskNode, skeleton: QuantumGraphSkeleton, has_skipped_quanta: bool 

829 ) -> None: 

830 """Add init-input and init-output dataset nodes and edges for a task to 

831 the skeleton. 

832 

833 Parameters 

834 ---------- 

835 task_node : `pipeline_graph.TaskNode` 

836 Pipeline graph description of the task. 

837 skeleton : `QuantumGraphSkeleton` 

838 In-progress quantum graph data structure to update in-place. 

839 has_skipped_quanta : `bool` 

840 Whether any of this task's quanta were skipped because they had 

841 already succeeded. 

842 """ 

843 quanta = skeleton.get_quanta(task_node.label) 

844 task_init_key = TaskInitKey(task_node.label) 

845 if quanta: 

846 adapted_inputs: NamedKeyDict[DatasetType, DatasetRef] = NamedKeyDict() 

847 # Process init-inputs. 

848 input_keys: list[DatasetKey] = [] 

849 for read_edge in task_node.init.iter_all_inputs(): 

850 dataset_key = skeleton.add_dataset_node( 

851 read_edge.parent_dataset_type_name, self.empty_data_id 

852 ) 

853 skeleton.add_input_edge(task_init_key, dataset_key) 

854 if (ref := skeleton[dataset_key].get("ref")) is None: 

855 try: 

856 ref = self.existing_datasets.inputs[dataset_key] 

857 except KeyError: 

858 raise InitInputMissingError( 

859 f"Overall init-input dataset {read_edge.parent_dataset_type_name!r} " 

860 f"needed by task {task_node.label!r} not found in input collection(s) " 

861 f"{self.input_collections}." 

862 ) from None 

863 skeleton[dataset_key]["ref"] = ref 

864 for quantum_key in skeleton.get_quanta(task_node.label): 

865 skeleton.add_input_edge(quantum_key, dataset_key) 

866 input_keys.append(dataset_key) 

867 adapted_ref = read_edge.adapt_dataset_ref(ref) 

868 adapted_inputs[adapted_ref.datasetType] = adapted_ref 

869 # Save the quantum-adapted init inputs to each quantum, and add 

870 # skeleton edges connecting the init inputs to each quantum. 

871 for quantum_key in skeleton.get_quanta(task_node.label): 

872 skeleton[quantum_key]["init_inputs"] = adapted_inputs 

873 # Process init-outputs. 

874 adapted_outputs: NamedKeyDict[DatasetType, DatasetRef] = NamedKeyDict() 

875 for write_edge in task_node.init.iter_all_outputs(): 

876 dataset_key = skeleton.add_dataset_node( 

877 write_edge.parent_dataset_type_name, self.empty_data_id 

878 ) 

879 if (ref := self.existing_datasets.outputs_in_the_way.get(dataset_key)) is None: 

880 ref = DatasetRef( 

881 self._pipeline_graph.dataset_types[write_edge.parent_dataset_type_name].dataset_type, 

882 self.empty_data_id, 

883 run=self.output_run, 

884 ) 

885 skeleton[dataset_key]["ref"] = ref 

886 skeleton.add_output_edge(task_init_key, dataset_key) 

887 adapted_ref = write_edge.adapt_dataset_ref(ref) 

888 adapted_outputs[adapted_ref.datasetType] = adapted_ref 

889 skeleton[task_init_key]["inputs"] = adapted_inputs 

890 skeleton[task_init_key]["outputs"] = adapted_outputs 

891 elif has_skipped_quanta: 

892 # No quanta remain for this task, but at least one quantum was 

893 # skipped because its outputs were present in the skip_existing_in 

894 # collections. This means all init outputs should be present in 

895 # the skip_existing_in collections, too, and we need to put those 

896 # refs in the graph. 

897 for write_edge in task_node.init.iter_all_outputs(): 

898 dataset_key = skeleton.add_dataset_node( 

899 write_edge.parent_dataset_type_name, self.empty_data_id 

900 ) 

901 if (ref := self.existing_datasets.outputs_for_skip.get(dataset_key)) is None: 

902 raise InitInputMissingError( 

903 f"Init-output dataset {write_edge.parent_dataset_type_name!r} of skipped task " 

904 f"{task_node.label!r} not found in skip-existing-in collection(s) " 

905 f"{self.skip_existing_in}." 

906 ) from None 

907 skeleton[dataset_key]["ref"] = ref 

908 # If this dataset was "in the way" (i.e. already in the output 

909 # run), it isn't anymore. 

910 self.existing_datasets.outputs_in_the_way.pop(dataset_key, None) 

911 # No quanta remain in this task, but none were skipped; this means 

912 # they all got pruned because of NoWorkFound conditions. This 

913 # dooms all downstream quanta to the same fate, so we don't bother 

914 # doing anything with the task's init-outputs, since nothing is 

915 # going to consume them. 

916 

917 @final 

918 @timeMethod 

919 def _find_empty_dimension_datasets(self) -> None: 

920 """Query for all dataset types with no dimensions, updating 

921 `existing_datasets` in-place. 

922 

923 This includes but is not limited to init inputs and init outputs. 

924 """ 

925 _, dataset_type_nodes = self._pipeline_graph.group_by_dimensions()[self.universe.empty] 

926 dataset_types = [node.dataset_type for node in dataset_type_nodes.values()] 

927 dataset_types.extend(self._global_init_output_types.values()) 

928 for dataset_type in dataset_types: 

929 key = DatasetKey(dataset_type.name, self.empty_data_id.values_tuple()) 

930 if ( 

931 self._pipeline_graph.producer_of(dataset_type.name) is None 

932 and dataset_type.name not in self._global_init_output_types 

933 ): 

934 # Dataset type is an overall input; we always need to try to 

935 # find these. 

936 try: 

937 ref = self.butler.registry.findDataset( 

938 dataset_type.name, collections=self.input_collections 

939 ) 

940 except MissingDatasetTypeError: 

941 ref = None 

942 if ref is not None: 

943 self.existing_datasets.inputs[key] = ref 

944 elif self.skip_existing_in: 

945 # Dataset type is an intermediate or output; need to find these 

946 # if only they're from previously executed quanta that we might 

947 # skip... 

948 try: 

949 ref = self.butler.registry.findDataset( 

950 dataset_type.name, collections=self.skip_existing_in 

951 ) 

952 except MissingDatasetTypeError: 

953 ref = None 

954 if ref is not None: 

955 self.existing_datasets.outputs_for_skip[key] = ref 

956 if ref.run == self.output_run: 

957 self.existing_datasets.outputs_in_the_way[key] = ref 

958 if self.output_run_exists and not self.skip_existing_starts_with_output_run: 

959 # ...or if they're in the way and would need to be clobbered 

960 # (and we haven't already found them in the previous block). 

961 try: 

962 ref = self.butler.registry.findDataset(dataset_type.name, collections=[self.output_run]) 

963 except MissingDatasetTypeError: 

964 ref = None 

965 if ref is not None: 

966 self.existing_datasets.outputs_in_the_way[key] = ref 

967 

968 @final 

969 @timeMethod 

970 def _attach_datastore_records(self, skeleton: QuantumGraphSkeleton) -> None: 

971 """Add datastore records for all overall inputs to a preliminary 

972 quantum graph. 

973 

974 Parameters 

975 ---------- 

976 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

977 Preliminary quantum graph to update in place. 

978 

979 Notes 

980 ----- 

981 On return, all quantum nodes in the skeleton graph will have a 

982 "datastore_records" attribute that is a mapping from datastore name 

983 to `lsst.daf.butler.DatastoreRecordData`, as used by 

984 `lsst.daf.butler.Quantum`. 

985 """ 

986 overall_inputs = skeleton.extract_overall_inputs() 

987 exported_records = self.butler._datastore.export_records(overall_inputs.values()) 

988 for quantum_key in skeleton.iter_all_quanta(): 

989 quantum_records = {} 

990 input_ids = { 

991 ref.id 

992 for dataset_key in skeleton.iter_inputs_of(quantum_key) 

993 if (ref := overall_inputs.get(dataset_key)) is not None 

994 } 

995 if input_ids: 

996 for datastore_name, records in exported_records.items(): 

997 matching_records = records.subset(input_ids) 

998 if matching_records is not None: 

999 quantum_records[datastore_name] = matching_records 

1000 skeleton[quantum_key]["datastore_records"] = quantum_records 

1001 

1002 @final 

1003 @timeMethod 

1004 def _construct_quantum_graph( 

1005 self, skeleton: QuantumGraphSkeleton, metadata: Mapping[str, Any] 

1006 ) -> QuantumGraph: 

1007 """Construct a `QuantumGraph` object from the contents of a 

1008 fully-processed `quantum_graph_skeleton.QuantumGraphSkeleton`. 

1009 

1010 Parameters 

1011 ---------- 

1012 skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton` 

1013 Preliminary quantum graph. Must have "init_inputs", "inputs", and 

1014 "outputs" attributes on all quantum nodes, as added by 

1015 `_resolve_task_quanta`, as well as a "datastore_records" attribute 

1016 as added by `_attach_datastore_records`. 

1017 metadata : `Mapping` 

1018 Flexible metadata to add to the graph. 

1019 

1020 Returns 

1021 ------- 

1022 quantum_graph : `QuantumGraph` 

1023 DAG describing processing to be performed. 

1024 """ 

1025 quanta: dict[TaskDef, set[Quantum]] = {} 

1026 init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {} 

1027 init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {} 

1028 for task_def in self._pipeline_graph._iter_task_defs(): 

1029 if not skeleton.has_task(task_def.label): 

1030 continue 

1031 task_node = self._pipeline_graph.tasks[task_def.label] 

1032 task_init_key = skeleton.get_task_init_node(task_def.label) 

1033 init_inputs[task_def] = skeleton[task_init_key]["inputs"].values() 

1034 init_outputs[task_def] = skeleton[task_init_key]["outputs"].values() 

1035 quanta_for_task: set[Quantum] = set() 

1036 for quantum_key in skeleton.get_quanta(task_node.label): 

1037 node_state = skeleton[quantum_key] 

1038 quanta_for_task.add( 

1039 Quantum( 

1040 taskName=task_node.task_class_name, 

1041 taskClass=task_node.task_class, 

1042 dataId=node_state["data_id"], 

1043 initInputs=node_state["init_inputs"], 

1044 inputs=node_state["inputs"], 

1045 outputs=node_state["outputs"], 

1046 datastore_records=node_state.get("datastore_records"), 

1047 ) 

1048 ) 

1049 quanta[task_def] = quanta_for_task 

1050 

1051 registry_dataset_types: list[DatasetType] = [ 

1052 node.dataset_type for node in self._pipeline_graph.dataset_types.values() 

1053 ] 

1054 

1055 all_metadata = self.metadata.to_dict() 

1056 all_metadata.update(metadata) 

1057 return QuantumGraph( 

1058 quanta, 

1059 metadata=all_metadata, 

1060 universe=self.universe, 

1061 initInputs=init_inputs, 

1062 initOutputs=init_outputs, 

1063 globalInitOutputs=[skeleton[key]["ref"] for key in skeleton.global_init_outputs], 

1064 registryDatasetTypes=registry_dataset_types, 

1065 ) 

1066 

1067 @staticmethod 

1068 @final 

1069 def _find_removed( 

1070 original: Iterable[DatasetKey | PrerequisiteDatasetKey], 

1071 adjusted: NamedKeyMapping[DatasetType, Sequence[DatasetRef]], 

1072 ) -> set[DatasetKey | PrerequisiteDatasetKey]: 

1073 """Identify skeleton-graph dataset nodes that have been removed by 

1074 `~PipelineTaskConnections.adjustQuantum`. 

1075 

1076 Parameters 

1077 ---------- 

1078 original : `~collections.abc.Iterable` [ `DatasetKey` or \ 

1079 `PrerequisiteDatasetKey` ] 

1080 Identifiers for the dataset nodes that were the original neighbors 

1081 (inputs or outputs) of a quantum. 

1082 adjusted : `~lsst.daf.butler.NamedKeyMapping` [ \ 

1083 `~lsst.daf.butler.DatasetType`, \ 

1084 `~collections.abc.Sequence` [ `lsst.daf.butler.DatasetType` ] ] 

1085 Adjusted neighbors, in the form used by `lsst.daf.butler.Quantum`. 

1086 

1087 Returns 

1088 ------- 

1089 removed : `set` [ `DatasetKey` ] 

1090 Datasets in ``original`` that have no counterpart in ``adjusted``. 

1091 """ 

1092 result = set(original) 

1093 for dataset_type, kept_refs in adjusted.items(): 

1094 parent_dataset_type_name, _ = DatasetType.splitDatasetTypeName(dataset_type.name) 

1095 for kept_ref in kept_refs: 

1096 result.remove(DatasetKey(parent_dataset_type_name, kept_ref.dataId.values_tuple())) 

1097 return result 

1098 

1099 

1100@dataclasses.dataclass(eq=False, order=False) 

1101class ExistingDatasets: 

1102 """Struct that holds the results of dataset queries for 

1103 `QuantumGraphBuilder`. 

1104 """ 

1105 

1106 inputs: dict[DatasetKey | PrerequisiteDatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1107 """Overall-input datasets found in `QuantumGraphBuilder.input_collections`. 

1108 

1109 This may include prerequisite inputs. It does include init-inputs. 

1110 It does not include intermediates. 

1111 """ 

1112 

1113 outputs_for_skip: dict[DatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1114 """Output datasets found in `QuantumGraphBuilder.skip_existing_in`. 

1115 

1116 It is unspecified whether this contains include init-outputs; there is 

1117 no concept of skipping at the init stage, so this is not expected to 

1118 matter. 

1119 """ 

1120 

1121 outputs_in_the_way: dict[DatasetKey, DatasetRef] = dataclasses.field(default_factory=dict) 

1122 """Output datasets found in `QuantumGraphBuilder.output_run`. 

1123 

1124 This includes regular outputs and init-outputs. 

1125 """ 

1126 

1127 

1128def _quantum_or_quanta(n: int) -> str: 

1129 """Correctly pluralize 'quantum' if needed.""" 

1130 return f"{n} quanta" if n != 1 else "1 quantum"