Coverage for python/lsst/pipe/base/pipeline_graph/_edges.py: 38%

191 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-30 12:09 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("Edge", "ReadEdge", "WriteEdge") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Mapping, Sequence 

33from typing import Any, ClassVar, TypeVar 

34 

35from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse 

36from lsst.daf.butler.registry import MissingDatasetTypeError 

37from lsst.utils.classes import immutable 

38 

39from ..connectionTypes import BaseConnection 

40from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError 

41from ._nodes import NodeKey, NodeType 

42 

43_S = TypeVar("_S", bound="Edge") 

44 

45 

46@immutable 

47class Edge(ABC): 

48 """Base class for edges in a pipeline graph. 

49 

50 This represents the link between a task node and an input or output dataset 

51 type. 

52 

53 Parameters 

54 ---------- 

55 task_key : `NodeKey` 

56 Key for the task node this edge is connected to. 

57 dataset_type_key : `NodeKey` 

58 Key for the dataset type node this edge is connected to. 

59 storage_class_name : `str` 

60 Name of the dataset type's storage class as seen by the task. 

61 connection_name : `str` 

62 Internal name for the connection as seen by the task. 

63 is_calibration : `bool` 

64 Whether this dataset type can be included in 

65 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

66 raw_dimensions : `frozenset` [ `str` ] 

67 Raw dimensions from the connection definition. 

68 """ 

69 

70 def __init__( 

71 self, 

72 *, 

73 task_key: NodeKey, 

74 dataset_type_key: NodeKey, 

75 storage_class_name: str, 

76 connection_name: str, 

77 is_calibration: bool, 

78 raw_dimensions: frozenset[str], 

79 ): 

80 self.task_key = task_key 

81 self.dataset_type_key = dataset_type_key 

82 self.connection_name = connection_name 

83 self.storage_class_name = storage_class_name 

84 self.is_calibration = is_calibration 

85 self.raw_dimensions = raw_dimensions 

86 

87 INIT_TO_TASK_NAME: ClassVar[str] = "INIT" 

88 """Edge key for the special edge that connects a task init node to the 

89 task node itself (for regular edges, this would be the connection name). 

90 """ 

91 

92 task_key: NodeKey 

93 """Task part of the key for this edge in networkx graphs.""" 

94 

95 dataset_type_key: NodeKey 

96 """Task part of the key for this edge in networkx graphs.""" 

97 

98 connection_name: str 

99 """Name used by the task to refer to this dataset type.""" 

100 

101 storage_class_name: str 

102 """Storage class expected by this task. 

103 

104 If `ReadEdge.component` is not `None`, this is the component storage class, 

105 not the parent storage class. 

106 """ 

107 

108 is_calibration: bool 

109 """Whether this dataset type can be included in 

110 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

111 """ 

112 

113 raw_dimensions: frozenset[str] 

114 """Raw dimensions in the task declaration. 

115 

116 This can only be used safely for partial comparisons: two edges with the 

117 same ``raw_dimensions`` (and the same parent dataset type name) always have 

118 the same resolved dimensions, but edges with different ``raw_dimensions`` 

119 may also have the same resolvd dimensions. 

120 """ 

121 

122 @property 

123 def is_init(self) -> bool: 

124 """Whether this dataset is read or written when the task is 

125 constructed, not when it is run. 

126 """ 

127 return self.task_key.node_type is NodeType.TASK_INIT 

128 

129 @property 

130 def task_label(self) -> str: 

131 """Label of the task.""" 

132 return str(self.task_key) 

133 

134 @property 

135 def parent_dataset_type_name(self) -> str: 

136 """Name of the parent dataset type. 

137 

138 All dataset type nodes in a pipeline graph are for parent dataset 

139 types; components are represented by additional `ReadEdge` state. 

140 """ 

141 return str(self.dataset_type_key) 

142 

143 @property 

144 @abstractmethod 

145 def nodes(self) -> tuple[NodeKey, NodeKey]: 

146 """The directed pair of `NodeKey` instances this edge connects. 

147 

148 This tuple is ordered in the same direction as the pipeline flow: 

149 `task_key` precedes `dataset_type_key` for writes, and the 

150 reverse is true for reads. 

151 """ 

152 raise NotImplementedError() 

153 

154 @property 

155 def key(self) -> tuple[NodeKey, NodeKey, str]: 

156 """Ordered tuple of node keys and connection name that uniquely 

157 identifies this edge in a pipeline graph. 

158 """ 

159 return self.nodes + (self.connection_name,) 

160 

161 def __repr__(self) -> str: 

162 return f"{self.nodes[0]} -> {self.nodes[1]} ({self.connection_name})" 

163 

164 @property 

165 def dataset_type_name(self) -> str: 

166 """Dataset type name seen by the task. 

167 

168 This defaults to the parent dataset type name, which is appropriate 

169 for all writes and most reads. 

170 """ 

171 return self.parent_dataset_type_name 

172 

173 def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]: 

174 """Compare this edge to another one from a possibly-different 

175 configuration of the same task label. 

176 

177 Parameters 

178 ---------- 

179 other : `Edge` 

180 Another edge of the same type to compare to. 

181 connection_type : `str` 

182 Human-readable name of the connection type of this edge (e.g. 

183 "init input", "output") for use in returned messages. 

184 

185 Returns 

186 ------- 

187 differences : `list` [ `str` ] 

188 List of string messages describing differences between ``self`` and 

189 ``other``. Will be empty if ``self == other`` or if the only 

190 difference is in the task label or connection name (which are not 

191 checked). Messages will use 'A' to refer to ``self`` and 'B' to 

192 refer to ``other``. 

193 """ 

194 result = [] 

195 if self.dataset_type_name != other.dataset_type_name: 

196 result.append( 

197 f"{connection_type.capitalize()} {self.connection_name!r} has dataset type " 

198 f"{self.dataset_type_name!r} in A, but {other.dataset_type_name!r} in B." 

199 ) 

200 if self.storage_class_name != other.storage_class_name: 

201 result.append( 

202 f"{connection_type.capitalize()} {self.connection_name!r} has storage class " 

203 f"{self.storage_class_name!r} in A, but {other.storage_class_name!r} in B." 

204 ) 

205 if self.raw_dimensions != other.raw_dimensions: 

206 result.append( 

207 f"{connection_type.capitalize()} {self.connection_name!r} has raw dimensions " 

208 f"{set(self.raw_dimensions)} in A, but {set(other.raw_dimensions)} in B " 

209 "(differences in raw dimensions may not lead to differences in resolved dimensions, " 

210 "but this cannot be checked without re-resolving the dataset type)." 

211 ) 

212 if self.is_calibration != other.is_calibration: 

213 result.append( 

214 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a calibration " 

215 f"{'in A but not in B' if self.is_calibration else 'in B but not in A'}." 

216 ) 

217 return result 

218 

219 @abstractmethod 

220 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

221 """Transform the graph's definition of a dataset type (parent, with the 

222 registry or producer's storage class) to the one seen by this task. 

223 """ 

224 raise NotImplementedError() 

225 

226 @abstractmethod 

227 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

228 """Transform the graph's definition of a dataset reference (parent 

229 dataset type, with the registry or producer's storage class) to the one 

230 seen by this task. 

231 """ 

232 raise NotImplementedError() 

233 

234 def _to_xgraph_state(self) -> dict[str, Any]: 

235 """Convert this edges's attributes into a dictionary suitable for use 

236 in exported networkx graphs. 

237 """ 

238 return { 

239 "parent_dataset_type_name": self.parent_dataset_type_name, 

240 "storage_class_name": self.storage_class_name, 

241 "is_init": bool, 

242 } 

243 

244 

245class ReadEdge(Edge): 

246 """Representation of an input connection (including init-inputs and 

247 prerequisites) in a pipeline graph. 

248 

249 Parameters 

250 ---------- 

251 dataset_type_key : `NodeKey` 

252 Key for the dataset type node this edge is connected to. This should 

253 hold the parent dataset type name for component dataset types. 

254 task_key : `NodeKey` 

255 Key for the task node this edge is connected to. 

256 storage_class_name : `str` 

257 Name of the dataset type's storage class as seen by the task. 

258 connection_name : `str` 

259 Internal name for the connection as seen by the task. 

260 is_calibration : `bool` 

261 Whether this dataset type can be included in 

262 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

263 raw_dimensions : `frozenset` [ `str` ] 

264 Raw dimensions from the connection definition. 

265 is_prerequisite : `bool` 

266 Whether this dataset must be present in the data repository prior to 

267 `QuantumGraph` generation. 

268 component : `str` or `None` 

269 Component of the dataset type requested by the task. 

270 defer_query_constraint : `bool` 

271 If `True`, by default do not include this dataset type's existence as a 

272 constraint on the initial data ID query in QuantumGraph generation. 

273 

274 Notes 

275 ----- 

276 When included in an exported `networkx` graph (e.g. 

277 `PipelineGraph.make_xgraph`), read edges set the following edge attributes: 

278 

279 - ``parent_dataset_type_name`` 

280 - ``storage_class_name`` 

281 - ``is_init`` 

282 - ``component`` 

283 - ``is_prerequisite`` 

284 

285 As with `ReadEdge` instance attributes, these descriptions of dataset types 

286 are those specific to a task, and may differ from the graph's resolved 

287 dataset type or (if `PipelineGraph.resolve` has not been called) there may 

288 not even be a consistent definition of the dataset type. 

289 """ 

290 

291 def __init__( 

292 self, 

293 dataset_type_key: NodeKey, 

294 task_key: NodeKey, 

295 *, 

296 storage_class_name: str, 

297 connection_name: str, 

298 is_calibration: bool, 

299 raw_dimensions: frozenset[str], 

300 is_prerequisite: bool, 

301 component: str | None, 

302 defer_query_constraint: bool, 

303 ): 

304 super().__init__( 

305 task_key=task_key, 

306 dataset_type_key=dataset_type_key, 

307 storage_class_name=storage_class_name, 

308 connection_name=connection_name, 

309 raw_dimensions=raw_dimensions, 

310 is_calibration=is_calibration, 

311 ) 

312 self.is_prerequisite = is_prerequisite 

313 self.component = component 

314 self.defer_query_constraint = defer_query_constraint 

315 

316 component: str | None 

317 """Component to add to `parent_dataset_type_name` to form the dataset type 

318 name seen by this task. 

319 """ 

320 

321 is_prerequisite: bool 

322 """Whether this dataset must be present in the data repository prior to 

323 `QuantumGraph` generation. 

324 """ 

325 

326 defer_query_constraint: bool 

327 """If `True`, by default do not include this dataset type's existence as a 

328 constraint on the initial data ID query in QuantumGraph generation. 

329 """ 

330 

331 @property 

332 def nodes(self) -> tuple[NodeKey, NodeKey]: 

333 # Docstring inherited. 

334 return (self.dataset_type_key, self.task_key) 

335 

336 @property 

337 def dataset_type_name(self) -> str: 

338 """Complete dataset type name, as seen by the task.""" 

339 if self.component is not None: 

340 return f"{self.parent_dataset_type_name}.{self.component}" 

341 return self.parent_dataset_type_name 

342 

343 def diff(self: ReadEdge, other: ReadEdge, connection_type: str = "connection") -> list[str]: 

344 # Docstring inherited. 

345 result = super().diff(other, connection_type) 

346 if self.defer_query_constraint != other.defer_query_constraint: 

347 result.append( 

348 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a deferred query " 

349 f"constraint {'in A but not in B' if self.defer_query_constraint else 'in B but not in A'}." 

350 ) 

351 return result 

352 

353 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

354 # Docstring inherited. 

355 if self.component is not None: 

356 assert ( 

357 self.storage_class_name == dataset_type.storageClass.allComponents()[self.component].name 

358 ), "components with storage class overrides are not supported" 

359 return dataset_type.makeComponentDatasetType(self.component) 

360 if self.storage_class_name != dataset_type.storageClass_name: 

361 return dataset_type.overrideStorageClass(self.storage_class_name) 

362 return dataset_type 

363 

364 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

365 # Docstring inherited. 

366 if self.component is not None: 

367 assert ( 

368 self.storage_class_name == ref.datasetType.storageClass.allComponents()[self.component].name 

369 ), "components with storage class overrides are not supported" 

370 return ref.makeComponentRef(self.component) 

371 if self.storage_class_name != ref.datasetType.storageClass_name: 

372 return ref.overrideStorageClass(self.storage_class_name) 

373 return ref 

374 

375 @classmethod 

376 def _from_connection_map( 

377 cls, 

378 task_key: NodeKey, 

379 connection_name: str, 

380 connection_map: Mapping[str, BaseConnection], 

381 is_prerequisite: bool = False, 

382 ) -> ReadEdge: 

383 """Construct a `ReadEdge` instance from a `.BaseConnection` object. 

384 

385 Parameters 

386 ---------- 

387 task_key : `NodeKey` 

388 Key for the associated task node or task init node. 

389 connection_name : `str` 

390 Internal name for the connection as seen by the task,. 

391 connection_map : Mapping [ `str`, `.BaseConnection` ] 

392 Mapping of post-configuration object to draw dataset type 

393 information from, keyed by connection name. 

394 is_prerequisite : `bool`, optional 

395 Whether this dataset must be present in the data repository prior 

396 to `QuantumGraph` generation. 

397 

398 Returns 

399 ------- 

400 edge : `ReadEdge` 

401 New edge instance. 

402 """ 

403 connection = connection_map[connection_name] 

404 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name) 

405 return cls( 

406 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name), 

407 task_key=task_key, 

408 component=component, 

409 storage_class_name=connection.storageClass, 

410 # InitInput connections don't have .isCalibration. 

411 is_calibration=getattr(connection, "isCalibration", False), 

412 is_prerequisite=is_prerequisite, 

413 connection_name=connection_name, 

414 # InitInput connections don't have a .dimensions because they 

415 # always have empty dimensions. 

416 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())), 

417 # PrerequisiteInput and InitInput connections don't have a 

418 # .deferGraphConstraint, because they never constrain the initial 

419 # data ID query. 

420 defer_query_constraint=getattr(connection, "deferGraphConstraint", False), 

421 ) 

422 

423 def _resolve_dataset_type( 

424 self, 

425 *, 

426 current: DatasetType | None, 

427 is_initial_query_constraint: bool, 

428 is_prerequisite: bool | None, 

429 universe: DimensionUniverse, 

430 producer: str | None, 

431 consumers: Sequence[str], 

432 is_registered: bool, 

433 ) -> tuple[DatasetType, bool, bool]: 

434 """Participate in the construction of the `DatasetTypeNode` object 

435 associated with this edge. 

436 

437 Parameters 

438 ---------- 

439 current : `lsst.daf.butler.DatasetType` or `None` 

440 The current graph-wide `DatasetType`, or `None`. This will always 

441 be the registry's definition of the parent dataset type, if one 

442 exists. If not, it will be the dataset type definition from the 

443 task in the graph that writes it, if there is one. If there is no 

444 such task, this will be `None`. 

445 is_initial_query_constraint : `bool` 

446 Whether this dataset type is currently marked as a constraint on 

447 the initial data ID query in QuantumGraph generation. 

448 is_prerequisite : `bool` | None` 

449 Whether this dataset type is marked as a prerequisite input in all 

450 edges processed so far. `None` if this is the first edge. 

451 universe : `lsst.daf.butler.DimensionUniverse` 

452 Object that holds all dimension definitions. 

453 producer : `str` or `None` 

454 The label of the task that produces this dataset type in the 

455 pipeline, or `None` if it is an overall input. 

456 consumers : `Sequence` [ `str` ] 

457 Labels for other consuming tasks that have already participated in 

458 this dataset type's resolution. 

459 is_registered : `bool` 

460 Whether a registration for this dataset type was found in the 

461 data repository. 

462 

463 Returns 

464 ------- 

465 dataset_type : `DatasetType` 

466 The updated graph-wide dataset type. If ``current`` was provided, 

467 this must be equal to it. 

468 is_initial_query_constraint : `bool` 

469 If `True`, this dataset type should be included as a constraint in 

470 the initial data ID query during QuantumGraph generation; this 

471 requires that ``is_initial_query_constraint`` also be `True` on 

472 input. 

473 is_prerequisite : `bool` 

474 Whether this dataset type is marked as a prerequisite input in this 

475 task and all other edges processed so far. 

476 

477 Raises 

478 ------ 

479 MissingDatasetTypeError 

480 Raised if ``current is None`` and this edge cannot define one on 

481 its own. 

482 IncompatibleDatasetTypeError 

483 Raised if ``current is not None`` and this edge's definition is not 

484 compatible with it. 

485 ConnectionTypeConsistencyError 

486 Raised if a prerequisite input for one task appears as a different 

487 kind of connection in any other task. 

488 """ 

489 if "skypix" in self.raw_dimensions: 

490 if current is None: 

491 raise MissingDatasetTypeError( 

492 f"DatasetType '{self.dataset_type_name}' referenced by " 

493 f"{self.task_label!r} uses 'skypix' as a dimension " 

494 f"placeholder, but has not been registered with the data repository. " 

495 f"Note that reference catalog names are now used as the dataset " 

496 f"type name instead of 'ref_cat'." 

497 ) 

498 rest1 = set(universe.conform(self.raw_dimensions - {"skypix"}).names) 

499 rest2 = current.dimensions.names - current.dimensions.skypix.names 

500 if rest1 != rest2: 

501 raise IncompatibleDatasetTypeError( 

502 f"Non-skypix dimensions for dataset type {self.dataset_type_name} declared in " 

503 f"connections ({rest1}) are inconsistent with those in " 

504 f"registry's version of this dataset ({rest2})." 

505 ) 

506 dimensions = current.dimensions.as_group() 

507 else: 

508 dimensions = universe.conform(self.raw_dimensions) 

509 is_initial_query_constraint = is_initial_query_constraint and not self.defer_query_constraint 

510 if is_prerequisite is None: 

511 is_prerequisite = self.is_prerequisite 

512 elif is_prerequisite and not self.is_prerequisite: 

513 raise ConnectionTypeConsistencyError( 

514 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to {consumers}, " 

515 f"but it is not a prerequisite to {self.task_label!r}." 

516 ) 

517 elif not is_prerequisite and self.is_prerequisite: 

518 if producer is not None: 

519 raise ConnectionTypeConsistencyError( 

520 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to " 

521 f"{self.task_label}, but it is produced by {producer!r}." 

522 ) 

523 else: 

524 raise ConnectionTypeConsistencyError( 

525 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to " 

526 f"{self.task_label}, but it is a regular input to {consumers!r}." 

527 ) 

528 

529 def report_current_origin() -> str: 

530 if is_registered: 

531 return "data repository" 

532 elif producer is not None: 

533 return f"producing task {producer!r}" 

534 else: 

535 return f"consuming task(s) {consumers!r}" 

536 

537 if self.component is not None: 

538 if current is None: 

539 raise MissingDatasetTypeError( 

540 f"Dataset type {self.parent_dataset_type_name!r} is not registered and not produced by " 

541 f"this pipeline, but it used by task {self.task_label!r}, via component " 

542 f"{self.component!r}. This pipeline cannot be resolved until the parent dataset type is " 

543 "registered." 

544 ) 

545 all_current_components = current.storageClass.allComponents() 

546 if self.component not in all_current_components: 

547 raise IncompatibleDatasetTypeError( 

548 f"Dataset type {self.parent_dataset_type_name!r} has storage class " 

549 f"{current.storageClass_name!r} (from {report_current_origin()}), " 

550 f"which does not include component {self.component!r} " 

551 f"as requested by task {self.task_label!r}." 

552 ) 

553 if all_current_components[self.component].name != self.storage_class_name: 

554 raise IncompatibleDatasetTypeError( 

555 f"Dataset type '{self.parent_dataset_type_name}.{self.component}' has storage class " 

556 f"{all_current_components[self.component].name!r} " 

557 f"(from {report_current_origin()}), which does not match " 

558 f"{self.storage_class_name!r}, as requested by task {self.task_label!r}. " 

559 "Note that storage class conversions of components are not supported." 

560 ) 

561 return current, is_initial_query_constraint, is_prerequisite 

562 else: 

563 dataset_type = DatasetType( 

564 self.parent_dataset_type_name, 

565 dimensions, 

566 storageClass=self.storage_class_name, 

567 isCalibration=self.is_calibration, 

568 ) 

569 if current is not None: 

570 if not is_registered and producer is None: 

571 # Current definition comes from another consumer; we 

572 # require the dataset types to be exactly equal (not just 

573 # compatible), since neither connection should take 

574 # precedence. 

575 if dataset_type != current: 

576 raise MissingDatasetTypeError( 

577 f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; " 

578 f"task {self.task_label!r} has {dataset_type}, but the definition " 

579 f"from {report_current_origin()} is {current}. If the storage classes are " 

580 "compatible but different, registering the dataset type in the data repository " 

581 "in advance will avoid this error." 

582 ) 

583 elif not dataset_type.is_compatible_with(current): 

584 raise IncompatibleDatasetTypeError( 

585 f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; " 

586 f"task {self.task_label!r} has {dataset_type}, but the definition " 

587 f"from {report_current_origin()} is {current}." 

588 ) 

589 return current, is_initial_query_constraint, is_prerequisite 

590 else: 

591 return dataset_type, is_initial_query_constraint, is_prerequisite 

592 

593 def _to_xgraph_state(self) -> dict[str, Any]: 

594 # Docstring inherited. 

595 result = super()._to_xgraph_state() 

596 result["component"] = self.component 

597 result["is_prerequisite"] = self.is_prerequisite 

598 return result 

599 

600 

601class WriteEdge(Edge): 

602 """Representation of an output connection (including init-outputs) in a 

603 pipeline graph. 

604 

605 Notes 

606 ----- 

607 When included in an exported `networkx` graph (e.g. 

608 `PipelineGraph.make_xgraph`), write edges set the following edge 

609 attributes: 

610 

611 - ``parent_dataset_type_name`` 

612 - ``storage_class_name`` 

613 - ``is_init`` 

614 

615 As with `WRiteEdge` instance attributes, these descriptions of dataset 

616 types are those specific to a task, and may differ from the graph's 

617 resolved dataset type or (if `PipelineGraph.resolve` has not been called) 

618 there may not even be a consistent definition of the dataset type. 

619 """ 

620 

621 @property 

622 def nodes(self) -> tuple[NodeKey, NodeKey]: 

623 # Docstring inherited. 

624 return (self.task_key, self.dataset_type_key) 

625 

626 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

627 # Docstring inherited. 

628 if self.storage_class_name != dataset_type.storageClass_name: 

629 return dataset_type.overrideStorageClass(self.storage_class_name) 

630 return dataset_type 

631 

632 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

633 # Docstring inherited. 

634 if self.storage_class_name != ref.datasetType.storageClass_name: 

635 return ref.overrideStorageClass(self.storage_class_name) 

636 return ref 

637 

638 @classmethod 

639 def _from_connection_map( 

640 cls, 

641 task_key: NodeKey, 

642 connection_name: str, 

643 connection_map: Mapping[str, BaseConnection], 

644 ) -> WriteEdge: 

645 """Construct a `WriteEdge` instance from a `.BaseConnection` object. 

646 

647 Parameters 

648 ---------- 

649 task_key : `NodeKey` 

650 Key for the associated task node or task init node. 

651 connection_name : `str` 

652 Internal name for the connection as seen by the task,. 

653 connection_map : Mapping [ `str`, `.BaseConnection` ] 

654 Mapping of post-configuration object to draw dataset type 

655 information from, keyed by connection name. 

656 

657 Returns 

658 ------- 

659 edge : `WriteEdge` 

660 New edge instance. 

661 """ 

662 connection = connection_map[connection_name] 

663 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name) 

664 if component is not None: 

665 raise ValueError( 

666 f"Illegal output component dataset {connection.name!r} in task {task_key.name!r}." 

667 ) 

668 return cls( 

669 task_key=task_key, 

670 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name), 

671 storage_class_name=connection.storageClass, 

672 connection_name=connection_name, 

673 # InitOutput connections don't have .isCalibration. 

674 is_calibration=getattr(connection, "isCalibration", False), 

675 # InitOutput connections don't have a .dimensions because they 

676 # always have empty dimensions. 

677 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())), 

678 ) 

679 

680 def _resolve_dataset_type(self, current: DatasetType | None, universe: DimensionUniverse) -> DatasetType: 

681 """Participate in the construction of the `DatasetTypeNode` object 

682 associated with this edge. 

683 

684 Parameters 

685 ---------- 

686 current : `lsst.daf.butler.DatasetType` or `None` 

687 The current graph-wide `DatasetType`, or `None`. This will always 

688 be the registry's definition of the parent dataset type, if one 

689 exists. 

690 universe : `lsst.daf.butler.DimensionUniverse` 

691 Object that holds all dimension definitions. 

692 

693 Returns 

694 ------- 

695 dataset_type : `DatasetType` 

696 A dataset type compatible with this edge. If ``current`` was 

697 provided, this must be equal to it. 

698 

699 Raises 

700 ------ 

701 IncompatibleDatasetTypeError 

702 Raised if ``current is not None`` and this edge's definition is not 

703 compatible with it. 

704 """ 

705 dimensions = universe.conform(self.raw_dimensions) 

706 dataset_type = DatasetType( 

707 self.parent_dataset_type_name, 

708 dimensions, 

709 storageClass=self.storage_class_name, 

710 isCalibration=self.is_calibration, 

711 ) 

712 if current is not None: 

713 if not current.is_compatible_with(dataset_type): 

714 raise IncompatibleDatasetTypeError( 

715 f"Incompatible definition for output dataset type {self.parent_dataset_type_name!r}: " 

716 f"task {self.task_label!r} has {dataset_type}, but data repository has {current}." 

717 ) 

718 return current 

719 else: 

720 return dataset_type