Coverage for python/lsst/pipe/base/pipeline_graph/_edges.py: 38%

191 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-27 02:40 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("Edge", "ReadEdge", "WriteEdge") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Mapping, Sequence 

33from typing import Any, ClassVar, TypeVar 

34 

35from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse 

36from lsst.daf.butler.registry import MissingDatasetTypeError 

37from lsst.utils.classes import immutable 

38 

39from ..connectionTypes import BaseConnection 

40from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError 

41from ._nodes import NodeKey, NodeType 

42 

43_S = TypeVar("_S", bound="Edge") 

44 

45 

46@immutable 

47class Edge(ABC): 

48 """Base class for edges in a pipeline graph. 

49 

50 This represents the link between a task node and an input or output dataset 

51 type. 

52 

53 Parameters 

54 ---------- 

55 task_key : `NodeKey` 

56 Key for the task node this edge is connected to. 

57 dataset_type_key : `NodeKey` 

58 Key for the dataset type node this edge is connected to. 

59 storage_class_name : `str` 

60 Name of the dataset type's storage class as seen by the task. 

61 connection_name : `str` 

62 Internal name for the connection as seen by the task. 

63 is_calibration : `bool` 

64 Whether this dataset type can be included in 

65 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

66 raw_dimensions : `frozenset` [ `str` ] 

67 Raw dimensions from the connection definition. 

68 """ 

69 

70 def __init__( 

71 self, 

72 *, 

73 task_key: NodeKey, 

74 dataset_type_key: NodeKey, 

75 storage_class_name: str, 

76 connection_name: str, 

77 is_calibration: bool, 

78 raw_dimensions: frozenset[str], 

79 ): 

80 self.task_key = task_key 

81 self.dataset_type_key = dataset_type_key 

82 self.connection_name = connection_name 

83 self.storage_class_name = storage_class_name 

84 self.is_calibration = is_calibration 

85 self.raw_dimensions = raw_dimensions 

86 

87 INIT_TO_TASK_NAME: ClassVar[str] = "INIT" 

88 """Edge key for the special edge that connects a task init node to the 

89 task node itself (for regular edges, this would be the connection name). 

90 """ 

91 

92 task_key: NodeKey 

93 """Task part of the key for this edge in networkx graphs.""" 

94 

95 dataset_type_key: NodeKey 

96 """Task part of the key for this edge in networkx graphs.""" 

97 

98 connection_name: str 

99 """Name used by the task to refer to this dataset type.""" 

100 

101 storage_class_name: str 

102 """Storage class expected by this task. 

103 

104 If `ReadEdge.component` is not `None`, this is the component storage class, 

105 not the parent storage class. 

106 """ 

107 

108 is_calibration: bool 

109 """Whether this dataset type can be included in 

110 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

111 """ 

112 

113 raw_dimensions: frozenset[str] 

114 """Raw dimensions in the task declaration. 

115 

116 This can only be used safely for partial comparisons: two edges with the 

117 same ``raw_dimensions`` (and the same parent dataset type name) always have 

118 the same resolved dimensions, but edges with different ``raw_dimensions`` 

119 may also have the same resolvd dimensions. 

120 """ 

121 

122 @property 

123 def is_init(self) -> bool: 

124 """Whether this dataset is read or written when the task is 

125 constructed, not when it is run. 

126 """ 

127 return self.task_key.node_type is NodeType.TASK_INIT 

128 

129 @property 

130 def task_label(self) -> str: 

131 """Label of the task.""" 

132 return str(self.task_key) 

133 

134 @property 

135 def parent_dataset_type_name(self) -> str: 

136 """Name of the parent dataset type. 

137 

138 All dataset type nodes in a pipeline graph are for parent dataset 

139 types; components are represented by additional `ReadEdge` state. 

140 """ 

141 return str(self.dataset_type_key) 

142 

143 @property 

144 @abstractmethod 

145 def nodes(self) -> tuple[NodeKey, NodeKey]: 

146 """The directed pair of `NodeKey` instances this edge connects. 

147 

148 This tuple is ordered in the same direction as the pipeline flow: 

149 `task_key` precedes `dataset_type_key` for writes, and the 

150 reverse is true for reads. 

151 """ 

152 raise NotImplementedError() 

153 

154 @property 

155 def key(self) -> tuple[NodeKey, NodeKey, str]: 

156 """Ordered tuple of node keys and connection name that uniquely 

157 identifies this edge in a pipeline graph. 

158 """ 

159 return self.nodes + (self.connection_name,) 

160 

161 def __repr__(self) -> str: 

162 return f"{self.nodes[0]} -> {self.nodes[1]} ({self.connection_name})" 

163 

164 @property 

165 def dataset_type_name(self) -> str: 

166 """Dataset type name seen by the task. 

167 

168 This defaults to the parent dataset type name, which is appropriate 

169 for all writes and most reads. 

170 """ 

171 return self.parent_dataset_type_name 

172 

173 def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]: 

174 """Compare this edge to another one from a possibly-different 

175 configuration of the same task label. 

176 

177 Parameters 

178 ---------- 

179 other : `Edge` 

180 Another edge of the same type to compare to. 

181 connection_type : `str` 

182 Human-readable name of the connection type of this edge (e.g. 

183 "init input", "output") for use in returned messages. 

184 

185 Returns 

186 ------- 

187 differences : `list` [ `str` ] 

188 List of string messages describing differences between ``self`` and 

189 ``other``. Will be empty if ``self == other`` or if the only 

190 difference is in the task label or connection name (which are not 

191 checked). Messages will use 'A' to refer to ``self`` and 'B' to 

192 refer to ``other``. 

193 """ 

194 result = [] 

195 if self.dataset_type_name != other.dataset_type_name: 

196 result.append( 

197 f"{connection_type.capitalize()} {self.connection_name!r} has dataset type " 

198 f"{self.dataset_type_name!r} in A, but {other.dataset_type_name!r} in B." 

199 ) 

200 if self.storage_class_name != other.storage_class_name: 

201 result.append( 

202 f"{connection_type.capitalize()} {self.connection_name!r} has storage class " 

203 f"{self.storage_class_name!r} in A, but {other.storage_class_name!r} in B." 

204 ) 

205 if self.raw_dimensions != other.raw_dimensions: 

206 result.append( 

207 f"{connection_type.capitalize()} {self.connection_name!r} has raw dimensions " 

208 f"{set(self.raw_dimensions)} in A, but {set(other.raw_dimensions)} in B " 

209 "(differences in raw dimensions may not lead to differences in resolved dimensions, " 

210 "but this cannot be checked without re-resolving the dataset type)." 

211 ) 

212 if self.is_calibration != other.is_calibration: 

213 result.append( 

214 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a calibration " 

215 f"{'in A but not in B' if self.is_calibration else 'in B but not in A'}." 

216 ) 

217 return result 

218 

219 @abstractmethod 

220 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

221 """Transform the graph's definition of a dataset type (parent, with the 

222 registry or producer's storage class) to the one seen by this task. 

223 

224 Parameters 

225 ---------- 

226 dataset_type : `~lsst.daf.butler.DatasetType` 

227 Graph's definition of dataset type. 

228 

229 Returns 

230 ------- 

231 out_dataset_type : `~lsst.daf.butler.DatasetType` 

232 Dataset type seen by this task. 

233 """ 

234 raise NotImplementedError() 

235 

236 @abstractmethod 

237 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

238 """Transform the graph's definition of a dataset reference (parent 

239 dataset type, with the registry or producer's storage class) to the one 

240 seen by this task. 

241 

242 Parameters 

243 ---------- 

244 ref : `~lsst.daf.butler.DatasetRef` 

245 Graph's definition of the dataset reference. 

246 

247 Returns 

248 ------- 

249 out_dataset_ref : `~lsst.daf.butler.DatasetRef` 

250 Dataset reference seen by this task. 

251 """ 

252 raise NotImplementedError() 

253 

254 def _to_xgraph_state(self) -> dict[str, Any]: 

255 """Convert this edges's attributes into a dictionary suitable for use 

256 in exported networkx graphs. 

257 """ 

258 return { 

259 "parent_dataset_type_name": self.parent_dataset_type_name, 

260 "storage_class_name": self.storage_class_name, 

261 "is_init": bool, 

262 } 

263 

264 

265class ReadEdge(Edge): 

266 """Representation of an input connection (including init-inputs and 

267 prerequisites) in a pipeline graph. 

268 

269 Parameters 

270 ---------- 

271 dataset_type_key : `NodeKey` 

272 Key for the dataset type node this edge is connected to. This should 

273 hold the parent dataset type name for component dataset types. 

274 task_key : `NodeKey` 

275 Key for the task node this edge is connected to. 

276 storage_class_name : `str` 

277 Name of the dataset type's storage class as seen by the task. 

278 connection_name : `str` 

279 Internal name for the connection as seen by the task. 

280 is_calibration : `bool` 

281 Whether this dataset type can be included in 

282 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

283 raw_dimensions : `frozenset` [ `str` ] 

284 Raw dimensions from the connection definition. 

285 is_prerequisite : `bool` 

286 Whether this dataset must be present in the data repository prior to 

287 `QuantumGraph` generation. 

288 component : `str` or `None` 

289 Component of the dataset type requested by the task. 

290 defer_query_constraint : `bool` 

291 If `True`, by default do not include this dataset type's existence as a 

292 constraint on the initial data ID query in QuantumGraph generation. 

293 

294 Notes 

295 ----- 

296 When included in an exported `networkx` graph (e.g. 

297 `PipelineGraph.make_xgraph`), read edges set the following edge attributes: 

298 

299 - ``parent_dataset_type_name`` 

300 - ``storage_class_name`` 

301 - ``is_init`` 

302 - ``component`` 

303 - ``is_prerequisite`` 

304 

305 As with `ReadEdge` instance attributes, these descriptions of dataset types 

306 are those specific to a task, and may differ from the graph's resolved 

307 dataset type or (if `PipelineGraph.resolve` has not been called) there may 

308 not even be a consistent definition of the dataset type. 

309 """ 

310 

311 def __init__( 

312 self, 

313 dataset_type_key: NodeKey, 

314 task_key: NodeKey, 

315 *, 

316 storage_class_name: str, 

317 connection_name: str, 

318 is_calibration: bool, 

319 raw_dimensions: frozenset[str], 

320 is_prerequisite: bool, 

321 component: str | None, 

322 defer_query_constraint: bool, 

323 ): 

324 super().__init__( 

325 task_key=task_key, 

326 dataset_type_key=dataset_type_key, 

327 storage_class_name=storage_class_name, 

328 connection_name=connection_name, 

329 raw_dimensions=raw_dimensions, 

330 is_calibration=is_calibration, 

331 ) 

332 self.is_prerequisite = is_prerequisite 

333 self.component = component 

334 self.defer_query_constraint = defer_query_constraint 

335 

336 component: str | None 

337 """Component to add to `parent_dataset_type_name` to form the dataset type 

338 name seen by this task. 

339 """ 

340 

341 is_prerequisite: bool 

342 """Whether this dataset must be present in the data repository prior to 

343 `QuantumGraph` generation. 

344 """ 

345 

346 defer_query_constraint: bool 

347 """If `True`, by default do not include this dataset type's existence as a 

348 constraint on the initial data ID query in QuantumGraph generation. 

349 """ 

350 

351 @property 

352 def nodes(self) -> tuple[NodeKey, NodeKey]: 

353 # Docstring inherited. 

354 return (self.dataset_type_key, self.task_key) 

355 

356 @property 

357 def dataset_type_name(self) -> str: 

358 """Complete dataset type name, as seen by the task.""" 

359 if self.component is not None: 

360 return f"{self.parent_dataset_type_name}.{self.component}" 

361 return self.parent_dataset_type_name 

362 

363 def diff(self: ReadEdge, other: ReadEdge, connection_type: str = "connection") -> list[str]: 

364 # Docstring inherited. 

365 result = super().diff(other, connection_type) 

366 if self.defer_query_constraint != other.defer_query_constraint: 

367 result.append( 

368 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a deferred query " 

369 f"constraint {'in A but not in B' if self.defer_query_constraint else 'in B but not in A'}." 

370 ) 

371 return result 

372 

373 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

374 # Docstring inherited. 

375 if self.component is not None: 

376 assert ( 

377 self.storage_class_name == dataset_type.storageClass.allComponents()[self.component].name 

378 ), "components with storage class overrides are not supported" 

379 return dataset_type.makeComponentDatasetType(self.component) 

380 if self.storage_class_name != dataset_type.storageClass_name: 

381 return dataset_type.overrideStorageClass(self.storage_class_name) 

382 return dataset_type 

383 

384 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

385 # Docstring inherited. 

386 if self.component is not None: 

387 assert ( 

388 self.storage_class_name == ref.datasetType.storageClass.allComponents()[self.component].name 

389 ), "components with storage class overrides are not supported" 

390 return ref.makeComponentRef(self.component) 

391 if self.storage_class_name != ref.datasetType.storageClass_name: 

392 return ref.overrideStorageClass(self.storage_class_name) 

393 return ref 

394 

395 @classmethod 

396 def _from_connection_map( 

397 cls, 

398 task_key: NodeKey, 

399 connection_name: str, 

400 connection_map: Mapping[str, BaseConnection], 

401 is_prerequisite: bool = False, 

402 ) -> ReadEdge: 

403 """Construct a `ReadEdge` instance from a `.BaseConnection` object. 

404 

405 Parameters 

406 ---------- 

407 task_key : `NodeKey` 

408 Key for the associated task node or task init node. 

409 connection_name : `str` 

410 Internal name for the connection as seen by the task,. 

411 connection_map : Mapping [ `str`, `.BaseConnection` ] 

412 Mapping of post-configuration object to draw dataset type 

413 information from, keyed by connection name. 

414 is_prerequisite : `bool`, optional 

415 Whether this dataset must be present in the data repository prior 

416 to `QuantumGraph` generation. 

417 

418 Returns 

419 ------- 

420 edge : `ReadEdge` 

421 New edge instance. 

422 """ 

423 connection = connection_map[connection_name] 

424 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name) 

425 return cls( 

426 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name), 

427 task_key=task_key, 

428 component=component, 

429 storage_class_name=connection.storageClass, 

430 # InitInput connections don't have .isCalibration. 

431 is_calibration=getattr(connection, "isCalibration", False), 

432 is_prerequisite=is_prerequisite, 

433 connection_name=connection_name, 

434 # InitInput connections don't have a .dimensions because they 

435 # always have empty dimensions. 

436 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())), 

437 # PrerequisiteInput and InitInput connections don't have a 

438 # .deferGraphConstraint, because they never constrain the initial 

439 # data ID query. 

440 defer_query_constraint=getattr(connection, "deferGraphConstraint", False), 

441 ) 

442 

443 def _resolve_dataset_type( 

444 self, 

445 *, 

446 current: DatasetType | None, 

447 is_initial_query_constraint: bool, 

448 is_prerequisite: bool | None, 

449 universe: DimensionUniverse, 

450 producer: str | None, 

451 consumers: Sequence[str], 

452 is_registered: bool, 

453 ) -> tuple[DatasetType, bool, bool]: 

454 """Participate in the construction of the `DatasetTypeNode` object 

455 associated with this edge. 

456 

457 Parameters 

458 ---------- 

459 current : `lsst.daf.butler.DatasetType` or `None` 

460 The current graph-wide `DatasetType`, or `None`. This will always 

461 be the registry's definition of the parent dataset type, if one 

462 exists. If not, it will be the dataset type definition from the 

463 task in the graph that writes it, if there is one. If there is no 

464 such task, this will be `None`. 

465 is_initial_query_constraint : `bool` 

466 Whether this dataset type is currently marked as a constraint on 

467 the initial data ID query in QuantumGraph generation. 

468 is_prerequisite : `bool` | None` 

469 Whether this dataset type is marked as a prerequisite input in all 

470 edges processed so far. `None` if this is the first edge. 

471 universe : `lsst.daf.butler.DimensionUniverse` 

472 Object that holds all dimension definitions. 

473 producer : `str` or `None` 

474 The label of the task that produces this dataset type in the 

475 pipeline, or `None` if it is an overall input. 

476 consumers : `Sequence` [ `str` ] 

477 Labels for other consuming tasks that have already participated in 

478 this dataset type's resolution. 

479 is_registered : `bool` 

480 Whether a registration for this dataset type was found in the 

481 data repository. 

482 

483 Returns 

484 ------- 

485 dataset_type : `DatasetType` 

486 The updated graph-wide dataset type. If ``current`` was provided, 

487 this must be equal to it. 

488 is_initial_query_constraint : `bool` 

489 If `True`, this dataset type should be included as a constraint in 

490 the initial data ID query during QuantumGraph generation; this 

491 requires that ``is_initial_query_constraint`` also be `True` on 

492 input. 

493 is_prerequisite : `bool` 

494 Whether this dataset type is marked as a prerequisite input in this 

495 task and all other edges processed so far. 

496 

497 Raises 

498 ------ 

499 MissingDatasetTypeError 

500 Raised if ``current is None`` and this edge cannot define one on 

501 its own. 

502 IncompatibleDatasetTypeError 

503 Raised if ``current is not None`` and this edge's definition is not 

504 compatible with it. 

505 ConnectionTypeConsistencyError 

506 Raised if a prerequisite input for one task appears as a different 

507 kind of connection in any other task. 

508 """ 

509 if "skypix" in self.raw_dimensions: 

510 if current is None: 

511 raise MissingDatasetTypeError( 

512 f"DatasetType '{self.dataset_type_name}' referenced by " 

513 f"{self.task_label!r} uses 'skypix' as a dimension " 

514 f"placeholder, but has not been registered with the data repository. " 

515 f"Note that reference catalog names are now used as the dataset " 

516 f"type name instead of 'ref_cat'." 

517 ) 

518 rest1 = set(universe.conform(self.raw_dimensions - {"skypix"}).names) 

519 rest2 = current.dimensions.names - current.dimensions.skypix.names 

520 if rest1 != rest2: 

521 raise IncompatibleDatasetTypeError( 

522 f"Non-skypix dimensions for dataset type {self.dataset_type_name} declared in " 

523 f"connections ({rest1}) are inconsistent with those in " 

524 f"registry's version of this dataset ({rest2})." 

525 ) 

526 dimensions = current.dimensions.as_group() 

527 else: 

528 dimensions = universe.conform(self.raw_dimensions) 

529 is_initial_query_constraint = is_initial_query_constraint and not self.defer_query_constraint 

530 if is_prerequisite is None: 

531 is_prerequisite = self.is_prerequisite 

532 elif is_prerequisite and not self.is_prerequisite: 

533 raise ConnectionTypeConsistencyError( 

534 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to {consumers}, " 

535 f"but it is not a prerequisite to {self.task_label!r}." 

536 ) 

537 elif not is_prerequisite and self.is_prerequisite: 

538 if producer is not None: 

539 raise ConnectionTypeConsistencyError( 

540 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to " 

541 f"{self.task_label}, but it is produced by {producer!r}." 

542 ) 

543 else: 

544 raise ConnectionTypeConsistencyError( 

545 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to " 

546 f"{self.task_label}, but it is a regular input to {consumers!r}." 

547 ) 

548 

549 def report_current_origin() -> str: 

550 if is_registered: 

551 return "data repository" 

552 elif producer is not None: 

553 return f"producing task {producer!r}" 

554 else: 

555 return f"consuming task(s) {consumers!r}" 

556 

557 if self.component is not None: 

558 if current is None: 

559 raise MissingDatasetTypeError( 

560 f"Dataset type {self.parent_dataset_type_name!r} is not registered and not produced by " 

561 f"this pipeline, but it used by task {self.task_label!r}, via component " 

562 f"{self.component!r}. This pipeline cannot be resolved until the parent dataset type is " 

563 "registered." 

564 ) 

565 all_current_components = current.storageClass.allComponents() 

566 if self.component not in all_current_components: 

567 raise IncompatibleDatasetTypeError( 

568 f"Dataset type {self.parent_dataset_type_name!r} has storage class " 

569 f"{current.storageClass_name!r} (from {report_current_origin()}), " 

570 f"which does not include component {self.component!r} " 

571 f"as requested by task {self.task_label!r}." 

572 ) 

573 if all_current_components[self.component].name != self.storage_class_name: 

574 raise IncompatibleDatasetTypeError( 

575 f"Dataset type '{self.parent_dataset_type_name}.{self.component}' has storage class " 

576 f"{all_current_components[self.component].name!r} " 

577 f"(from {report_current_origin()}), which does not match " 

578 f"{self.storage_class_name!r}, as requested by task {self.task_label!r}. " 

579 "Note that storage class conversions of components are not supported." 

580 ) 

581 return current, is_initial_query_constraint, is_prerequisite 

582 else: 

583 dataset_type = DatasetType( 

584 self.parent_dataset_type_name, 

585 dimensions, 

586 storageClass=self.storage_class_name, 

587 isCalibration=self.is_calibration, 

588 ) 

589 if current is not None: 

590 if not is_registered and producer is None: 

591 # Current definition comes from another consumer; we 

592 # require the dataset types to be exactly equal (not just 

593 # compatible), since neither connection should take 

594 # precedence. 

595 if dataset_type != current: 

596 raise MissingDatasetTypeError( 

597 f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; " 

598 f"task {self.task_label!r} has {dataset_type}, but the definition " 

599 f"from {report_current_origin()} is {current}. If the storage classes are " 

600 "compatible but different, registering the dataset type in the data repository " 

601 "in advance will avoid this error." 

602 ) 

603 elif not dataset_type.is_compatible_with(current): 

604 raise IncompatibleDatasetTypeError( 

605 f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; " 

606 f"task {self.task_label!r} has {dataset_type}, but the definition " 

607 f"from {report_current_origin()} is {current}." 

608 ) 

609 return current, is_initial_query_constraint, is_prerequisite 

610 else: 

611 return dataset_type, is_initial_query_constraint, is_prerequisite 

612 

613 def _to_xgraph_state(self) -> dict[str, Any]: 

614 # Docstring inherited. 

615 result = super()._to_xgraph_state() 

616 result["component"] = self.component 

617 result["is_prerequisite"] = self.is_prerequisite 

618 return result 

619 

620 

621class WriteEdge(Edge): 

622 """Representation of an output connection (including init-outputs) in a 

623 pipeline graph. 

624 

625 Notes 

626 ----- 

627 When included in an exported `networkx` graph (e.g. 

628 `PipelineGraph.make_xgraph`), write edges set the following edge 

629 attributes: 

630 

631 - ``parent_dataset_type_name`` 

632 - ``storage_class_name`` 

633 - ``is_init`` 

634 

635 As with `WRiteEdge` instance attributes, these descriptions of dataset 

636 types are those specific to a task, and may differ from the graph's 

637 resolved dataset type or (if `PipelineGraph.resolve` has not been called) 

638 there may not even be a consistent definition of the dataset type. 

639 """ 

640 

641 @property 

642 def nodes(self) -> tuple[NodeKey, NodeKey]: 

643 # Docstring inherited. 

644 return (self.task_key, self.dataset_type_key) 

645 

646 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

647 # Docstring inherited. 

648 if self.storage_class_name != dataset_type.storageClass_name: 

649 return dataset_type.overrideStorageClass(self.storage_class_name) 

650 return dataset_type 

651 

652 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

653 # Docstring inherited. 

654 if self.storage_class_name != ref.datasetType.storageClass_name: 

655 return ref.overrideStorageClass(self.storage_class_name) 

656 return ref 

657 

658 @classmethod 

659 def _from_connection_map( 

660 cls, 

661 task_key: NodeKey, 

662 connection_name: str, 

663 connection_map: Mapping[str, BaseConnection], 

664 ) -> WriteEdge: 

665 """Construct a `WriteEdge` instance from a `.BaseConnection` object. 

666 

667 Parameters 

668 ---------- 

669 task_key : `NodeKey` 

670 Key for the associated task node or task init node. 

671 connection_name : `str` 

672 Internal name for the connection as seen by the task,. 

673 connection_map : Mapping [ `str`, `.BaseConnection` ] 

674 Mapping of post-configuration object to draw dataset type 

675 information from, keyed by connection name. 

676 

677 Returns 

678 ------- 

679 edge : `WriteEdge` 

680 New edge instance. 

681 """ 

682 connection = connection_map[connection_name] 

683 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name) 

684 if component is not None: 

685 raise ValueError( 

686 f"Illegal output component dataset {connection.name!r} in task {task_key.name!r}." 

687 ) 

688 return cls( 

689 task_key=task_key, 

690 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name), 

691 storage_class_name=connection.storageClass, 

692 connection_name=connection_name, 

693 # InitOutput connections don't have .isCalibration. 

694 is_calibration=getattr(connection, "isCalibration", False), 

695 # InitOutput connections don't have a .dimensions because they 

696 # always have empty dimensions. 

697 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())), 

698 ) 

699 

700 def _resolve_dataset_type(self, current: DatasetType | None, universe: DimensionUniverse) -> DatasetType: 

701 """Participate in the construction of the `DatasetTypeNode` object 

702 associated with this edge. 

703 

704 Parameters 

705 ---------- 

706 current : `lsst.daf.butler.DatasetType` or `None` 

707 The current graph-wide `DatasetType`, or `None`. This will always 

708 be the registry's definition of the parent dataset type, if one 

709 exists. 

710 universe : `lsst.daf.butler.DimensionUniverse` 

711 Object that holds all dimension definitions. 

712 

713 Returns 

714 ------- 

715 dataset_type : `DatasetType` 

716 A dataset type compatible with this edge. If ``current`` was 

717 provided, this must be equal to it. 

718 

719 Raises 

720 ------ 

721 IncompatibleDatasetTypeError 

722 Raised if ``current is not None`` and this edge's definition is not 

723 compatible with it. 

724 """ 

725 dimensions = universe.conform(self.raw_dimensions) 

726 dataset_type = DatasetType( 

727 self.parent_dataset_type_name, 

728 dimensions, 

729 storageClass=self.storage_class_name, 

730 isCalibration=self.is_calibration, 

731 ) 

732 if current is not None: 

733 if not current.is_compatible_with(dataset_type): 

734 raise IncompatibleDatasetTypeError( 

735 f"Incompatible definition for output dataset type {self.parent_dataset_type_name!r}: " 

736 f"task {self.task_label!r} has {dataset_type}, but data repository has {current}." 

737 ) 

738 return current 

739 else: 

740 return dataset_type