Coverage for python/lsst/pipe/base/pipeline_graph/_edges.py: 37%

191 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-23 10:31 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Edge", "ReadEdge", "WriteEdge") 

24 

25from abc import ABC, abstractmethod 

26from collections.abc import Mapping, Sequence 

27from typing import Any, ClassVar, TypeVar 

28 

29from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse, SkyPixDimension 

30from lsst.daf.butler.registry import MissingDatasetTypeError 

31from lsst.utils.classes import immutable 

32 

33from ..connectionTypes import BaseConnection 

34from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError 

35from ._nodes import NodeKey, NodeType 

36 

37_S = TypeVar("_S", bound="Edge") 

38 

39 

40@immutable 

41class Edge(ABC): 

42 """Base class for edges in a pipeline graph. 

43 

44 This represents the link between a task node and an input or output dataset 

45 type. 

46 

47 Parameters 

48 ---------- 

49 task_key : `NodeKey` 

50 Key for the task node this edge is connected to. 

51 dataset_type_key : `NodeKey` 

52 Key for the dataset type node this edge is connected to. 

53 storage_class_name : `str` 

54 Name of the dataset type's storage class as seen by the task. 

55 connection_name : `str` 

56 Internal name for the connection as seen by the task. 

57 is_calibration : `bool` 

58 Whether this dataset type can be included in 

59 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

60 raw_dimensions : `frozenset` [ `str` ] 

61 Raw dimensions from the connection definition. 

62 """ 

63 

64 def __init__( 

65 self, 

66 *, 

67 task_key: NodeKey, 

68 dataset_type_key: NodeKey, 

69 storage_class_name: str, 

70 connection_name: str, 

71 is_calibration: bool, 

72 raw_dimensions: frozenset[str], 

73 ): 

74 self.task_key = task_key 

75 self.dataset_type_key = dataset_type_key 

76 self.connection_name = connection_name 

77 self.storage_class_name = storage_class_name 

78 self.is_calibration = is_calibration 

79 self.raw_dimensions = raw_dimensions 

80 

81 INIT_TO_TASK_NAME: ClassVar[str] = "INIT" 

82 """Edge key for the special edge that connects a task init node to the 

83 task node itself (for regular edges, this would be the connection name). 

84 """ 

85 

86 task_key: NodeKey 

87 """Task part of the key for this edge in networkx graphs.""" 

88 

89 dataset_type_key: NodeKey 

90 """Task part of the key for this edge in networkx graphs.""" 

91 

92 connection_name: str 

93 """Name used by the task to refer to this dataset type.""" 

94 

95 storage_class_name: str 

96 """Storage class expected by this task. 

97 

98 If `ReadEdge.component` is not `None`, this is the component storage class, 

99 not the parent storage class. 

100 """ 

101 

102 is_calibration: bool 

103 """Whether this dataset type can be included in 

104 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

105 """ 

106 

107 raw_dimensions: frozenset[str] 

108 """Raw dimensions in the task declaration. 

109 

110 This can only be used safely for partial comparisons: two edges with the 

111 same ``raw_dimensions`` (and the same parent dataset type name) always have 

112 the same resolved dimensions, but edges with different ``raw_dimensions`` 

113 may also have the same resolvd dimensions. 

114 """ 

115 

116 @property 

117 def is_init(self) -> bool: 

118 """Whether this dataset is read or written when the task is 

119 constructed, not when it is run. 

120 """ 

121 return self.task_key.node_type is NodeType.TASK_INIT 

122 

123 @property 

124 def task_label(self) -> str: 

125 """Label of the task.""" 

126 return str(self.task_key) 

127 

128 @property 

129 def parent_dataset_type_name(self) -> str: 

130 """Name of the parent dataset type. 

131 

132 All dataset type nodes in a pipeline graph are for parent dataset 

133 types; components are represented by additional `ReadEdge` state. 

134 """ 

135 return str(self.dataset_type_key) 

136 

137 @property 

138 @abstractmethod 

139 def nodes(self) -> tuple[NodeKey, NodeKey]: 

140 """The directed pair of `NodeKey` instances this edge connects. 

141 

142 This tuple is ordered in the same direction as the pipeline flow: 

143 `task_key` precedes `dataset_type_key` for writes, and the 

144 reverse is true for reads. 

145 """ 

146 raise NotImplementedError() 

147 

148 @property 

149 def key(self) -> tuple[NodeKey, NodeKey, str]: 

150 """Ordered tuple of node keys and connection name that uniquely 

151 identifies this edge in a pipeline graph. 

152 """ 

153 return self.nodes + (self.connection_name,) 

154 

155 def __repr__(self) -> str: 

156 return f"{self.nodes[0]} -> {self.nodes[1]} ({self.connection_name})" 

157 

158 @property 

159 def dataset_type_name(self) -> str: 

160 """Dataset type name seen by the task. 

161 

162 This defaults to the parent dataset type name, which is appropriate 

163 for all writes and most reads. 

164 """ 

165 return self.parent_dataset_type_name 

166 

167 def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]: 

168 """Compare this edge to another one from a possibly-different 

169 configuration of the same task label. 

170 

171 Parameters 

172 ---------- 

173 other : `Edge` 

174 Another edge of the same type to compare to. 

175 connection_type : `str` 

176 Human-readable name of the connection type of this edge (e.g. 

177 "init input", "output") for use in returned messages. 

178 

179 Returns 

180 ------- 

181 differences : `list` [ `str` ] 

182 List of string messages describing differences between ``self`` and 

183 ``other``. Will be empty if ``self == other`` or if the only 

184 difference is in the task label or connection name (which are not 

185 checked). Messages will use 'A' to refer to ``self`` and 'B' to 

186 refer to ``other``. 

187 """ 

188 result = [] 

189 if self.dataset_type_name != other.dataset_type_name: 

190 result.append( 

191 f"{connection_type.capitalize()} {self.connection_name!r} has dataset type " 

192 f"{self.dataset_type_name!r} in A, but {other.dataset_type_name!r} in B." 

193 ) 

194 if self.storage_class_name != other.storage_class_name: 

195 result.append( 

196 f"{connection_type.capitalize()} {self.connection_name!r} has storage class " 

197 f"{self.storage_class_name!r} in A, but {other.storage_class_name!r} in B." 

198 ) 

199 if self.raw_dimensions != other.raw_dimensions: 

200 result.append( 

201 f"{connection_type.capitalize()} {self.connection_name!r} has raw dimensions " 

202 f"{set(self.raw_dimensions)} in A, but {set(other.raw_dimensions)} in B " 

203 "(differences in raw dimensions may not lead to differences in resolved dimensions, " 

204 "but this cannot be checked without re-resolving the dataset type)." 

205 ) 

206 if self.is_calibration != other.is_calibration: 

207 result.append( 

208 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a calibration " 

209 f"{'in A but not in B' if self.is_calibration else 'in B but not in A'}." 

210 ) 

211 return result 

212 

213 @abstractmethod 

214 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

215 """Transform the graph's definition of a dataset type (parent, with the 

216 registry or producer's storage class) to the one seen by this task. 

217 """ 

218 raise NotImplementedError() 

219 

220 @abstractmethod 

221 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

222 """Transform the graph's definition of a dataset reference (parent 

223 dataset type, with the registry or producer's storage class) to the one 

224 seen by this task. 

225 """ 

226 raise NotImplementedError() 

227 

228 def _to_xgraph_state(self) -> dict[str, Any]: 

229 """Convert this edges's attributes into a dictionary suitable for use 

230 in exported networkx graphs. 

231 """ 

232 return { 

233 "parent_dataset_type_name": self.parent_dataset_type_name, 

234 "storage_class_name": self.storage_class_name, 

235 "is_init": bool, 

236 } 

237 

238 

239class ReadEdge(Edge): 

240 """Representation of an input connection (including init-inputs and 

241 prerequisites) in a pipeline graph. 

242 

243 Parameters 

244 ---------- 

245 dataset_type_key : `NodeKey` 

246 Key for the dataset type node this edge is connected to. This should 

247 hold the parent dataset type name for component dataset types. 

248 task_key : `NodeKey` 

249 Key for the task node this edge is connected to. 

250 storage_class_name : `str` 

251 Name of the dataset type's storage class as seen by the task. 

252 connection_name : `str` 

253 Internal name for the connection as seen by the task. 

254 is_calibration : `bool` 

255 Whether this dataset type can be included in 

256 `~lsst.daf.butler.CollectionType.CALIBRATION` collections. 

257 raw_dimensions : `frozenset` [ `str` ] 

258 Raw dimensions from the connection definition. 

259 is_prerequisite : `bool` 

260 Whether this dataset must be present in the data repository prior to 

261 `QuantumGraph` generation. 

262 component : `str` or `None` 

263 Component of the dataset type requested by the task. 

264 defer_query_constraint : `bool` 

265 If `True`, by default do not include this dataset type's existence as a 

266 constraint on the initial data ID query in QuantumGraph generation. 

267 

268 Notes 

269 ----- 

270 When included in an exported `networkx` graph (e.g. 

271 `PipelineGraph.make_xgraph`), read edges set the following edge attributes: 

272 

273 - ``parent_dataset_type_name`` 

274 - ``storage_class_name`` 

275 - ``is_init`` 

276 - ``component`` 

277 - ``is_prerequisite`` 

278 

279 As with `ReadEdge` instance attributes, these descriptions of dataset types 

280 are those specific to a task, and may differ from the graph's resolved 

281 dataset type or (if `PipelineGraph.resolve` has not been called) there may 

282 not even be a consistent definition of the dataset type. 

283 """ 

284 

285 def __init__( 

286 self, 

287 dataset_type_key: NodeKey, 

288 task_key: NodeKey, 

289 *, 

290 storage_class_name: str, 

291 connection_name: str, 

292 is_calibration: bool, 

293 raw_dimensions: frozenset[str], 

294 is_prerequisite: bool, 

295 component: str | None, 

296 defer_query_constraint: bool, 

297 ): 

298 super().__init__( 

299 task_key=task_key, 

300 dataset_type_key=dataset_type_key, 

301 storage_class_name=storage_class_name, 

302 connection_name=connection_name, 

303 raw_dimensions=raw_dimensions, 

304 is_calibration=is_calibration, 

305 ) 

306 self.is_prerequisite = is_prerequisite 

307 self.component = component 

308 self.defer_query_constraint = defer_query_constraint 

309 

310 component: str | None 

311 """Component to add to `parent_dataset_type_name` to form the dataset type 

312 name seen by this task. 

313 """ 

314 

315 is_prerequisite: bool 

316 """Whether this dataset must be present in the data repository prior to 

317 `QuantumGraph` generation. 

318 """ 

319 

320 defer_query_constraint: bool 

321 """If `True`, by default do not include this dataset type's existence as a 

322 constraint on the initial data ID query in QuantumGraph generation. 

323 """ 

324 

325 @property 

326 def nodes(self) -> tuple[NodeKey, NodeKey]: 

327 # Docstring inherited. 

328 return (self.dataset_type_key, self.task_key) 

329 

330 @property 

331 def dataset_type_name(self) -> str: 

332 """Complete dataset type name, as seen by the task.""" 

333 if self.component is not None: 

334 return f"{self.parent_dataset_type_name}.{self.component}" 

335 return self.parent_dataset_type_name 

336 

337 def diff(self: ReadEdge, other: ReadEdge, connection_type: str = "connection") -> list[str]: 

338 # Docstring inherited. 

339 result = super().diff(other, connection_type) 

340 if self.defer_query_constraint != other.defer_query_constraint: 

341 result.append( 

342 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a deferred query " 

343 f"constraint {'in A but not in B' if self.defer_query_constraint else 'in B but not in A'}." 

344 ) 

345 return result 

346 

347 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

348 # Docstring inherited. 

349 if self.component is not None: 

350 assert ( 

351 self.storage_class_name == dataset_type.storageClass.allComponents()[self.component].name 

352 ), "components with storage class overrides are not supported" 

353 return dataset_type.makeComponentDatasetType(self.component) 

354 if self.storage_class_name != dataset_type.storageClass_name: 

355 return dataset_type.overrideStorageClass(self.storage_class_name) 

356 return dataset_type 

357 

358 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

359 # Docstring inherited. 

360 if self.component is not None: 

361 assert ( 

362 self.storage_class_name == ref.datasetType.storageClass.allComponents()[self.component].name 

363 ), "components with storage class overrides are not supported" 

364 return ref.makeComponentRef(self.component) 

365 if self.storage_class_name != ref.datasetType.storageClass_name: 

366 return ref.overrideStorageClass(self.storage_class_name) 

367 return ref 

368 

369 @classmethod 

370 def _from_connection_map( 

371 cls, 

372 task_key: NodeKey, 

373 connection_name: str, 

374 connection_map: Mapping[str, BaseConnection], 

375 is_prerequisite: bool = False, 

376 ) -> ReadEdge: 

377 """Construct a `ReadEdge` instance from a `.BaseConnection` object. 

378 

379 Parameters 

380 ---------- 

381 task_key : `NodeKey` 

382 Key for the associated task node or task init node. 

383 connection_name : `str` 

384 Internal name for the connection as seen by the task,. 

385 connection_map : Mapping [ `str`, `.BaseConnection` ] 

386 Mapping of post-configuration object to draw dataset type 

387 information from, keyed by connection name. 

388 is_prerequisite : `bool`, optional 

389 Whether this dataset must be present in the data repository prior 

390 to `QuantumGraph` generation. 

391 

392 Returns 

393 ------- 

394 edge : `ReadEdge` 

395 New edge instance. 

396 """ 

397 connection = connection_map[connection_name] 

398 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name) 

399 return cls( 

400 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name), 

401 task_key=task_key, 

402 component=component, 

403 storage_class_name=connection.storageClass, 

404 # InitInput connections don't have .isCalibration. 

405 is_calibration=getattr(connection, "isCalibration", False), 

406 is_prerequisite=is_prerequisite, 

407 connection_name=connection_name, 

408 # InitInput connections don't have a .dimensions because they 

409 # always have empty dimensions. 

410 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())), 

411 # PrerequisiteInput and InitInput connections don't have a 

412 # .eferQueryConstraints, because they never constrain the initial 

413 # data ID query. 

414 defer_query_constraint=getattr(connection, "deferQueryConstraint", False), 

415 ) 

416 

417 def _resolve_dataset_type( 

418 self, 

419 *, 

420 current: DatasetType | None, 

421 is_initial_query_constraint: bool, 

422 is_prerequisite: bool | None, 

423 universe: DimensionUniverse, 

424 producer: str | None, 

425 consumers: Sequence[str], 

426 is_registered: bool, 

427 ) -> tuple[DatasetType, bool, bool]: 

428 """Participate in the construction of the `DatasetTypeNode` object 

429 associated with this edge. 

430 

431 Parameters 

432 ---------- 

433 current : `lsst.daf.butler.DatasetType` or `None` 

434 The current graph-wide `DatasetType`, or `None`. This will always 

435 be the registry's definition of the parent dataset type, if one 

436 exists. If not, it will be the dataset type definition from the 

437 task in the graph that writes it, if there is one. If there is no 

438 such task, this will be `None`. 

439 is_initial_query_constraint : `bool` 

440 Whether this dataset type is currently marked as a constraint on 

441 the initial data ID query in QuantumGraph generation. 

442 is_prerequisite : `bool` | None` 

443 Whether this dataset type is marked as a prerequisite input in all 

444 edges processed so far. `None` if this is the first edge. 

445 universe : `lsst.daf.butler.DimensionUniverse` 

446 Object that holds all dimension definitions. 

447 producer : `str` or `None` 

448 The label of the task that produces this dataset type in the 

449 pipeline, or `None` if it is an overall input. 

450 consumers : `Sequence` [ `str` ] 

451 Labels for other consuming tasks that have already participated in 

452 this dataset type's resolution. 

453 is_registered : `bool` 

454 Whether a registration for this dataset type was found in the 

455 data repository. 

456 

457 Returns 

458 ------- 

459 dataset_type : `DatasetType` 

460 The updated graph-wide dataset type. If ``current`` was provided, 

461 this must be equal to it. 

462 is_initial_query_constraint : `bool` 

463 If `True`, this dataset type should be included as a constraint in 

464 the initial data ID query during QuantumGraph generation; this 

465 requires that ``is_initial_query_constraint`` also be `True` on 

466 input. 

467 is_prerequisite : `bool` 

468 Whether this dataset type is marked as a prerequisite input in this 

469 task and all other edges processed so far. 

470 

471 Raises 

472 ------ 

473 MissingDatasetTypeError 

474 Raised if ``current is None`` and this edge cannot define one on 

475 its own. 

476 IncompatibleDatasetTypeError 

477 Raised if ``current is not None`` and this edge's definition is not 

478 compatible with it. 

479 ConnectionTypeConsistencyError 

480 Raised if a prerequisite input for one task appears as a different 

481 kind of connection in any other task. 

482 """ 

483 if "skypix" in self.raw_dimensions: 

484 if current is None: 

485 raise MissingDatasetTypeError( 

486 f"DatasetType '{self.dataset_type_name}' referenced by " 

487 f"{self.task_label!r} uses 'skypix' as a dimension " 

488 f"placeholder, but has not been registered with the data repository. " 

489 f"Note that reference catalog names are now used as the dataset " 

490 f"type name instead of 'ref_cat'." 

491 ) 

492 rest1 = set(universe.extract(self.raw_dimensions - {"skypix"}).names) 

493 rest2 = {dim.name for dim in current.dimensions if not isinstance(dim, SkyPixDimension)} 

494 if rest1 != rest2: 

495 raise IncompatibleDatasetTypeError( 

496 f"Non-skypix dimensions for dataset type {self.dataset_type_name} declared in " 

497 f"connections ({rest1}) are inconsistent with those in " 

498 f"registry's version of this dataset ({rest2})." 

499 ) 

500 dimensions = current.dimensions 

501 else: 

502 dimensions = universe.extract(self.raw_dimensions) 

503 is_initial_query_constraint = is_initial_query_constraint and not self.defer_query_constraint 

504 if is_prerequisite is None: 

505 is_prerequisite = self.is_prerequisite 

506 elif is_prerequisite and not self.is_prerequisite: 

507 raise ConnectionTypeConsistencyError( 

508 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to {consumers}, " 

509 f"but it is not a prerequisite to {self.task_label!r}." 

510 ) 

511 elif not is_prerequisite and self.is_prerequisite: 

512 if producer is not None: 

513 raise ConnectionTypeConsistencyError( 

514 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to " 

515 f"{self.task_label}, but it is produced by {producer!r}." 

516 ) 

517 else: 

518 raise ConnectionTypeConsistencyError( 

519 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to " 

520 f"{self.task_label}, but it is a regular input to {consumers!r}." 

521 ) 

522 

523 def report_current_origin() -> str: 

524 if is_registered: 

525 return "data repository" 

526 elif producer is not None: 

527 return f"producing task {producer!r}" 

528 else: 

529 return f"consuming task(s) {consumers!r}" 

530 

531 if self.component is not None: 

532 if current is None: 

533 raise MissingDatasetTypeError( 

534 f"Dataset type {self.parent_dataset_type_name!r} is not registered and not produced by " 

535 f"this pipeline, but it used by task {self.task_label!r}, via component " 

536 f"{self.component!r}. This pipeline cannot be resolved until the parent dataset type is " 

537 "registered." 

538 ) 

539 all_current_components = current.storageClass.allComponents() 

540 if self.component not in all_current_components: 

541 raise IncompatibleDatasetTypeError( 

542 f"Dataset type {self.parent_dataset_type_name!r} has storage class " 

543 f"{current.storageClass_name!r} (from {report_current_origin()}), " 

544 f"which does not include component {self.component!r} " 

545 f"as requested by task {self.task_label!r}." 

546 ) 

547 if all_current_components[self.component].name != self.storage_class_name: 

548 raise IncompatibleDatasetTypeError( 

549 f"Dataset type '{self.parent_dataset_type_name}.{self.component}' has storage class " 

550 f"{all_current_components[self.component].name!r} " 

551 f"(from {report_current_origin()}), which does not match " 

552 f"{self.storage_class_name!r}, as requested by task {self.task_label!r}. " 

553 "Note that storage class conversions of components are not supported." 

554 ) 

555 return current, is_initial_query_constraint, is_prerequisite 

556 else: 

557 dataset_type = DatasetType( 

558 self.parent_dataset_type_name, 

559 dimensions, 

560 storageClass=self.storage_class_name, 

561 isCalibration=self.is_calibration, 

562 ) 

563 if current is not None: 

564 if not is_registered and producer is None: 

565 # Current definition comes from another consumer; we 

566 # require the dataset types to be exactly equal (not just 

567 # compatible), since neither connection should take 

568 # precedence. 

569 if dataset_type != current: 

570 raise MissingDatasetTypeError( 

571 f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; " 

572 f"task {self.task_label!r} has {dataset_type}, but the definition " 

573 f"from {report_current_origin()} is {current}. If the storage classes are " 

574 "compatible but different, registering the dataset type in the data repository " 

575 "in advance will avoid this error." 

576 ) 

577 elif not dataset_type.is_compatible_with(current): 

578 raise IncompatibleDatasetTypeError( 

579 f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; " 

580 f"task {self.task_label!r} has {dataset_type}, but the definition " 

581 f"from {report_current_origin()} is {current}." 

582 ) 

583 return current, is_initial_query_constraint, is_prerequisite 

584 else: 

585 return dataset_type, is_initial_query_constraint, is_prerequisite 

586 

587 def _to_xgraph_state(self) -> dict[str, Any]: 

588 # Docstring inherited. 

589 result = super()._to_xgraph_state() 

590 result["component"] = self.component 

591 result["is_prerequisite"] = self.is_prerequisite 

592 return result 

593 

594 

595class WriteEdge(Edge): 

596 """Representation of an output connection (including init-outputs) in a 

597 pipeline graph. 

598 

599 Notes 

600 ----- 

601 When included in an exported `networkx` graph (e.g. 

602 `PipelineGraph.make_xgraph`), write edges set the following edge 

603 attributes: 

604 

605 - ``parent_dataset_type_name`` 

606 - ``storage_class_name`` 

607 - ``is_init`` 

608 

609 As with `WRiteEdge` instance attributes, these descriptions of dataset 

610 types are those specific to a task, and may differ from the graph's 

611 resolved dataset type or (if `PipelineGraph.resolve` has not been called) 

612 there may not even be a consistent definition of the dataset type. 

613 """ 

614 

615 @property 

616 def nodes(self) -> tuple[NodeKey, NodeKey]: 

617 # Docstring inherited. 

618 return (self.task_key, self.dataset_type_key) 

619 

620 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType: 

621 # Docstring inherited. 

622 if self.storage_class_name != dataset_type.storageClass_name: 

623 return dataset_type.overrideStorageClass(self.storage_class_name) 

624 return dataset_type 

625 

626 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef: 

627 # Docstring inherited. 

628 if self.storage_class_name != ref.datasetType.storageClass_name: 

629 return ref.overrideStorageClass(self.storage_class_name) 

630 return ref 

631 

632 @classmethod 

633 def _from_connection_map( 

634 cls, 

635 task_key: NodeKey, 

636 connection_name: str, 

637 connection_map: Mapping[str, BaseConnection], 

638 ) -> WriteEdge: 

639 """Construct a `WriteEdge` instance from a `.BaseConnection` object. 

640 

641 Parameters 

642 ---------- 

643 task_key : `NodeKey` 

644 Key for the associated task node or task init node. 

645 connection_name : `str` 

646 Internal name for the connection as seen by the task,. 

647 connection_map : Mapping [ `str`, `.BaseConnection` ] 

648 Mapping of post-configuration object to draw dataset type 

649 information from, keyed by connection name. 

650 

651 Returns 

652 ------- 

653 edge : `WriteEdge` 

654 New edge instance. 

655 """ 

656 connection = connection_map[connection_name] 

657 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name) 

658 if component is not None: 

659 raise ValueError( 

660 f"Illegal output component dataset {connection.name!r} in task {task_key.name!r}." 

661 ) 

662 return cls( 

663 task_key=task_key, 

664 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name), 

665 storage_class_name=connection.storageClass, 

666 connection_name=connection_name, 

667 # InitOutput connections don't have .isCalibration. 

668 is_calibration=getattr(connection, "isCalibration", False), 

669 # InitOutput connections don't have a .dimensions because they 

670 # always have empty dimensions. 

671 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())), 

672 ) 

673 

674 def _resolve_dataset_type(self, current: DatasetType | None, universe: DimensionUniverse) -> DatasetType: 

675 """Participate in the construction of the `DatasetTypeNode` object 

676 associated with this edge. 

677 

678 Parameters 

679 ---------- 

680 current : `lsst.daf.butler.DatasetType` or `None` 

681 The current graph-wide `DatasetType`, or `None`. This will always 

682 be the registry's definition of the parent dataset type, if one 

683 exists. 

684 universe : `lsst.daf.butler.DimensionUniverse` 

685 Object that holds all dimension definitions. 

686 

687 Returns 

688 ------- 

689 dataset_type : `DatasetType` 

690 A dataset type compatible with this edge. If ``current`` was 

691 provided, this must be equal to it. 

692 

693 Raises 

694 ------ 

695 IncompatibleDatasetTypeError 

696 Raised if ``current is not None`` and this edge's definition is not 

697 compatible with it. 

698 """ 

699 dimensions = universe.extract(self.raw_dimensions) 

700 dataset_type = DatasetType( 

701 self.parent_dataset_type_name, 

702 dimensions, 

703 storageClass=self.storage_class_name, 

704 isCalibration=self.is_calibration, 

705 ) 

706 if current is not None: 

707 if not current.is_compatible_with(dataset_type): 

708 raise IncompatibleDatasetTypeError( 

709 f"Incompatible definition for output dataset type {self.parent_dataset_type_name!r}: " 

710 f"task {self.task_label!r} has {current}, but data repository has {dataset_type}." 

711 ) 

712 return current 

713 else: 

714 return dataset_type