Coverage for python/lsst/pipe/base/pipeline_graph/_edges.py: 37%
191 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-23 10:31 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-23 10:31 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Edge", "ReadEdge", "WriteEdge")
25from abc import ABC, abstractmethod
26from collections.abc import Mapping, Sequence
27from typing import Any, ClassVar, TypeVar
29from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse, SkyPixDimension
30from lsst.daf.butler.registry import MissingDatasetTypeError
31from lsst.utils.classes import immutable
33from ..connectionTypes import BaseConnection
34from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError
35from ._nodes import NodeKey, NodeType
37_S = TypeVar("_S", bound="Edge")
40@immutable
41class Edge(ABC):
42 """Base class for edges in a pipeline graph.
44 This represents the link between a task node and an input or output dataset
45 type.
47 Parameters
48 ----------
49 task_key : `NodeKey`
50 Key for the task node this edge is connected to.
51 dataset_type_key : `NodeKey`
52 Key for the dataset type node this edge is connected to.
53 storage_class_name : `str`
54 Name of the dataset type's storage class as seen by the task.
55 connection_name : `str`
56 Internal name for the connection as seen by the task.
57 is_calibration : `bool`
58 Whether this dataset type can be included in
59 `~lsst.daf.butler.CollectionType.CALIBRATION` collections.
60 raw_dimensions : `frozenset` [ `str` ]
61 Raw dimensions from the connection definition.
62 """
64 def __init__(
65 self,
66 *,
67 task_key: NodeKey,
68 dataset_type_key: NodeKey,
69 storage_class_name: str,
70 connection_name: str,
71 is_calibration: bool,
72 raw_dimensions: frozenset[str],
73 ):
74 self.task_key = task_key
75 self.dataset_type_key = dataset_type_key
76 self.connection_name = connection_name
77 self.storage_class_name = storage_class_name
78 self.is_calibration = is_calibration
79 self.raw_dimensions = raw_dimensions
81 INIT_TO_TASK_NAME: ClassVar[str] = "INIT"
82 """Edge key for the special edge that connects a task init node to the
83 task node itself (for regular edges, this would be the connection name).
84 """
86 task_key: NodeKey
87 """Task part of the key for this edge in networkx graphs."""
89 dataset_type_key: NodeKey
90 """Task part of the key for this edge in networkx graphs."""
92 connection_name: str
93 """Name used by the task to refer to this dataset type."""
95 storage_class_name: str
96 """Storage class expected by this task.
98 If `ReadEdge.component` is not `None`, this is the component storage class,
99 not the parent storage class.
100 """
102 is_calibration: bool
103 """Whether this dataset type can be included in
104 `~lsst.daf.butler.CollectionType.CALIBRATION` collections.
105 """
107 raw_dimensions: frozenset[str]
108 """Raw dimensions in the task declaration.
110 This can only be used safely for partial comparisons: two edges with the
111 same ``raw_dimensions`` (and the same parent dataset type name) always have
112 the same resolved dimensions, but edges with different ``raw_dimensions``
113 may also have the same resolvd dimensions.
114 """
116 @property
117 def is_init(self) -> bool:
118 """Whether this dataset is read or written when the task is
119 constructed, not when it is run.
120 """
121 return self.task_key.node_type is NodeType.TASK_INIT
123 @property
124 def task_label(self) -> str:
125 """Label of the task."""
126 return str(self.task_key)
128 @property
129 def parent_dataset_type_name(self) -> str:
130 """Name of the parent dataset type.
132 All dataset type nodes in a pipeline graph are for parent dataset
133 types; components are represented by additional `ReadEdge` state.
134 """
135 return str(self.dataset_type_key)
137 @property
138 @abstractmethod
139 def nodes(self) -> tuple[NodeKey, NodeKey]:
140 """The directed pair of `NodeKey` instances this edge connects.
142 This tuple is ordered in the same direction as the pipeline flow:
143 `task_key` precedes `dataset_type_key` for writes, and the
144 reverse is true for reads.
145 """
146 raise NotImplementedError()
148 @property
149 def key(self) -> tuple[NodeKey, NodeKey, str]:
150 """Ordered tuple of node keys and connection name that uniquely
151 identifies this edge in a pipeline graph.
152 """
153 return self.nodes + (self.connection_name,)
155 def __repr__(self) -> str:
156 return f"{self.nodes[0]} -> {self.nodes[1]} ({self.connection_name})"
158 @property
159 def dataset_type_name(self) -> str:
160 """Dataset type name seen by the task.
162 This defaults to the parent dataset type name, which is appropriate
163 for all writes and most reads.
164 """
165 return self.parent_dataset_type_name
167 def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]:
168 """Compare this edge to another one from a possibly-different
169 configuration of the same task label.
171 Parameters
172 ----------
173 other : `Edge`
174 Another edge of the same type to compare to.
175 connection_type : `str`
176 Human-readable name of the connection type of this edge (e.g.
177 "init input", "output") for use in returned messages.
179 Returns
180 -------
181 differences : `list` [ `str` ]
182 List of string messages describing differences between ``self`` and
183 ``other``. Will be empty if ``self == other`` or if the only
184 difference is in the task label or connection name (which are not
185 checked). Messages will use 'A' to refer to ``self`` and 'B' to
186 refer to ``other``.
187 """
188 result = []
189 if self.dataset_type_name != other.dataset_type_name:
190 result.append(
191 f"{connection_type.capitalize()} {self.connection_name!r} has dataset type "
192 f"{self.dataset_type_name!r} in A, but {other.dataset_type_name!r} in B."
193 )
194 if self.storage_class_name != other.storage_class_name:
195 result.append(
196 f"{connection_type.capitalize()} {self.connection_name!r} has storage class "
197 f"{self.storage_class_name!r} in A, but {other.storage_class_name!r} in B."
198 )
199 if self.raw_dimensions != other.raw_dimensions:
200 result.append(
201 f"{connection_type.capitalize()} {self.connection_name!r} has raw dimensions "
202 f"{set(self.raw_dimensions)} in A, but {set(other.raw_dimensions)} in B "
203 "(differences in raw dimensions may not lead to differences in resolved dimensions, "
204 "but this cannot be checked without re-resolving the dataset type)."
205 )
206 if self.is_calibration != other.is_calibration:
207 result.append(
208 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a calibration "
209 f"{'in A but not in B' if self.is_calibration else 'in B but not in A'}."
210 )
211 return result
213 @abstractmethod
214 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType:
215 """Transform the graph's definition of a dataset type (parent, with the
216 registry or producer's storage class) to the one seen by this task.
217 """
218 raise NotImplementedError()
220 @abstractmethod
221 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef:
222 """Transform the graph's definition of a dataset reference (parent
223 dataset type, with the registry or producer's storage class) to the one
224 seen by this task.
225 """
226 raise NotImplementedError()
228 def _to_xgraph_state(self) -> dict[str, Any]:
229 """Convert this edges's attributes into a dictionary suitable for use
230 in exported networkx graphs.
231 """
232 return {
233 "parent_dataset_type_name": self.parent_dataset_type_name,
234 "storage_class_name": self.storage_class_name,
235 "is_init": bool,
236 }
239class ReadEdge(Edge):
240 """Representation of an input connection (including init-inputs and
241 prerequisites) in a pipeline graph.
243 Parameters
244 ----------
245 dataset_type_key : `NodeKey`
246 Key for the dataset type node this edge is connected to. This should
247 hold the parent dataset type name for component dataset types.
248 task_key : `NodeKey`
249 Key for the task node this edge is connected to.
250 storage_class_name : `str`
251 Name of the dataset type's storage class as seen by the task.
252 connection_name : `str`
253 Internal name for the connection as seen by the task.
254 is_calibration : `bool`
255 Whether this dataset type can be included in
256 `~lsst.daf.butler.CollectionType.CALIBRATION` collections.
257 raw_dimensions : `frozenset` [ `str` ]
258 Raw dimensions from the connection definition.
259 is_prerequisite : `bool`
260 Whether this dataset must be present in the data repository prior to
261 `QuantumGraph` generation.
262 component : `str` or `None`
263 Component of the dataset type requested by the task.
264 defer_query_constraint : `bool`
265 If `True`, by default do not include this dataset type's existence as a
266 constraint on the initial data ID query in QuantumGraph generation.
268 Notes
269 -----
270 When included in an exported `networkx` graph (e.g.
271 `PipelineGraph.make_xgraph`), read edges set the following edge attributes:
273 - ``parent_dataset_type_name``
274 - ``storage_class_name``
275 - ``is_init``
276 - ``component``
277 - ``is_prerequisite``
279 As with `ReadEdge` instance attributes, these descriptions of dataset types
280 are those specific to a task, and may differ from the graph's resolved
281 dataset type or (if `PipelineGraph.resolve` has not been called) there may
282 not even be a consistent definition of the dataset type.
283 """
285 def __init__(
286 self,
287 dataset_type_key: NodeKey,
288 task_key: NodeKey,
289 *,
290 storage_class_name: str,
291 connection_name: str,
292 is_calibration: bool,
293 raw_dimensions: frozenset[str],
294 is_prerequisite: bool,
295 component: str | None,
296 defer_query_constraint: bool,
297 ):
298 super().__init__(
299 task_key=task_key,
300 dataset_type_key=dataset_type_key,
301 storage_class_name=storage_class_name,
302 connection_name=connection_name,
303 raw_dimensions=raw_dimensions,
304 is_calibration=is_calibration,
305 )
306 self.is_prerequisite = is_prerequisite
307 self.component = component
308 self.defer_query_constraint = defer_query_constraint
310 component: str | None
311 """Component to add to `parent_dataset_type_name` to form the dataset type
312 name seen by this task.
313 """
315 is_prerequisite: bool
316 """Whether this dataset must be present in the data repository prior to
317 `QuantumGraph` generation.
318 """
320 defer_query_constraint: bool
321 """If `True`, by default do not include this dataset type's existence as a
322 constraint on the initial data ID query in QuantumGraph generation.
323 """
325 @property
326 def nodes(self) -> tuple[NodeKey, NodeKey]:
327 # Docstring inherited.
328 return (self.dataset_type_key, self.task_key)
330 @property
331 def dataset_type_name(self) -> str:
332 """Complete dataset type name, as seen by the task."""
333 if self.component is not None:
334 return f"{self.parent_dataset_type_name}.{self.component}"
335 return self.parent_dataset_type_name
337 def diff(self: ReadEdge, other: ReadEdge, connection_type: str = "connection") -> list[str]:
338 # Docstring inherited.
339 result = super().diff(other, connection_type)
340 if self.defer_query_constraint != other.defer_query_constraint:
341 result.append(
342 f"{connection_type.capitalize()} {self.connection_name!r} is marked as a deferred query "
343 f"constraint {'in A but not in B' if self.defer_query_constraint else 'in B but not in A'}."
344 )
345 return result
347 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType:
348 # Docstring inherited.
349 if self.component is not None:
350 assert (
351 self.storage_class_name == dataset_type.storageClass.allComponents()[self.component].name
352 ), "components with storage class overrides are not supported"
353 return dataset_type.makeComponentDatasetType(self.component)
354 if self.storage_class_name != dataset_type.storageClass_name:
355 return dataset_type.overrideStorageClass(self.storage_class_name)
356 return dataset_type
358 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef:
359 # Docstring inherited.
360 if self.component is not None:
361 assert (
362 self.storage_class_name == ref.datasetType.storageClass.allComponents()[self.component].name
363 ), "components with storage class overrides are not supported"
364 return ref.makeComponentRef(self.component)
365 if self.storage_class_name != ref.datasetType.storageClass_name:
366 return ref.overrideStorageClass(self.storage_class_name)
367 return ref
369 @classmethod
370 def _from_connection_map(
371 cls,
372 task_key: NodeKey,
373 connection_name: str,
374 connection_map: Mapping[str, BaseConnection],
375 is_prerequisite: bool = False,
376 ) -> ReadEdge:
377 """Construct a `ReadEdge` instance from a `.BaseConnection` object.
379 Parameters
380 ----------
381 task_key : `NodeKey`
382 Key for the associated task node or task init node.
383 connection_name : `str`
384 Internal name for the connection as seen by the task,.
385 connection_map : Mapping [ `str`, `.BaseConnection` ]
386 Mapping of post-configuration object to draw dataset type
387 information from, keyed by connection name.
388 is_prerequisite : `bool`, optional
389 Whether this dataset must be present in the data repository prior
390 to `QuantumGraph` generation.
392 Returns
393 -------
394 edge : `ReadEdge`
395 New edge instance.
396 """
397 connection = connection_map[connection_name]
398 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name)
399 return cls(
400 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name),
401 task_key=task_key,
402 component=component,
403 storage_class_name=connection.storageClass,
404 # InitInput connections don't have .isCalibration.
405 is_calibration=getattr(connection, "isCalibration", False),
406 is_prerequisite=is_prerequisite,
407 connection_name=connection_name,
408 # InitInput connections don't have a .dimensions because they
409 # always have empty dimensions.
410 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())),
411 # PrerequisiteInput and InitInput connections don't have a
412 # .eferQueryConstraints, because they never constrain the initial
413 # data ID query.
414 defer_query_constraint=getattr(connection, "deferQueryConstraint", False),
415 )
417 def _resolve_dataset_type(
418 self,
419 *,
420 current: DatasetType | None,
421 is_initial_query_constraint: bool,
422 is_prerequisite: bool | None,
423 universe: DimensionUniverse,
424 producer: str | None,
425 consumers: Sequence[str],
426 is_registered: bool,
427 ) -> tuple[DatasetType, bool, bool]:
428 """Participate in the construction of the `DatasetTypeNode` object
429 associated with this edge.
431 Parameters
432 ----------
433 current : `lsst.daf.butler.DatasetType` or `None`
434 The current graph-wide `DatasetType`, or `None`. This will always
435 be the registry's definition of the parent dataset type, if one
436 exists. If not, it will be the dataset type definition from the
437 task in the graph that writes it, if there is one. If there is no
438 such task, this will be `None`.
439 is_initial_query_constraint : `bool`
440 Whether this dataset type is currently marked as a constraint on
441 the initial data ID query in QuantumGraph generation.
442 is_prerequisite : `bool` | None`
443 Whether this dataset type is marked as a prerequisite input in all
444 edges processed so far. `None` if this is the first edge.
445 universe : `lsst.daf.butler.DimensionUniverse`
446 Object that holds all dimension definitions.
447 producer : `str` or `None`
448 The label of the task that produces this dataset type in the
449 pipeline, or `None` if it is an overall input.
450 consumers : `Sequence` [ `str` ]
451 Labels for other consuming tasks that have already participated in
452 this dataset type's resolution.
453 is_registered : `bool`
454 Whether a registration for this dataset type was found in the
455 data repository.
457 Returns
458 -------
459 dataset_type : `DatasetType`
460 The updated graph-wide dataset type. If ``current`` was provided,
461 this must be equal to it.
462 is_initial_query_constraint : `bool`
463 If `True`, this dataset type should be included as a constraint in
464 the initial data ID query during QuantumGraph generation; this
465 requires that ``is_initial_query_constraint`` also be `True` on
466 input.
467 is_prerequisite : `bool`
468 Whether this dataset type is marked as a prerequisite input in this
469 task and all other edges processed so far.
471 Raises
472 ------
473 MissingDatasetTypeError
474 Raised if ``current is None`` and this edge cannot define one on
475 its own.
476 IncompatibleDatasetTypeError
477 Raised if ``current is not None`` and this edge's definition is not
478 compatible with it.
479 ConnectionTypeConsistencyError
480 Raised if a prerequisite input for one task appears as a different
481 kind of connection in any other task.
482 """
483 if "skypix" in self.raw_dimensions:
484 if current is None:
485 raise MissingDatasetTypeError(
486 f"DatasetType '{self.dataset_type_name}' referenced by "
487 f"{self.task_label!r} uses 'skypix' as a dimension "
488 f"placeholder, but has not been registered with the data repository. "
489 f"Note that reference catalog names are now used as the dataset "
490 f"type name instead of 'ref_cat'."
491 )
492 rest1 = set(universe.extract(self.raw_dimensions - {"skypix"}).names)
493 rest2 = {dim.name for dim in current.dimensions if not isinstance(dim, SkyPixDimension)}
494 if rest1 != rest2:
495 raise IncompatibleDatasetTypeError(
496 f"Non-skypix dimensions for dataset type {self.dataset_type_name} declared in "
497 f"connections ({rest1}) are inconsistent with those in "
498 f"registry's version of this dataset ({rest2})."
499 )
500 dimensions = current.dimensions
501 else:
502 dimensions = universe.extract(self.raw_dimensions)
503 is_initial_query_constraint = is_initial_query_constraint and not self.defer_query_constraint
504 if is_prerequisite is None:
505 is_prerequisite = self.is_prerequisite
506 elif is_prerequisite and not self.is_prerequisite:
507 raise ConnectionTypeConsistencyError(
508 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to {consumers}, "
509 f"but it is not a prerequisite to {self.task_label!r}."
510 )
511 elif not is_prerequisite and self.is_prerequisite:
512 if producer is not None:
513 raise ConnectionTypeConsistencyError(
514 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to "
515 f"{self.task_label}, but it is produced by {producer!r}."
516 )
517 else:
518 raise ConnectionTypeConsistencyError(
519 f"Dataset type {self.parent_dataset_type_name!r} is a prerequisite input to "
520 f"{self.task_label}, but it is a regular input to {consumers!r}."
521 )
523 def report_current_origin() -> str:
524 if is_registered:
525 return "data repository"
526 elif producer is not None:
527 return f"producing task {producer!r}"
528 else:
529 return f"consuming task(s) {consumers!r}"
531 if self.component is not None:
532 if current is None:
533 raise MissingDatasetTypeError(
534 f"Dataset type {self.parent_dataset_type_name!r} is not registered and not produced by "
535 f"this pipeline, but it used by task {self.task_label!r}, via component "
536 f"{self.component!r}. This pipeline cannot be resolved until the parent dataset type is "
537 "registered."
538 )
539 all_current_components = current.storageClass.allComponents()
540 if self.component not in all_current_components:
541 raise IncompatibleDatasetTypeError(
542 f"Dataset type {self.parent_dataset_type_name!r} has storage class "
543 f"{current.storageClass_name!r} (from {report_current_origin()}), "
544 f"which does not include component {self.component!r} "
545 f"as requested by task {self.task_label!r}."
546 )
547 if all_current_components[self.component].name != self.storage_class_name:
548 raise IncompatibleDatasetTypeError(
549 f"Dataset type '{self.parent_dataset_type_name}.{self.component}' has storage class "
550 f"{all_current_components[self.component].name!r} "
551 f"(from {report_current_origin()}), which does not match "
552 f"{self.storage_class_name!r}, as requested by task {self.task_label!r}. "
553 "Note that storage class conversions of components are not supported."
554 )
555 return current, is_initial_query_constraint, is_prerequisite
556 else:
557 dataset_type = DatasetType(
558 self.parent_dataset_type_name,
559 dimensions,
560 storageClass=self.storage_class_name,
561 isCalibration=self.is_calibration,
562 )
563 if current is not None:
564 if not is_registered and producer is None:
565 # Current definition comes from another consumer; we
566 # require the dataset types to be exactly equal (not just
567 # compatible), since neither connection should take
568 # precedence.
569 if dataset_type != current:
570 raise MissingDatasetTypeError(
571 f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; "
572 f"task {self.task_label!r} has {dataset_type}, but the definition "
573 f"from {report_current_origin()} is {current}. If the storage classes are "
574 "compatible but different, registering the dataset type in the data repository "
575 "in advance will avoid this error."
576 )
577 elif not dataset_type.is_compatible_with(current):
578 raise IncompatibleDatasetTypeError(
579 f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "
580 f"task {self.task_label!r} has {dataset_type}, but the definition "
581 f"from {report_current_origin()} is {current}."
582 )
583 return current, is_initial_query_constraint, is_prerequisite
584 else:
585 return dataset_type, is_initial_query_constraint, is_prerequisite
587 def _to_xgraph_state(self) -> dict[str, Any]:
588 # Docstring inherited.
589 result = super()._to_xgraph_state()
590 result["component"] = self.component
591 result["is_prerequisite"] = self.is_prerequisite
592 return result
595class WriteEdge(Edge):
596 """Representation of an output connection (including init-outputs) in a
597 pipeline graph.
599 Notes
600 -----
601 When included in an exported `networkx` graph (e.g.
602 `PipelineGraph.make_xgraph`), write edges set the following edge
603 attributes:
605 - ``parent_dataset_type_name``
606 - ``storage_class_name``
607 - ``is_init``
609 As with `WRiteEdge` instance attributes, these descriptions of dataset
610 types are those specific to a task, and may differ from the graph's
611 resolved dataset type or (if `PipelineGraph.resolve` has not been called)
612 there may not even be a consistent definition of the dataset type.
613 """
615 @property
616 def nodes(self) -> tuple[NodeKey, NodeKey]:
617 # Docstring inherited.
618 return (self.task_key, self.dataset_type_key)
620 def adapt_dataset_type(self, dataset_type: DatasetType) -> DatasetType:
621 # Docstring inherited.
622 if self.storage_class_name != dataset_type.storageClass_name:
623 return dataset_type.overrideStorageClass(self.storage_class_name)
624 return dataset_type
626 def adapt_dataset_ref(self, ref: DatasetRef) -> DatasetRef:
627 # Docstring inherited.
628 if self.storage_class_name != ref.datasetType.storageClass_name:
629 return ref.overrideStorageClass(self.storage_class_name)
630 return ref
632 @classmethod
633 def _from_connection_map(
634 cls,
635 task_key: NodeKey,
636 connection_name: str,
637 connection_map: Mapping[str, BaseConnection],
638 ) -> WriteEdge:
639 """Construct a `WriteEdge` instance from a `.BaseConnection` object.
641 Parameters
642 ----------
643 task_key : `NodeKey`
644 Key for the associated task node or task init node.
645 connection_name : `str`
646 Internal name for the connection as seen by the task,.
647 connection_map : Mapping [ `str`, `.BaseConnection` ]
648 Mapping of post-configuration object to draw dataset type
649 information from, keyed by connection name.
651 Returns
652 -------
653 edge : `WriteEdge`
654 New edge instance.
655 """
656 connection = connection_map[connection_name]
657 parent_dataset_type_name, component = DatasetType.splitDatasetTypeName(connection.name)
658 if component is not None:
659 raise ValueError(
660 f"Illegal output component dataset {connection.name!r} in task {task_key.name!r}."
661 )
662 return cls(
663 task_key=task_key,
664 dataset_type_key=NodeKey(NodeType.DATASET_TYPE, parent_dataset_type_name),
665 storage_class_name=connection.storageClass,
666 connection_name=connection_name,
667 # InitOutput connections don't have .isCalibration.
668 is_calibration=getattr(connection, "isCalibration", False),
669 # InitOutput connections don't have a .dimensions because they
670 # always have empty dimensions.
671 raw_dimensions=frozenset(getattr(connection, "dimensions", frozenset())),
672 )
674 def _resolve_dataset_type(self, current: DatasetType | None, universe: DimensionUniverse) -> DatasetType:
675 """Participate in the construction of the `DatasetTypeNode` object
676 associated with this edge.
678 Parameters
679 ----------
680 current : `lsst.daf.butler.DatasetType` or `None`
681 The current graph-wide `DatasetType`, or `None`. This will always
682 be the registry's definition of the parent dataset type, if one
683 exists.
684 universe : `lsst.daf.butler.DimensionUniverse`
685 Object that holds all dimension definitions.
687 Returns
688 -------
689 dataset_type : `DatasetType`
690 A dataset type compatible with this edge. If ``current`` was
691 provided, this must be equal to it.
693 Raises
694 ------
695 IncompatibleDatasetTypeError
696 Raised if ``current is not None`` and this edge's definition is not
697 compatible with it.
698 """
699 dimensions = universe.extract(self.raw_dimensions)
700 dataset_type = DatasetType(
701 self.parent_dataset_type_name,
702 dimensions,
703 storageClass=self.storage_class_name,
704 isCalibration=self.is_calibration,
705 )
706 if current is not None:
707 if not current.is_compatible_with(dataset_type):
708 raise IncompatibleDatasetTypeError(
709 f"Incompatible definition for output dataset type {self.parent_dataset_type_name!r}: "
710 f"task {self.task_label!r} has {current}, but data repository has {dataset_type}."
711 )
712 return current
713 else:
714 return dataset_type