Coverage for python/lsst/daf/relation/_operations/_calculation.py: 52%
52 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 02:26 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 02:26 -0800
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Calculation",)
26import dataclasses
27from collections.abc import Set
28from typing import TYPE_CHECKING, Literal, final
30from .._columns import ColumnExpression, ColumnTag
31from .._exceptions import ColumnError
32from .._operation_relations import UnaryOperationRelation
33from .._unary_operation import UnaryCommutator, UnaryOperation
35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true
36 from .._engine import Engine
37 from .._relation import Relation
40@final
41@dataclasses.dataclass(frozen=True)
42class Calculation(UnaryOperation):
43 """A relation operation that adds a new column from an expression involving
44 existing columns.
46 Notes
47 -----
48 `Calculation` operations are assumed to be deterministically related to
49 existing columns - in particular, a `Deduplication` is assumed to have the
50 same effect regardless of whether it is performed before or after a
51 `Calculation`. This means a `Calculation` should not be used to generate
52 random numbers or counters, though it does not prohibit additional
53 information outside the relation being used. The expression that backs
54 a `Calculation` must depend on at least one existing column, however; it
55 also cannot be used to add a constant-valued column to a relation.
56 """
58 tag: ColumnTag
59 """Identifier for the new column (`ColumnTag`).
60 """
62 expression: ColumnExpression
63 """Expression used to populate the new column (`ColumnExpression`).
64 """
66 def __post_init__(self) -> None:
67 if not self.expression.columns_required:
68 # It's unlikely anyone would want them, and explicitly prohibiting
69 # calculated columns that are constants saves us from having to
70 # worry about one-row, zero-column relations hiding behind them,
71 # and hence Relation.is_trivial not propagating the way we'd like.
72 raise ColumnError(
73 f"Calculated column {self.tag} that does not depend on any other columns is not allowed."
74 )
76 @property
77 def columns_required(self) -> Set[ColumnTag]:
78 # Docstring inherited.
79 return self.expression.columns_required
81 @property
82 def is_empty_invariant(self) -> Literal[True]:
83 # Docstring inherited.
84 return True
86 @property
87 def is_count_invariant(self) -> Literal[True]:
88 # Docstring inherited.
89 return True
91 def __str__(self) -> str:
92 return f"+[{self.tag!s}={self.expression!s}]"
94 def is_supported_by(self, engine: Engine) -> bool:
95 # Docstring inherited.
96 return self.expression.is_supported_by(engine)
98 def _begin_apply(
99 self, target: Relation, preferred_engine: Engine | None
100 ) -> tuple[UnaryOperation, Engine]:
101 # Docstring inherited.
102 if not (self.expression.columns_required <= target.columns):
103 raise ColumnError(
104 f"Cannot calculate column {self.tag} because expression requires "
105 f"columns {set(self.expression.columns_required) - target.columns} "
106 f"that are not present in the target relation {target}."
107 )
108 if self.tag in target.columns:
109 raise ColumnError(f"Calculated column {self.tag} is already present in {target}.")
110 return super()._begin_apply(target, preferred_engine)
112 def applied_columns(self, target: Relation) -> Set[ColumnTag]:
113 # Docstring inherited.
114 result = set(target.columns)
115 result.add(self.tag)
116 return result
118 def applied_min_rows(self, target: Relation) -> int:
119 # Docstring inherited.
120 return target.min_rows
122 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator:
123 # Docstring inherited.
124 from ._projection import Projection
126 if not self.columns_required <= current.target.columns:
127 return UnaryCommutator(
128 first=None,
129 second=current.operation,
130 done=False,
131 messages=(
132 f"{current.target} is missing columns "
133 f"{set(self.columns_required - current.target.columns)}",
134 ),
135 )
136 # If we commute a calculation before a projection, the
137 # projection also needs to include the calculated column.
138 return UnaryCommutator(
139 self,
140 (
141 Projection(current.operation.columns | {self.tag})
142 if isinstance(current.operation, Projection)
143 else current.operation
144 ),
145 )