Coverage for python/lsst/daf/relation/_operations/_calculation.py: 52%

52 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-10 02:26 -0800

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Calculation",) 

25 

26import dataclasses 

27from collections.abc import Set 

28from typing import TYPE_CHECKING, Literal, final 

29 

30from .._columns import ColumnExpression, ColumnTag 

31from .._exceptions import ColumnError 

32from .._operation_relations import UnaryOperationRelation 

33from .._unary_operation import UnaryCommutator, UnaryOperation 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 from .._engine import Engine 

37 from .._relation import Relation 

38 

39 

40@final 

41@dataclasses.dataclass(frozen=True) 

42class Calculation(UnaryOperation): 

43 """A relation operation that adds a new column from an expression involving 

44 existing columns. 

45 

46 Notes 

47 ----- 

48 `Calculation` operations are assumed to be deterministically related to 

49 existing columns - in particular, a `Deduplication` is assumed to have the 

50 same effect regardless of whether it is performed before or after a 

51 `Calculation`. This means a `Calculation` should not be used to generate 

52 random numbers or counters, though it does not prohibit additional 

53 information outside the relation being used. The expression that backs 

54 a `Calculation` must depend on at least one existing column, however; it 

55 also cannot be used to add a constant-valued column to a relation. 

56 """ 

57 

58 tag: ColumnTag 

59 """Identifier for the new column (`ColumnTag`). 

60 """ 

61 

62 expression: ColumnExpression 

63 """Expression used to populate the new column (`ColumnExpression`). 

64 """ 

65 

66 def __post_init__(self) -> None: 

67 if not self.expression.columns_required: 

68 # It's unlikely anyone would want them, and explicitly prohibiting 

69 # calculated columns that are constants saves us from having to 

70 # worry about one-row, zero-column relations hiding behind them, 

71 # and hence Relation.is_trivial not propagating the way we'd like. 

72 raise ColumnError( 

73 f"Calculated column {self.tag} that does not depend on any other columns is not allowed." 

74 ) 

75 

76 @property 

77 def columns_required(self) -> Set[ColumnTag]: 

78 # Docstring inherited. 

79 return self.expression.columns_required 

80 

81 @property 

82 def is_empty_invariant(self) -> Literal[True]: 

83 # Docstring inherited. 

84 return True 

85 

86 @property 

87 def is_count_invariant(self) -> Literal[True]: 

88 # Docstring inherited. 

89 return True 

90 

91 def __str__(self) -> str: 

92 return f"+[{self.tag!s}={self.expression!s}]" 

93 

94 def is_supported_by(self, engine: Engine) -> bool: 

95 # Docstring inherited. 

96 return self.expression.is_supported_by(engine) 

97 

98 def _begin_apply( 

99 self, target: Relation, preferred_engine: Engine | None 

100 ) -> tuple[UnaryOperation, Engine]: 

101 # Docstring inherited. 

102 if not (self.expression.columns_required <= target.columns): 

103 raise ColumnError( 

104 f"Cannot calculate column {self.tag} because expression requires " 

105 f"columns {set(self.expression.columns_required) - target.columns} " 

106 f"that are not present in the target relation {target}." 

107 ) 

108 if self.tag in target.columns: 

109 raise ColumnError(f"Calculated column {self.tag} is already present in {target}.") 

110 return super()._begin_apply(target, preferred_engine) 

111 

112 def applied_columns(self, target: Relation) -> Set[ColumnTag]: 

113 # Docstring inherited. 

114 result = set(target.columns) 

115 result.add(self.tag) 

116 return result 

117 

118 def applied_min_rows(self, target: Relation) -> int: 

119 # Docstring inherited. 

120 return target.min_rows 

121 

122 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator: 

123 # Docstring inherited. 

124 from ._projection import Projection 

125 

126 if not self.columns_required <= current.target.columns: 

127 return UnaryCommutator( 

128 first=None, 

129 second=current.operation, 

130 done=False, 

131 messages=( 

132 f"{current.target} is missing columns " 

133 f"{set(self.columns_required - current.target.columns)}", 

134 ), 

135 ) 

136 # If we commute a calculation before a projection, the 

137 # projection also needs to include the calculated column. 

138 return UnaryCommutator( 

139 self, 

140 ( 

141 Projection(current.operation.columns | {self.tag}) 

142 if isinstance(current.operation, Projection) 

143 else current.operation 

144 ), 

145 )