Coverage for python/lsst/daf/relation/_operations/_projection.py: 42%

60 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-06 10:42 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Projection",) 

25 

26import dataclasses 

27from collections.abc import Set 

28from typing import TYPE_CHECKING, Literal, final 

29 

30from .._columns import ColumnTag 

31from .._exceptions import ColumnError 

32from .._operation_relations import UnaryOperationRelation 

33from .._unary_operation import Identity, UnaryCommutator, UnaryOperation 

34 

35if TYPE_CHECKING: 

36 from .._engine import Engine 

37 from .._relation import Relation 

38 

39 

40@final 

41@dataclasses.dataclass(frozen=True) 

42class Projection(UnaryOperation): 

43 """A unary operation that removes one or more columns. 

44 

45 Notes 

46 ----- 

47 This is the only operation permitted to introduce duplication among rows 

48 (as opposed to just propagating duplicates). 

49 """ 

50 

51 columns: frozenset[ColumnTag] 

52 """The columns to be kept (`frozenset` [ `ColumnTag` ]). 

53 """ 

54 

55 @property 

56 def columns_required(self) -> Set[ColumnTag]: 

57 # Docstring inherited. 

58 return self.columns 

59 

60 @property 

61 def is_empty_invariant(self) -> Literal[True]: 

62 # Docstring inherited. 

63 return True 

64 

65 @property 

66 def is_count_invariant(self) -> Literal[True]: 

67 # Docstring inherited. 

68 return True 

69 

70 def __str__(self) -> str: 

71 return f"Π[{', '.join(sorted(str(tag) for tag in self.columns))}]" 

72 

73 def _begin_apply( 

74 self, target: Relation, preferred_engine: Engine | None 

75 ) -> tuple[UnaryOperation, Engine]: 

76 if self.columns == target.columns: 

77 return Identity(), target.engine 

78 if not self.columns <= target.columns: 

79 raise ColumnError( 

80 f"Cannot project column(s) {set(self.columns) - target.columns} " 

81 f"that are not present in the target relation {target}." 

82 ) 

83 return super()._begin_apply(target, preferred_engine) 

84 

85 def _finish_apply(self, target: Relation) -> Relation: 

86 if self.columns == target.columns: 

87 return target 

88 return super()._finish_apply(target) 

89 

90 def applied_columns(self, target: Relation) -> Set[ColumnTag]: 

91 # Docstring inherited. 

92 return self.columns 

93 

94 def applied_min_rows(self, target: Relation) -> int: 

95 # Docstring inherited. 

96 return target.min_rows 

97 

98 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator: 

99 # Docstring inherited. 

100 from ._calculation import Calculation 

101 

102 commuted_columns: frozenset[ColumnTag] = self.columns 

103 match current.operation: 

104 case Projection(): 

105 # We can just drop any existing Projection as this one 

106 # supersedes it; by construction the new one has a 

107 # subset of the original's columns. 

108 return UnaryCommutator(first=self, second=Identity()) 

109 case Calculation(tag=tag): 

110 if tag not in self.columns: 

111 # Projection will drop the column added by the 

112 # Calculation, so it might as well have never 

113 # existed. 

114 return UnaryCommutator(first=self, second=Identity()) 

115 else: 

116 commuted_columns -= {tag} 

117 if not commuted_columns >= current.operation.columns_required: 

118 # Can't move the entire projection past this operation; 

119 # move what we can, and return the full Projection as the 

120 # "remainder". 

121 return UnaryCommutator( 

122 first=Projection(commuted_columns | current.operation.columns_required), 

123 second=current.operation, 

124 done=False, 

125 messages=( 

126 f"{current.operation} requires columns " 

127 f"{set(current.operation.columns_required - self.columns)}", 

128 ), 

129 ) 

130 return UnaryCommutator(Projection(commuted_columns), current.operation) 

131 

132 def simplify(self, upstream: UnaryOperation) -> UnaryOperation | None: 

133 # Docstring inherited. 

134 from ._calculation import Calculation 

135 

136 # See similar checks in commute for explanations. 

137 match upstream: 

138 case Projection(): 

139 return self 

140 case Calculation(tag=tag) if tag not in self.columns: 

141 return self 

142 return None