Coverage for tests/test_deduplication.py: 18%

55 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-10 02:26 -0800

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import unittest 

25 

26from lsst.daf.relation import ( 

27 ColumnExpression, 

28 Deduplication, 

29 EngineError, 

30 SortTerm, 

31 UnaryOperationRelation, 

32 iteration, 

33 tests, 

34) 

35 

36 

37class DeduplicationTestCase(tests.RelationTestCase): 

38 """Tests for the Deduplication operation and relations based on it.""" 

39 

40 def setUp(self) -> None: 

41 self.a = tests.ColumnTag("a") 

42 self.b = tests.ColumnTag("b", is_key=False) 

43 self.engine = iteration.Engine(name="preferred") 

44 self.leaf = self.engine.make_leaf( 

45 {self.a}, payload=iteration.RowSequence([{self.a: 1}, {self.a: 0}, {self.a: 1}]), name="leaf" 

46 ) 

47 

48 def test_attributes(self) -> None: 

49 """Check that all UnaryOperation and Relation attributes have the 

50 expected values. 

51 """ 

52 relation = self.leaf.without_duplicates() 

53 assert isinstance(relation, UnaryOperationRelation) 

54 self.assertEqual(relation.columns, {self.a}) 

55 self.assertEqual(relation.engine, self.engine) 

56 self.assertEqual(relation.min_rows, 1) 

57 self.assertEqual(relation.max_rows, self.leaf.max_rows) 

58 operation = relation.operation 

59 assert isinstance(operation, Deduplication) 

60 self.assertEqual(operation.columns_required, set()) 

61 self.assertTrue(operation.is_empty_invariant) 

62 self.assertFalse(operation.is_count_invariant) 

63 self.assertFalse(operation.is_order_dependent) 

64 self.assertFalse(operation.is_count_dependent) 

65 

66 def test_backtracking_apply(self) -> None: 

67 """Test apply logic that involves reordering operations in the existing 

68 tree to perform the new operation in a preferred engine. 

69 """ 

70 new_engine = iteration.Engine(name="downstream") 

71 expression = ColumnExpression.reference(self.a) 

72 predicate = expression.lt(ColumnExpression.literal(20)) 

73 # Apply a bunch of operations in a new engine that a Deduplication 

74 # should commute with. 

75 target = ( 

76 self.leaf.transferred_to(new_engine) 

77 .with_calculated_column(self.b, expression) 

78 .with_rows_satisfying(predicate) 

79 .sorted([SortTerm(ColumnExpression.reference(self.a))]) 

80 ) 

81 # Apply a new Deduplication with backtracking and see that it appears 

82 # before the transfer to the new engine, with adjustments as needed 

83 # downstream (to the Projection and Chain, in this case). 

84 relation = target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True) 

85 self.assert_relations_equal( 

86 relation, 

87 ( 

88 self.leaf.without_duplicates() 

89 .transferred_to(new_engine) 

90 .with_calculated_column(self.b, expression) 

91 .with_rows_satisfying(predicate) 

92 .sorted([SortTerm(ColumnExpression.reference(self.a))]) 

93 ), 

94 ) 

95 

96 def test_no_backtracking(self) -> None: 

97 """Test apply logic that handles preferred engines without reordering 

98 operations in the existing tree. 

99 """ 

100 new_engine = iteration.Engine(name="downstream") 

101 # Construct a relation tree we can't reorder when inserting a 

102 # Deduplication, because there is a locked Materialization in the way. 

103 target = self.leaf.transferred_to(new_engine).materialized("lock") 

104 # Preferred engine is ignored if we can't backtrack and don't enable 

105 # anything else. 

106 self.assert_relations_equal( 

107 target.without_duplicates(preferred_engine=self.engine), 

108 target.without_duplicates(), 

109 ) 

110 # We can force this to be an error. 

111 with self.assertRaises(EngineError): 

112 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True) 

113 # We can also automatically transfer (back) to the preferred engine. 

114 self.assert_relations_equal( 

115 target.without_duplicates(preferred_engine=self.engine, transfer=True), 

116 target.transferred_to(self.engine).without_duplicates(), 

117 ) 

118 # Now try a few other ways of making backtrack fail. 

119 # Deduplication does not commute with Projection. 

120 with self.assertRaises(EngineError): 

121 self.engine.make_leaf( 

122 {self.a, self.b}, 

123 payload=iteration.RowSequence([{self.a: 0, self.b: 0}, {self.a: 0, self.b: 1}]), 

124 name="leaf", 

125 ).transferred_to(new_engine).with_only_columns({self.a}).without_duplicates( 

126 preferred_engine=self.engine, require_preferred_engine=True 

127 ) 

128 # Deduplication does not commute with Slice. 

129 with self.assertRaises(EngineError): 

130 self.leaf.transferred_to(new_engine)[:1].without_duplicates( 

131 preferred_engine=self.engine, require_preferred_engine=True 

132 ) 

133 # Deduplication cannot be inserted past Chains or Joins 

134 # (at least not without more information than we have, like whether 

135 # Chain branches are disjoint or leaf relations start out with unique 

136 # rows). 

137 with self.assertRaises(EngineError): 

138 target = self.leaf.transferred_to(new_engine).chain( 

139 new_engine.make_leaf( 

140 {self.a}, 

141 payload=iteration.RowSequence([{self.a: 0}]), 

142 name="chain_leaf", 

143 ) 

144 ) 

145 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True) 

146 with self.assertRaises(EngineError): 

147 target = self.leaf.transferred_to(new_engine).join( 

148 new_engine.make_leaf( 

149 {self.a}, 

150 payload=iteration.RowSequence([{self.a: 0}]), 

151 name="join_leaf", 

152 ) 

153 ) 

154 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True) 

155 

156 def test_iteration(self) -> None: 

157 """Test Deduplication execution in the iteration engine.""" 

158 relation = self.leaf.without_duplicates() 

159 self.assertEqual( 

160 list(self.engine.execute(relation)), 

161 [{self.a: 1}, {self.a: 0}], 

162 ) 

163 

164 def test_str(self) -> None: 

165 """Test str(Deduplication) and 

166 str(UnaryOperationRelation[Deduplication]). 

167 """ 

168 relation = self.leaf.without_duplicates() 

169 self.assertEqual(str(relation), "deduplicate(leaf)") 

170 

171 

172if __name__ == "__main__": 172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true

173 unittest.main()