Coverage for tests/test_join.py: 9%

102 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-16 09:55 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import unittest 

25 

26from lsst.daf.relation import ( 

27 BinaryOperationRelation, 

28 ColumnError, 

29 ColumnExpression, 

30 EngineError, 

31 Join, 

32 Predicate, 

33 SortTerm, 

34 iteration, 

35 tests, 

36) 

37 

38 

39class JoinTestCase(tests.RelationTestCase): 

40 """Tests for the Join operation and relations based on it.""" 

41 

42 def setUp(self) -> None: 

43 self.a = tests.ColumnTag("a") 

44 self.b = tests.ColumnTag("b") 

45 self.c = tests.ColumnTag("c") 

46 self.engine = iteration.Engine(name="preferred") 

47 self.leaf_1 = self.engine.make_leaf( 

48 {self.a, self.b}, 

49 payload=iteration.RowSequence( 

50 [{self.a: 0, self.b: 5}, {self.a: 1, self.b: 10}, {self.a: 2, self.b: 25}] 

51 ), 

52 name="leaf_1", 

53 ) 

54 self.leaf_2 = self.engine.make_leaf( 

55 {self.a, self.c}, 

56 payload=iteration.RowSequence( 

57 [{self.a: 0, self.c: 15}, {self.a: 2, self.c: 20}, {self.a: 3, self.b: 0}] 

58 ), 

59 name="leaf_2", 

60 ) 

61 

62 def test_attributes(self) -> None: 

63 """Check that all Relation and PartialJoin attributes have the expected 

64 values. 

65 """ 

66 relation = self.leaf_1.join(self.leaf_2) 

67 assert isinstance(relation, BinaryOperationRelation) 

68 self.assertEqual(relation.columns, {self.a, self.b, self.c}) 

69 self.assertEqual(relation.engine, self.engine) 

70 self.assertEqual(relation.min_rows, 0) 

71 self.assertEqual(relation.max_rows, 9) 

72 self.assertFalse(relation.is_locked) 

73 operation = relation.operation 

74 assert isinstance(operation, Join) 

75 self.assertEqual(operation.min_columns, {self.a}) 

76 self.assertEqual(operation.max_columns, {self.a}) 

77 self.assertEqual(operation.common_columns, {self.a}) 

78 self.assertEqual(operation.predicate, Predicate.literal(True)) 

79 partial = Join().partial(self.leaf_1) 

80 self.assertEqual(partial.columns_required, frozenset()) 

81 self.assert_relations_equal(partial.fixed, self.leaf_1) 

82 self.assertFalse(partial.is_count_dependent) 

83 self.assertFalse(partial.is_order_dependent) 

84 self.assertFalse(partial.is_count_invariant) 

85 self.assertFalse(partial.is_empty_invariant) 

86 self.assertEqual(partial.applied_columns(self.leaf_2), {self.a, self.b, self.c}) 

87 self.assertEqual(partial.applied_min_rows(self.leaf_2), 0) 

88 self.assertEqual(partial.applied_max_rows(self.leaf_2), 9) 

89 

90 def test_apply_failures(self) -> None: 

91 """Test failure modes of constructing and applying Join.""" 

92 # Mismatched engines. 

93 new_engine = iteration.Engine(name="downstream") 

94 with self.assertRaises(EngineError): 

95 Join().apply(self.leaf_1.transferred_to(new_engine), self.leaf_2) 

96 # Predicate requires nonexistent columns. 

97 predicate = ColumnExpression.reference(tests.ColumnTag("d")).lt(ColumnExpression.literal(0)) 

98 with self.assertRaises(ColumnError): 

99 Join(predicate=predicate).apply(self.leaf_1, self.leaf_2) 

100 with self.assertRaises(ColumnError): 

101 Join(predicate=predicate).partial(self.leaf_1).apply(self.leaf_2) 

102 with self.assertRaises(ColumnError): 

103 Join(predicate=predicate).partial(self.leaf_2).apply(self.leaf_1) 

104 # Bounds on columns internally inconsistent. 

105 with self.assertRaises(ColumnError): 

106 Join(min_columns=frozenset({self.a, self.b}), max_columns=frozenset({self.a})) 

107 # Minimum columns not satisfied. 

108 join = Join(min_columns=frozenset({self.a, self.b})) 

109 with self.assertRaises(ColumnError): 

110 join.apply(self.leaf_1, self.leaf_2) 

111 with self.assertRaises(ColumnError): 

112 join.apply(self.leaf_2, self.leaf_1) 

113 with self.assertRaises(ColumnError): 

114 join.partial(self.leaf_2) 

115 with self.assertRaises(ColumnError): 

116 join.partial(self.leaf_1).apply(self.leaf_2) 

117 # Common columns not satisfied. 

118 join = Join(min_columns=frozenset({self.a, self.b}), max_columns=frozenset({self.a, self.b})) 

119 with self.assertRaises(ColumnError): 

120 join.apply(self.leaf_1, self.leaf_2) 

121 with self.assertRaises(ColumnError): 

122 join.apply(self.leaf_2, self.leaf_1) 

123 

124 def test_apply_simplify(self) -> None: 

125 """Test Join.apply simplifications.""" 

126 join_identity = self.engine.make_join_identity_relation() 

127 self.assertIs(self.leaf_1.join(join_identity), self.leaf_1) 

128 self.assertIs(join_identity.join(self.leaf_1), self.leaf_1) 

129 

130 def test_backtracking_apply(self) -> None: 

131 """Test `PartialJoin.apply` logic that involves reordering operations 

132 in the existing tree to perform the new operation in a preferred 

133 engine. 

134 """ 

135 new_engine = iteration.Engine(name="downstream") 

136 d = tests.ColumnTag("d") 

137 expression = ColumnExpression.function( 

138 "__add__", ColumnExpression.reference(self.a), ColumnExpression.literal(5) 

139 ) 

140 sort_terms = [SortTerm(ColumnExpression.reference(self.a))] 

141 predicate = ColumnExpression.reference(self.b).gt(ColumnExpression.literal(0)) 

142 # Apply a bunch of operations in a new engine that a PartialJoin should 

143 # commute with. 

144 target = ( 

145 self.leaf_1.transferred_to(new_engine) 

146 .with_calculated_column(d, expression) 

147 .with_rows_satisfying(predicate) 

148 .with_only_columns({self.a, d}) 

149 .sorted(sort_terms) 

150 ) 

151 # Apply a new PartialJoin with backtracking and see that it appears 

152 # before the transfer to the new engine, with adjustments as needed. 

153 relation = target.join(self.leaf_2) 

154 self.assert_relations_equal( 

155 relation, 

156 ( 

157 self.leaf_1.join(self.leaf_2) 

158 .transferred_to(new_engine) 

159 .with_calculated_column(d, expression) 

160 .with_rows_satisfying(predicate) 

161 .with_only_columns({self.a, self.c, d}) 

162 .sorted(sort_terms) 

163 ), 

164 ) 

165 

166 def test_no_backtracking(self) -> None: 

167 """Test `PartialJoin.apply` logic that handles differing engines 

168 without reordering operations in the existing tree, as well as failures 

169 in that reordering. 

170 """ 

171 new_engine = iteration.Engine(name="downstream") 

172 # Construct a relation tree we can't reorder when inserting a Join, 

173 # because there is a locked Materialization in the way. 

174 target = self.leaf_1.transferred_to(new_engine).materialized("lock") 

175 # We can automatically transfer (back) to the new relation's engine. 

176 self.assert_relations_equal( 

177 target.join(self.leaf_2, transfer=True), 

178 target.transferred_to(self.engine).join(self.leaf_2), 

179 ) 

180 # Can't backtrack through a Deduplication. 

181 target = self.leaf_1.transferred_to(new_engine).without_duplicates() 

182 with self.assertRaises(EngineError): 

183 target.join(self.leaf_2) 

184 # Can't backtrack through a Slice, because it's order/count dependent. 

185 target = self.leaf_1.transferred_to(new_engine)[:2] 

186 with self.assertRaises(EngineError): 

187 target.join(self.leaf_2) 

188 

189 def test_common_columns(self) -> None: 

190 """Test Join.applied_common_columns logic.""" 

191 leaf_3 = self.engine.make_leaf( 

192 {self.a, self.b, self.c}, 

193 payload=iteration.RowSequence( 

194 [{self.a: 0, self.b: 2, self.c: 15}, {self.a: 2, self.b: 4, self.c: 20}] 

195 ), 

196 name="leaf_2", 

197 ) 

198 # With no min or max columns, common_columns is just the intersection 

199 # of the columns of the operands. 

200 self.assertEqual(Join().applied_common_columns(self.leaf_1, leaf_3), {self.a, self.b}) 

201 # Check that max_columns is enforced. 

202 self.assertEqual( 

203 Join(max_columns=frozenset({self.a})).applied_common_columns(self.leaf_1, leaf_3), {self.a} 

204 ) 

205 # Check that min_columns is enforced. 

206 with self.assertRaises(ColumnError): 

207 Join(min_columns=frozenset({self.c})).applied_common_columns(self.leaf_1, leaf_3) 

208 # Repeat last two checks with min_columns == max_columns. 

209 self.assertEqual( 

210 Join(min_columns=frozenset({self.a}), max_columns=frozenset({self.a})).applied_common_columns( 

211 self.leaf_1, leaf_3 

212 ), 

213 {self.a}, 

214 ) 

215 with self.assertRaises(ColumnError): 

216 Join(min_columns=frozenset({self.c}), max_columns=frozenset({self.c})).apply(self.leaf_1, leaf_3) 

217 

218 def test_str(self) -> None: 

219 """Test str(Join), str(PartialJoin), and 

220 str(BinaryOperationRelation[Join]). 

221 """ 

222 relation = self.leaf_1.join(self.leaf_2) 

223 self.assertEqual(str(relation), "leaf_1 ⋈ leaf_2") 

224 partial = Join().partial(self.leaf_1) 

225 self.assertEqual(str(partial), "⋈[leaf_1]") 

226 # Nested operations get parentheses, unless they're joins or leaves. 

227 leaf_3 = self.engine.make_leaf( 

228 {self.a, self.b}, 

229 payload=iteration.RowSequence([{self.a: 3, self.b: 4}]), 

230 name="leaf_3", 

231 ) 

232 self.assertEqual(str(relation.join(leaf_3)), "leaf_1 ⋈ leaf_2 ⋈ leaf_3") 

233 self.assertEqual(str(self.leaf_1.chain(leaf_3).join(self.leaf_2)), "(leaf_1 ∪ leaf_3) ⋈ leaf_2") 

234 

235 

236if __name__ == "__main__": 

237 unittest.main()