Coverage for tests/test_sort.py: 15%

69 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-19 09:55 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import unittest 

25 

26import numpy as np 

27from lsst.daf.relation import ( 

28 ColumnError, 

29 ColumnExpression, 

30 EngineError, 

31 Sort, 

32 SortTerm, 

33 UnaryOperationRelation, 

34 iteration, 

35 tests, 

36) 

37 

38 

39class SortTestCase(tests.RelationTestCase): 

40 """Tests for the Sort operation and relations based on it.""" 

41 

42 def setUp(self) -> None: 

43 self.columns = {k: tests.ColumnTag(k) for k in "abcd"} 

44 self.sort_terms = ( 

45 SortTerm(ColumnExpression.reference(self.columns["a"]), ascending=True), 

46 SortTerm(ColumnExpression.reference(self.columns["b"]), ascending=True), 

47 SortTerm(ColumnExpression.reference(self.columns["c"]), ascending=False), 

48 SortTerm(ColumnExpression.reference(self.columns["d"]), ascending=True), 

49 ) 

50 self.engine = iteration.Engine(name="preferred") 

51 rng = np.random.RandomState(1) 

52 self.table = np.zeros(32, dtype=[(k, int) for k in self.columns]) 

53 for k in self.columns: 

54 self.table[k] = rng.randint(0, 4, size=32) 

55 self.leaf = self.engine.make_leaf( 

56 frozenset(self.columns.values()), 

57 payload=iteration.RowSequence( 

58 [{v: row[k] for k, v in self.columns.items()} for row in self.table] 

59 ), 

60 name="leaf", 

61 ) 

62 

63 def test_attributes(self) -> None: 

64 """Check that all UnaryOperation and Relation attributes have the 

65 expected values. 

66 """ 

67 relation = self.leaf.sorted(self.sort_terms) 

68 assert isinstance(relation, UnaryOperationRelation) 

69 self.assertEqual(relation.columns, frozenset(self.columns.values())) 

70 self.assertEqual(relation.engine, self.engine) 

71 self.assertEqual(relation.min_rows, self.leaf.min_rows) 

72 self.assertEqual(relation.max_rows, self.leaf.max_rows) 

73 operation = relation.operation 

74 assert isinstance(operation, Sort) 

75 self.assertEqual(operation.terms, self.sort_terms) 

76 self.assertEqual(operation.columns_required, frozenset(self.columns.values())) 

77 self.assertTrue(operation.is_empty_invariant) 

78 self.assertTrue(operation.is_count_invariant) 

79 self.assertFalse(operation.is_order_dependent) 

80 self.assertFalse(operation.is_count_dependent) 

81 

82 def test_apply_failures(self) -> None: 

83 """Test failure modes of constructing and applying Sorts.""" 

84 # Required columns must be present. 

85 with self.assertRaises(ColumnError): 

86 self.leaf.sorted([SortTerm(ColumnExpression.reference(tests.ColumnTag("e")))]) 

87 

88 def test_apply_simplify(self) -> None: 

89 """Test simplification logic in Sort.apply.""" 

90 # Test that applying a Sort to an existing Sort merges them. 

91 self.assert_relations_equal( 

92 self.leaf.sorted(self.sort_terms[2:4]).sorted(self.sort_terms[0:2]), 

93 self.leaf.sorted(self.sort_terms), 

94 ) 

95 # Test that a no-op Sort does nothing. 

96 self.assert_relations_equal(self.leaf.sorted([]), self.leaf) 

97 

98 def test_backtracking_apply(self) -> None: 

99 """Test apply logic that involves reordering operations in the existing 

100 tree to perform the new operation in a preferred engine. 

101 """ 

102 new_engine = iteration.Engine(name="downstream") 

103 expression = ColumnExpression.function( 

104 "__add__", 

105 ColumnExpression.reference(self.columns["a"]), 

106 ColumnExpression.reference(self.columns["b"]), 

107 ) 

108 predicate = ColumnExpression.reference(self.columns["c"]).gt( 

109 ColumnExpression.reference(self.columns["d"]) 

110 ) 

111 e = tests.ColumnTag("e") 

112 # Apply a bunch of operations in a new engine that a Sort should 

113 # commute with. 

114 target = ( 

115 self.leaf.transferred_to(new_engine) 

116 .with_calculated_column(e, expression) 

117 .with_rows_satisfying(predicate) 

118 .without_duplicates() 

119 .with_only_columns(frozenset(self.columns.values())) 

120 ) 

121 # Apply a new Sort with backtracking and see that it appears before the 

122 # transfer to the new engine, with adjustments as needed. 

123 relation = target.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True) 

124 self.assert_relations_equal( 

125 relation, 

126 ( 

127 self.leaf.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True) 

128 .transferred_to(new_engine) 

129 .with_calculated_column(e, expression) 

130 .with_rows_satisfying(predicate) 

131 .without_duplicates() 

132 .with_only_columns(frozenset(self.columns.values())) 

133 ), 

134 ) 

135 

136 def test_no_backtracking(self) -> None: 

137 """Test apply logic that handles preferred engines without reordering 

138 operations in the existing tree. 

139 """ 

140 new_engine = iteration.Engine(name="downstream") 

141 # Construct a relation tree we can't reorder when inserting a 

142 # Selection, because there is a locked Materialization in the way. 

143 target = self.leaf.transferred_to(new_engine).materialized("lock") 

144 # Preferred engine is ignored if we can't backtrack and don't enable 

145 # anything else. 

146 self.assert_relations_equal( 

147 target.sorted(self.sort_terms, preferred_engine=self.engine), 

148 target.sorted(self.sort_terms), 

149 ) 

150 # We can force this to be an error. 

151 with self.assertRaises(EngineError): 

152 target.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True) 

153 # We can also automatically transfer (back) to the preferred engine. 

154 self.assert_relations_equal( 

155 target.sorted(self.sort_terms, preferred_engine=self.engine, transfer=True), 

156 target.transferred_to(self.engine).sorted(self.sort_terms), 

157 ) 

158 # Can't backtrack through a Calculation that provides required columns. 

159 # In the future, we could make this possible by subsuming the 

160 # calculated columns into the predicate. 

161 e = tests.ColumnTag("e") 

162 target = self.leaf.transferred_to(new_engine).with_calculated_column( 

163 e, ColumnExpression.reference(self.columns["a"]) 

164 ) 

165 with self.assertRaises(EngineError): 

166 target.sorted( 

167 [SortTerm(ColumnExpression.reference(e))], 

168 preferred_engine=self.engine, 

169 require_preferred_engine=True, 

170 ) 

171 # Can't backtrack through a slice. 

172 target = self.leaf.transferred_to(new_engine)[1:3] 

173 with self.assertRaises(EngineError): 

174 target.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True) 

175 

176 def test_iteration(self) -> None: 

177 """Test Sort execution in the iteration engine.""" 

178 relation = self.leaf.sorted(self.sort_terms) 

179 sorted_table = self.table.copy() 

180 sorted_table["c"] *= -1 

181 sorted_table.sort(kind="stable", order="d") 

182 sorted_table.sort(kind="stable", order="c") 

183 sorted_table.sort(kind="stable", order=["a", "b"]) 

184 sorted_table["c"] *= -1 

185 self.assertEqual( 

186 list(self.engine.execute(relation)), 

187 [{v: row[k] for k, v in self.columns.items()} for row in sorted_table], 

188 ) 

189 

190 def test_str(self) -> None: 

191 """Test str(Sort) and 

192 str(UnaryOperationRelation[Sort]). 

193 """ 

194 relation = self.leaf.sorted(self.sort_terms) 

195 self.assertEqual(str(relation), "sort[a, b, -c, d](leaf)") 

196 

197 

198if __name__ == "__main__": 

199 unittest.main()