Coverage for tests/test_slice.py: 8%

120 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-21 09:39 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import unittest 

25 

26from lsst.daf.relation import ( 

27 ColumnExpression, 

28 EngineError, 

29 LeafRelation, 

30 Relation, 

31 Slice, 

32 UnaryOperationRelation, 

33 iteration, 

34 tests, 

35) 

36 

37 

38class SliceTestCase(tests.RelationTestCase): 

39 """Tests for the Slice operation and relations based on it.""" 

40 

41 def setUp(self) -> None: 

42 self.a = tests.ColumnTag("a") 

43 self.engine = iteration.Engine(name="preferred") 

44 self.leaf = self.engine.make_leaf( 

45 {self.a}, payload=iteration.RowSequence([{self.a: 0}, {self.a: 1}]), name="leaf" 

46 ) 

47 

48 def test_attributes(self) -> None: 

49 """Check that all UnaryOperation and Relation attributes have the 

50 expected values. 

51 """ 

52 relation = self.leaf[1:2] 

53 assert isinstance(relation, UnaryOperationRelation) 

54 self.assertEqual(relation.columns, {self.a}) 

55 self.assertEqual(relation.engine, self.engine) 

56 self.assertEqual(relation.min_rows, 1) 

57 self.assertEqual(relation.max_rows, 1) 

58 self.assertFalse(relation.is_locked) 

59 operation = relation.operation 

60 assert isinstance(operation, Slice) 

61 self.assertEqual(operation.start, 1) 

62 self.assertEqual(operation.stop, 2) 

63 self.assertEqual(operation.limit, 1) 

64 self.assertEqual(operation.columns_required, frozenset()) 

65 self.assertFalse(operation.is_empty_invariant) 

66 self.assertFalse(operation.is_count_invariant) 

67 self.assertTrue(operation.is_order_dependent) 

68 self.assertTrue(operation.is_count_dependent) 

69 # Also check min/max attributes an unbounded Slice, since that involves 

70 # a few different logic branches. 

71 relation = self.leaf[1:] 

72 assert isinstance(relation, UnaryOperationRelation) 

73 self.assertEqual(relation.columns, {self.a}) 

74 self.assertEqual(relation.engine, self.engine) 

75 self.assertEqual(relation.min_rows, 1) 

76 self.assertEqual(relation.max_rows, 1) 

77 self.assertFalse(relation.is_locked) 

78 operation = relation.operation 

79 assert isinstance(operation, Slice) 

80 self.assertEqual(operation.start, 1) 

81 self.assertEqual(operation.stop, None) 

82 self.assertEqual(operation.limit, None) 

83 self.assertEqual(operation.columns_required, frozenset()) 

84 self.assertFalse(operation.is_empty_invariant) 

85 self.assertFalse(operation.is_count_invariant) 

86 self.assertTrue(operation.is_order_dependent) 

87 self.assertTrue(operation.is_count_dependent) 

88 

89 def test_min_max_rows(self) -> None: 

90 """Test min_rows and max_rows for different kinds of slices 

91 and original min/max rows. 

92 """ 

93 # iteration.Engine.make_leaf sets min_rows and max_rows based on 

94 # len(payload), which we don't want here. 

95 leaf1 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=0, max_rows=None) 

96 leaf2 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=0, max_rows=5) 

97 leaf3 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=5, max_rows=5) 

98 leaf4 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=5, max_rows=8) 

99 leaf5 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=5, max_rows=None) 

100 

101 # Reasoning about the expected values of slice operations is really 

102 # easy to get wrong, so instead we brute-force the expected values, 

103 # ultimately delegating to Python's own implementation of slicing 

104 # range objects. 

105 

106 def brute_force_row_bounds( 

107 input_min_rows: int, input_max_rows: int | None, start: int, stop: int | None 

108 ) -> tuple[int, int | None]: 

109 """Compute the minimum and maximum number of rows a sequence could 

110 have after slicing. 

111 

112 Parameters 

113 ---------- 

114 input_min_rows, input_min_rows : `int` or `None` 

115 Original bounds on the number of rows. 

116 start, stop: `int` or `None` 

117 Slice parameters 

118 

119 Returns 

120 ------- 

121 output_min_rows, output_min_rows : `int` or `None` 

122 Bounds on the number of rows for the sliced sequence. 

123 

124 Notes 

125 ----- 

126 Since this is just a test helper, we handle `None` by assuming it 

127 can be replaced by a large value and that large values in the 

128 results indicate a `None` result. Keep all concrete integers below 

129 100 to avoid problems. 

130 """ 

131 sizes = [] 

132 if input_max_rows is None: 

133 output_min_rows, output_max_rows = brute_force_row_bounds(input_min_rows, 100, start, stop) 

134 if output_max_rows is not None and output_max_rows > 50: 

135 output_max_rows = None 

136 return output_min_rows, output_max_rows 

137 for n_rows in range(input_min_rows, input_max_rows + 1): 

138 sequence = range(n_rows) 

139 sizes.append(len(sequence[slice(start, stop)])) 

140 return min(sizes), max(sizes) 

141 

142 def check(leaf: Relation) -> None: 

143 """Run tests on the given leaf relation by applying slices with 

144 a number of start and stop values that are just above, just below, 

145 or equal to its min and max rows. 

146 """ 

147 breaks_set = {0, leaf.min_rows - 1, leaf.min_rows, leaf.min_rows + 1} 

148 if leaf.max_rows is not None: 

149 breaks_set.update({leaf.max_rows - 1, leaf.max_rows, leaf.max_rows + 1}) 

150 breaks_list = list(breaks_set) 

151 breaks_list.sort() 

152 for start in breaks_list: 

153 for stop in breaks_list + [None]: 

154 if start < 0: 

155 with self.assertRaises(ValueError): 

156 Slice(start, stop) 

157 elif stop is not None and stop < start: 

158 with self.assertRaises(ValueError): 

159 Slice(start, stop) 

160 else: 

161 relation = leaf[slice(start, stop)] 

162 self.assertEqual( 

163 (relation.min_rows, relation.max_rows), 

164 brute_force_row_bounds(leaf.min_rows, leaf.max_rows, start, stop), 

165 msg=( 

166 f"leaf.min_rows={leaf.min_rows}, " 

167 f"leaf.max_rows={leaf.max_rows}, " 

168 f"slice=[{start}:{stop}]" 

169 ), 

170 ) 

171 

172 check(leaf1) 

173 check(leaf2) 

174 check(leaf3) 

175 check(leaf4) 

176 check(leaf5) 

177 

178 def test_backtracking_apply(self) -> None: 

179 """Test apply logic that involves reordering operations in the existing 

180 tree to perform the new operation in a preferred engine. 

181 """ 

182 new_engine = iteration.Engine(name="downstream") 

183 b = tests.ColumnTag("b") 

184 expression = ColumnExpression.function( 

185 "__add__", ColumnExpression.reference(self.a), ColumnExpression.literal(5) 

186 ) 

187 # Apply operations in a new engine that a Slice should commute with. 

188 target = ( 

189 self.leaf.transferred_to(new_engine).with_calculated_column(b, expression).with_only_columns({b}) 

190 ) 

191 # Apply a new Slice with backtracking and see that it appears 

192 # before the transfer to the new engine. 

193 relation = Slice(start=1, stop=3).apply( 

194 target, preferred_engine=self.engine, require_preferred_engine=True 

195 ) 

196 self.assert_relations_equal( 

197 relation, 

198 ( 

199 self.leaf[1:3] 

200 .transferred_to(new_engine) 

201 .with_calculated_column(b, expression) 

202 .with_only_columns({b}) 

203 ), 

204 ) 

205 

206 def test_no_backtracking(self) -> None: 

207 """Test apply logic that handles preferred engines without reordering 

208 operations in the existing tree. 

209 """ 

210 new_engine = iteration.Engine(name="downstream") 

211 # Construct a relation tree we can't reorder when inserting a 

212 # Sort, because there is a locked Materialization in the way. 

213 target = self.leaf.transferred_to(new_engine).materialized("lock") 

214 # Preferred engine is ignored if we can't backtrack and don't enable 

215 # anything else. 

216 self.assert_relations_equal( 

217 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine), 

218 Slice(start=1, stop=3).apply(target), 

219 ) 

220 # We can force this to be an error. 

221 with self.assertRaises(EngineError): 

222 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine, require_preferred_engine=True) 

223 # We can also automatically transfer (back) to the preferred engine. 

224 self.assert_relations_equal( 

225 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine, transfer=True), 

226 target.transferred_to(self.engine)[1:3], 

227 ) 

228 # Can't backtrack through anything other than a Projection or 

229 # a Calculation. 

230 target = self.leaf.transferred_to(new_engine).without_duplicates() 

231 with self.assertRaises(EngineError): 

232 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine, require_preferred_engine=True) 

233 

234 def test_apply_simplify(self) -> None: 

235 """Test simplification logic in Slice.apply.""" 

236 # Test that applying a Slice to an existing Slice merges them. 

237 self.assert_relations_equal(self.leaf[1:][:2], self.leaf[1:3]) 

238 self.assert_relations_equal(self.leaf[1:][1:], self.leaf[2:]) 

239 self.assert_relations_equal(self.leaf[1:3][1:2], self.leaf[2:3]) 

240 self.assert_relations_equal(self.leaf[1:3][1:], self.leaf[2:3]) 

241 # Test that a no-op slice does nothing. 

242 self.assert_relations_equal(self.leaf[:], self.leaf) 

243 

244 def test_iteration(self) -> None: 

245 """Test Slice execution in the iteration engine.""" 

246 self.assertEqual(list(self.engine.execute(self.leaf[1:])), [{self.a: 1}]) 

247 self.assertEqual(list(self.engine.execute(self.leaf[:1])), [{self.a: 0}]) 

248 self.assertEqual(list(self.engine.execute(self.leaf[1:2])), [{self.a: 1}]) 

249 self.assertEqual(list(self.engine.execute(self.leaf[2:])), []) 

250 self.assertEqual(list(self.engine.execute(self.leaf[2:3])), []) 

251 # Also try a non-leaf target, since that's a different code branch in 

252 # the iteration engine. 

253 b = tests.ColumnTag("b") 

254 target = self.leaf.with_calculated_column(b, ColumnExpression.reference(self.a)) 

255 self.assertEqual(list(self.engine.execute(target[1:])), [{self.a: 1, b: 1}]) 

256 self.assertEqual(list(self.engine.execute(target[:1])), [{self.a: 0, b: 0}]) 

257 self.assertEqual(list(self.engine.execute(target[1:2])), [{self.a: 1, b: 1}]) 

258 self.assertEqual(list(self.engine.execute(target[2:])), []) 

259 self.assertEqual(list(self.engine.execute(target[2:3])), []) 

260 

261 def test_str(self) -> None: 

262 """Test str(Slice) and 

263 str(UnaryOperationRelation[Slice]). 

264 """ 

265 relation = self.leaf[1:2] 

266 self.assertEqual(str(relation), f"slice[1:2]({self.leaf})") 

267 

268 

269if __name__ == "__main__": 

270 unittest.main()