Coverage for tests/test_slice.py: 8%
120 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 11:35 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 11:35 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import unittest
26from lsst.daf.relation import (
27 ColumnExpression,
28 EngineError,
29 LeafRelation,
30 Relation,
31 Slice,
32 UnaryOperationRelation,
33 iteration,
34 tests,
35)
38class SliceTestCase(tests.RelationTestCase):
39 """Tests for the Slice operation and relations based on it."""
41 def setUp(self) -> None:
42 self.a = tests.ColumnTag("a")
43 self.engine = iteration.Engine(name="preferred")
44 self.leaf = self.engine.make_leaf(
45 {self.a}, payload=iteration.RowSequence([{self.a: 0}, {self.a: 1}]), name="leaf"
46 )
48 def test_attributes(self) -> None:
49 """Check that all UnaryOperation and Relation attributes have the
50 expected values.
51 """
52 relation = self.leaf[1:2]
53 assert isinstance(relation, UnaryOperationRelation)
54 self.assertEqual(relation.columns, {self.a})
55 self.assertEqual(relation.engine, self.engine)
56 self.assertEqual(relation.min_rows, 1)
57 self.assertEqual(relation.max_rows, 1)
58 self.assertFalse(relation.is_locked)
59 operation = relation.operation
60 assert isinstance(operation, Slice)
61 self.assertEqual(operation.start, 1)
62 self.assertEqual(operation.stop, 2)
63 self.assertEqual(operation.limit, 1)
64 self.assertEqual(operation.columns_required, frozenset())
65 self.assertFalse(operation.is_empty_invariant)
66 self.assertFalse(operation.is_count_invariant)
67 self.assertTrue(operation.is_order_dependent)
68 self.assertTrue(operation.is_count_dependent)
69 # Also check min/max attributes an unbounded Slice, since that involves
70 # a few different logic branches.
71 relation = self.leaf[1:]
72 assert isinstance(relation, UnaryOperationRelation)
73 self.assertEqual(relation.columns, {self.a})
74 self.assertEqual(relation.engine, self.engine)
75 self.assertEqual(relation.min_rows, 1)
76 self.assertEqual(relation.max_rows, 1)
77 self.assertFalse(relation.is_locked)
78 operation = relation.operation
79 assert isinstance(operation, Slice)
80 self.assertEqual(operation.start, 1)
81 self.assertEqual(operation.stop, None)
82 self.assertEqual(operation.limit, None)
83 self.assertEqual(operation.columns_required, frozenset())
84 self.assertFalse(operation.is_empty_invariant)
85 self.assertFalse(operation.is_count_invariant)
86 self.assertTrue(operation.is_order_dependent)
87 self.assertTrue(operation.is_count_dependent)
89 def test_min_max_rows(self) -> None:
90 """Test min_rows and max_rows for different kinds of slices
91 and original min/max rows.
92 """
93 # iteration.Engine.make_leaf sets min_rows and max_rows based on
94 # len(payload), which we don't want here.
95 leaf1 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=0, max_rows=None)
96 leaf2 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=0, max_rows=5)
97 leaf3 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=5, max_rows=5)
98 leaf4 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=5, max_rows=8)
99 leaf5 = LeafRelation(self.engine, frozenset({self.a}), payload=..., min_rows=5, max_rows=None)
101 # Reasoning about the expected values of slice operations is really
102 # easy to get wrong, so instead we brute-force the expected values,
103 # ultimately delegating to Python's own implementation of slicing
104 # range objects.
106 def brute_force_row_bounds(
107 input_min_rows: int, input_max_rows: int | None, start: int, stop: int | None
108 ) -> tuple[int, int | None]:
109 """Compute the minimum and maximum number of rows a sequence could
110 have after slicing.
112 Parameters
113 ----------
114 input_min_rows, input_min_rows : `int` or `None`
115 Original bounds on the number of rows.
116 start, stop: `int` or `None`
117 Slice parameters
119 Returns
120 -------
121 output_min_rows, output_min_rows : `int` or `None`
122 Bounds on the number of rows for the sliced sequence.
124 Notes
125 -----
126 Since this is just a test helper, we handle `None` by assuming it
127 can be replaced by a large value and that large values in the
128 results indicate a `None` result. Keep all concrete integers below
129 100 to avoid problems.
130 """
131 sizes = []
132 if input_max_rows is None:
133 output_min_rows, output_max_rows = brute_force_row_bounds(input_min_rows, 100, start, stop)
134 if output_max_rows is not None and output_max_rows > 50:
135 output_max_rows = None
136 return output_min_rows, output_max_rows
137 for n_rows in range(input_min_rows, input_max_rows + 1):
138 sequence = range(n_rows)
139 sizes.append(len(sequence[slice(start, stop)]))
140 return min(sizes), max(sizes)
142 def check(leaf: Relation) -> None:
143 """Run tests on the given leaf relation by applying slices with
144 a number of start and stop values that are just above, just below,
145 or equal to its min and max rows.
146 """
147 breaks_set = {0, leaf.min_rows - 1, leaf.min_rows, leaf.min_rows + 1}
148 if leaf.max_rows is not None:
149 breaks_set.update({leaf.max_rows - 1, leaf.max_rows, leaf.max_rows + 1})
150 breaks_list = list(breaks_set)
151 breaks_list.sort()
152 for start in breaks_list:
153 for stop in breaks_list + [None]:
154 if start < 0:
155 with self.assertRaises(ValueError):
156 Slice(start, stop)
157 elif stop is not None and stop < start:
158 with self.assertRaises(ValueError):
159 Slice(start, stop)
160 else:
161 relation = leaf[slice(start, stop)]
162 self.assertEqual(
163 (relation.min_rows, relation.max_rows),
164 brute_force_row_bounds(leaf.min_rows, leaf.max_rows, start, stop),
165 msg=(
166 f"leaf.min_rows={leaf.min_rows}, "
167 f"leaf.max_rows={leaf.max_rows}, "
168 f"slice=[{start}:{stop}]"
169 ),
170 )
172 check(leaf1)
173 check(leaf2)
174 check(leaf3)
175 check(leaf4)
176 check(leaf5)
178 def test_backtracking_apply(self) -> None:
179 """Test apply logic that involves reordering operations in the existing
180 tree to perform the new operation in a preferred engine.
181 """
182 new_engine = iteration.Engine(name="downstream")
183 b = tests.ColumnTag("b")
184 expression = ColumnExpression.function(
185 "__add__", ColumnExpression.reference(self.a), ColumnExpression.literal(5)
186 )
187 # Apply operations in a new engine that a Slice should commute with.
188 target = (
189 self.leaf.transferred_to(new_engine).with_calculated_column(b, expression).with_only_columns({b})
190 )
191 # Apply a new Slice with backtracking and see that it appears
192 # before the transfer to the new engine.
193 relation = Slice(start=1, stop=3).apply(
194 target, preferred_engine=self.engine, require_preferred_engine=True
195 )
196 self.assert_relations_equal(
197 relation,
198 (
199 self.leaf[1:3]
200 .transferred_to(new_engine)
201 .with_calculated_column(b, expression)
202 .with_only_columns({b})
203 ),
204 )
206 def test_no_backtracking(self) -> None:
207 """Test apply logic that handles preferred engines without reordering
208 operations in the existing tree.
209 """
210 new_engine = iteration.Engine(name="downstream")
211 # Construct a relation tree we can't reorder when inserting a
212 # Sort, because there is a locked Materialization in the way.
213 target = self.leaf.transferred_to(new_engine).materialized("lock")
214 # Preferred engine is ignored if we can't backtrack and don't enable
215 # anything else.
216 self.assert_relations_equal(
217 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine),
218 Slice(start=1, stop=3).apply(target),
219 )
220 # We can force this to be an error.
221 with self.assertRaises(EngineError):
222 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine, require_preferred_engine=True)
223 # We can also automatically transfer (back) to the preferred engine.
224 self.assert_relations_equal(
225 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine, transfer=True),
226 target.transferred_to(self.engine)[1:3],
227 )
228 # Can't backtrack through anything other than a Projection or
229 # a Calculation.
230 target = self.leaf.transferred_to(new_engine).without_duplicates()
231 with self.assertRaises(EngineError):
232 Slice(start=1, stop=3).apply(target, preferred_engine=self.engine, require_preferred_engine=True)
234 def test_apply_simplify(self) -> None:
235 """Test simplification logic in Slice.apply."""
236 # Test that applying a Slice to an existing Slice merges them.
237 self.assert_relations_equal(self.leaf[1:][:2], self.leaf[1:3])
238 self.assert_relations_equal(self.leaf[1:][1:], self.leaf[2:])
239 self.assert_relations_equal(self.leaf[1:3][1:2], self.leaf[2:3])
240 self.assert_relations_equal(self.leaf[1:3][1:], self.leaf[2:3])
241 # Test that a no-op slice does nothing.
242 self.assert_relations_equal(self.leaf[:], self.leaf)
244 def test_iteration(self) -> None:
245 """Test Slice execution in the iteration engine."""
246 self.assertEqual(list(self.engine.execute(self.leaf[1:])), [{self.a: 1}])
247 self.assertEqual(list(self.engine.execute(self.leaf[:1])), [{self.a: 0}])
248 self.assertEqual(list(self.engine.execute(self.leaf[1:2])), [{self.a: 1}])
249 self.assertEqual(list(self.engine.execute(self.leaf[2:])), [])
250 self.assertEqual(list(self.engine.execute(self.leaf[2:3])), [])
251 # Also try a non-leaf target, since that's a different code branch in
252 # the iteration engine.
253 b = tests.ColumnTag("b")
254 target = self.leaf.with_calculated_column(b, ColumnExpression.reference(self.a))
255 self.assertEqual(list(self.engine.execute(target[1:])), [{self.a: 1, b: 1}])
256 self.assertEqual(list(self.engine.execute(target[:1])), [{self.a: 0, b: 0}])
257 self.assertEqual(list(self.engine.execute(target[1:2])), [{self.a: 1, b: 1}])
258 self.assertEqual(list(self.engine.execute(target[2:])), [])
259 self.assertEqual(list(self.engine.execute(target[2:3])), [])
261 def test_str(self) -> None:
262 """Test str(Slice) and
263 str(UnaryOperationRelation[Slice]).
264 """
265 relation = self.leaf[1:2]
266 self.assertEqual(str(relation), f"slice[1:2]({self.leaf})")
269if __name__ == "__main__":
270 unittest.main()