Coverage for tests/test_sort.py: 16%
71 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-30 09:29 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-30 09:29 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import unittest
26import numpy as np
27from lsst.daf.relation import (
28 ColumnError,
29 ColumnExpression,
30 EngineError,
31 Sort,
32 SortTerm,
33 UnaryOperationRelation,
34 iteration,
35 tests,
36)
39class SortTestCase(tests.RelationTestCase):
40 """Tests for the Sort operation and relations based on it."""
42 def setUp(self) -> None:
43 self.columns = {k: tests.ColumnTag(k) for k in "abcd"}
44 self.sort_terms = (
45 SortTerm(ColumnExpression.reference(self.columns["a"]), ascending=True),
46 SortTerm(ColumnExpression.reference(self.columns["b"]), ascending=True),
47 SortTerm(ColumnExpression.reference(self.columns["c"]), ascending=False),
48 SortTerm(ColumnExpression.reference(self.columns["d"]), ascending=True),
49 )
50 self.engine = iteration.Engine(name="preferred")
51 rng = np.random.RandomState(1)
52 self.table = np.zeros(32, dtype=[(k, int) for k in self.columns])
53 for k in self.columns:
54 self.table[k] = rng.randint(0, 4, size=32)
55 self.leaf = self.engine.make_leaf(
56 frozenset(self.columns.values()),
57 payload=iteration.RowSequence(
58 [{v: row[k] for k, v in self.columns.items()} for row in self.table]
59 ),
60 name="leaf",
61 )
63 def test_attributes(self) -> None:
64 """Check that all UnaryOperation and Relation attributes have the
65 expected values.
66 """
67 relation = self.leaf.sorted(self.sort_terms)
68 assert isinstance(relation, UnaryOperationRelation)
69 self.assertEqual(relation.columns, frozenset(self.columns.values()))
70 self.assertEqual(relation.engine, self.engine)
71 self.assertEqual(relation.min_rows, self.leaf.min_rows)
72 self.assertEqual(relation.max_rows, self.leaf.max_rows)
73 operation = relation.operation
74 assert isinstance(operation, Sort)
75 self.assertEqual(operation.terms, self.sort_terms)
76 self.assertEqual(operation.columns_required, frozenset(self.columns.values()))
77 self.assertTrue(operation.is_empty_invariant)
78 self.assertTrue(operation.is_count_invariant)
79 self.assertFalse(operation.is_order_dependent)
80 self.assertFalse(operation.is_count_dependent)
82 def test_apply_failures(self) -> None:
83 """Test failure modes of constructing and applying Sorts."""
84 # Required columns must be present.
85 with self.assertRaises(ColumnError):
86 self.leaf.sorted([SortTerm(ColumnExpression.reference(tests.ColumnTag("e")))])
88 def test_apply_simplify(self) -> None:
89 """Test simplification logic in Sort.apply."""
90 # Test that applying a Sort to an existing Sort merges them.
91 self.assert_relations_equal(
92 self.leaf.sorted(self.sort_terms[2:4]).sorted(self.sort_terms[0:2]),
93 self.leaf.sorted(self.sort_terms),
94 )
95 # Test that a no-op Sort does nothing.
96 self.assert_relations_equal(self.leaf.sorted([]), self.leaf)
98 def test_backtracking_apply(self) -> None:
99 """Test apply logic that involves reordering operations in the existing
100 tree to perform the new operation in a preferred engine.
101 """
102 new_engine = iteration.Engine(name="downstream")
103 expression = ColumnExpression.function(
104 "__add__",
105 ColumnExpression.reference(self.columns["a"]),
106 ColumnExpression.reference(self.columns["b"]),
107 )
108 predicate = ColumnExpression.reference(self.columns["c"]).gt(
109 ColumnExpression.reference(self.columns["d"])
110 )
111 e = tests.ColumnTag("e")
112 # Apply a bunch of operations in a new engine that a Sort should
113 # commute with.
114 target = (
115 self.leaf.transferred_to(new_engine)
116 .with_calculated_column(e, expression)
117 .with_rows_satisfying(predicate)
118 .without_duplicates()
119 .with_only_columns(frozenset(self.columns.values()))
120 )
121 # Apply a new Sort with backtracking and see that it appears before the
122 # transfer to the new engine, with adjustments as needed.
123 relation = target.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True)
124 self.assert_relations_equal(
125 relation,
126 (
127 self.leaf.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True)
128 .transferred_to(new_engine)
129 .with_calculated_column(e, expression)
130 .with_rows_satisfying(predicate)
131 .without_duplicates()
132 .with_only_columns(frozenset(self.columns.values()))
133 ),
134 )
136 def test_no_backtracking(self) -> None:
137 """Test apply logic that handles preferred engines without reordering
138 operations in the existing tree.
139 """
140 new_engine = iteration.Engine(name="downstream")
141 # Construct a relation tree we can't reorder when inserting a
142 # Selection, because there is a locked Materialization in the way.
143 target = self.leaf.transferred_to(new_engine).materialized("lock")
144 # Preferred engine is ignored if we can't backtrack and don't enable
145 # anything else.
146 self.assert_relations_equal(
147 target.sorted(self.sort_terms, preferred_engine=self.engine),
148 target.sorted(self.sort_terms),
149 )
150 # We can force this to be an error.
151 with self.assertRaises(EngineError):
152 target.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True)
153 # We can also automatically transfer (back) to the preferred engine.
154 self.assert_relations_equal(
155 target.sorted(self.sort_terms, preferred_engine=self.engine, transfer=True),
156 target.transferred_to(self.engine).sorted(self.sort_terms),
157 )
158 # Can't backtrack through a Calculation that provides required columns.
159 # In the future, we could make this possible by subsuming the
160 # calculated columns into the predicate.
161 e = tests.ColumnTag("e")
162 target = self.leaf.transferred_to(new_engine).with_calculated_column(
163 e, ColumnExpression.reference(self.columns["a"])
164 )
165 with self.assertRaises(EngineError):
166 target.sorted(
167 [SortTerm(ColumnExpression.reference(e))],
168 preferred_engine=self.engine,
169 require_preferred_engine=True,
170 )
171 # Can't backtrack through a slice.
172 target = self.leaf.transferred_to(new_engine)[1:3]
173 with self.assertRaises(EngineError):
174 target.sorted(self.sort_terms, preferred_engine=self.engine, require_preferred_engine=True)
176 def test_iteration(self) -> None:
177 """Test Sort execution in the iteration engine."""
178 relation = self.leaf.sorted(self.sort_terms)
179 sorted_table = self.table.copy()
180 sorted_table["c"] *= -1
181 sorted_table.sort(kind="stable", order="d")
182 sorted_table.sort(kind="stable", order="c")
183 sorted_table.sort(kind="stable", order=["a", "b"])
184 sorted_table["c"] *= -1
185 self.assertEqual(
186 list(self.engine.execute(relation)),
187 [{v: row[k] for k, v in self.columns.items()} for row in sorted_table],
188 )
190 def test_str(self) -> None:
191 """Test str(Sort) and
192 str(UnaryOperationRelation[Sort]).
193 """
194 relation = self.leaf.sorted(self.sort_terms)
195 self.assertEqual(str(relation), "sort[a, b, -c, d](leaf)")
198if __name__ == "__main__": 198 ↛ 199line 198 didn't jump to line 199, because the condition on line 198 was never true
199 unittest.main()