Coverage for tests/test_join.py: 9%
102 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-06 10:42 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-06 10:42 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import unittest
26from lsst.daf.relation import (
27 BinaryOperationRelation,
28 ColumnError,
29 ColumnExpression,
30 EngineError,
31 Join,
32 Predicate,
33 SortTerm,
34 iteration,
35 tests,
36)
39class JoinTestCase(tests.RelationTestCase):
40 """Tests for the Join operation and relations based on it."""
42 def setUp(self) -> None:
43 self.a = tests.ColumnTag("a")
44 self.b = tests.ColumnTag("b")
45 self.c = tests.ColumnTag("c")
46 self.engine = iteration.Engine(name="preferred")
47 self.leaf_1 = self.engine.make_leaf(
48 {self.a, self.b},
49 payload=iteration.RowSequence(
50 [{self.a: 0, self.b: 5}, {self.a: 1, self.b: 10}, {self.a: 2, self.b: 25}]
51 ),
52 name="leaf_1",
53 )
54 self.leaf_2 = self.engine.make_leaf(
55 {self.a, self.c},
56 payload=iteration.RowSequence(
57 [{self.a: 0, self.c: 15}, {self.a: 2, self.c: 20}, {self.a: 3, self.b: 0}]
58 ),
59 name="leaf_2",
60 )
62 def test_attributes(self) -> None:
63 """Check that all Relation and PartialJoin attributes have the expected
64 values.
65 """
66 relation = self.leaf_1.join(self.leaf_2)
67 assert isinstance(relation, BinaryOperationRelation)
68 self.assertEqual(relation.columns, {self.a, self.b, self.c})
69 self.assertEqual(relation.engine, self.engine)
70 self.assertEqual(relation.min_rows, 0)
71 self.assertEqual(relation.max_rows, 9)
72 self.assertFalse(relation.is_locked)
73 operation = relation.operation
74 assert isinstance(operation, Join)
75 self.assertEqual(operation.min_columns, {self.a})
76 self.assertEqual(operation.max_columns, {self.a})
77 self.assertEqual(operation.common_columns, {self.a})
78 self.assertEqual(operation.predicate, Predicate.literal(True))
79 partial = Join().partial(self.leaf_1)
80 self.assertEqual(partial.columns_required, frozenset())
81 self.assert_relations_equal(partial.fixed, self.leaf_1)
82 self.assertFalse(partial.is_count_dependent)
83 self.assertFalse(partial.is_order_dependent)
84 self.assertFalse(partial.is_count_invariant)
85 self.assertFalse(partial.is_empty_invariant)
86 self.assertEqual(partial.applied_columns(self.leaf_2), {self.a, self.b, self.c})
87 self.assertEqual(partial.applied_min_rows(self.leaf_2), 0)
88 self.assertEqual(partial.applied_max_rows(self.leaf_2), 9)
90 def test_apply_failures(self) -> None:
91 """Test failure modes of constructing and applying Join."""
92 # Mismatched engines.
93 new_engine = iteration.Engine(name="downstream")
94 with self.assertRaises(EngineError):
95 Join().apply(self.leaf_1.transferred_to(new_engine), self.leaf_2)
96 # Predicate requires nonexistent columns.
97 predicate = ColumnExpression.reference(tests.ColumnTag("d")).lt(ColumnExpression.literal(0))
98 with self.assertRaises(ColumnError):
99 Join(predicate=predicate).apply(self.leaf_1, self.leaf_2)
100 with self.assertRaises(ColumnError):
101 Join(predicate=predicate).partial(self.leaf_1).apply(self.leaf_2)
102 with self.assertRaises(ColumnError):
103 Join(predicate=predicate).partial(self.leaf_2).apply(self.leaf_1)
104 # Bounds on columns internally inconsistent.
105 with self.assertRaises(ColumnError):
106 Join(min_columns=frozenset({self.a, self.b}), max_columns=frozenset({self.a}))
107 # Minimum columns not satisfied.
108 join = Join(min_columns=frozenset({self.a, self.b}))
109 with self.assertRaises(ColumnError):
110 join.apply(self.leaf_1, self.leaf_2)
111 with self.assertRaises(ColumnError):
112 join.apply(self.leaf_2, self.leaf_1)
113 with self.assertRaises(ColumnError):
114 join.partial(self.leaf_2)
115 with self.assertRaises(ColumnError):
116 join.partial(self.leaf_1).apply(self.leaf_2)
117 # Common columns not satisfied.
118 join = Join(min_columns=frozenset({self.a, self.b}), max_columns=frozenset({self.a, self.b}))
119 with self.assertRaises(ColumnError):
120 join.apply(self.leaf_1, self.leaf_2)
121 with self.assertRaises(ColumnError):
122 join.apply(self.leaf_2, self.leaf_1)
124 def test_apply_simplify(self) -> None:
125 """Test Join.apply simplifications."""
126 join_identity = self.engine.make_join_identity_relation()
127 self.assertIs(self.leaf_1.join(join_identity), self.leaf_1)
128 self.assertIs(join_identity.join(self.leaf_1), self.leaf_1)
130 def test_backtracking_apply(self) -> None:
131 """Test `PartialJoin.apply` logic that involves reordering operations
132 in the existing tree to perform the new operation in a preferred
133 engine.
134 """
135 new_engine = iteration.Engine(name="downstream")
136 d = tests.ColumnTag("d")
137 expression = ColumnExpression.function(
138 "__add__", ColumnExpression.reference(self.a), ColumnExpression.literal(5)
139 )
140 sort_terms = [SortTerm(ColumnExpression.reference(self.a))]
141 predicate = ColumnExpression.reference(self.b).gt(ColumnExpression.literal(0))
142 # Apply a bunch of operations in a new engine that a PartialJoin should
143 # commute with.
144 target = (
145 self.leaf_1.transferred_to(new_engine)
146 .with_calculated_column(d, expression)
147 .with_rows_satisfying(predicate)
148 .with_only_columns({self.a, d})
149 .sorted(sort_terms)
150 )
151 # Apply a new PartialJoin with backtracking and see that it appears
152 # before the transfer to the new engine, with adjustments as needed.
153 relation = target.join(self.leaf_2)
154 self.assert_relations_equal(
155 relation,
156 (
157 self.leaf_1.join(self.leaf_2)
158 .transferred_to(new_engine)
159 .with_calculated_column(d, expression)
160 .with_rows_satisfying(predicate)
161 .with_only_columns({self.a, self.c, d})
162 .sorted(sort_terms)
163 ),
164 )
166 def test_no_backtracking(self) -> None:
167 """Test `PartialJoin.apply` logic that handles differing engines
168 without reordering operations in the existing tree, as well as failures
169 in that reordering.
170 """
171 new_engine = iteration.Engine(name="downstream")
172 # Construct a relation tree we can't reorder when inserting a Join,
173 # because there is a locked Materialization in the way.
174 target = self.leaf_1.transferred_to(new_engine).materialized("lock")
175 # We can automatically transfer (back) to the new relation's engine.
176 self.assert_relations_equal(
177 target.join(self.leaf_2, transfer=True),
178 target.transferred_to(self.engine).join(self.leaf_2),
179 )
180 # Can't backtrack through a Deduplication.
181 target = self.leaf_1.transferred_to(new_engine).without_duplicates()
182 with self.assertRaises(EngineError):
183 target.join(self.leaf_2)
184 # Can't backtrack through a Slice, because it's order/count dependent.
185 target = self.leaf_1.transferred_to(new_engine)[:2]
186 with self.assertRaises(EngineError):
187 target.join(self.leaf_2)
189 def test_common_columns(self) -> None:
190 """Test Join.applied_common_columns logic."""
191 leaf_3 = self.engine.make_leaf(
192 {self.a, self.b, self.c},
193 payload=iteration.RowSequence(
194 [{self.a: 0, self.b: 2, self.c: 15}, {self.a: 2, self.b: 4, self.c: 20}]
195 ),
196 name="leaf_2",
197 )
198 # With no min or max columns, common_columns is just the intersection
199 # of the columns of the operands.
200 self.assertEqual(Join().applied_common_columns(self.leaf_1, leaf_3), {self.a, self.b})
201 # Check that max_columns is enforced.
202 self.assertEqual(
203 Join(max_columns=frozenset({self.a})).applied_common_columns(self.leaf_1, leaf_3), {self.a}
204 )
205 # Check that min_columns is enforced.
206 with self.assertRaises(ColumnError):
207 Join(min_columns=frozenset({self.c})).applied_common_columns(self.leaf_1, leaf_3)
208 # Repeat last two checks with min_columns == max_columns.
209 self.assertEqual(
210 Join(min_columns=frozenset({self.a}), max_columns=frozenset({self.a})).applied_common_columns(
211 self.leaf_1, leaf_3
212 ),
213 {self.a},
214 )
215 with self.assertRaises(ColumnError):
216 Join(min_columns=frozenset({self.c}), max_columns=frozenset({self.c})).apply(self.leaf_1, leaf_3)
218 def test_str(self) -> None:
219 """Test str(Join), str(PartialJoin), and
220 str(BinaryOperationRelation[Join]).
221 """
222 relation = self.leaf_1.join(self.leaf_2)
223 self.assertEqual(str(relation), "leaf_1 ⋈ leaf_2")
224 partial = Join().partial(self.leaf_1)
225 self.assertEqual(str(partial), "⋈[leaf_1]")
226 # Nested operations get parentheses, unless they're joins or leaves.
227 leaf_3 = self.engine.make_leaf(
228 {self.a, self.b},
229 payload=iteration.RowSequence([{self.a: 3, self.b: 4}]),
230 name="leaf_3",
231 )
232 self.assertEqual(str(relation.join(leaf_3)), "leaf_1 ⋈ leaf_2 ⋈ leaf_3")
233 self.assertEqual(str(self.leaf_1.chain(leaf_3).join(self.leaf_2)), "(leaf_1 ∪ leaf_3) ⋈ leaf_2")
236if __name__ == "__main__":
237 unittest.main()