Coverage for tests/test_deduplication.py: 15%
61 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-06 10:42 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-06 10:42 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import unittest
26from lsst.daf.relation import (
27 ColumnExpression,
28 Deduplication,
29 EngineError,
30 SortTerm,
31 UnaryOperationRelation,
32 iteration,
33 tests,
34)
37class DeduplicationTestCase(tests.RelationTestCase):
38 """Tests for the Deduplication operation and relations based on it."""
40 def setUp(self) -> None:
41 self.a = tests.ColumnTag("a")
42 self.b = tests.ColumnTag("b", is_key=False)
43 self.engine = iteration.Engine(name="preferred")
44 self.leaf = self.engine.make_leaf(
45 {self.a}, payload=iteration.RowSequence([{self.a: 1}, {self.a: 0}, {self.a: 1}]), name="leaf"
46 )
48 def test_attributes(self) -> None:
49 """Check that all UnaryOperation and Relation attributes have the
50 expected values.
51 """
52 relation = self.leaf.without_duplicates()
53 assert isinstance(relation, UnaryOperationRelation)
54 self.assertEqual(relation.columns, {self.a})
55 self.assertEqual(relation.engine, self.engine)
56 self.assertEqual(relation.min_rows, 1)
57 self.assertEqual(relation.max_rows, self.leaf.max_rows)
58 operation = relation.operation
59 assert isinstance(operation, Deduplication)
60 self.assertEqual(operation.columns_required, set())
61 self.assertTrue(operation.is_empty_invariant)
62 self.assertFalse(operation.is_count_invariant)
63 self.assertFalse(operation.is_order_dependent)
64 self.assertFalse(operation.is_count_dependent)
66 def test_min_max_rows(self) -> None:
67 """Test min and max rows for edge-case deduplications."""
68 self.assertEqual(self.leaf.with_only_columns(set()).without_duplicates().min_rows, 1)
69 self.assertEqual(self.leaf.with_only_columns(set()).without_duplicates().max_rows, 1)
70 leaf0 = self.engine.make_leaf({self.a}, payload=iteration.RowSequence([]), name="leaf")
71 self.assertEqual(leaf0.without_duplicates().min_rows, 0)
72 self.assertEqual(leaf0.without_duplicates().max_rows, 0)
73 self.assertEqual(leaf0.with_only_columns([]).without_duplicates().min_rows, 0)
74 self.assertEqual(leaf0.with_only_columns([]).without_duplicates().max_rows, 0)
76 def test_backtracking_apply(self) -> None:
77 """Test apply logic that involves reordering operations in the existing
78 tree to perform the new operation in a preferred engine.
79 """
80 new_engine = iteration.Engine(name="downstream")
81 expression = ColumnExpression.reference(self.a)
82 predicate = expression.lt(ColumnExpression.literal(20))
83 # Apply a bunch of operations in a new engine that a Deduplication
84 # should commute with.
85 target = (
86 self.leaf.transferred_to(new_engine)
87 .with_calculated_column(self.b, expression)
88 .with_rows_satisfying(predicate)
89 .sorted([SortTerm(ColumnExpression.reference(self.a))])
90 )
91 # Apply a new Deduplication with backtracking and see that it appears
92 # before the transfer to the new engine, with adjustments as needed
93 # downstream (to the Projection and Chain, in this case).
94 relation = target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
95 self.assert_relations_equal(
96 relation,
97 (
98 self.leaf.without_duplicates()
99 .transferred_to(new_engine)
100 .with_calculated_column(self.b, expression)
101 .with_rows_satisfying(predicate)
102 .sorted([SortTerm(ColumnExpression.reference(self.a))])
103 ),
104 )
106 def test_no_backtracking(self) -> None:
107 """Test apply logic that handles preferred engines without reordering
108 operations in the existing tree.
109 """
110 new_engine = iteration.Engine(name="downstream")
111 # Construct a relation tree we can't reorder when inserting a
112 # Deduplication, because there is a locked Materialization in the way.
113 target = self.leaf.transferred_to(new_engine).materialized("lock")
114 # Preferred engine is ignored if we can't backtrack and don't enable
115 # anything else.
116 self.assert_relations_equal(
117 target.without_duplicates(preferred_engine=self.engine),
118 target.without_duplicates(),
119 )
120 # We can force this to be an error.
121 with self.assertRaises(EngineError):
122 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
123 # We can also automatically transfer (back) to the preferred engine.
124 self.assert_relations_equal(
125 target.without_duplicates(preferred_engine=self.engine, transfer=True),
126 target.transferred_to(self.engine).without_duplicates(),
127 )
128 # Now try a few other ways of making backtrack fail.
129 # Deduplication does not commute with Projection.
130 with self.assertRaises(EngineError):
131 self.engine.make_leaf(
132 {self.a, self.b},
133 payload=iteration.RowSequence([{self.a: 0, self.b: 0}, {self.a: 0, self.b: 1}]),
134 name="leaf",
135 ).transferred_to(new_engine).with_only_columns({self.a}).without_duplicates(
136 preferred_engine=self.engine, require_preferred_engine=True
137 )
138 # Deduplication does not commute with Slice.
139 with self.assertRaises(EngineError):
140 self.leaf.transferred_to(new_engine)[:1].without_duplicates(
141 preferred_engine=self.engine, require_preferred_engine=True
142 )
143 # Deduplication cannot be inserted past Chains or Joins
144 # (at least not without more information than we have, like whether
145 # Chain branches are disjoint or leaf relations start out with unique
146 # rows).
147 with self.assertRaises(EngineError):
148 target = self.leaf.transferred_to(new_engine).chain(
149 new_engine.make_leaf(
150 {self.a},
151 payload=iteration.RowSequence([{self.a: 0}]),
152 name="chain_leaf",
153 )
154 )
155 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
156 with self.assertRaises(EngineError):
157 target = self.leaf.transferred_to(new_engine).join(
158 new_engine.make_leaf(
159 {self.a},
160 payload=iteration.RowSequence([{self.a: 0}]),
161 name="join_leaf",
162 )
163 )
164 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
166 def test_iteration(self) -> None:
167 """Test Deduplication execution in the iteration engine."""
168 relation = self.leaf.without_duplicates()
169 self.assertEqual(
170 list(self.engine.execute(relation)),
171 [{self.a: 1}, {self.a: 0}],
172 )
174 def test_str(self) -> None:
175 """Test str(Deduplication) and
176 str(UnaryOperationRelation[Deduplication]).
177 """
178 relation = self.leaf.without_duplicates()
179 self.assertEqual(str(relation), "deduplicate(leaf)")
182if __name__ == "__main__":
183 unittest.main()