Coverage for tests/test_deduplication.py: 18%
55 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 09:32 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 09:32 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import unittest
26from lsst.daf.relation import (
27 ColumnExpression,
28 Deduplication,
29 EngineError,
30 SortTerm,
31 UnaryOperationRelation,
32 iteration,
33 tests,
34)
37class DeduplicationTestCase(tests.RelationTestCase):
38 """Tests for the Deduplication operation and relations based on it."""
40 def setUp(self) -> None:
41 self.a = tests.ColumnTag("a")
42 self.b = tests.ColumnTag("b", is_key=False)
43 self.engine = iteration.Engine(name="preferred")
44 self.leaf = self.engine.make_leaf(
45 {self.a}, payload=iteration.RowSequence([{self.a: 1}, {self.a: 0}, {self.a: 1}]), name="leaf"
46 )
48 def test_attributes(self) -> None:
49 """Check that all UnaryOperation and Relation attributes have the
50 expected values.
51 """
52 relation = self.leaf.without_duplicates()
53 assert isinstance(relation, UnaryOperationRelation)
54 self.assertEqual(relation.columns, {self.a})
55 self.assertEqual(relation.engine, self.engine)
56 self.assertEqual(relation.min_rows, 1)
57 self.assertEqual(relation.max_rows, self.leaf.max_rows)
58 operation = relation.operation
59 assert isinstance(operation, Deduplication)
60 self.assertEqual(operation.columns_required, set())
61 self.assertTrue(operation.is_empty_invariant)
62 self.assertFalse(operation.is_count_invariant)
63 self.assertFalse(operation.is_order_dependent)
64 self.assertFalse(operation.is_count_dependent)
66 def test_backtracking_apply(self) -> None:
67 """Test apply logic that involves reordering operations in the existing
68 tree to perform the new operation in a preferred engine.
69 """
70 new_engine = iteration.Engine(name="downstream")
71 expression = ColumnExpression.reference(self.a)
72 predicate = expression.lt(ColumnExpression.literal(20))
73 # Apply a bunch of operations in a new engine that a Deduplication
74 # should commute with.
75 target = (
76 self.leaf.transferred_to(new_engine)
77 .with_calculated_column(self.b, expression)
78 .with_rows_satisfying(predicate)
79 .sorted([SortTerm(ColumnExpression.reference(self.a))])
80 )
81 # Apply a new Deduplication with backtracking and see that it appears
82 # before the transfer to the new engine, with adjustments as needed
83 # downstream (to the Projection and Chain, in this case).
84 relation = target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
85 self.assert_relations_equal(
86 relation,
87 (
88 self.leaf.without_duplicates()
89 .transferred_to(new_engine)
90 .with_calculated_column(self.b, expression)
91 .with_rows_satisfying(predicate)
92 .sorted([SortTerm(ColumnExpression.reference(self.a))])
93 ),
94 )
96 def test_no_backtracking(self) -> None:
97 """Test apply logic that handles preferred engines without reordering
98 operations in the existing tree.
99 """
100 new_engine = iteration.Engine(name="downstream")
101 # Construct a relation tree we can't reorder when inserting a
102 # Deduplication, because there is a locked Materialization in the way.
103 target = self.leaf.transferred_to(new_engine).materialized("lock")
104 # Preferred engine is ignored if we can't backtrack and don't enable
105 # anything else.
106 self.assert_relations_equal(
107 target.without_duplicates(preferred_engine=self.engine),
108 target.without_duplicates(),
109 )
110 # We can force this to be an error.
111 with self.assertRaises(EngineError):
112 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
113 # We can also automatically transfer (back) to the preferred engine.
114 self.assert_relations_equal(
115 target.without_duplicates(preferred_engine=self.engine, transfer=True),
116 target.transferred_to(self.engine).without_duplicates(),
117 )
118 # Now try a few other ways of making backtrack fail.
119 # Deduplication does not commute with Projection.
120 with self.assertRaises(EngineError):
121 self.engine.make_leaf(
122 {self.a, self.b},
123 payload=iteration.RowSequence([{self.a: 0, self.b: 0}, {self.a: 0, self.b: 1}]),
124 name="leaf",
125 ).transferred_to(new_engine).with_only_columns({self.a}).without_duplicates(
126 preferred_engine=self.engine, require_preferred_engine=True
127 )
128 # Deduplication does not commute with Slice.
129 with self.assertRaises(EngineError):
130 self.leaf.transferred_to(new_engine)[:1].without_duplicates(
131 preferred_engine=self.engine, require_preferred_engine=True
132 )
133 # Deduplication cannot be inserted past Chains or Joins
134 # (at least not without more information than we have, like whether
135 # Chain branches are disjoint or leaf relations start out with unique
136 # rows).
137 with self.assertRaises(EngineError):
138 target = self.leaf.transferred_to(new_engine).chain(
139 new_engine.make_leaf(
140 {self.a},
141 payload=iteration.RowSequence([{self.a: 0}]),
142 name="chain_leaf",
143 )
144 )
145 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
146 with self.assertRaises(EngineError):
147 target = self.leaf.transferred_to(new_engine).join(
148 new_engine.make_leaf(
149 {self.a},
150 payload=iteration.RowSequence([{self.a: 0}]),
151 name="join_leaf",
152 )
153 )
154 target.without_duplicates(preferred_engine=self.engine, require_preferred_engine=True)
156 def test_iteration(self) -> None:
157 """Test Deduplication execution in the iteration engine."""
158 relation = self.leaf.without_duplicates()
159 self.assertEqual(
160 list(self.engine.execute(relation)),
161 [{self.a: 1}, {self.a: 0}],
162 )
164 def test_str(self) -> None:
165 """Test str(Deduplication) and
166 str(UnaryOperationRelation[Deduplication]).
167 """
168 relation = self.leaf.without_duplicates()
169 self.assertEqual(str(relation), "deduplicate(leaf)")
172if __name__ == "__main__": 172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true
173 unittest.main()