Coverage for tests/test_processor.py: 14%
106 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 10:30 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 10:30 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import unittest
25from collections.abc import Mapping, Set
26from typing import Any
28from lsst.daf.relation import (
29 BinaryOperationRelation,
30 ColumnExpression,
31 ColumnTag,
32 Engine,
33 GenericConcreteEngine,
34 LeafRelation,
35 Materialization,
36 Processor,
37 Relation,
38 SortTerm,
39 Transfer,
40 UnaryOperationRelation,
41 tests,
42)
45class StringEngine(GenericConcreteEngine[str]):
46 """A test Engine whose payloads are just the `str` of their relations."""
48 def get_join_identity_payload(self) -> str:
49 return "I"
51 def get_doomed_payload(self, columns: Set[ColumnTag]) -> str:
52 return "0"
54 def make_leaf(self, name: str, *columns: ColumnTag, **kwargs: Any) -> Relation:
55 return LeafRelation(self, frozenset(columns), name=name, payload=name, **kwargs)
58class StringProcessor(Processor):
59 """A test subclass of `Processor` that tracks the calls made to its hook
60 methods and attaches the `str` of relations as payloads.
61 """
63 def __init__(self, test_case: ProcessorTestCase):
64 self.test_case = test_case
65 self.seen: list[str] = []
67 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> str:
68 self.test_case.check_upstream_payloads(source)
69 relation = source.transferred_to(destination)
70 if materialize_as is not None:
71 relation = relation.materialized(materialize_as)
72 result = str(relation)
73 self.seen.append(result)
74 return result
76 def materialize(self, target: Relation, name: str) -> str:
77 self.test_case.check_upstream_payloads(target)
78 result = str(target.materialized(name))
79 self.seen.append(result)
80 return result
83class ProcessorTestCase(tests.RelationTestCase):
84 """Tests for the Processor class."""
86 def check_upstream_payloads(
87 self,
88 relation: Relation,
89 materializations_only: bool = False,
90 upstream_of_materialization: str | None = None,
91 simplifications: Mapping[str, str] | None = None,
92 ) -> None:
93 """Check that a relation and its upstream tree have the payloads
94 that should be attached by `StringProcessor`.
96 Parameters
97 ----------
98 relation : `Relation`
99 Relation to check.
100 materializations_only : `bool`, optional
101 If `True`, only expect leaf and materialization relations to have
102 payloads, not transfers, as expected for a tree passed to (but not
103 returned by) a `Processor`.
104 upstream_of_materialization : `str` | None, optional
105 If not `None`, this relation is just upstream of a materialization
106 with this name and needs to adjust its expected `str` accordingly
107 to include that materialization.
108 simplifications : `~collections.abc.Mapping` [ `str`, `str` ]
109 Mappings from the original `str` of a relation subtree and the
110 simplified form that should have been used to compute the payload
111 by `StringProcessor`.
112 """
113 if simplifications is None:
114 simplifications = {}
115 if relation.is_join_identity:
116 expected_string = "I"
117 elif relation.max_rows == 0:
118 expected_string = "0"
119 else:
120 if upstream_of_materialization is not None:
121 expected_string = str(relation.materialized(upstream_of_materialization))
122 else:
123 expected_string = str(relation)
124 expected_string = simplifications.get(expected_string, expected_string)
125 match relation:
126 case LeafRelation():
127 self.assertIsNotNone(relation.payload)
128 self.assertEqual(relation.payload, expected_string)
129 case Materialization():
130 self.assertIsNotNone(relation.payload)
131 self.assertEqual(relation.payload, expected_string)
132 self.check_upstream_payloads(
133 relation.target,
134 materializations_only=materializations_only,
135 upstream_of_materialization=relation.name,
136 simplifications=simplifications,
137 )
138 case Transfer():
139 if materializations_only:
140 self.assertIsNone(relation.payload)
141 else:
142 self.assertIsNotNone(relation.payload)
143 self.assertEqual(relation.payload, expected_string)
144 self.check_upstream_payloads(
145 relation.target,
146 materializations_only=materializations_only,
147 simplifications=simplifications,
148 )
149 case UnaryOperationRelation():
150 self.check_upstream_payloads(
151 relation.target,
152 materializations_only=materializations_only,
153 simplifications=simplifications,
154 )
155 case BinaryOperationRelation():
156 self.check_upstream_payloads(
157 relation.lhs,
158 materializations_only=materializations_only,
159 simplifications=simplifications,
160 )
161 self.check_upstream_payloads(
162 relation.rhs,
163 materializations_only=materializations_only,
164 simplifications=simplifications,
165 )
167 def test_processor(self) -> None:
168 """Test the Processor class."""
169 # Cook up a three-engine relation tree with pretty some interesting
170 # structure to it including some materializations; start with the
171 # ingredients.
172 engine1 = StringEngine(name="one")
173 engine2 = StringEngine(name="two")
174 engine3 = StringEngine(name="three")
175 a = tests.ColumnTag("a")
176 b = tests.ColumnTag("b")
177 c = tests.ColumnTag("c")
178 d = tests.ColumnTag("d")
179 expression = ColumnExpression.reference(b).method("__neg__")
180 predicate = ColumnExpression.reference(c).gt(ColumnExpression.literal(0))
181 terms = [SortTerm(ColumnExpression.reference(b))]
182 leaf1 = engine1.make_leaf("leaf1", a, b)
183 leaf2 = engine2.make_leaf("leaf1", a, c)
184 leaf3 = engine3.make_leaf("leaf3", a, b, d)
185 leaf4 = engine3.make_leaf("leaf4", a, b, d)
186 # Build the tree itself while taking snapshops of its str(...)
187 # everywhere there's a transfer and/or materialization.
188 snapshots = []
189 full_tree = (
190 leaf2.with_rows_satisfying(predicate).transferred_to(engine1).materialized("materialization1")
191 )
192 snapshots.append(str(full_tree))
193 full_tree = (
194 leaf1.with_calculated_column(d, expression)
195 .join(full_tree)
196 .with_only_columns({a, b, d})
197 .transferred_to(engine3)
198 )
199 snapshots.append(str(full_tree))
200 full_tree = full_tree.chain(leaf3).materialized("materialization2")
201 snapshots.append(str(full_tree))
203 # Chain the full_tree to a what's ultimately a relation with no rows.
204 # The Processor will drop these operations without calling its
205 # transfer() and materialize() hooks, so they won't appear in its
206 # snapshots or the processed_tree it returns.
207 trimmed_tree = full_tree
208 full_tree = full_tree.chain(
209 engine1.make_leaf("doomed_by_join", a, b)
210 .join(engine1.make_doomed_relation({b, d}, ["badness"]))
211 .transferred_to(engine3)
212 )
213 # Add some more chains, one of which will simplify before a
214 # materialization.
215 full_subtree = leaf4.chain(engine3.make_doomed_relation({a, b, d}, ["badness again"])).materialized(
216 "materialization3"
217 )
218 full_tree = full_tree.chain(full_subtree)
219 trimmed_tree = trimmed_tree.chain(leaf4)
220 # Add a few more operations to both the full_ and trimmed_trees.
221 full_tree = full_tree.without_duplicates().sorted(terms)
222 trimmed_tree = trimmed_tree.without_duplicates().sorted(terms)
223 # Construct and run the Processor, which itself checks some aspects
224 # of the algorithm via calls to check_upstream_payloads.
225 processor = StringProcessor(self)
226 processed_tree = processor.process(full_tree)
227 # Check that the snapshots taken by the processor match the ones we
228 # took while creating the tree.
229 self.assertEqual(processor.seen, snapshots)
230 # Check that the processed tree has the same columns, row bounds, and
231 # engine as the original.
232 self.assertEqual(full_tree.columns, processed_tree.columns)
233 self.assertEqual(full_tree.min_rows, processed_tree.min_rows)
234 self.assertEqual(full_tree.max_rows, processed_tree.max_rows)
235 # Check that the full tree now has payloads for materializations, but
236 # not transfers.
237 self.check_upstream_payloads(
238 full_tree, materializations_only=True, simplifications={str(full_subtree): str(leaf4)}
239 )
240 # Check that the returned tree how has payloads for materializations
241 # and transfers.
242 self.check_upstream_payloads(processed_tree, materializations_only=False)
243 # Check that the returned tree has the same structure as the trimmed
244 # tree.
245 self.assert_relations_equal(trimmed_tree, processed_tree)
246 # Process the original tree again, which should short-circuit at the
247 # last materializations and not call its hooks at all.
248 reprocessor = StringProcessor(self)
249 reprocessed_tree = reprocessor.process(full_tree)
250 self.assertEqual(reprocessor.seen, [])
251 self.assertEqual(full_tree.columns, reprocessed_tree.columns)
252 self.assertEqual(full_tree.min_rows, reprocessed_tree.min_rows)
253 self.assertEqual(full_tree.max_rows, reprocessed_tree.max_rows)
256if __name__ == "__main__": 256 ↛ 257line 256 didn't jump to line 257, because the condition on line 256 was never true
257 unittest.main()