Coverage for tests/test_processor.py: 14%

106 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-13 09:32 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import unittest 

25from collections.abc import Mapping, Set 

26from typing import Any 

27 

28from lsst.daf.relation import ( 

29 BinaryOperationRelation, 

30 ColumnExpression, 

31 ColumnTag, 

32 Engine, 

33 GenericConcreteEngine, 

34 LeafRelation, 

35 Materialization, 

36 Processor, 

37 Relation, 

38 SortTerm, 

39 Transfer, 

40 UnaryOperationRelation, 

41 tests, 

42) 

43 

44 

45class StringEngine(GenericConcreteEngine[str]): 

46 """A test Engine whose payloads are just the `str` of their relations.""" 

47 

48 def get_join_identity_payload(self) -> str: 

49 return "I" 

50 

51 def get_doomed_payload(self, columns: Set[ColumnTag]) -> str: 

52 return "0" 

53 

54 def make_leaf(self, name: str, *columns: ColumnTag, **kwargs: Any) -> Relation: 

55 return LeafRelation(self, frozenset(columns), name=name, payload=name, **kwargs) 

56 

57 

58class StringProcessor(Processor): 

59 """A test subclass of `Processor` that tracks the calls made to its hook 

60 methods and attaches the `str` of relations as payloads. 

61 """ 

62 

63 def __init__(self, test_case: ProcessorTestCase): 

64 self.test_case = test_case 

65 self.seen: list[str] = [] 

66 

67 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> str: 

68 self.test_case.check_upstream_payloads(source) 

69 relation = source.transferred_to(destination) 

70 if materialize_as is not None: 

71 relation = relation.materialized(materialize_as) 

72 result = str(relation) 

73 self.seen.append(result) 

74 return result 

75 

76 def materialize(self, target: Relation, name: str) -> str: 

77 self.test_case.check_upstream_payloads(target) 

78 result = str(target.materialized(name)) 

79 self.seen.append(result) 

80 return result 

81 

82 

83class ProcessorTestCase(tests.RelationTestCase): 

84 """Tests for the Processor class.""" 

85 

86 def check_upstream_payloads( 

87 self, 

88 relation: Relation, 

89 materializations_only: bool = False, 

90 upstream_of_materialization: str | None = None, 

91 simplifications: Mapping[str, str] | None = None, 

92 ) -> None: 

93 """Check that a relation and its upstream tree have the payloads 

94 that should be attached by `StringProcessor`. 

95 

96 Parameters 

97 ---------- 

98 relation : `Relation` 

99 Relation to check. 

100 materializations_only : `bool`, optional 

101 If `True`, only expect leaf and materialization relations to have 

102 payloads, not transfers, as expected for a tree passed to (but not 

103 returned by) a `Processor`. 

104 upstream_of_materialization : `str` | None, optional 

105 If not `None`, this relation is just upstream of a materialization 

106 with this name and needs to adjust its expected `str` accordingly 

107 to include that materialization. 

108 simplifications : `~collections.abc.Mapping` [ `str`, `str` ] 

109 Mappings from the original `str` of a relation subtree and the 

110 simplified form that should have been used to compute the payload 

111 by `StringProcessor`. 

112 """ 

113 if simplifications is None: 

114 simplifications = {} 

115 if relation.is_join_identity: 

116 expected_string = "I" 

117 elif relation.max_rows == 0: 

118 expected_string = "0" 

119 else: 

120 if upstream_of_materialization is not None: 

121 expected_string = str(relation.materialized(upstream_of_materialization)) 

122 else: 

123 expected_string = str(relation) 

124 expected_string = simplifications.get(expected_string, expected_string) 

125 match relation: 

126 case LeafRelation(): 

127 self.assertIsNotNone(relation.payload) 

128 self.assertEqual(relation.payload, expected_string) 

129 case Materialization(): 

130 self.assertIsNotNone(relation.payload) 

131 self.assertEqual(relation.payload, expected_string) 

132 self.check_upstream_payloads( 

133 relation.target, 

134 materializations_only=materializations_only, 

135 upstream_of_materialization=relation.name, 

136 simplifications=simplifications, 

137 ) 

138 case Transfer(): 

139 if materializations_only: 

140 self.assertIsNone(relation.payload) 

141 else: 

142 self.assertIsNotNone(relation.payload) 

143 self.assertEqual(relation.payload, expected_string) 

144 self.check_upstream_payloads( 

145 relation.target, 

146 materializations_only=materializations_only, 

147 simplifications=simplifications, 

148 ) 

149 case UnaryOperationRelation(): 

150 self.check_upstream_payloads( 

151 relation.target, 

152 materializations_only=materializations_only, 

153 simplifications=simplifications, 

154 ) 

155 case BinaryOperationRelation(): 

156 self.check_upstream_payloads( 

157 relation.lhs, 

158 materializations_only=materializations_only, 

159 simplifications=simplifications, 

160 ) 

161 self.check_upstream_payloads( 

162 relation.rhs, 

163 materializations_only=materializations_only, 

164 simplifications=simplifications, 

165 ) 

166 

167 def test_processor(self) -> None: 

168 """Test the Processor class.""" 

169 # Cook up a three-engine relation tree with pretty some interesting 

170 # structure to it including some materializations; start with the 

171 # ingredients. 

172 engine1 = StringEngine(name="one") 

173 engine2 = StringEngine(name="two") 

174 engine3 = StringEngine(name="three") 

175 a = tests.ColumnTag("a") 

176 b = tests.ColumnTag("b") 

177 c = tests.ColumnTag("c") 

178 d = tests.ColumnTag("d") 

179 expression = ColumnExpression.reference(b).method("__neg__") 

180 predicate = ColumnExpression.reference(c).gt(ColumnExpression.literal(0)) 

181 terms = [SortTerm(ColumnExpression.reference(b))] 

182 leaf1 = engine1.make_leaf("leaf1", a, b) 

183 leaf2 = engine2.make_leaf("leaf1", a, c) 

184 leaf3 = engine3.make_leaf("leaf3", a, b, d) 

185 leaf4 = engine3.make_leaf("leaf4", a, b, d) 

186 # Build the tree itself while taking snapshops of its str(...) 

187 # everywhere there's a transfer and/or materialization. 

188 snapshots = [] 

189 full_tree = ( 

190 leaf2.with_rows_satisfying(predicate).transferred_to(engine1).materialized("materialization1") 

191 ) 

192 snapshots.append(str(full_tree)) 

193 full_tree = ( 

194 leaf1.with_calculated_column(d, expression) 

195 .join(full_tree) 

196 .with_only_columns({a, b, d}) 

197 .transferred_to(engine3) 

198 ) 

199 snapshots.append(str(full_tree)) 

200 full_tree = full_tree.chain(leaf3).materialized("materialization2") 

201 snapshots.append(str(full_tree)) 

202 

203 # Chain the full_tree to a what's ultimately a relation with no rows. 

204 # The Processor will drop these operations without calling its 

205 # transfer() and materialize() hooks, so they won't appear in its 

206 # snapshots or the processed_tree it returns. 

207 trimmed_tree = full_tree 

208 full_tree = full_tree.chain( 

209 engine1.make_leaf("doomed_by_join", a, b) 

210 .join(engine1.make_doomed_relation({b, d}, ["badness"])) 

211 .transferred_to(engine3) 

212 ) 

213 # Add some more chains, one of which will simplify before a 

214 # materialization. 

215 full_subtree = leaf4.chain(engine3.make_doomed_relation({a, b, d}, ["badness again"])).materialized( 

216 "materialization3" 

217 ) 

218 full_tree = full_tree.chain(full_subtree) 

219 trimmed_tree = trimmed_tree.chain(leaf4) 

220 # Add a few more operations to both the full_ and trimmed_trees. 

221 full_tree = full_tree.without_duplicates().sorted(terms) 

222 trimmed_tree = trimmed_tree.without_duplicates().sorted(terms) 

223 # Construct and run the Processor, which itself checks some aspects 

224 # of the algorithm via calls to check_upstream_payloads. 

225 processor = StringProcessor(self) 

226 processed_tree = processor.process(full_tree) 

227 # Check that the snapshots taken by the processor match the ones we 

228 # took while creating the tree. 

229 self.assertEqual(processor.seen, snapshots) 

230 # Check that the processed tree has the same columns, row bounds, and 

231 # engine as the original. 

232 self.assertEqual(full_tree.columns, processed_tree.columns) 

233 self.assertEqual(full_tree.min_rows, processed_tree.min_rows) 

234 self.assertEqual(full_tree.max_rows, processed_tree.max_rows) 

235 # Check that the full tree now has payloads for materializations, but 

236 # not transfers. 

237 self.check_upstream_payloads( 

238 full_tree, materializations_only=True, simplifications={str(full_subtree): str(leaf4)} 

239 ) 

240 # Check that the returned tree how has payloads for materializations 

241 # and transfers. 

242 self.check_upstream_payloads(processed_tree, materializations_only=False) 

243 # Check that the returned tree has the same structure as the trimmed 

244 # tree. 

245 self.assert_relations_equal(trimmed_tree, processed_tree) 

246 # Process the original tree again, which should short-circuit at the 

247 # last materializations and not call its hooks at all. 

248 reprocessor = StringProcessor(self) 

249 reprocessed_tree = reprocessor.process(full_tree) 

250 self.assertEqual(reprocessor.seen, []) 

251 self.assertEqual(full_tree.columns, reprocessed_tree.columns) 

252 self.assertEqual(full_tree.min_rows, reprocessed_tree.min_rows) 

253 self.assertEqual(full_tree.max_rows, reprocessed_tree.max_rows) 

254 

255 

256if __name__ == "__main__": 256 ↛ 257line 256 didn't jump to line 257, because the condition on line 256 was never true

257 unittest.main()