Coverage for python/lsst/daf/relation/_processor.py: 15%

77 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-09 03:04 -0800

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Processor",) 

25 

26from abc import ABC, abstractmethod 

27from typing import TYPE_CHECKING, Any 

28 

29from ._marker_relation import MarkerRelation 

30from ._materialization import Materialization 

31from ._operation_relations import BinaryOperationRelation, UnaryOperationRelation 

32from ._operations import Chain 

33from ._transfer import Transfer 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 from ._engine import Engine 

37 from ._relation import Relation 

38 

39 

40class Processor(ABC): 

41 """An inheritable framework for processing multi-engine relation trees. 

42 

43 Individual `Engine` classes have different definitions of what it means to 

44 process a relation tree, and no single engine can handle a tree with 

45 engines. This class provides a recursive algorithm that fills that role, 

46 with abstract method hooks for implementing `Transfer` and `Materialize` 

47 operations. 

48 

49 Notes 

50 ----- 

51 The `Processor` algorithm walks the tree recursively until it either finds: 

52 

53 - a `Relation` with a `Relation.payload` that is not `None`, which is 

54 returned as-is; 

55 

56 - a `Materialization` operation, for which a payload is computed via a call 

57 to the `materialize` hook, and then attached to both the original 

58 relation (modifying it in-place) and the returned one; 

59 

60 - a `Transfer` operation, for which a payload is computed via a call to 

61 the `transfer` hook, and then the attached to the returned relation only. 

62 

63 In addition, `Processor` never calls either hook on 

64 `trivial <Relation.is_trivial>` methods - 

65 `Engine.get_join_identity_payload` and `Engine.get_doomed_payload` are 

66 called instead. This can (for example) avoid executing asking a database 

67 to execute a SQL query when the relation tree knows in advance the result 

68 will have no real content. It also special-cases `Transfer` operations 

69 that are followed immediately by a `Materialization`, allowing both 

70 operations to be handled by a single call. 

71 """ 

72 

73 def process(self, relation: Relation) -> Relation: 

74 """Main entry point for processing a relation tree. 

75 

76 Parameters 

77 ---------- 

78 relation : `Relation` 

79 Root of the relation tree to process. On return, relations that 

80 hold a `Materialization` relation will have a new 

81 `~Relation.payload` attached, if they did not have one already. 

82 

83 Returns 

84 ------- 

85 processed : `Relation` 

86 A version of the relation tree in which any relation with a 

87 `Transfer` operation has a copy of the original `Transfer` that 

88 has a `~Relation.payload` attached. 

89 """ 

90 return self._process_recursive(relation, materialize_as=None)[0] 

91 

92 @abstractmethod 

93 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> Any: 

94 """Hook for implementing transfers between engines. 

95 

96 This method should be called only by the `Processor` base class. 

97 

98 Parameters 

99 ---------- 

100 source : `Relation` 

101 Relation to be transferred. Any upstream `Transfer` operations in 

102 this tree are guaranteed to already have a `~Relation.payload` 

103 already attached (or some intervening relation does), so the 

104 relation's own engine should be capable of processing it on its 

105 own. 

106 destination : `Engine` 

107 Engine the relation is being transferred to. 

108 materialize_as : `str` or `None` 

109 If not `None`, the name of a `Materialization` operation that 

110 immediately follows the transfer being implemented, in which case 

111 the returned `~Relation.payload` should be appropriate for caching 

112 with the `Materialization`. 

113 

114 Returns 

115 ------- 

116 payload 

117 Payload for this relation in the ``destination`` engine. 

118 """ 

119 raise NotImplementedError() 

120 

121 @abstractmethod 

122 def materialize(self, target: Relation, name: str) -> Any: 

123 """Hook for implementing materialization operations. 

124 

125 This method should be called only by the `Processor` base class. 

126 

127 Parameters 

128 ---------- 

129 target : `Relation` 

130 Relation to be materialized. Any upstream `Transfer` operations in 

131 this tree are guaranteed to already have a `~Relation.payload` 

132 already attached (or some intervening relation does), so the 

133 relation's own engine should be capable of processing it on its 

134 own. 

135 name : `str` 

136 The name of the `Materialization` operation, to be used as needed 

137 in the engine-specific payload. 

138 

139 Returns 

140 ------- 

141 payload 

142 Payload for this relation that should be cached. 

143 """ 

144 raise NotImplementedError() 

145 

146 def _process_recursive(self, original: Relation, materialize_as: str | None) -> tuple[Relation, bool]: 

147 """Recursive implementation for `process`. 

148 

149 Parameters 

150 ---------- 

151 original : `Relation` 

152 Relation from the tree originally passed to `process`. 

153 materialize_as : `str` | `None` 

154 The name of the `Materialization` operation just downstream of this 

155 call, or `None` if the caller was not `_process_recursive` itself 

156 acting on a a `Materialization` operation. 

157 

158 Returns 

159 ------- 

160 processed : `Relation` 

161 Relation tree with `~Relation.payload` values attached to any 

162 `Transfer` operations. 

163 was_materialized : `bool` 

164 If `True`, `transfer` was called with ``materialize_as`` not 

165 `None`, and hence the caller (which must have been 

166 `_process_recursive` acting on a `Materialization` operation) does 

167 not need to call `materialize` to obtain a payload suitable for 

168 materialization. 

169 """ 

170 if original.payload is not None: 

171 return original, True 

172 result: Relation 

173 payload: Any = None 

174 match original: 

175 case Transfer(destination=destination, target=target): 

176 # If the result is a trivial relation, just make a new 

177 # payload directly in the destination engine. 

178 if original.is_join_identity: 

179 payload = destination.get_join_identity_payload() 

180 new_target = target 

181 elif original.max_rows == 0: 

182 payload = destination.get_doomed_payload(original.columns) 

183 new_target = target 

184 else: 

185 # Process recursively, ensuring upstream transfers 

186 # and materializations happen first. 

187 new_target, _ = self._process_recursive(target, materialize_as=None) 

188 # Actually execute the transfer. If materialize_as 

189 # is not None, this will also take care of an 

190 # immediately-downstream Materialization. 

191 payload = self.transfer(new_target, destination, materialize_as) 

192 # We need to attach this payload to the processed 

193 # relation we return, but we don't want to attach it to 

194 # the original, so we reapply the transfer operation to 

195 # new_target even if new_target is target. 

196 result = original.reapply(new_target, payload) 

197 return result, materialize_as is not None 

198 case Materialization(name=name, target=target): 

199 assert name is not None, "Guaranteed by Materialization.apply." 

200 # Process recursively, ensuring upstream transfers and 

201 # materializations happen first. Pass name as 

202 # materialize_as to tell an immediately-upstream 

203 # transfer to materialize directly. 

204 new_target, persisted = self._process_recursive(target, materialize_as=name) 

205 if new_target is not target: 

206 result = new_target.materialized(name=name) 

207 if result.payload is not None: 

208 # This operation has been simplified away 

209 # (perhaps it's now a materialization of a 

210 # leaf). 

211 original.attach_payload(result.payload) 

212 return result, True 

213 else: 

214 result = original 

215 if persisted: 

216 payload = new_target.payload 

217 elif original.is_join_identity: 

218 payload = target.engine.get_join_identity_payload() 

219 elif original.max_rows == 0: 

220 payload = target.engine.get_doomed_payload(original.columns) 

221 else: 

222 payload = self.materialize(new_target, name) 

223 # Attach the payload to the original relation, not just 

224 # the processed one, so it's used every time that the 

225 # original relation tree is processed. 

226 original.attach_payload(payload) 

227 if result is not original: 

228 result.attach_payload(payload) 

229 return result, True 

230 case MarkerRelation(target=target): 

231 new_target, persisted = self._process_recursive(target, materialize_as=materialize_as) 

232 return original.reapply(new_target), persisted 

233 case UnaryOperationRelation(operation=operation, target=target): 

234 new_target, _ = self._process_recursive(target, materialize_as=None) 

235 if new_target is not target: 

236 return operation.apply(new_target), False 

237 else: 

238 return original, False 

239 case BinaryOperationRelation(operation=operation, lhs=lhs, rhs=rhs): 

240 new_lhs, lhs_persisted = self._process_recursive(lhs, materialize_as=None) 

241 new_rhs, rhs_persisted = self._process_recursive(rhs, materialize_as=None) 

242 if isinstance(operation, Chain): 

243 # Simplify out relations with no rows from unions to save 

244 # engines from having to handle those do-nothing branches. 

245 # We don't do that earlier to the original tree usually 

246 # because this is useful diagnostic information. 

247 if new_lhs.max_rows == 0: 

248 return new_rhs, rhs_persisted 

249 if new_rhs.max_rows == 0: 

250 return new_lhs, lhs_persisted 

251 if new_lhs is not lhs or new_rhs is not rhs: 

252 return operation.apply(new_lhs, new_rhs), False 

253 return original, False 

254 raise AssertionError("Match should be exhaustive and all branches should return.")