Coverage for python/lsst/daf/relation/_processor.py: 15%
77 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:35 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:35 -0700
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Processor",)
26from abc import ABC, abstractmethod
27from typing import TYPE_CHECKING, Any
29from ._marker_relation import MarkerRelation
30from ._materialization import Materialization
31from ._operation_relations import BinaryOperationRelation, UnaryOperationRelation
32from ._operations import Chain
33from ._transfer import Transfer
35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true
36 from ._engine import Engine
37 from ._relation import Relation
40class Processor(ABC):
41 """An inheritable framework for processing multi-engine relation trees.
43 Individual `Engine` classes have different definitions of what it means to
44 process a relation tree, and no single engine can handle a tree with
45 engines. This class provides a recursive algorithm that fills that role,
46 with abstract method hooks for implementing `Transfer` and `Materialize`
47 operations.
49 Notes
50 -----
51 The `Processor` algorithm walks the tree recursively until it either finds:
53 - a `Relation` with a `Relation.payload` that is not `None`, which is
54 returned as-is;
56 - a `Materialization` operation, for which a payload is computed via a call
57 to the `materialize` hook, and then attached to both the original
58 relation (modifying it in-place) and the returned one;
60 - a `Transfer` operation, for which a payload is computed via a call to
61 the `transfer` hook, and then the attached to the returned relation only.
63 In addition, `Processor` never calls either hook on
64 `trivial <Relation.is_trivial>` methods -
65 `Engine.get_join_identity_payload` and `Engine.get_doomed_payload` are
66 called instead. This can (for example) avoid executing asking a database
67 to execute a SQL query when the relation tree knows in advance the result
68 will have no real content. It also special-cases `Transfer` operations
69 that are followed immediately by a `Materialization`, allowing both
70 operations to be handled by a single call.
71 """
73 def process(self, relation: Relation) -> Relation:
74 """Main entry point for processing a relation tree.
76 Parameters
77 ----------
78 relation : `Relation`
79 Root of the relation tree to process. On return, relations that
80 hold a `Materialization` relation will have a new
81 `~Relation.payload` attached, if they did not have one already.
83 Returns
84 -------
85 processed : `Relation`
86 A version of the relation tree in which any relation with a
87 `Transfer` operation has a copy of the original `Transfer` that
88 has a `~Relation.payload` attached.
89 """
90 return self._process_recursive(relation, materialize_as=None)[0]
92 @abstractmethod
93 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> Any:
94 """Hook for implementing transfers between engines.
96 This method should be called only by the `Processor` base class.
98 Parameters
99 ----------
100 source : `Relation`
101 Relation to be transferred. Any upstream `Transfer` operations in
102 this tree are guaranteed to already have a `~Relation.payload`
103 already attached (or some intervening relation does), so the
104 relation's own engine should be capable of processing it on its
105 own.
106 destination : `Engine`
107 Engine the relation is being transferred to.
108 materialize_as : `str` or `None`
109 If not `None`, the name of a `Materialization` operation that
110 immediately follows the transfer being implemented, in which case
111 the returned `~Relation.payload` should be appropriate for caching
112 with the `Materialization`.
114 Returns
115 -------
116 payload
117 Payload for this relation in the ``destination`` engine.
118 """
119 raise NotImplementedError()
121 @abstractmethod
122 def materialize(self, target: Relation, name: str) -> Any:
123 """Hook for implementing materialization operations.
125 This method should be called only by the `Processor` base class.
127 Parameters
128 ----------
129 target : `Relation`
130 Relation to be materialized. Any upstream `Transfer` operations in
131 this tree are guaranteed to already have a `~Relation.payload`
132 already attached (or some intervening relation does), so the
133 relation's own engine should be capable of processing it on its
134 own.
135 name : `str`
136 The name of the `Materialization` operation, to be used as needed
137 in the engine-specific payload.
139 Returns
140 -------
141 payload
142 Payload for this relation that should be cached.
143 """
144 raise NotImplementedError()
146 def _process_recursive(self, original: Relation, materialize_as: str | None) -> tuple[Relation, bool]:
147 """Recursive implementation for `process`.
149 Parameters
150 ----------
151 original : `Relation`
152 Relation from the tree originally passed to `process`.
153 materialize_as : `str` | `None`
154 The name of the `Materialization` operation just downstream of this
155 call, or `None` if the caller was not `_process_recursive` itself
156 acting on a a `Materialization` operation.
158 Returns
159 -------
160 processed : `Relation`
161 Relation tree with `~Relation.payload` values attached to any
162 `Transfer` operations.
163 was_materialized : `bool`
164 If `True`, `transfer` was called with ``materialize_as`` not
165 `None`, and hence the caller (which must have been
166 `_process_recursive` acting on a `Materialization` operation) does
167 not need to call `materialize` to obtain a payload suitable for
168 materialization.
169 """
170 if original.payload is not None:
171 return original, True
172 result: Relation
173 payload: Any = None
174 match original:
175 case Transfer(destination=destination, target=target):
176 # If the result is a trivial relation, just make a new
177 # payload directly in the destination engine.
178 if original.is_join_identity:
179 payload = destination.get_join_identity_payload()
180 new_target = target
181 elif original.max_rows == 0:
182 payload = destination.get_doomed_payload(original.columns)
183 new_target = target
184 else:
185 # Process recursively, ensuring upstream transfers
186 # and materializations happen first.
187 new_target, _ = self._process_recursive(target, materialize_as=None)
188 # Actually execute the transfer. If materialize_as
189 # is not None, this will also take care of an
190 # immediately-downstream Materialization.
191 payload = self.transfer(new_target, destination, materialize_as)
192 # We need to attach this payload to the processed
193 # relation we return, but we don't want to attach it to
194 # the original, so we reapply the transfer operation to
195 # new_target even if new_target is target.
196 result = original.reapply(new_target, payload)
197 return result, materialize_as is not None
198 case Materialization(name=name, target=target):
199 assert name is not None, "Guaranteed by Materialization.apply."
200 # Process recursively, ensuring upstream transfers and
201 # materializations happen first. Pass name as
202 # materialize_as to tell an immediately-upstream
203 # transfer to materialize directly.
204 new_target, persisted = self._process_recursive(target, materialize_as=name)
205 if new_target is not target:
206 result = new_target.materialized(name=name)
207 if result.payload is not None:
208 # This operation has been simplified away
209 # (perhaps it's now a materialization of a
210 # leaf).
211 original.attach_payload(result.payload)
212 return result, True
213 else:
214 result = original
215 if persisted:
216 payload = new_target.payload
217 elif original.is_join_identity:
218 payload = target.engine.get_join_identity_payload()
219 elif original.max_rows == 0:
220 payload = target.engine.get_doomed_payload(original.columns)
221 else:
222 payload = self.materialize(new_target, name)
223 # Attach the payload to the original relation, not just
224 # the processed one, so it's used every time that the
225 # original relation tree is processed.
226 original.attach_payload(payload)
227 if result is not original:
228 result.attach_payload(payload)
229 return result, True
230 case MarkerRelation(target=target):
231 new_target, persisted = self._process_recursive(target, materialize_as=materialize_as)
232 return original.reapply(new_target), persisted
233 case UnaryOperationRelation(operation=operation, target=target):
234 new_target, _ = self._process_recursive(target, materialize_as=None)
235 if new_target is not target:
236 return operation.apply(new_target), False
237 else:
238 return original, False
239 case BinaryOperationRelation(operation=operation, lhs=lhs, rhs=rhs):
240 new_lhs, lhs_persisted = self._process_recursive(lhs, materialize_as=None)
241 new_rhs, rhs_persisted = self._process_recursive(rhs, materialize_as=None)
242 if isinstance(operation, Chain):
243 # Simplify out relations with no rows from unions to save
244 # engines from having to handle those do-nothing branches.
245 # We don't do that earlier to the original tree usually
246 # because this is useful diagnostic information.
247 if new_lhs.max_rows == 0:
248 return new_rhs, rhs_persisted
249 if new_rhs.max_rows == 0:
250 return new_lhs, lhs_persisted
251 if new_lhs is not lhs or new_rhs is not rhs:
252 return operation.apply(new_lhs, new_rhs), False
253 return original, False
254 raise AssertionError("Match should be exhaustive and all branches should return.")