Coverage for python/lsst/daf/relation/_engine.py: 47%
74 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-13 10:31 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-13 10:31 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Engine", "GenericConcreteEngine")
26import dataclasses
27import operator
28import uuid
29from abc import abstractmethod
30from collections.abc import Hashable, Sequence, Set
31from typing import TYPE_CHECKING, Any, Generic, TypeVar
33from ._columns import ColumnTag
34from ._exceptions import EngineError
36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true
37 from ._binary_operation import BinaryOperation
38 from ._relation import Relation
39 from ._unary_operation import UnaryOperation
42_F = TypeVar("_F")
45class Engine(Hashable):
46 """An abstract interface for the systems that hold relation data and know
47 how to process relation trees.
49 Notes
50 -----
51 A key part of any concrete engine's interface is not defined by the base
52 class, because different engines can represent the content (or "payload")
53 of a relation in very different ways.
55 Engines can impose their own invariants on the structure of a relation
56 tree, by implementing `conform`. They can also maintain these invariants
57 when new operations are added to the tree by implementing `append_unary`
58 and `append_binary`, though any derived implementations of base-class
59 methods that accept relation arguments should always conform them.
60 """
62 @abstractmethod
63 def get_relation_name(self, prefix: str = "leaf") -> str:
64 """Return a name suitable for a new relation in this engine.
66 Parameters
67 ----------
68 prefix : `str`, optional
69 Prefix to include in the returned name.
71 Returns
72 -------
73 name : `str`
74 Name for the relation; guaranteed to be unique over all of the
75 relations in this engine.
76 """
77 raise NotImplementedError()
79 def get_join_identity_payload(self) -> Any:
80 """Return a `~Relation.payload` for a leaf relation that is the
81 `join identity <Relation.is_join_identity>`.
83 Returns
84 -------
85 payload
86 The engine-specific content for this relation.
87 """
88 return None
90 def get_doomed_payload(self, columns: Set[ColumnTag]) -> Any:
91 """Return a `~Relation.payload` for a leaf relation that has no rows.
93 Parameters
94 ----------
95 columns : `~collections.abc.Set` [ `ColumnTag` ]
96 The columns the relation should have.
98 Returns
99 -------
100 payload
101 The engine-specific content for this relation.
102 """
103 return None
105 def conform(self, relation: Relation) -> Relation:
106 """Ensure a relation tree satisfies this engine's invariants.
108 This can include reordering operations (in a way consistent with their
109 commutators) and/or inserting `MarkerRelation` nodes.
111 Parameters
112 ----------
113 relation : `Relation`
114 Original relation tree.
116 Returns
117 -------
118 conformed : `Relation`
119 Relation tree that satisfies this engine's invariants.
121 Notes
122 -----
123 The default implementation returns the given relation. Engines with a
124 non-trivial `conform` implementation should always call it on any
125 relations they are passed, as algorithms that process the relation tree
126 are not guaranteed to maintain those invariants themselves. It is
127 recommended to use a custom `MarkerRelation` to indicate trees that
128 satisfy invariants, allowing the corresponding `conform` implementation
129 to short-circuit quickly.
130 """
131 return relation
133 def materialize(
134 self, target: Relation, name: str | None = None, name_prefix: str = "materialization_"
135 ) -> Relation:
136 """Mark that a target relation's payload should be cached.
138 Parameters
139 ----------
140 target : `Relation`
141 Relation to mark.
142 name : `str`, optional
143 Name to use for the cached payload within the engine.
144 name_prefix : `str`, optional
145 Prefix to pass to `get_relation_name`; ignored if ``name``
146 is provided.
148 Returns
149 -------
150 relation : `Relation`
151 New relation that marks its upstream tree for caching, unless
152 the materialization was simplified away.
154 Notes
155 -----
156 The base class implementation calls `Materialization.simplify` to avoid
157 materializations of leaf relations or other materializations. Override
158 implementations should generally do the same.
160 See Also
161 --------
162 Processor.materialize
163 """
164 from ._materialization import Materialization
166 if Materialization.simplify(target):
167 return target
168 if name is None:
169 name = target.engine.get_relation_name(name_prefix)
170 return Materialization(target=target, name=name)
172 def transfer(self, target: Relation, payload: Any | None = None) -> Relation:
173 """Mark that a relation's payload should be transferred from some other
174 engine to this one.
176 Parameters
177 ----------
178 target : Relation
179 Relation to transfer. If ``target.engine == self``, this relation
180 will be returned directly and no transfer will be performed.
181 Back-to-back transfers from one engine to another and back again
182 are also simplified away (via a call to `Transfer.simplify`).
183 Sequences of transfers involving more than two engines are not
184 simplified.
185 payload, optional
186 Destination-engine-specific content for the relation to attach to
187 the transfer. Most `Transfer` relations do not have a payload;
188 their ability to do so is mostly to support the special relation
189 trees returned by the `Processor` class.
191 Returns
192 -------
193 relation : `Relation`
194 New relation that marks its upstream tree to be transferred to a
195 new engine.
197 Notes
198 -----
199 The default implementation calls `conform` on the target relation using
200 the target relation's engine (i.e. not ``self``). All override
201 implementations should do this as well.
203 See Also
204 --------
205 Processor.transfer
206 """
207 from ._transfer import Transfer
209 if simplified := Transfer.simplify(target, self):
210 target = simplified
211 if target.engine == self:
212 if payload is not None:
213 raise EngineError("Cannot attach payload to transfer that will be simplified away.")
214 return target
215 conformed_target = target.engine.conform(target)
216 return Transfer(conformed_target, destination=self, payload=payload)
218 def make_doomed_relation(
219 self, columns: Set[ColumnTag], messages: Sequence[str], name: str = "0"
220 ) -> Relation:
221 """Construct a leaf relation with no rows and one or more messages
222 explaining why.
224 Parameters
225 ----------
226 columns : `~collections.abc.Set` [ `ColumnTag` ]
227 The columns in this relation.
228 messages : `~collections.abc.Sequence [ `str` ]
229 One or more messages explaining why the relation has no rows.
230 name : `str`, optional
231 Name used to identify and reconstruct this relation.
233 Returns
234 -------
235 relation : `Relation`
236 Doomed relation.
238 Notes
239 -----
240 This is simplify a convenience method that delegates to
241 `LeafRelation.make_doomed`. Derived engines with a nontrivial
242 `conform` should override this method to conform the return value.
243 """
244 from ._leaf_relation import LeafRelation
246 return LeafRelation.make_doomed(self, columns, messages, name)
248 def make_join_identity_relation(self, name: str = "I") -> Relation:
249 """Construct a leaf relation with no columns and exactly one row.
251 Parameters
252 ----------
253 engine : `Engine`
254 The engine that is responsible for interpreting this relation.
255 name : `str`, optional
256 Name used to identify and reconstruct this relation.
258 Returns
259 -------
260 relation : `Relation`
261 Relation with no columns and one row.
262 """
263 from ._leaf_relation import LeafRelation
265 return LeafRelation.make_join_identity(self, name)
267 def append_unary(self, operation: UnaryOperation, target: Relation) -> Relation:
268 """Hook for maintaining the engine's `conform` invariants through
269 `UnaryOperation.apply`.
271 This method should only be called by `UnaryOperation.apply` and the
272 engine's own methods and helper classes. External code should call
273 `UnaryOperation.apply` or a `Relation` factory method instead.
275 Parameters
276 ----------
277 operation : `UnaryOperation`
278 Operation to apply; should already be filtered through
279 `UnaryOperation._begin_apply`.
280 target : `Relation`
281 Relation to apply the operation to directly.
283 Returns
284 -------
285 relation : `Relation`
286 Relation that includes the given operation acting on ``target``,
287 or a simplified equivalent.
289 Notes
290 -----
291 Implementations should delegate back to `UnaryOperation._finish_apply`
292 to actually create a `UnaryOperationRelation` and perform final
293 simplification and checks. This is all the default implementation
294 does.
295 """
296 return operation._finish_apply(target)
298 def append_binary(self, operation: BinaryOperation, lhs: Relation, rhs: Relation) -> Relation:
299 """Hook for maintaining the engine's `conform` invariants through
300 `BinaryOperation.apply`.
302 This method should only be called by `BinaryOperation.apply` and the
303 engine's own methods and helper classes. External code should call
304 `BinaryOperation.apply` or a `Relation` factory method instead.
306 Parameters
307 ----------
308 operation : `BinaryOperation`
309 Operation to apply; should already be filtered through
310 `BinaryOperation._begin_apply`.
311 lhs : `Relation`
312 One relation to apply the operation to directly.
313 rhs : `Relation`
314 The other relation to apply the operation to directly.
316 Returns
317 -------
318 relation : `Relation`
319 Relation that includes the given operation acting on ``lhs`` and
320 ``rhs``, or a simplified equivalent.
322 Notes
323 -----
324 Implementations should delegate back to `UnaryOperation._finish_apply`
325 to actually create a `UnaryOperationRelation` and perform final
326 simplification and checks. This is all the default implementation
327 does.
328 """
329 return operation._finish_apply(lhs, rhs)
331 def backtrack_unary(
332 self, operation: UnaryOperation, tree: Relation, preferred: Engine
333 ) -> tuple[Relation, bool, tuple[str, ...]]:
334 """Attempt to insert a unary operation in another engine upstream of
335 this one by via operation commutators.
337 Parameters
338 ----------
339 operation : `UnaryOperation`
340 Unary operation to apply.
341 tree : `Relation`
342 Relation tree the operation logically acts on; any upstream
343 insertion of the given operation should be equivalent to applying
344 it to the root of this tree. Caller guarantees that ``tree.engine
345 == self``.
346 preferred : `Engine`
347 Engine in which the operation or its commuted equivalent should be
348 performed.
350 Returns
351 -------
352 new_tree : `Relation`
353 Possibly-updated relation tree.
354 done : `bool`
355 If `True`, the operation has been fully inserted upstream in the
356 preferred engine. If `False`, either ``tree`` was returned
357 unmodified or only a part of the operation (e.g. a projection whose
358 columns are superset of the given projection's) was inserted
359 upstream.
360 messages : `~collections.abc.Sequence` [ `str` ]
361 Messages explaining why backtracking insertion was unsuccessful or
362 incomplete. Should be sentences with no trailing ``.`` and no
363 capitalization; they will be joined with semicolons.
364 """
365 return tree, False, (f"engine {self} does not support backtracking insertion",)
368@dataclasses.dataclass(repr=False, eq=False, kw_only=True)
369class GenericConcreteEngine(Engine, Generic[_F]):
370 """An implementation-focused base class for `Engine` objects
372 This class provides common functionality for the provided `iteration` and
373 `sql` engines. It may be used in external engine implementations as well.
374 """
376 name: str
377 """Name of the engine; primarily used for display purposes (`str`).
378 """
380 functions: dict[str, _F] = dataclasses.field(default_factory=dict)
381 """A mapping of engine-specific callables that are used to satisfy
382 `ColumnFunction` and `PredicateFunction` name lookups.
383 """
385 relation_name_counter: int = 0
386 """An integer counter used to generate relation names (`int`).
387 """
389 def __str__(self) -> str:
390 return self.name
392 def __hash__(self) -> int:
393 return id(self)
395 def __eq__(self, other: Any) -> bool:
396 return self is other
398 def get_relation_name(self, prefix: str = "leaf") -> str:
399 """Return a name suitable for a new relation in this engine.
401 Parameters
402 ----------
403 prefix : `str`, optional
404 Prefix to include in the returned name.
406 Returns
407 -------
408 name : `str`
409 Name for the relation; guaranteed to be unique over all of the
410 relations in this engine.
412 Notes
413 -----
414 This implementation combines the given prefix with both the current
415 `relation_name_counter` value and a random hexadecimal suffix.
416 """
417 name = f"{prefix}_{self.relation_name_counter:04d}_{uuid.uuid4().hex}"
418 self.relation_name_counter += 1
419 return name
421 def get_function(self, name: str) -> _F | None:
422 """Return the named column expression function.
424 Parameters
425 ----------
426 name : `str`
427 Name of the function, from `ColumnFunction.name` or
428 `PredicateFunction.name`
430 Returns
431 -------
432 function
433 Engine-specific callable, or `None` if no match was found.
435 Notes
436 -----
437 This implementation first looks for a symbol with this name in the
438 built-in `operator` module, to handle the common case (shared by both
439 the `iteration` and `sql` engines) where these functions are
440 appropriate for the engine due to operator overloading. When this
441 fails, the name is looked up in the `functions` attribute.
442 """
443 return getattr(operator, name, self.functions.get(name))