Coverage for python/lsst/daf/relation/_unary_operation.py: 51%
146 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-13 10:31 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-13 10:31 +0000
1# This file is part of daf_relation.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("UnaryOperation", "RowFilter", "Reordering", "Identity", "UnaryCommutator")
26import dataclasses
27from abc import ABC, abstractmethod
28from collections.abc import Set
29from typing import TYPE_CHECKING, Literal, final
31from ._columns import ColumnTag
32from ._exceptions import EngineError
33from ._relation import Relation
35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true
36 from ._engine import Engine
37 from ._operation_relations import UnaryOperationRelation
40class UnaryOperation(ABC):
41 """An abstract base class for operations that act on a single relation.
43 Notes
44 -----
45 A `UnaryOperation` represents the operation itself; the combination of an
46 operation and the "target" relation it acts on to form a new relation is
47 represented by the `UnaryOperationRelation` class. That combination should
48 always be created via a call to the `apply` method (or something that calls
49 it, like the convenience methods on the `Relation` class). In some cases,
50 applying a `UnaryOperation` doesn't return something involving the original
51 operation, because of some combination of defaulted-parameter population
52 and simplification, and there are even some `UnaryOperation` classes that
53 should never actually appear in a `UnaryOperationRelation`.
55 `UnaryOperation` cannot be subclassed directly by external code, but it has
56 two more restricted subclasses that can be:`RowFilter` and `Reordering`.
58 All concrete `UnaryOperation` types are frozen, equality-comparable
59 `dataclasses`. They also provide a very concise `str` representation (in
60 addition to the dataclass-provided `repr`) suitable for summarizing an
61 entire relation tree.
63 See Also
64 --------
65 :ref:`lsst.daf.relation-overview-operations`
66 """
68 def __init_subclass__(cls) -> None:
69 assert (
70 cls.__name__
71 in {
72 "Calculation",
73 "Deduplication",
74 "Identity",
75 "PartialJoin",
76 "Projection",
77 "RowFilter",
78 "Reordering",
79 }
80 or cls.__base__ is not UnaryOperation
81 ), (
82 "UnaryOperation inheritance is closed to predefined types in daf_relation, "
83 "except for subclasses of RowFilter and Reordering."
84 )
86 @property
87 def columns_required(self) -> Set[ColumnTag]:
88 """The columns the target relation must have in order for this
89 operation to be applied to it (`~collections.abc.Set` [ `ColumnTag` ]
90 ).
91 """
92 return frozenset()
94 @abstractmethod
95 def __str__(self) -> str:
96 raise NotImplementedError()
98 @property
99 @abstractmethod
100 def is_empty_invariant(self) -> bool:
101 """Whether this operation can remove all rows from its target relation
102 (`bool`).
103 """
104 raise NotImplementedError()
106 @property
107 @abstractmethod
108 def is_count_invariant(self) -> bool:
109 """Whether this operation can change the number of rows in its target
110 relation (`bool`).
112 The number of rows here includes duplicates - removing duplicates is
113 not considered a count-invariant operation.
114 """
115 raise NotImplementedError()
117 @property
118 def is_order_dependent(self) -> bool:
119 """Whether this operation depends on the order of the rows in its
120 target relation (`bool`).
121 """
122 return False
124 @property
125 def is_count_dependent(self) -> bool:
126 """Whether this operation depends on the number of rows in its target
127 relation (`bool`).
128 """
129 return False
131 def is_supported_by(self, engine: Engine) -> bool:
132 """Whether this operation is supported by the given engine (`bool`)."""
133 return True
135 @final
136 def apply(
137 self,
138 target: Relation,
139 *,
140 preferred_engine: Engine | None = None,
141 backtrack: bool = True,
142 transfer: bool = False,
143 require_preferred_engine: bool = False,
144 ) -> Relation:
145 """Create a new relation that represents the action of this operation
146 on an existing relation.
148 Parameters
149 ----------
150 target : `Relation`
151 Relation the operation will act on.
152 preferred_engine : `Engine`, optional
153 Engine that the operation would ideally be performed in. If this
154 is not equal to ``target.engine``, the ``backtrack``, ``transfer``,
155 and ``require_preferred_engine`` arguments control the behavior.
156 Some operations may supply their own preferred engine default, such
157 as the "fixed" operand's own engine in a `PartialJoin`.
158 backtrack : `bool`, optional
159 If `True` (default) and the current engine is not the preferred
160 engine, attempt to insert this operation before a transfer upstream
161 of the current relation, as long as this can be done without
162 breaking up any locked relations or changing the resulting relation
163 content.
164 transfer : `bool`, optional
165 If `True` (`False` is default) and the current engine is not the
166 preferred engine, insert a new `Transfer` to the preferred engine
167 before this operation. If ``backtrack`` is also true, the transfer
168 is added only if the backtrack attempt fails.
169 require_preferred_engine : `bool`, optional
170 If `True` (`False` is default) and the current engine is not the
171 preferred engine, raise `EngineError`. If ``backtrack`` is also
172 true, the exception is only raised if the backtrack attempt fails.
173 Ignored if ``transfer`` is true.
175 Returns
176 -------
177 new_relation : `Relation`
178 Relation that includes this operation. This may be ``target`` if
179 the operation is a no-op, and it may not be a
180 `UnaryOperationRelation` holding this operation (or even a similar
181 one) if the operation was inserted earlier in the tree via
182 commutation relations or if simplification occurred.
184 Raises
185 ------
186 ColumnError
187 Raised if the operation could not be applied due to problems with
188 the target relation's columns.
189 EngineError
190 Raised if the operation could not be applied due to problems with
191 the target relation's engine.
193 Notes
194 -----
195 Adding operations to relation trees is a potentially complex process in
196 order to give both the operation type and the engine to customize the
197 opportunity to enforce their own invariants. This `~typing.final`
198 method provides the bulk of the high-level implementation, and is
199 called by the `Relation` class's convenience methods with essentially
200 no additional logic. The overall sequence is as follows:
202 - `apply` starts by delegating to `_begin_apply`, which
203 allows operation classes to replace the operation object itself,
204 perform initial checks, and set the preferred engine.
205 - `apply` then performs the ``preferred_engine`` logic indicated by the
206 ``backtrack`` ``transfer``, and ``require_preferred_engine`` options,
207 delegating backtracking to `Engine.backtrack_unary`.
208 - `Engine.backtrack_unary` will typically call back to `commute` to
209 determine how and whether to move the new operation upstream of
210 existing ones.
211 - If backtracking is not used or is not fully successful, `apply` then
212 delegates to `Engine.append_unary` to add the operation to the root
213 of the relation tree.
214 - The `Engine` methods are expected to delegate back to
215 `_finish_apply` when they have identified the location in the tree
216 where the new operation should be inserted.
217 - `_finish_apply` is responsible for actually constructing the
218 `UnaryOperationRelation` when appropriate. The default
219 implementation of `_finish_apply` also calls `simplify` to see if it
220 is possible to merge the new operation with those immediately
221 upstream of it or elide it entirely.
222 """
223 operation, preferred_engine = self._begin_apply(target, preferred_engine)
224 done = False
225 result = target
226 if preferred_engine != target.engine:
227 if backtrack:
228 result, done, messages = target.engine.backtrack_unary(operation, target, preferred_engine)
229 else:
230 messages = ("backtracking insertion not requested by caller",)
231 if not done:
232 if transfer:
233 result = result.transferred_to(preferred_engine)
234 elif require_preferred_engine:
235 raise EngineError(
236 f"No way to apply {operation} to {target} "
237 f"with required engine '{preferred_engine}': {'; '.join(messages)}."
238 )
239 if not done:
240 result = result.engine.append_unary(operation, result)
241 return result
243 def _begin_apply(
244 self, target: Relation, preferred_engine: Engine | None
245 ) -> tuple[UnaryOperation, Engine]:
246 """A customization hook for the beginning of operation application.
248 Parameter
249 ---------
250 target : `Relation`
251 Relation the operation should act on, at least conceptually. Later
252 logic may actually apply the operation upstream of this relation,
253 but only when the result of doing so would be equivalent to
254 applying it here.
255 preferred_engine : `Engine` or `None`
256 Preferred engine passed to `apply`.
258 Returns
259 -------
260 operation : `UnaryOperation`
261 The operation to actually apply. The default implementation
262 returns ``self``.
263 preferred_engine : `Engine`
264 The engine to actually prefer. The default implementation returns
265 the given ``preferred_engine`` if it is not `None`, and
266 ``target.engine`` if it is `None`.
268 Notes
269 -----
270 This method provides an opportunity for operations to establish any
271 invariants that must be satisfied only when the operation is part of
272 a relation. Implementations can also return an `Identity` instance
273 when they can determine that the operation will do nothing when applied
274 to the given target.
275 """
276 return self, preferred_engine if preferred_engine is not None else target.engine
278 def _finish_apply(self, target: Relation) -> Relation:
279 """A customization hook for the end of operation application.
281 Parameters
282 ----------
283 target : `Relation`
284 Relation the operation will act upon directly.
286 Returns
287 -------
288 applied : `Relation`
289 Result of applying this operation to the given target. Usually -
290 but not always - a `UnaryOperationRelation` that holds ``self`` and
291 ``target``.
293 Notes
294 -----
295 This method provides an opportunity for operations to perform final
296 simplification at the point of insertion (which the default
297 implementation does, via calls to `simplify`) and change the kind of
298 relation produced (the default implementation constructs a
299 `UnaryOperationRelation`).
300 """
301 from ._operation_relations import UnaryOperationRelation
303 match target:
304 case UnaryOperationRelation():
305 if simplified := self.simplify(target.operation):
306 if simplified is target.operation:
307 return target
308 else:
309 return simplified._finish_apply(target.target)
311 if not self.is_supported_by(target.engine):
312 raise EngineError(f"Operation {self} is not supported by engine {target.engine}.")
314 return UnaryOperationRelation(
315 operation=self,
316 target=target,
317 columns=self.applied_columns(target),
318 )
320 def applied_columns(self, target: Relation) -> Set[ColumnTag]:
321 """Return the columns of the relation that results from applying this
322 operation to the given target.
324 Parameters
325 ----------
326 target : `Relation`
327 Relation the operation will act on.
329 Returns
330 -------
331 columns : `~collections.abc.Set` [ `ColumnTag` ]
332 Columns the new relation would have.
333 """
334 return target.columns
336 @abstractmethod
337 def applied_min_rows(self, target: Relation) -> int:
338 """Return the minimum number of rows of the relation that results from
339 applying this operation to the given target.
341 Parameters
342 ----------
343 target : `Relation`
344 Relation the operation will act on.
346 Returns
347 -------
348 min_rows : `int`
349 Minimum number of rows the new relation would have.
350 """
351 raise NotImplementedError()
353 def applied_max_rows(self, target: Relation) -> int | None:
354 """Return the maximum number of rows of the relation that results from
355 applying this operation to the given target.
357 Parameters
358 ----------
359 target : `Relation`
360 Relation the operation will act on.
362 Returns
363 -------
364 max_rows : `int` or `None`
365 Maximum number of rows the new relation would have.
366 """
367 return target.max_rows
369 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator:
370 """Describe whether and how this operation can be moved upstream of an
371 existing one without changing the content of the resulting relation.
373 Parameters
374 ----------
375 current : `UnaryOperationRelation`
376 A unary operation relation that is the current logical target of
377 ``self``.
379 Returns
380 -------
381 commutator : `UnaryCommutator`
382 A struct that either provides a version of ``current.operation``
383 that can be applied to ``current.target`` after a possibly-modified
384 version of ``self``, or an explanation of why this is impossible.
386 Notes
387 -----
388 The `commute` implementations for the provided concrete
389 `UnaryOperation` types assume that all unary operations preserve row
390 order. If this is not the case in an engine, that engine should not
391 implement `Engine.backtrack_unary` or take this into account itself
392 when determining whether operations commute.
393 """
394 return UnaryCommutator(
395 first=None,
396 second=current.operation,
397 done=False,
398 messages=(f"{self} does not commute with anything",),
399 )
401 def simplify(self, upstream: UnaryOperation) -> UnaryOperation | None:
402 """Return a simplified combination of this operation with another.
404 Parameters
405 ----------
406 upstream : `UnaryOperation`
407 Operation that acts immediately prior to ``self``.
409 Returns
410 -------
411 simplified : `UnaryOperation`
412 Operation that combines the action of ``upstream`` followed by
413 ``self``, or `None` if no such combination is possible.
414 """
415 return None
418class RowFilter(UnaryOperation):
419 """An extensible `UnaryOperation` subclass for operations that only remove
420 rows from their target.
421 """
423 @final
424 @property
425 def is_count_invariant(self, engine: Engine | None = None) -> Literal[False]:
426 # Docstring inherited.
427 return False
429 @property
430 @abstractmethod
431 def is_order_dependent(self) -> bool:
432 # Docstring inherited.
433 raise NotImplementedError()
435 @final
436 def applied_columns(self, target: Relation) -> Set[ColumnTag]:
437 # Docstring inherited.
438 return target.columns
440 def applied_min_rows(self, target: Relation) -> int:
441 # Docstring inherited.
442 if target.min_rows == 0 or self.is_count_invariant:
443 return target.min_rows
444 elif self.is_empty_invariant:
445 return 1
446 else:
447 return 0
450@final
451class Identity(UnaryOperation):
452 """A concrete unary operation that does nothing.
454 `Identity` operations never appear in relation trees; their `apply` method
455 always just returns the target relation.
456 """
458 def __str__(self) -> str:
459 return "identity"
461 def _finish_apply(self, target: Relation) -> Relation:
462 # Docstring inherited.
463 return target
465 @property
466 def is_count_invariant(self) -> Literal[True]:
467 # Docstring inherited.
468 return True
470 @property
471 def is_empty_invariant(self) -> Literal[True]:
472 # Docstring inherited.
473 return True
475 def applied_columns(self, target: Relation) -> Set[ColumnTag]:
476 # Docstring inherited.
477 return target.columns
479 def applied_min_rows(self, target: Relation) -> int:
480 # Docstring inherited.
481 return target.min_rows
483 def applied_max_rows(self, target: Relation) -> int | None:
484 # Docstring inherited.
485 return target.max_rows
487 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator:
488 return UnaryCommutator(first=self, second=current.operation)
490 def simplify(self, current: UnaryOperation) -> UnaryOperation:
491 return current
494class Reordering(UnaryOperation):
495 """An extensible `UnaryOperation` subclass for operations that only reorder
496 rows.
497 """
499 @final
500 @property
501 def is_count_invariant(self) -> Literal[True]:
502 # Docstring inherited.
503 return True
505 @final
506 @property
507 def is_empty_invariant(self) -> Literal[True]:
508 # Docstring inherited.
509 return True
511 @final
512 def applied_columns(self, target: Relation) -> Set[ColumnTag]:
513 # Docstring inherited.
514 return target.columns
516 @final
517 def applied_min_rows(self, target: Relation) -> int:
518 # Docstring inherited.
519 return target.min_rows
521 @final
522 def applied_max_rows(self, target: Relation) -> int | None:
523 # Docstring inherited.
524 return target.max_rows
527@dataclasses.dataclass
528class UnaryCommutator:
529 """A struct for the return value of `UnaryOperation.commute`."""
531 first: UnaryOperation | None
532 """The first operation to apply in the commuted sequence (`UnaryOperation`
533 or `None`).
535 When at least some commutation is possible, this is a possibly-modified
536 version of ``current.operation``, where ``current`` is the argument to
537 `UnaryOperation.commute`. When it is `None`, either the commutation failed
538 or the original operation will simplify away entirely (as indicated by
539 ``done``).
540 """
542 second: UnaryOperation
543 """The second operation to apply in the commuted sequence
544 (`UnaryOperation`).
546 When commutation is successful, this is usually ``self`` or a modification
547 thereof. When commutation is unsuccessful, this should be exactly
548 ``current.operation``, where ``current`` is the argument to
549 `UnaryOperation.commute`.
550 """
552 done: bool = True
553 """Whether the commutation was fully successful (`bool`).
555 When `False`, the original downstream relation (``self`` in call to
556 `commute`) must still be applied after `first` (if not `None`) and
557 `second`. While `first` is usually `None` in this case, `Projection`
558 operations (and possibily some extension operations) can be partially
559 commuted
560 """
562 messages: tuple[str, ...] = dataclasses.field(default_factory=tuple)
563 """Messages that describe why commutation either failed or only
564 partially succeeded.
565 """