Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py: 29%
319 statements
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "convertExpressionToSql",
26 "ExpressionTypeError",
27)
29import operator
30import warnings
31from abc import ABC, abstractmethod
32from datetime import datetime
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 Dict,
38 Iterable,
39 List,
40 Mapping,
41 Optional,
42 Tuple,
43 Type,
44 TypeVar,
45 Union,
46)
48import astropy.utils.exceptions
49import sqlalchemy
50from astropy.time import Time
51from lsst.utils.iteration import ensure_iterable
52from sqlalchemy.ext.compiler import compiles
53from sqlalchemy.sql.expression import func
54from sqlalchemy.sql.visitors import InternalTraversal
56from ....core import (
57 Dimension,
58 DimensionElement,
59 DimensionUniverse,
60 NamedKeyMapping,
61 Timespan,
62 TimespanDatabaseRepresentation,
63 ddl,
64)
65from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId
66from .parser import Node, TreeVisitor
68# As of astropy 4.2, the erfa interface is shipped independently and
69# ErfaWarning is no longer an AstropyWarning
70try:
71 import erfa
72except ImportError:
73 erfa = None
75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true
76 from .._structs import QueryColumns
79def convertExpressionToSql(
80 tree: Node,
81 universe: DimensionUniverse,
82 columns: QueryColumns,
83 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
84 bind: Mapping[str, Any],
85 TimespanReprClass: Type[TimespanDatabaseRepresentation],
86) -> sqlalchemy.sql.ColumnElement:
87 """Convert a query expression tree into a SQLAlchemy expression object.
89 Parameters
90 ----------
91 tree : `Node`
92 Root node of the query expression tree.
93 universe : `DimensionUniverse`
94 All known dimensions.
95 columns : `QueryColumns`
96 Struct that organizes the special columns known to the query
97 under construction.
98 elements : `NamedKeyMapping`
99 `DimensionElement` instances and their associated tables.
100 bind : `Mapping`
101 Mapping from string names to literal values that should be subsituted
102 for those names when they appear (as identifiers) in the expression.
103 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation`
104 Class that encapsulates the representation of `Timespan` objects in
105 the database.
107 Returns
108 -------
109 sql : `sqlalchemy.sql.ColumnElement`
110 A boolean SQLAlchemy column expression.
112 Raises
113 ------
114 ExpressionTypeError
115 Raised if the operands in a query expression operation are incompatible
116 with the operator, or if the expression does not evaluate to a boolean.
117 """
118 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass)
119 converter = tree.visit(visitor)
120 return converter.finish(tree)
123class ExpressionTypeError(TypeError):
124 """Exception raised when the types in a query expression are not
125 compatible with the operators or other syntax.
126 """
129class _TimestampLiteral(sqlalchemy.sql.ColumnElement):
130 """Special ColumnElement type used for TIMESTAMP literals in expressions.
132 SQLite stores timestamps as strings which sometimes can cause issues when
133 comparing strings. For more reliable comparison SQLite needs DATETIME()
134 wrapper for those strings. For PostgreSQL it works better if we add
135 TIMESTAMP to string literals.
136 """
138 inherit_cache = True
139 _traverse_internals = [("_literal", InternalTraversal.dp_plain_obj)]
141 def __init__(self, literal: datetime):
142 super().__init__()
143 self._literal = literal
146@compiles(_TimestampLiteral, "sqlite")
147def compile_timestamp_literal_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
148 """Compilation of TIMESTAMP literal for SQLite.
150 SQLite defines ``datetiem`` function that can be used to convert timestamp
151 value to Unix seconds.
152 """
153 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw)
156@compiles(_TimestampLiteral, "postgresql")
157def compile_timestamp_literal_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
158 """Compilation of TIMESTAMP literal for PostgreSQL.
160 For PostgreSQL it works better if we add TIMESTAMP to string literals.
161 """
162 literal = element._literal.isoformat(sep=" ", timespec="microseconds")
163 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw)
166class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
167 """Special ColumnElement type used for TIMESTAMP columns or in expressions.
169 SQLite stores timestamps as strings which sometimes can cause issues when
170 comparing strings. For more reliable comparison SQLite needs DATETIME()
171 wrapper for columns.
173 This mechanism is only used for expressions in WHERE clause, values of the
174 TIMESTAMP columns returned from queries are still handled by standard
175 mechanism and they are converted to `datetime` instances.
176 """
178 inherit_cache = True
179 _traverse_internals = [("_column", InternalTraversal.dp_clauseelement)]
181 def __init__(self, column: sqlalchemy.sql.ColumnElement):
182 super().__init__()
183 self._column = column
186@compiles(_TimestampColumnElement, "sqlite")
187def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
188 """Compilation of TIMESTAMP column for SQLite.
190 SQLite defines ``datetime`` function that can be used to convert timestamp
191 value to Unix seconds.
192 """
193 return compiler.process(func.datetime(element._column), **kw)
196@compiles(_TimestampColumnElement, "postgresql")
197def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
198 """Compilation of TIMESTAMP column for PostgreSQL."""
199 return compiler.process(element._column, **kw)
202class WhereClauseConverter(ABC):
203 """Abstract base class for the objects used to transform a butler query
204 expression tree into SQLAlchemy expression objects.
206 WhereClauseConverter instances are created and consumed by
207 `WhereClauseConverterVisitor`, which is in turn created and used only by
208 the `convertExpressionToSql` function.
209 """
211 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
212 """Finish converting this [boolean] expression, returning a SQLAlchemy
213 expression object.
215 Parameters
216 ----------
217 node : `Node`
218 Original expression tree nodethis converter represents; used only
219 for error reporting.
221 Returns
222 -------
223 sql : `sqlalchemy.sql.ColumnElement`
224 A boolean SQLAlchemy column expression.
226 Raises
227 ------
228 ExpressionTypeError
229 Raised if this node does not represent a boolean expression. The
230 default implementation always raises this exception; subclasses
231 that may actually represent a boolean should override.
232 """
233 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.')
235 @property
236 @abstractmethod
237 def dtype(self) -> type:
238 """The Python type of the expression tree node associated with this
239 converter (`type`).
241 This should be the exact type of any literal or bind object, and the
242 type produced by SQLAlchemy's converter mechanism when returning rows
243 from the database in the case of expressions that map to database
244 entities or expressions.
245 """
246 raise NotImplementedError()
248 @abstractmethod
249 def categorizeForIn(
250 self,
251 literals: List[sqlalchemy.sql.ColumnElement],
252 ranges: List[Tuple[int, int, int]],
253 dtype: type,
254 node: Node,
255 ) -> None:
256 """Visit this expression when it appears as an element in the
257 right-hand side of an IN expression.
259 Implementations must either:
261 - append or extend to ``literals``
262 - append or extend to ``ranges``
263 - raise `ExpressionTypeError`.
265 Parameters
266 ----------
267 literals : `list` [ `sqlalchemy.sql.ColumnElement` ]
268 List of SQL expression objects that the left-hand side of the IN
269 operation may match exactly.
270 ranges : `list` of `tuple`
271 List of (start, stop, step) tuples that represent ranges that the
272 left-hand side of the IN operation may match.
273 dtype : `type`
274 Type of the left-hand side operand for the IN expression. Literals
275 should only be appended to if ``self.dtype is dtype``, and
276 ``ranges`` should only be appended to if ``dtype is int``.
277 node : `Node`
278 Original expression tree node this converter represents; for use
279 only in error reporting.
281 Raises
282 ------
283 ExpressionTypeError
284 Raised if this node can never appear on the right-hand side of an
285 IN expression, or if it is incompatible with the left-hand side
286 type.
287 """
288 raise NotImplementedError()
291class ScalarWhereClauseConverter(WhereClauseConverter):
292 """Primary implementation of WhereClauseConverter, for expressions that can
293 always be represented directly by a single `sqlalchemy.sql.ColumnElement`
294 instance.
296 Should be constructed by calling either `fromExpression` or `fromLiteral`.
298 Parameters
299 ----------
300 column : `sqlalchemy.sql.ColumnElement`
301 A SQLAlchemy column expression.
302 value
303 The Python literal this expression was constructed from, or `None` if
304 it was not constructed from a literal. Note that this is also `None`
305 this object corresponds to the literal `None`, in which case
306 ``dtype is type(None)``.
307 dtype : `type`
308 Python type this expression maps to.
309 """
311 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type):
312 self.column = column
313 self.value = value
314 self._dtype = dtype
316 @classmethod
317 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter:
318 """Construct from an existing SQLAlchemy column expression and type.
320 Parameters
321 ----------
322 column : `sqlalchemy.sql.ColumnElement`
323 A SQLAlchemy column expression.
324 dtype : `type`
325 Python type this expression maps to.
327 Returns
328 -------
329 converter : `ScalarWhereClauseConverter`
330 Converter instance that wraps ``column``.
331 """
332 return cls(column, None, dtype)
334 @classmethod
335 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter:
336 """Construct from a Python literal.
338 Parameters
339 ----------
340 value
341 The Python literal to wrap.
343 Returns
344 -------
345 converter : `ScalarWhereClauseConverter`
346 Converter instance that wraps ``value``.
347 """
348 dtype = type(value)
349 if dtype is datetime:
350 column = _TimestampLiteral(value)
351 else:
352 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None)
353 return cls(column, value, dtype)
355 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
356 # Docstring inherited.
357 if self.dtype is not bool:
358 return super().finish(node) # will raise; just avoids duplicate error message
359 return self.column
361 @property
362 def dtype(self) -> type:
363 # Docstring inherited.
364 return self._dtype
366 def categorizeForIn(
367 self,
368 literals: List[sqlalchemy.sql.ColumnElement],
369 ranges: List[Tuple[int, int, int]],
370 dtype: type,
371 node: Node,
372 ) -> None:
373 # Docstring inherited.
374 if dtype is not self.dtype:
375 raise ExpressionTypeError(
376 f'Error in IN expression "{node}": left hand side has type '
377 f"{dtype.__name__}, but item has type {self.dtype.__name__}."
378 )
379 literals.append(self.column)
382class TimespanWhereClauseConverter(WhereClauseConverter):
383 """Implementation of WhereClauseConverter for `Timespan` expressions.
385 Parameters
386 ----------
387 timespan : `TimespanDatabaseRepresentation`
388 Object that represents a logical timespan column or column expression
389 (which may or may not be backed by multiple real columns).
390 """
392 def __init__(self, timespan: TimespanDatabaseRepresentation):
393 self.timespan = timespan
395 @classmethod
396 def fromPair(
397 cls,
398 begin: ScalarWhereClauseConverter,
399 end: ScalarWhereClauseConverter,
400 TimespanReprClass: Type[TimespanDatabaseRepresentation],
401 ) -> TimespanWhereClauseConverter:
402 """Construct from a pair of literal expressions.
404 Parameters
405 ----------
406 begin : `ScalarWhereClauseConverter`
407 Converter object associated with an expression of type
408 `astropy.time.Time` or `None` (for a timespan that is unbounded
409 from below).
410 end : `ScalarWhereClauseConverter`
411 Converter object associated with an expression of type
412 `astropy.time.Time` or `None` (for a timespan that is unbounded
413 from above).
414 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
415 Class that encapsulates the representation of `Timespan` objects in
416 the database.
418 Returns
419 -------
420 converter : `TimespanWhereClauseConverter`
421 Converter instance that represents a `Timespan` literal.
423 Raises
424 ------
425 ExpressionTypeError
426 Raised if begin or end is a time column from the database or other
427 time expression, not a literal or bind time value.
428 """
429 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
430 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
431 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time):
432 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.")
433 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value)))
435 @property
436 def dtype(self) -> type:
437 # Docstring inherited.
438 return Timespan
440 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter:
441 """Construct a boolean converter expression that represents the overlap
442 of this timespan with another.
444 Parameters
445 ----------
446 other : `TimespanWhereClauseConverter`
447 RHS operand for the overlap operation.
449 Returns
450 -------
451 overlaps : `ScalarWhereClauseConverter`
452 Converter that wraps the boolean overlaps expression.
453 """
454 assert other.dtype is Timespan, "Guaranteed by dispatch table rules"
455 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool)
457 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
458 """Construct a boolean converter expression that represents whether
459 this timespans contains a scalar time.
461 Parameters
462 ----------
463 other : `ScalarWhereClauseConverter`
464 RHS operand for the overlap operation.
465 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
466 Ignored; provided for signature compatibility with `DispatchTable`.
468 Returns
469 -------
470 overlaps : `ScalarWhereClauseConverter`
471 Converter that wraps the boolean overlaps expression.
472 """
473 assert other.dtype is Time, "Guaranteed by dispatch table rules"
474 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool)
476 def categorizeForIn(
477 self,
478 literals: List[sqlalchemy.sql.ColumnElement],
479 ranges: List[Tuple[int, int, int]],
480 dtype: type,
481 node: Node,
482 ) -> None:
483 # Docstring inherited.
484 raise ExpressionTypeError(
485 f'Invalid element on right side of IN expression "{node}": '
486 "Timespans are not allowed in this context."
487 )
490class RangeWhereClauseConverter(WhereClauseConverter):
491 """Implementation of WhereClauseConverters for integer range literals.
493 Range literals may only appear on the right-hand side of IN operations
494 where the left-hand side operand is of type `int`.
496 Parameters
497 ----------
498 start : `int`
499 Starting point (inclusive) for the range.
500 stop : `int`
501 Stopping point (exclusive) for the range.
502 step : `int`
503 Step size for the range.
504 """
506 def __init__(self, start: int, stop: int, step: int):
507 self.start = start
508 self.stop = stop
509 self.step = step
511 @property
512 def dtype(self) -> type:
513 # Docstring inherited.
514 return range
516 def categorizeForIn(
517 self,
518 literals: List[sqlalchemy.sql.ColumnElement],
519 ranges: List[Tuple[int, int, int]],
520 dtype: type,
521 node: Node,
522 ) -> None:
523 # Docstring inherited.
524 if dtype is not int:
525 raise ExpressionTypeError(
526 f'Error in IN expression "{node}": range expressions '
527 f"are only supported for int operands, not {dtype.__name__}."
528 )
529 ranges.append((self.start, self.stop, self.step))
532UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter]
533"""Signature of unary-operation callables directly stored in `DispatchTable`.
534"""
536BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter]
537"""Signature of binary-operation callables directly stored in `DispatchTable`.
538"""
540UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement]
541"""Signature for unary-operation callables that can work directly on SQLAlchemy
542column expressions.
543"""
545BinaryColumnFunc = Callable[
546 [sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement
547]
548"""Signature for binary-operation callables that can work directly on
549SQLAlchemy column expressions.
550"""
552_F = TypeVar("_F")
555def adaptIdentity(func: _F, result: Optional[type]) -> _F:
556 """An adapter function for `DispatchTable.registerUnary` and
557 `DispatchTable.registerBinary` that just returns this original function.
558 """
559 return func
562def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc:
563 """An adapter function for `DispatchTable.registerUnary` that converts a
564 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a
565 `ScalarWhereClauseConverter`.
566 """
568 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter:
569 assert isinstance(operand, ScalarWhereClauseConverter)
570 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result)
572 return adapted
575def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc:
576 """An adapter function for `DispatchTable.registerBinary` that converts a
577 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be
578 `ScalarWhereClauseConverter` instances.
579 """
581 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
582 assert isinstance(lhs, ScalarWhereClauseConverter)
583 assert isinstance(rhs, ScalarWhereClauseConverter)
584 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result)
586 return adapted
589class TimeBinaryOperator:
590 def __init__(self, operator: Callable, dtype: type):
591 self.operator = operator
592 self.dtype = dtype
594 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
595 assert isinstance(lhs, ScalarWhereClauseConverter)
596 assert isinstance(rhs, ScalarWhereClauseConverter)
597 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)]
598 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype)
600 @classmethod
601 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]:
602 """Coerce one or more ScalarWhereClauseConverters to datetime type if
603 necessary.
605 If any of the arguments has `datetime` type then all other arguments
606 are converted to `datetime` type as well.
608 Parameters
609 ----------
610 *args : `ScalarWhereClauseConverter`
611 Instances which represent time objects, their type can be one of
612 `Time` or `datetime`. If coercion happens, then `Time` objects can
613 only be literals, not expressions.
615 Returns
616 -------
617 converters : `list` [ `ScalarWhereClauseConverter` ]
618 List of converters in the same order as they appera in argument
619 list, some of them can be coerced to `datetime` type, non-coerced
620 arguments are returned without any change.
621 """
623 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
624 """Coerce single ScalarWhereClauseConverter to datetime literal."""
625 if arg.dtype is not datetime:
626 assert arg.value is not None, "Cannot coerce non-literals"
627 assert arg.dtype is Time, "Cannot coerce non-Time literals"
628 with warnings.catch_warnings():
629 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
630 if erfa is not None:
631 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
632 dt = arg.value.to_datetime()
633 arg = ScalarWhereClauseConverter.fromLiteral(dt)
634 return arg
636 if any(arg.dtype is datetime for arg in args):
637 return [_coerce(arg) for arg in args]
638 else:
639 return list(args)
642class DispatchTable:
643 """An object that manages unary- and binary-operator type-dispatch tables
644 for `WhereClauseConverter`.
646 Notes
647 -----
648 A lot of the machinery in this class (and in the preceding function
649 signature type aliases) is all in service of making the actual dispatch
650 rules in the `build` method concise and easy to read, because that's where
651 all of the important high-level logic lives.
653 Double-dispatch is a pain in Python, as it is in most languages; it's worth
654 noting that I first tried the traditional visitor-pattern approach here,
655 and it was *definitely* much harder to see the actual behavior.
656 """
658 def __init__(self) -> None:
659 self._unary: Dict[Tuple[str, type], UnaryFunc] = {}
660 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {}
662 def registerUnary(
663 self,
664 operator: str,
665 operand: Union[type, Iterable[type]],
666 func: _F,
667 *,
668 result: Optional[type] = None,
669 adapt: Any = True,
670 ) -> None:
671 """Register a unary operation for one or more types.
673 Parameters
674 ----------
675 operator : `str`
676 Operator as it appears in the string expression language. Unary
677 operations that are not mapped to operators may use their own
678 arbitrary strings, as long as these are used consistently in
679 `build` and `applyUnary`.
680 operand : `type` or `Iterable` [ `type` ]
681 Type or types for which this operation is implemented by the given
682 ``func``.
683 func : `Callable`
684 Callable that implements the unary operation. If
685 ``adapt is True``, this should be a `UnaryColumnFunc`. If
686 ``adapt is False``, this should be a `UnaryFunc`. Otherwise,
687 this is whatever type is accepted as the first argument to
688 ``adapt``.
689 result : `type`, optional
690 Type of the expression returned by this operation. If not
691 provided, the type of the operand is assumed.
692 adapt : `bool` or `Callable`
693 A callable that wraps ``func`` (the first argument) and ``result``
694 (the second argument), returning a new callable with the
695 signature of `UnaryFunc`. `True` (default) and `False` invoke a
696 default adapter or no adapter (see ``func`` docs).
697 """
698 if adapt is True:
699 adapt = adaptUnaryColumnFunc
700 elif adapt is False:
701 adapt = adaptIdentity
702 for item in ensure_iterable(operand):
703 self._unary[operator, item] = adapt(func, result if result is not None else item)
705 def registerBinary(
706 self,
707 operator: str,
708 lhs: Union[type, Iterable[type]],
709 func: _F,
710 *,
711 rhs: Optional[Union[type, Iterable[type]]] = None,
712 result: Optional[type] = None,
713 adapt: Any = True,
714 ) -> None:
715 """Register a binary operation for one or more types.
717 Parameters
718 ----------
719 operator : `str`
720 Operator as it appears in the string expression language. Binary
721 operations that are not mapped to operators may use their own
722 arbitrary strings, as long as these are used consistently in
723 `build` and `applyBinary`.
724 lhs : `type` or `Iterable` [ `type` ]
725 Left-hand side type or types for which this operation is
726 implemented by the given ``func``.
727 func : `Callable`
728 Callable that implements the binary operation. If
729 ``adapt is True``, this should be a `BinaryColumnFunc`. If
730 ``adapt is False``, this should be a `BinaryFunc`. Otherwise,
731 this is whatever type is accepted as the first argument to
732 ``adapt``.
733 rhs : `type` or `Iterable` [ `type` ]
734 Right-hand side type or types for which this operation is
735 implemented by the given ``func``. If multiple types, all
736 combinations of ``lhs`` and ``rhs`` are registered. If not
737 provided, each element of ``lhs`` is assumed to be paired with
738 itself, but mixed-type combinations are not registered.
739 result : `type`, optional
740 Type of the expression returned by this operation. If not
741 provided and ``rhs`` is also not provided, the type of the operand
742 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided,
743 then ``result=None`` will be forwarded to ``adapt``.
744 adapt : `bool` or `Callable`
745 A callable that wraps ``func`` (the first argument) and ``result``
746 (the second argument), returning a new callable with the
747 signature of `BinaryFunc`. `True` (default) and `False` invoke a
748 default adapter or no adapter (see ``func`` docs).
749 """
750 if adapt is True:
751 adapt = adaptBinaryColumnFunc
752 elif adapt is False:
753 adapt = adaptIdentity
754 for lh in ensure_iterable(lhs):
755 if rhs is None:
756 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh)
757 else:
758 for rh in ensure_iterable(rhs):
759 self._binary[operator, lh, rh] = adapt(func, result)
761 def applyUnary(
762 self,
763 operator: str,
764 operand: WhereClauseConverter,
765 ) -> WhereClauseConverter:
766 """Look up and apply the appropriate function for a registered unary
767 operation.
769 Parameters
770 ----------
771 operator : `str`
772 Operator for the operation to apply.
773 operand : `WhereClauseConverter`
774 Operand, with ``operand.dtype`` and ``operator`` used to look up
775 the appropriate function.
777 Returns
778 -------
779 expression : `WhereClauseConverter`
780 Converter instance that represents the operation, created by
781 calling the registered function.
783 Raises
784 ------
785 KeyError
786 Raised if the operator and operand type combination is not
787 recognized.
788 """
789 return self._unary[operator, operand.dtype](operand)
791 def applyBinary(
792 self,
793 operator: str,
794 lhs: WhereClauseConverter,
795 rhs: WhereClauseConverter,
796 ) -> WhereClauseConverter:
797 """Look up and apply the appropriate function for a registered binary
798 operation.
800 Parameters
801 ----------
802 operator : `str`
803 Operator for the operation to apply.
804 lhs : `WhereClauseConverter`
805 Left-hand side operand.
806 rhs : `WhereClauseConverter`
807 Right-hand side operand.
809 Returns
810 -------
811 expression : `WhereClauseConverter`
812 Converter instance that represents the operation, created by
813 calling the registered function.
815 Raises
816 ------
817 KeyError
818 Raised if the operator and operand type combination is not
819 recognized.
820 """
821 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs)
823 @classmethod
824 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable:
825 table = DispatchTable()
826 # Standard scalar unary and binary operators: just delegate to
827 # SQLAlchemy operators.
828 table.registerUnary("NOT", bool, sqlalchemy.sql.not_)
829 table.registerUnary("+", (int, float), operator.__pos__)
830 table.registerUnary("-", (int, float), operator.__neg__)
831 table.registerBinary("AND", bool, sqlalchemy.sql.and_)
832 table.registerBinary("OR", bool, sqlalchemy.sql.or_)
833 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool)
834 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool)
835 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool)
836 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool)
837 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool)
838 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool)
839 table.registerBinary("+", (int, float), operator.__add__)
840 table.registerBinary("-", (int, float), operator.__sub__)
841 table.registerBinary("*", (int, float), operator.__mul__)
842 table.registerBinary("/", (int, float), operator.__truediv__)
843 table.registerBinary("%", (int, float), operator.__mod__)
844 table.registerBinary(
845 "=",
846 (Time, datetime),
847 TimeBinaryOperator(operator.__eq__, bool),
848 rhs=(Time, datetime),
849 adapt=False,
850 )
851 table.registerBinary(
852 "!=",
853 (Time, datetime),
854 TimeBinaryOperator(operator.__ne__, bool),
855 rhs=(Time, datetime),
856 adapt=False,
857 )
858 table.registerBinary(
859 "<",
860 (Time, datetime),
861 TimeBinaryOperator(operator.__lt__, bool),
862 rhs=(Time, datetime),
863 adapt=False,
864 )
865 table.registerBinary(
866 ">",
867 (Time, datetime),
868 TimeBinaryOperator(operator.__gt__, bool),
869 rhs=(Time, datetime),
870 adapt=False,
871 )
872 table.registerBinary(
873 "<=",
874 (Time, datetime),
875 TimeBinaryOperator(operator.__le__, bool),
876 rhs=(Time, datetime),
877 adapt=False,
878 )
879 table.registerBinary(
880 ">=",
881 (Time, datetime),
882 TimeBinaryOperator(operator.__ge__, bool),
883 rhs=(Time, datetime),
884 adapt=False,
885 )
886 table.registerBinary(
887 "=",
888 lhs=(int, float, str, Time, type(None)),
889 rhs=(type(None),),
890 func=sqlalchemy.sql.expression.ColumnOperators.is_,
891 result=bool,
892 )
893 table.registerBinary(
894 "=",
895 lhs=(type(None),),
896 rhs=(int, float, str, Time, type(None)),
897 func=sqlalchemy.sql.expression.ColumnOperators.is_,
898 result=bool,
899 )
900 table.registerBinary(
901 "!=",
902 lhs=(int, float, str, Time, type(None)),
903 rhs=(type(None),),
904 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
905 result=bool,
906 )
907 table.registerBinary(
908 "!=",
909 lhs=(type(None),),
910 rhs=(int, float, str, Time, type(None)),
911 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
912 result=bool,
913 )
914 # Construct Timespan literals from 2-element tuples (A, B), where A and
915 # B are each either Time or None.
916 table.registerBinary(
917 "PAIR",
918 lhs=(Time, type(None)),
919 rhs=(Time, type(None)),
920 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass),
921 adapt=False,
922 )
923 # Less-than and greater-than between Timespans.
924 table.registerBinary(
925 "<",
926 lhs=Timespan,
927 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool),
928 adapt=False,
929 )
930 table.registerBinary(
931 ">",
932 lhs=Timespan,
933 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool),
934 adapt=False,
935 )
936 # Less-than and greater-than between Timespans and Times.
937 table.registerBinary(
938 "<",
939 lhs=Timespan,
940 rhs=Time,
941 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool),
942 adapt=False,
943 )
944 table.registerBinary(
945 ">",
946 lhs=Timespan,
947 rhs=Time,
948 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool),
949 adapt=False,
950 )
951 table.registerBinary(
952 "<",
953 lhs=Time,
954 rhs=Timespan,
955 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool),
956 adapt=False,
957 )
958 table.registerBinary(
959 ">",
960 lhs=Time,
961 rhs=Timespan,
962 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool),
963 adapt=False,
964 )
965 # OVERLAPS operator between Timespans.
966 table.registerBinary(
967 "OVERLAPS",
968 lhs=Timespan,
969 func=TimespanWhereClauseConverter.overlaps,
970 adapt=False,
971 )
972 # OVERLAPS operator between Timespans and Time is equivalent to
973 # "contains", but expression language only has OVERLAPS to keep it
974 # simple.
975 table.registerBinary(
976 "OVERLAPS",
977 lhs=Timespan,
978 rhs=Time,
979 func=TimespanWhereClauseConverter.contains,
980 adapt=False,
981 )
982 table.registerBinary(
983 "OVERLAPS",
984 lhs=Time,
985 rhs=Timespan,
986 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a),
987 adapt=False,
988 )
989 return table
992class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]):
993 """Implements TreeVisitor to convert the tree into
994 `WhereClauseConverter` objects.
996 This class should be used only by the `convertExpressionToSql` function;
997 external code should just call that function.
999 Parameters
1000 ----------
1001 universe : `DimensionUniverse`
1002 All known dimensions.
1003 columns: `QueryColumns`
1004 Struct that organizes the special columns known to the query
1005 under construction.
1006 elements: `NamedKeyMapping`
1007 `DimensionElement` instances and their associated tables.
1008 bind: `Mapping`
1009 Mapping from string names to literal values that should be subsituted
1010 for those names when they appear (as identifiers) in the expression.
1011 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation`
1012 Class that encapsulates the representation of `Timespan` objects in
1013 the database.
1014 """
1016 def __init__(
1017 self,
1018 universe: DimensionUniverse,
1019 columns: QueryColumns,
1020 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
1021 bind: Mapping[str, Any],
1022 TimespanReprClass: Type[TimespanDatabaseRepresentation],
1023 ):
1024 self.universe = universe
1025 self.columns = columns
1026 self.elements = elements
1027 self.bind = bind
1028 self._TimespanReprClass = TimespanReprClass
1029 self._dispatch = DispatchTable.build(TimespanReprClass)
1031 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter:
1032 # Docstring inherited from TreeVisitor.visitNumericLiteral
1033 # Convert string value into float or int
1034 coerced: Union[int, float]
1035 try:
1036 coerced = int(value)
1037 except ValueError:
1038 coerced = float(value)
1039 return ScalarWhereClauseConverter.fromLiteral(coerced)
1041 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter:
1042 # Docstring inherited from TreeVisitor.visitStringLiteral
1043 return ScalarWhereClauseConverter.fromLiteral(value)
1045 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter:
1046 # Docstring inherited from TreeVisitor.visitTimeLiteral
1047 return ScalarWhereClauseConverter.fromLiteral(value)
1049 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter:
1050 # Docstring inherited from TreeVisitor.visitIdentifier
1051 if name in self.bind:
1052 value = self.bind[name]
1053 if isinstance(value, Timespan):
1054 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value))
1055 return ScalarWhereClauseConverter.fromLiteral(value)
1056 constant = categorizeConstant(name)
1057 if constant is ExpressionConstant.INGEST_DATE:
1058 assert self.columns.datasets is not None
1059 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
1060 return ScalarWhereClauseConverter.fromExpression(
1061 _TimestampColumnElement(self.columns.datasets.ingestDate),
1062 datetime,
1063 )
1064 elif constant is ExpressionConstant.NULL:
1065 return ScalarWhereClauseConverter.fromLiteral(None)
1066 assert constant is None, "Check for enum values should be exhaustive."
1067 element, column = categorizeElementId(self.universe, name)
1068 if column is not None:
1069 if column == TimespanDatabaseRepresentation.NAME:
1070 if element.temporal is None:
1071 raise ExpressionTypeError(
1072 f"No timespan column exists for non-temporal element '{element.name}'."
1073 )
1074 return TimespanWhereClauseConverter(self.columns.timespans[element])
1075 else:
1076 if column not in element.RecordClass.fields.standard.names:
1077 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.")
1078 return ScalarWhereClauseConverter.fromExpression(
1079 self.elements[element].columns[column],
1080 element.RecordClass.fields.standard[column].getPythonType(),
1081 )
1082 else:
1083 assert isinstance(element, Dimension)
1084 return ScalarWhereClauseConverter.fromExpression(
1085 self.columns.getKeyColumn(element), element.primaryKey.getPythonType()
1086 )
1088 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1089 # Docstring inherited from TreeVisitor.visitUnaryOp
1090 try:
1091 return self._dispatch.applyUnary(operator, operand)
1092 except KeyError:
1093 raise ExpressionTypeError(
1094 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".'
1095 ) from None
1097 def visitBinaryOp(
1098 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node
1099 ) -> WhereClauseConverter:
1100 # Docstring inherited from TreeVisitor.visitBinaryOp
1101 try:
1102 return self._dispatch.applyBinary(operator, lhs, rhs)
1103 except KeyError:
1104 raise ExpressionTypeError(
1105 f"Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary "
1106 f'operator {operator} in "{node}".'
1107 ) from None
1109 def visitIsIn(
1110 self,
1111 lhs: WhereClauseConverter,
1112 values: List[WhereClauseConverter],
1113 not_in: bool,
1114 node: Node,
1115 ) -> WhereClauseConverter:
1116 if not isinstance(lhs, ScalarWhereClauseConverter):
1117 raise ExpressionTypeError(f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".')
1118 # Docstring inherited from TreeVisitor.visitIsIn
1119 #
1120 # `values` is a list of literals and ranges, range is represented
1121 # by a tuple (start, stop, stride). We need to transform range into
1122 # some SQL construct, simplest would be to generate a set of literals
1123 # and add it to the same list but it could become too long. What we
1124 # do here is to introduce some large limit on the total number of
1125 # items in IN() and if range exceeds that limit then we do something
1126 # like:
1127 #
1128 # X IN (1, 2, 3)
1129 # OR
1130 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
1131 #
1132 # or for NOT IN case
1133 #
1134 # NOT (X IN (1, 2, 3)
1135 # OR
1136 # (X BETWEEN START AND STOP
1137 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
1138 #
1139 max_in_items = 1000
1140 clauses: List[sqlalchemy.sql.ColumnElement] = []
1141 # Split the list into literals and ranges
1142 literals: List[sqlalchemy.sql.ColumnElement] = []
1143 ranges: List[Tuple[int, int, int]] = []
1144 for value in values:
1145 value.categorizeForIn(literals, ranges, lhs.dtype, node)
1146 # Handle ranges (maybe by converting them to literals).
1147 for start, stop, stride in ranges:
1148 count = (stop - start + 1) // stride
1149 if len(literals) + count > max_in_items:
1150 # X BETWEEN START AND STOP
1151 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
1152 expr = lhs.column.between(start, stop)
1153 if stride != 1:
1154 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride))
1155 clauses.append(expr)
1156 else:
1157 # add all values to literal list, stop is inclusive
1158 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)]
1159 # Handle literals.
1160 if literals:
1161 # add IN() in front of BETWEENs
1162 clauses.insert(0, lhs.column.in_(literals))
1163 # Assemble the full expression.
1164 expr = sqlalchemy.sql.or_(*clauses)
1165 if not_in:
1166 expr = sqlalchemy.sql.not_(expr)
1167 return ScalarWhereClauseConverter.fromExpression(expr, bool)
1169 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1170 # Docstring inherited from TreeVisitor.visitParens
1171 return expression
1173 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter:
1174 # Docstring inherited from base class
1175 if len(items) != 2:
1176 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".')
1177 try:
1178 return self._dispatch.applyBinary("PAIR", items[0], items[1])
1179 except KeyError:
1180 raise ExpressionTypeError(
1181 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" '
1182 '(Note that date/time strings must be preceded by "T" to be recognized).'
1183 )
1185 def visitRangeLiteral(
1186 self, start: int, stop: int, stride: Optional[int], node: Node
1187 ) -> WhereClauseConverter:
1188 # Docstring inherited from TreeVisitor.visitRangeLiteral
1189 # stride can be None which means the same as 1.
1190 return RangeWhereClauseConverter(start, stop, stride or 1)
1192 def visitPointNode(
1193 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node
1194 ) -> WhereClauseConverter:
1195 # Docstring inherited from base class
1197 # this is a placeholder for future extension, we enabled syntax but
1198 # do not support actual use just yet.
1199 raise NotImplementedError("POINT() function is not supported yet")