Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py: 26%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "convertExpressionToSql",
26 "ExpressionTypeError",
27)
29import operator
30import warnings
31from abc import ABC, abstractmethod
32from datetime import datetime
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 Dict,
38 Iterable,
39 List,
40 Mapping,
41 Optional,
42 Tuple,
43 Type,
44 TypeVar,
45 Union,
46)
48import astropy.utils.exceptions
49import sqlalchemy
50from astropy.time import Time
51from lsst.utils.iteration import ensure_iterable
52from sqlalchemy.ext.compiler import compiles
53from sqlalchemy.sql.expression import func
55from ....core import (
56 Dimension,
57 DimensionElement,
58 DimensionUniverse,
59 NamedKeyMapping,
60 Timespan,
61 TimespanDatabaseRepresentation,
62 ddl,
63)
64from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId
65from .parser import Node, TreeVisitor
67# As of astropy 4.2, the erfa interface is shipped independently and
68# ErfaWarning is no longer an AstropyWarning
69try:
70 import erfa
71except ImportError:
72 erfa = None
74if TYPE_CHECKING: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true
75 from .._structs import QueryColumns
78def convertExpressionToSql(
79 tree: Node,
80 universe: DimensionUniverse,
81 columns: QueryColumns,
82 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
83 bind: Mapping[str, Any],
84 TimespanReprClass: Type[TimespanDatabaseRepresentation],
85) -> sqlalchemy.sql.ColumnElement:
86 """Convert a query expression tree into a SQLAlchemy expression object.
88 Parameters
89 ----------
90 tree : `Node`
91 Root node of the query expression tree.
92 universe : `DimensionUniverse`
93 All known dimensions.
94 columns : `QueryColumns`
95 Struct that organizes the special columns known to the query
96 under construction.
97 elements : `NamedKeyMapping`
98 `DimensionElement` instances and their associated tables.
99 bind : `Mapping`
100 Mapping from string names to literal values that should be subsituted
101 for those names when they appear (as identifiers) in the expression.
102 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation`
103 Class that encapsulates the representation of `Timespan` objects in
104 the database.
106 Returns
107 -------
108 sql : `sqlalchemy.sql.ColumnElement`
109 A boolean SQLAlchemy column expression.
111 Raises
112 ------
113 ExpressionTypeError
114 Raised if the operands in a query expression operation are incompatible
115 with the operator, or if the expression does not evaluate to a boolean.
116 """
117 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass)
118 converter = tree.visit(visitor)
119 return converter.finish(tree)
122class ExpressionTypeError(TypeError):
123 """Exception raised when the types in a query expression are not
124 compatible with the operators or other syntax.
125 """
128class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
129 """Special ColumnElement type used for TIMESTAMP columns or literals in
130 expressions.
132 SQLite stores timestamps as strings which sometimes can cause issues when
133 comparing strings. For more reliable comparison SQLite needs DATETIME()
134 wrapper for those strings. For PostgreSQL it works better if we add
135 TIMESTAMP to string literals.
137 This mechanism is only used for expressions in WHERE clause, values of the
138 TIMESTAMP columns returned from queries are still handled by standard
139 mechanism and they are converted to `datetime` instances.
140 """
142 def __init__(
143 self, column: Optional[sqlalchemy.sql.ColumnElement] = None, literal: Optional[datetime] = None
144 ):
145 super().__init__()
146 self._column = column
147 self._literal = literal
150@compiles(_TimestampColumnElement, "sqlite")
151def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
152 """Compilation of TIMESTAMP column for SQLite.
154 SQLite defines ``strftime`` function that can be used to convert timestamp
155 value to Unix seconds.
156 """
157 assert element._column is not None or element._literal is not None, "Must have column or literal"
158 if element._column is not None:
159 return compiler.process(func.datetime(element._column), **kw)
160 else:
161 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw)
164@compiles(_TimestampColumnElement, "postgresql")
165def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
166 """Compilation of TIMESTAMP column for PostgreSQL.
168 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function.
169 """
170 assert element._column is not None or element._literal is not None, "Must have column or literal"
171 if element._column is not None:
172 return compiler.process(element._column, **kw)
173 else:
174 literal = element._literal.isoformat(sep=" ", timespec="microseconds")
175 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw)
178class WhereClauseConverter(ABC):
179 """Abstract base class for the objects used to transform a butler query
180 expression tree into SQLAlchemy expression objects.
182 WhereClauseConverter instances are created and consumed by
183 `WhereClauseConverterVisitor`, which is in turn created and used only by
184 the `convertExpressionToSql` function.
185 """
187 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
188 """Finish converting this [boolean] expression, returning a SQLAlchemy
189 expression object.
191 Parameters
192 ----------
193 node : `Node`
194 Original expression tree nodethis converter represents; used only
195 for error reporting.
197 Returns
198 -------
199 sql : `sqlalchemy.sql.ColumnElement`
200 A boolean SQLAlchemy column expression.
202 Raises
203 ------
204 ExpressionTypeError
205 Raised if this node does not represent a boolean expression. The
206 default implementation always raises this exception; subclasses
207 that may actually represent a boolean should override.
208 """
209 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.')
211 @property
212 @abstractmethod
213 def dtype(self) -> type:
214 """The Python type of the expression tree node associated with this
215 converter (`type`).
217 This should be the exact type of any literal or bind object, and the
218 type produced by SQLAlchemy's converter mechanism when returning rows
219 from the database in the case of expressions that map to database
220 entities or expressions.
221 """
222 raise NotImplementedError()
224 @abstractmethod
225 def categorizeForIn(
226 self,
227 literals: List[sqlalchemy.sql.ColumnElement],
228 ranges: List[Tuple[int, int, int]],
229 dtype: type,
230 node: Node,
231 ) -> None:
232 """Visit this expression when it appears as an element in the
233 right-hand side of an IN expression.
235 Implementations must either:
237 - append or extend to ``literals``
238 - append or extend to ``ranges``
239 - raise `ExpressionTypeError`.
241 Parameters
242 ----------
243 literals : `list` [ `sqlalchemy.sql.ColumnElement` ]
244 List of SQL expression objects that the left-hand side of the IN
245 operation may match exactly.
246 ranges : `list` of `tuple`
247 List of (start, stop, step) tuples that represent ranges that the
248 left-hand side of the IN operation may match.
249 dtype : `type`
250 Type of the left-hand side operand for the IN expression. Literals
251 should only be appended to if ``self.dtype is dtype``, and
252 ``ranges`` should only be appended to if ``dtype is int``.
253 node : `Node`
254 Original expression tree node this converter represents; for use
255 only in error reporting.
257 Raises
258 ------
259 ExpressionTypeError
260 Raised if this node can never appear on the right-hand side of an
261 IN expression, or if it is incompatible with the left-hand side
262 type.
263 """
264 raise NotImplementedError()
267class ScalarWhereClauseConverter(WhereClauseConverter):
268 """Primary implementation of WhereClauseConverter, for expressions that can
269 always be represented directly by a single `sqlalchemy.sql.ColumnElement`
270 instance.
272 Should be constructed by calling either `fromExpression` or `fromLiteral`.
274 Parameters
275 ----------
276 column : `sqlalchemy.sql.ColumnElement`
277 A SQLAlchemy column expression.
278 value
279 The Python literal this expression was constructed from, or `None` if
280 it was not constructed from a literal. Note that this is also `None`
281 this object corresponds to the literal `None`, in which case
282 ``dtype is type(None)``.
283 dtype : `type`
284 Python type this expression maps to.
285 """
287 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type):
288 self.column = column
289 self.value = value
290 self._dtype = dtype
292 @classmethod
293 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter:
294 """Construct from an existing SQLAlchemy column expression and type.
296 Parameters
297 ----------
298 column : `sqlalchemy.sql.ColumnElement`
299 A SQLAlchemy column expression.
300 dtype : `type`
301 Python type this expression maps to.
303 Returns
304 -------
305 converter : `ScalarWhereClauseConverter`
306 Converter instance that wraps ``column``.
307 """
308 return cls(column, None, dtype)
310 @classmethod
311 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter:
312 """Construct from a Python literal.
314 Parameters
315 ----------
316 value
317 The Python literal to wrap.
319 Returns
320 -------
321 converter : `ScalarWhereClauseConverter`
322 Converter instance that wraps ``value``.
323 """
324 dtype = type(value)
325 if dtype is datetime:
326 column = _TimestampColumnElement(literal=value)
327 else:
328 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None)
329 return cls(column, value, dtype)
331 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
332 # Docstring inherited.
333 if self.dtype is not bool:
334 return super().finish(node) # will raise; just avoids duplicate error message
335 return self.column
337 @property
338 def dtype(self) -> type:
339 # Docstring inherited.
340 return self._dtype
342 def categorizeForIn(
343 self,
344 literals: List[sqlalchemy.sql.ColumnElement],
345 ranges: List[Tuple[int, int, int]],
346 dtype: type,
347 node: Node,
348 ) -> None:
349 # Docstring inherited.
350 if dtype is not self.dtype:
351 raise ExpressionTypeError(
352 f'Error in IN expression "{node}": left hand side has type '
353 f"{dtype.__name__}, but item has type {self.dtype.__name__}."
354 )
355 literals.append(self.column)
358class TimespanWhereClauseConverter(WhereClauseConverter):
359 """Implementation of WhereClauseConverter for `Timespan` expressions.
361 Parameters
362 ----------
363 timespan : `TimespanDatabaseRepresentation`
364 Object that represents a logical timespan column or column expression
365 (which may or may not be backed by multiple real columns).
366 """
368 def __init__(self, timespan: TimespanDatabaseRepresentation):
369 self.timespan = timespan
371 @classmethod
372 def fromPair(
373 cls,
374 begin: ScalarWhereClauseConverter,
375 end: ScalarWhereClauseConverter,
376 TimespanReprClass: Type[TimespanDatabaseRepresentation],
377 ) -> TimespanWhereClauseConverter:
378 """Construct from a pair of literal expressions.
380 Parameters
381 ----------
382 begin : `ScalarWhereClauseConverter`
383 Converter object associated with an expression of type
384 `astropy.time.Time` or `None` (for a timespan that is unbounded
385 from below).
386 end : `ScalarWhereClauseConverter`
387 Converter object associated with an expression of type
388 `astropy.time.Time` or `None` (for a timespan that is unbounded
389 from above).
390 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
391 Class that encapsulates the representation of `Timespan` objects in
392 the database.
394 Returns
395 -------
396 converter : `TimespanWhereClauseConverter`
397 Converter instance that represents a `Timespan` literal.
399 Raises
400 ------
401 ExpressionTypeError
402 Raised if begin or end is a time column from the database or other
403 time expression, not a literal or bind time value.
404 """
405 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
406 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
407 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time):
408 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.")
409 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value)))
411 @property
412 def dtype(self) -> type:
413 # Docstring inherited.
414 return Timespan
416 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter:
417 """Construct a boolean converter expression that represents the overlap
418 of this timespan with another.
420 Parameters
421 ----------
422 other : `TimespanWhereClauseConverter`
423 RHS operand for the overlap operation.
425 Returns
426 -------
427 overlaps : `ScalarWhereClauseConverter`
428 Converter that wraps the boolean overlaps expression.
429 """
430 assert other.dtype is Timespan, "Guaranteed by dispatch table rules"
431 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool)
433 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
434 """Construct a boolean converter expression that represents whether
435 this timespans contains a scalar time.
437 Parameters
438 ----------
439 other : `ScalarWhereClauseConverter`
440 RHS operand for the overlap operation.
441 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
442 Ignored; provided for signature compatibility with `DispatchTable`.
444 Returns
445 -------
446 overlaps : `ScalarWhereClauseConverter`
447 Converter that wraps the boolean overlaps expression.
448 """
449 assert other.dtype is Time, "Guaranteed by dispatch table rules"
450 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool)
452 def categorizeForIn(
453 self,
454 literals: List[sqlalchemy.sql.ColumnElement],
455 ranges: List[Tuple[int, int, int]],
456 dtype: type,
457 node: Node,
458 ) -> None:
459 # Docstring inherited.
460 raise ExpressionTypeError(
461 f'Invalid element on right side of IN expression "{node}": '
462 "Timespans are not allowed in this context."
463 )
466class RangeWhereClauseConverter(WhereClauseConverter):
467 """Implementation of WhereClauseConverters for integer range literals.
469 Range literals may only appear on the right-hand side of IN operations
470 where the left-hand side operand is of type `int`.
472 Parameters
473 ----------
474 start : `int`
475 Starting point (inclusive) for the range.
476 stop : `int`
477 Stopping point (exclusive) for the range.
478 step : `int`
479 Step size for the range.
480 """
482 def __init__(self, start: int, stop: int, step: int):
483 self.start = start
484 self.stop = stop
485 self.step = step
487 @property
488 def dtype(self) -> type:
489 # Docstring inherited.
490 return range
492 def categorizeForIn(
493 self,
494 literals: List[sqlalchemy.sql.ColumnElement],
495 ranges: List[Tuple[int, int, int]],
496 dtype: type,
497 node: Node,
498 ) -> None:
499 # Docstring inherited.
500 if dtype is not int:
501 raise ExpressionTypeError(
502 f'Error in IN expression "{node}": range expressions '
503 f"are only supported for int operands, not {dtype.__name__}."
504 )
505 ranges.append((self.start, self.stop, self.step))
508UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter]
509"""Signature of unary-operation callables directly stored in `DispatchTable`.
510"""
512BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter]
513"""Signature of binary-operation callables directly stored in `DispatchTable`.
514"""
516UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement]
517"""Signature for unary-operation callables that can work directly on SQLAlchemy
518column expressions.
519"""
521BinaryColumnFunc = Callable[
522 [sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement
523]
524"""Signature for binary-operation callables that can work directly on
525SQLAlchemy column expressions.
526"""
528_F = TypeVar("_F")
531def adaptIdentity(func: _F, result: Optional[type]) -> _F:
532 """An adapter function for `DispatchTable.registerUnary` and
533 `DispatchTable.registerBinary` that just returns this original function.
534 """
535 return func
538def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc:
539 """An adapter function for `DispatchTable.registerUnary` that converts a
540 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a
541 `ScalarWhereClauseConverter`.
542 """
544 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter:
545 assert isinstance(operand, ScalarWhereClauseConverter)
546 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result)
548 return adapted
551def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc:
552 """An adapter function for `DispatchTable.registerBinary` that converts a
553 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be
554 `ScalarWhereClauseConverter` instances.
555 """
557 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
558 assert isinstance(lhs, ScalarWhereClauseConverter)
559 assert isinstance(rhs, ScalarWhereClauseConverter)
560 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result)
562 return adapted
565class TimeBinaryOperator:
566 def __init__(self, operator: Callable, dtype: type):
567 self.operator = operator
568 self.dtype = dtype
570 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
571 assert isinstance(lhs, ScalarWhereClauseConverter)
572 assert isinstance(rhs, ScalarWhereClauseConverter)
573 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)]
574 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype)
576 @classmethod
577 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]:
578 """Coerce one or more ScalarWhereClauseConverters to datetime type if
579 necessary.
581 If any of the arguments has `datetime` type then all other arguments
582 are converted to `datetime` type as well.
584 Parameters
585 ----------
586 *args : `ScalarWhereClauseConverter`
587 Instances which represent time objects, their type can be one of
588 `Time` or `datetime`. If coercion happens, then `Time` objects can
589 only be literals, not expressions.
591 Returns
592 -------
593 converters : `list` [ `ScalarWhereClauseConverter` ]
594 List of converters in the same order as they appera in argument
595 list, some of them can be coerced to `datetime` type, non-coerced
596 arguments are returned without any change.
597 """
599 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
600 """Coerce single ScalarWhereClauseConverter to datetime literal."""
601 if arg.dtype is not datetime:
602 assert arg.value is not None, "Cannot coerce non-literals"
603 assert arg.dtype is Time, "Cannot coerce non-Time literals"
604 with warnings.catch_warnings():
605 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
606 if erfa is not None:
607 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
608 dt = arg.value.to_datetime()
609 arg = ScalarWhereClauseConverter.fromLiteral(dt)
610 return arg
612 if any(arg.dtype is datetime for arg in args):
613 return [_coerce(arg) for arg in args]
614 else:
615 return list(args)
618class DispatchTable:
619 """An object that manages unary- and binary-operator type-dispatch tables
620 for `WhereClauseConverter`.
622 Notes
623 -----
624 A lot of the machinery in this class (and in the preceding function
625 signature type aliases) is all in service of making the actual dispatch
626 rules in the `build` method concise and easy to read, because that's where
627 all of the important high-level logic lives.
629 Double-dispatch is a pain in Python, as it is in most languages; it's worth
630 noting that I first tried the traditional visitor-pattern approach here,
631 and it was *definitely* much harder to see the actual behavior.
632 """
634 def __init__(self) -> None:
635 self._unary: Dict[Tuple[str, type], UnaryFunc] = {}
636 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {}
638 def registerUnary(
639 self,
640 operator: str,
641 operand: Union[type, Iterable[type]],
642 func: _F,
643 *,
644 result: Optional[type] = None,
645 adapt: Any = True,
646 ) -> None:
647 """Register a unary operation for one or more types.
649 Parameters
650 ----------
651 operator : `str`
652 Operator as it appears in the string expression language. Unary
653 operations that are not mapped to operators may use their own
654 arbitrary strings, as long as these are used consistently in
655 `build` and `applyUnary`.
656 operand : `type` or `Iterable` [ `type` ]
657 Type or types for which this operation is implemented by the given
658 ``func``.
659 func : `Callable`
660 Callable that implements the unary operation. If
661 ``adapt is True``, this should be a `UnaryColumnFunc`. If
662 ``adapt is False``, this should be a `UnaryFunc`. Otherwise,
663 this is whatever type is accepted as the first argument to
664 ``adapt``.
665 result : `type`, optional
666 Type of the expression returned by this operation. If not
667 provided, the type of the operand is assumed.
668 adapt : `bool` or `Callable`
669 A callable that wraps ``func`` (the first argument) and ``result``
670 (the second argument), returning a new callable with the
671 signature of `UnaryFunc`. `True` (default) and `False` invoke a
672 default adapter or no adapter (see ``func`` docs).
673 """
674 if adapt is True:
675 adapt = adaptUnaryColumnFunc
676 elif adapt is False:
677 adapt = adaptIdentity
678 for item in ensure_iterable(operand):
679 self._unary[operator, item] = adapt(func, result if result is not None else item)
681 def registerBinary(
682 self,
683 operator: str,
684 lhs: Union[type, Iterable[type]],
685 func: _F,
686 *,
687 rhs: Optional[Union[type, Iterable[type]]] = None,
688 result: Optional[type] = None,
689 adapt: Any = True,
690 ) -> None:
691 """Register a binary operation for one or more types.
693 Parameters
694 ----------
695 operator : `str`
696 Operator as it appears in the string expression language. Binary
697 operations that are not mapped to operators may use their own
698 arbitrary strings, as long as these are used consistently in
699 `build` and `applyBinary`.
700 lhs : `type` or `Iterable` [ `type` ]
701 Left-hand side type or types for which this operation is
702 implemented by the given ``func``.
703 func : `Callable`
704 Callable that implements the binary operation. If
705 ``adapt is True``, this should be a `BinaryColumnFunc`. If
706 ``adapt is False``, this should be a `BinaryFunc`. Otherwise,
707 this is whatever type is accepted as the first argument to
708 ``adapt``.
709 rhs : `type` or `Iterable` [ `type` ]
710 Right-hand side type or types for which this operation is
711 implemented by the given ``func``. If multiple types, all
712 combinations of ``lhs`` and ``rhs`` are registered. If not
713 provided, each element of ``lhs`` is assumed to be paired with
714 itself, but mixed-type combinations are not registered.
715 result : `type`, optional
716 Type of the expression returned by this operation. If not
717 provided and ``rhs`` is also not provided, the type of the operand
718 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided,
719 then ``result=None`` will be forwarded to ``adapt``.
720 adapt : `bool` or `Callable`
721 A callable that wraps ``func`` (the first argument) and ``result``
722 (the second argument), returning a new callable with the
723 signature of `BinaryFunc`. `True` (default) and `False` invoke a
724 default adapter or no adapter (see ``func`` docs).
725 """
726 if adapt is True:
727 adapt = adaptBinaryColumnFunc
728 elif adapt is False:
729 adapt = adaptIdentity
730 for lh in ensure_iterable(lhs):
731 if rhs is None:
732 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh)
733 else:
734 for rh in ensure_iterable(rhs):
735 self._binary[operator, lh, rh] = adapt(func, result)
737 def applyUnary(
738 self,
739 operator: str,
740 operand: WhereClauseConverter,
741 ) -> WhereClauseConverter:
742 """Look up and apply the appropriate function for a registered unary
743 operation.
745 Parameters
746 ----------
747 operator : `str`
748 Operator for the operation to apply.
749 operand : `WhereClauseConverter`
750 Operand, with ``operand.dtype`` and ``operator`` used to look up
751 the appropriate function.
753 Returns
754 -------
755 expression : `WhereClauseConverter`
756 Converter instance that represents the operation, created by
757 calling the registered function.
759 Raises
760 ------
761 KeyError
762 Raised if the operator and operand type combination is not
763 recognized.
764 """
765 return self._unary[operator, operand.dtype](operand)
767 def applyBinary(
768 self,
769 operator: str,
770 lhs: WhereClauseConverter,
771 rhs: WhereClauseConverter,
772 ) -> WhereClauseConverter:
773 """Look up and apply the appropriate function for a registered binary
774 operation.
776 Parameters
777 ----------
778 operator : `str`
779 Operator for the operation to apply.
780 lhs : `WhereClauseConverter`
781 Left-hand side operand.
782 rhs : `WhereClauseConverter`
783 Right-hand side operand.
785 Returns
786 -------
787 expression : `WhereClauseConverter`
788 Converter instance that represents the operation, created by
789 calling the registered function.
791 Raises
792 ------
793 KeyError
794 Raised if the operator and operand type combination is not
795 recognized.
796 """
797 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs)
799 @classmethod
800 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable:
801 table = DispatchTable()
802 # Standard scalar unary and binary operators: just delegate to
803 # SQLAlchemy operators.
804 table.registerUnary("NOT", bool, sqlalchemy.sql.not_)
805 table.registerUnary("+", (int, float), operator.__pos__)
806 table.registerUnary("-", (int, float), operator.__neg__)
807 table.registerBinary("AND", bool, sqlalchemy.sql.and_)
808 table.registerBinary("OR", bool, sqlalchemy.sql.or_)
809 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool)
810 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool)
811 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool)
812 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool)
813 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool)
814 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool)
815 table.registerBinary("+", (int, float), operator.__add__)
816 table.registerBinary("-", (int, float), operator.__sub__)
817 table.registerBinary("*", (int, float), operator.__mul__)
818 table.registerBinary("/", (int, float), operator.__truediv__)
819 table.registerBinary("%", (int, float), operator.__mod__)
820 table.registerBinary(
821 "=",
822 (Time, datetime),
823 TimeBinaryOperator(operator.__eq__, bool),
824 rhs=(Time, datetime),
825 adapt=False,
826 )
827 table.registerBinary(
828 "!=",
829 (Time, datetime),
830 TimeBinaryOperator(operator.__ne__, bool),
831 rhs=(Time, datetime),
832 adapt=False,
833 )
834 table.registerBinary(
835 "<",
836 (Time, datetime),
837 TimeBinaryOperator(operator.__lt__, bool),
838 rhs=(Time, datetime),
839 adapt=False,
840 )
841 table.registerBinary(
842 ">",
843 (Time, datetime),
844 TimeBinaryOperator(operator.__gt__, bool),
845 rhs=(Time, datetime),
846 adapt=False,
847 )
848 table.registerBinary(
849 "<=",
850 (Time, datetime),
851 TimeBinaryOperator(operator.__le__, bool),
852 rhs=(Time, datetime),
853 adapt=False,
854 )
855 table.registerBinary(
856 ">=",
857 (Time, datetime),
858 TimeBinaryOperator(operator.__ge__, bool),
859 rhs=(Time, datetime),
860 adapt=False,
861 )
862 table.registerBinary(
863 "=",
864 lhs=(int, float, str, Time, type(None)),
865 rhs=(type(None),),
866 func=sqlalchemy.sql.expression.ColumnOperators.is_,
867 result=bool,
868 )
869 table.registerBinary(
870 "=",
871 lhs=(type(None),),
872 rhs=(int, float, str, Time, type(None)),
873 func=sqlalchemy.sql.expression.ColumnOperators.is_,
874 result=bool,
875 )
876 table.registerBinary(
877 "!=",
878 lhs=(int, float, str, Time, type(None)),
879 rhs=(type(None),),
880 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
881 result=bool,
882 )
883 table.registerBinary(
884 "!=",
885 lhs=(type(None),),
886 rhs=(int, float, str, Time, type(None)),
887 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
888 result=bool,
889 )
890 # Construct Timespan literals from 2-element tuples (A, B), where A and
891 # B are each either Time or None.
892 table.registerBinary(
893 "PAIR",
894 lhs=(Time, type(None)),
895 rhs=(Time, type(None)),
896 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass),
897 adapt=False,
898 )
899 # Less-than and greater-than between Timespans.
900 table.registerBinary(
901 "<",
902 lhs=Timespan,
903 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool),
904 adapt=False,
905 )
906 table.registerBinary(
907 ">",
908 lhs=Timespan,
909 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool),
910 adapt=False,
911 )
912 # Less-than and greater-than between Timespans and Times.
913 table.registerBinary(
914 "<",
915 lhs=Timespan,
916 rhs=Time,
917 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool),
918 adapt=False,
919 )
920 table.registerBinary(
921 ">",
922 lhs=Timespan,
923 rhs=Time,
924 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool),
925 adapt=False,
926 )
927 table.registerBinary(
928 "<",
929 lhs=Time,
930 rhs=Timespan,
931 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool),
932 adapt=False,
933 )
934 table.registerBinary(
935 ">",
936 lhs=Time,
937 rhs=Timespan,
938 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool),
939 adapt=False,
940 )
941 # OVERLAPS operator between Timespans.
942 table.registerBinary(
943 "OVERLAPS",
944 lhs=Timespan,
945 func=TimespanWhereClauseConverter.overlaps,
946 adapt=False,
947 )
948 # OVERLAPS operator between Timespans and Time is equivalent to
949 # "contains", but expression language only has OVERLAPS to keep it
950 # simple.
951 table.registerBinary(
952 "OVERLAPS",
953 lhs=Timespan,
954 rhs=Time,
955 func=TimespanWhereClauseConverter.contains,
956 adapt=False,
957 )
958 table.registerBinary(
959 "OVERLAPS",
960 lhs=Time,
961 rhs=Timespan,
962 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a),
963 adapt=False,
964 )
965 return table
968class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]):
969 """Implements TreeVisitor to convert the tree into
970 `WhereClauseConverter` objects.
972 This class should be used only by the `convertExpressionToSql` function;
973 external code should just call that function.
975 Parameters
976 ----------
977 universe : `DimensionUniverse`
978 All known dimensions.
979 columns: `QueryColumns`
980 Struct that organizes the special columns known to the query
981 under construction.
982 elements: `NamedKeyMapping`
983 `DimensionElement` instances and their associated tables.
984 bind: `Mapping`
985 Mapping from string names to literal values that should be subsituted
986 for those names when they appear (as identifiers) in the expression.
987 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation`
988 Class that encapsulates the representation of `Timespan` objects in
989 the database.
990 """
992 def __init__(
993 self,
994 universe: DimensionUniverse,
995 columns: QueryColumns,
996 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
997 bind: Mapping[str, Any],
998 TimespanReprClass: Type[TimespanDatabaseRepresentation],
999 ):
1000 self.universe = universe
1001 self.columns = columns
1002 self.elements = elements
1003 self.bind = bind
1004 self._TimespanReprClass = TimespanReprClass
1005 self._dispatch = DispatchTable.build(TimespanReprClass)
1007 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter:
1008 # Docstring inherited from TreeVisitor.visitNumericLiteral
1009 # Convert string value into float or int
1010 coerced: Union[int, float]
1011 try:
1012 coerced = int(value)
1013 except ValueError:
1014 coerced = float(value)
1015 return ScalarWhereClauseConverter.fromLiteral(coerced)
1017 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter:
1018 # Docstring inherited from TreeVisitor.visitStringLiteral
1019 return ScalarWhereClauseConverter.fromLiteral(value)
1021 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter:
1022 # Docstring inherited from TreeVisitor.visitTimeLiteral
1023 return ScalarWhereClauseConverter.fromLiteral(value)
1025 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter:
1026 # Docstring inherited from TreeVisitor.visitIdentifier
1027 if name in self.bind:
1028 value = self.bind[name]
1029 if isinstance(value, Timespan):
1030 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value))
1031 return ScalarWhereClauseConverter.fromLiteral(value)
1032 constant = categorizeConstant(name)
1033 if constant is ExpressionConstant.INGEST_DATE:
1034 assert self.columns.datasets is not None
1035 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
1036 return ScalarWhereClauseConverter.fromExpression(
1037 _TimestampColumnElement(column=self.columns.datasets.ingestDate),
1038 datetime,
1039 )
1040 elif constant is ExpressionConstant.NULL:
1041 return ScalarWhereClauseConverter.fromLiteral(None)
1042 assert constant is None, "Check for enum values should be exhaustive."
1043 element, column = categorizeElementId(self.universe, name)
1044 if column is not None:
1045 if column == TimespanDatabaseRepresentation.NAME:
1046 if element.temporal is None:
1047 raise ExpressionTypeError(
1048 f"No timespan column exists for non-temporal element '{element.name}'."
1049 )
1050 return TimespanWhereClauseConverter(self.columns.timespans[element])
1051 else:
1052 if column not in element.RecordClass.fields.standard.names:
1053 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.")
1054 return ScalarWhereClauseConverter.fromExpression(
1055 self.elements[element].columns[column],
1056 element.RecordClass.fields.standard[column].getPythonType(),
1057 )
1058 else:
1059 assert isinstance(element, Dimension)
1060 return ScalarWhereClauseConverter.fromExpression(
1061 self.columns.getKeyColumn(element), element.primaryKey.getPythonType()
1062 )
1064 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1065 # Docstring inherited from TreeVisitor.visitUnaryOp
1066 try:
1067 return self._dispatch.applyUnary(operator, operand)
1068 except KeyError:
1069 raise ExpressionTypeError(
1070 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".'
1071 ) from None
1073 def visitBinaryOp(
1074 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node
1075 ) -> WhereClauseConverter:
1076 # Docstring inherited from TreeVisitor.visitBinaryOp
1077 try:
1078 return self._dispatch.applyBinary(operator, lhs, rhs)
1079 except KeyError:
1080 raise ExpressionTypeError(
1081 f"Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary "
1082 f'operator {operator} in "{node}".'
1083 ) from None
1085 def visitIsIn(
1086 self,
1087 lhs: WhereClauseConverter,
1088 values: List[WhereClauseConverter],
1089 not_in: bool,
1090 node: Node,
1091 ) -> WhereClauseConverter:
1092 if not isinstance(lhs, ScalarWhereClauseConverter):
1093 raise ExpressionTypeError(f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".')
1094 # Docstring inherited from TreeVisitor.visitIsIn
1095 #
1096 # `values` is a list of literals and ranges, range is represented
1097 # by a tuple (start, stop, stride). We need to transform range into
1098 # some SQL construct, simplest would be to generate a set of literals
1099 # and add it to the same list but it could become too long. What we
1100 # do here is to introduce some large limit on the total number of
1101 # items in IN() and if range exceeds that limit then we do something
1102 # like:
1103 #
1104 # X IN (1, 2, 3)
1105 # OR
1106 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
1107 #
1108 # or for NOT IN case
1109 #
1110 # NOT (X IN (1, 2, 3)
1111 # OR
1112 # (X BETWEEN START AND STOP
1113 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
1114 #
1115 max_in_items = 1000
1116 clauses: List[sqlalchemy.sql.ColumnElement] = []
1117 # Split the list into literals and ranges
1118 literals: List[sqlalchemy.sql.ColumnElement] = []
1119 ranges: List[Tuple[int, int, int]] = []
1120 for value in values:
1121 value.categorizeForIn(literals, ranges, lhs.dtype, node)
1122 # Handle ranges (maybe by converting them to literals).
1123 for start, stop, stride in ranges:
1124 count = (stop - start + 1) // stride
1125 if len(literals) + count > max_in_items:
1126 # X BETWEEN START AND STOP
1127 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
1128 expr = lhs.column.between(start, stop)
1129 if stride != 1:
1130 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride))
1131 clauses.append(expr)
1132 else:
1133 # add all values to literal list, stop is inclusive
1134 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)]
1135 # Handle literals.
1136 if literals:
1137 # add IN() in front of BETWEENs
1138 clauses.insert(0, lhs.column.in_(literals))
1139 # Assemble the full expression.
1140 expr = sqlalchemy.sql.or_(*clauses)
1141 if not_in:
1142 expr = sqlalchemy.sql.not_(expr)
1143 return ScalarWhereClauseConverter.fromExpression(expr, bool)
1145 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1146 # Docstring inherited from TreeVisitor.visitParens
1147 return expression
1149 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter:
1150 # Docstring inherited from base class
1151 if len(items) != 2:
1152 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".')
1153 try:
1154 return self._dispatch.applyBinary("PAIR", items[0], items[1])
1155 except KeyError:
1156 raise ExpressionTypeError(
1157 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" '
1158 '(Note that date/time strings must be preceded by "T" to be recognized).'
1159 )
1161 def visitRangeLiteral(
1162 self, start: int, stop: int, stride: Optional[int], node: Node
1163 ) -> WhereClauseConverter:
1164 # Docstring inherited from TreeVisitor.visitRangeLiteral
1165 # stride can be None which means the same as 1.
1166 return RangeWhereClauseConverter(start, stop, stride or 1)
1168 def visitPointNode(
1169 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node
1170 ) -> WhereClauseConverter:
1171 # Docstring inherited from base class
1173 # this is a placeholder for future extension, we enabled syntax but
1174 # do not support actual use just yet.
1175 raise NotImplementedError("POINT() function is not supported yet")