Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "convertExpressionToSql",
26 "ExpressionTypeError",
27)
29from abc import ABC, abstractmethod
30from datetime import datetime
31import operator
32from typing import (
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 TYPE_CHECKING,
43 TypeVar,
44 Union,
45)
46import warnings
48from astropy.time import Time
49import astropy.utils.exceptions
50import sqlalchemy
51from sqlalchemy.ext.compiler import compiles
52from sqlalchemy.sql.expression import func
54from lsst.utils.iteration import ensure_iterable
55from ....core import (
56 ddl,
57 Dimension,
58 DimensionElement,
59 DimensionUniverse,
60 NamedKeyMapping,
61 Timespan,
62 TimespanDatabaseRepresentation,
63)
64from .parser import Node, TreeVisitor
65from .categorize import categorizeElementId, categorizeConstant, ExpressionConstant
67# As of astropy 4.2, the erfa interface is shipped independently and
68# ErfaWarning is no longer an AstropyWarning
69try:
70 import erfa
71except ImportError:
72 erfa = None
74if TYPE_CHECKING: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true
75 from .._structs import QueryColumns
78def convertExpressionToSql(
79 tree: Node,
80 universe: DimensionUniverse,
81 columns: QueryColumns,
82 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
83 bind: Mapping[str, Any],
84 TimespanReprClass: Type[TimespanDatabaseRepresentation],
85) -> sqlalchemy.sql.ColumnElement:
86 """Convert a query expression tree into a SQLAlchemy expression object.
88 Parameters
89 ----------
90 tree : `Node`
91 Root node of the query expression tree.
92 universe : `DimensionUniverse`
93 All known dimensions.
94 columns : `QueryColumns`
95 Struct that organizes the special columns known to the query
96 under construction.
97 elements : `NamedKeyMapping`
98 `DimensionElement` instances and their associated tables.
99 bind : `Mapping`
100 Mapping from string names to literal values that should be subsituted
101 for those names when they appear (as identifiers) in the expression.
102 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation`
103 Class that encapsulates the representation of `Timespan` objects in
104 the database.
106 Returns
107 -------
108 sql : `sqlalchemy.sql.ColumnElement`
109 A boolean SQLAlchemy column expression.
111 Raises
112 ------
113 ExpressionTypeError
114 Raised if the operands in a query expression operation are incompatible
115 with the operator, or if the expression does not evaluate to a boolean.
116 """
117 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass)
118 converter = tree.visit(visitor)
119 return converter.finish(tree)
122class ExpressionTypeError(TypeError):
123 """Exception raised when the types in a query expression are not
124 compatible with the operators or other syntax.
125 """
128class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
129 """Special ColumnElement type used for TIMESTAMP columns or literals in
130 expressions.
132 SQLite stores timestamps as strings which sometimes can cause issues when
133 comparing strings. For more reliable comparison SQLite needs DATETIME()
134 wrapper for those strings. For PostgreSQL it works better if we add
135 TIMESTAMP to string literals.
137 This mechanism is only used for expressions in WHERE clause, values of the
138 TIMESTAMP columns returned from queries are still handled by standard
139 mechanism and they are converted to `datetime` instances.
140 """
141 def __init__(self, column: Optional[sqlalchemy.sql.ColumnElement] = None,
142 literal: Optional[datetime] = None):
143 super().__init__()
144 self._column = column
145 self._literal = literal
148@compiles(_TimestampColumnElement, "sqlite")
149def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
150 """Compilation of TIMESTAMP column for SQLite.
152 SQLite defines ``strftime`` function that can be used to convert timestamp
153 value to Unix seconds.
154 """
155 assert element._column is not None or element._literal is not None, "Must have column or literal"
156 if element._column is not None:
157 return compiler.process(func.datetime(element._column), **kw)
158 else:
159 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw)
162@compiles(_TimestampColumnElement, "postgresql")
163def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
164 """Compilation of TIMESTAMP column for PostgreSQL.
166 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function.
167 """
168 assert element._column is not None or element._literal is not None, "Must have column or literal"
169 if element._column is not None:
170 return compiler.process(element._column, **kw)
171 else:
172 literal = element._literal.isoformat(sep=" ", timespec="microseconds")
173 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw)
176class WhereClauseConverter(ABC):
177 """Abstract base class for the objects used to transform a butler query
178 expression tree into SQLAlchemy expression objects.
180 WhereClauseConverter instances are created and consumed by
181 `WhereClauseConverterVisitor`, which is in turn created and used only by
182 the `convertExpressionToSql` function.
183 """
185 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
186 """Finish converting this [boolean] expression, returning a SQLAlchemy
187 expression object.
189 Parameters
190 ----------
191 node : `Node`
192 Original expression tree nodethis converter represents; used only
193 for error reporting.
195 Returns
196 -------
197 sql : `sqlalchemy.sql.ColumnElement`
198 A boolean SQLAlchemy column expression.
200 Raises
201 ------
202 ExpressionTypeError
203 Raised if this node does not represent a boolean expression. The
204 default implementation always raises this exception; subclasses
205 that may actually represent a boolean should override.
206 """
207 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.')
209 @property
210 @abstractmethod
211 def dtype(self) -> type:
212 """The Python type of the expression tree node associated with this
213 converter (`type`).
215 This should be the exact type of any literal or bind object, and the
216 type produced by SQLAlchemy's converter mechanism when returning rows
217 from the database in the case of expressions that map to database
218 entities or expressions.
219 """
220 raise NotImplementedError()
222 @abstractmethod
223 def categorizeForIn(
224 self,
225 literals: List[sqlalchemy.sql.ColumnElement],
226 ranges: List[Tuple[int, int, int]],
227 dtype: type,
228 node: Node,
229 ) -> None:
230 """Visit this expression when it appears as an element in the
231 right-hand side of an IN expression.
233 Implementations must either:
235 - append or extend to ``literals``
236 - append or extend to ``ranges``
237 - raise `ExpressionTypeError`.
239 Parameters
240 ----------
241 literals : `list` [ `sqlalchemy.sql.ColumnElement` ]
242 List of SQL expression objects that the left-hand side of the IN
243 operation may match exactly.
244 ranges : `list` of `tuple`
245 List of (start, stop, step) tuples that represent ranges that the
246 left-hand side of the IN operation may match.
247 dtype : `type`
248 Type of the left-hand side operand for the IN expression. Literals
249 should only be appended to if ``self.dtype is dtype``, and
250 ``ranges`` should only be appended to if ``dtype is int``.
251 node : `Node`
252 Original expression tree node this converter represents; for use
253 only in error reporting.
255 Raises
256 ------
257 ExpressionTypeError
258 Raised if this node can never appear on the right-hand side of an
259 IN expression, or if it is incompatible with the left-hand side
260 type.
261 """
262 raise NotImplementedError()
265class ScalarWhereClauseConverter(WhereClauseConverter):
266 """Primary implementation of WhereClauseConverter, for expressions that can
267 always be represented directly by a single `sqlalchemy.sql.ColumnElement`
268 instance.
270 Should be constructed by calling either `fromExpression` or `fromLiteral`.
272 Parameters
273 ----------
274 column : `sqlalchemy.sql.ColumnElement`
275 A SQLAlchemy column expression.
276 value
277 The Python literal this expression was constructed from, or `None` if
278 it was not constructed from a literal. Note that this is also `None`
279 this object corresponds to the literal `None`, in which case
280 ``dtype is type(None)``.
281 dtype : `type`
282 Python type this expression maps to.
283 """
284 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type):
285 self.column = column
286 self.value = value
287 self._dtype = dtype
289 @classmethod
290 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter:
291 """Construct from an existing SQLAlchemy column expression and type.
293 Parameters
294 ----------
295 column : `sqlalchemy.sql.ColumnElement`
296 A SQLAlchemy column expression.
297 dtype : `type`
298 Python type this expression maps to.
300 Returns
301 -------
302 converter : `ScalarWhereClauseConverter`
303 Converter instance that wraps ``column``.
304 """
305 return cls(column, None, dtype)
307 @classmethod
308 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter:
309 """Construct from a Python literal.
311 Parameters
312 ----------
313 value
314 The Python literal to wrap.
316 Returns
317 -------
318 converter : `ScalarWhereClauseConverter`
319 Converter instance that wraps ``value``.
320 """
321 dtype = type(value)
322 if dtype is datetime:
323 column = _TimestampColumnElement(literal=value)
324 else:
325 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None)
326 return cls(column, value, dtype)
328 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
329 # Docstring inherited.
330 if self.dtype is not bool:
331 return super().finish(node) # will raise; just avoids duplicate error message
332 return self.column
334 @property
335 def dtype(self) -> type:
336 # Docstring inherited.
337 return self._dtype
339 def categorizeForIn(
340 self,
341 literals: List[sqlalchemy.sql.ColumnElement],
342 ranges: List[Tuple[int, int, int]],
343 dtype: type,
344 node: Node,
345 ) -> None:
346 # Docstring inherited.
347 if dtype is not self.dtype:
348 raise ExpressionTypeError(
349 f'Error in IN expression "{node}": left hand side has type '
350 f'{dtype.__name__}, but item has type {self.dtype.__name__}.'
351 )
352 literals.append(self.column)
355class TimespanWhereClauseConverter(WhereClauseConverter):
356 """Implementation of WhereClauseConverter for `Timespan` expressions.
358 Parameters
359 ----------
360 timespan : `TimespanDatabaseRepresentation`
361 Object that represents a logical timespan column or column expression
362 (which may or may not be backed by multiple real columns).
363 """
364 def __init__(self, timespan: TimespanDatabaseRepresentation):
365 self.timespan = timespan
367 @classmethod
368 def fromPair(
369 cls,
370 begin: ScalarWhereClauseConverter,
371 end: ScalarWhereClauseConverter,
372 TimespanReprClass: Type[TimespanDatabaseRepresentation],
373 ) -> TimespanWhereClauseConverter:
374 """Construct from a pair of literal expressions.
376 Parameters
377 ----------
378 begin : `ScalarWhereClauseConverter`
379 Converter object associated with an expression of type
380 `astropy.time.Time` or `None` (for a timespan that is unbounded
381 from below).
382 end : `ScalarWhereClauseConverter`
383 Converter object associated with an expression of type
384 `astropy.time.Time` or `None` (for a timespan that is unbounded
385 from above).
386 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
387 Class that encapsulates the representation of `Timespan` objects in
388 the database.
390 Returns
391 -------
392 converter : `TimespanWhereClauseConverter`
393 Converter instance that represents a `Timespan` literal.
395 Raises
396 ------
397 ExpressionTypeError
398 Raised if begin or end is a time column from the database or other
399 time expression, not a literal or bind time value.
400 """
401 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
402 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
403 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time):
404 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.")
405 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value)))
407 @property
408 def dtype(self) -> type:
409 # Docstring inherited.
410 return Timespan
412 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter:
413 """Construct a boolean converter expression that represents the overlap
414 of this timespan with another.
416 Parameters
417 ----------
418 other : `TimespanWhereClauseConverter`
419 RHS operand for the overlap operation.
421 Returns
422 -------
423 overlaps : `ScalarWhereClauseConverter`
424 Converter that wraps the boolean overlaps expression.
425 """
426 assert other.dtype is Timespan, "Guaranteed by dispatch table rules"
427 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool)
429 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
430 """Construct a boolean converter expression that represents whether
431 this timespans contains a scalar time.
433 Parameters
434 ----------
435 other : `ScalarWhereClauseConverter`
436 RHS operand for the overlap operation.
437 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
438 Ignored; provided for signature compatibility with `DispatchTable`.
440 Returns
441 -------
442 overlaps : `ScalarWhereClauseConverter`
443 Converter that wraps the boolean overlaps expression.
444 """
445 assert other.dtype is Time, "Guaranteed by dispatch table rules"
446 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool)
448 def categorizeForIn(
449 self,
450 literals: List[sqlalchemy.sql.ColumnElement],
451 ranges: List[Tuple[int, int, int]],
452 dtype: type,
453 node: Node,
454 ) -> None:
455 # Docstring inherited.
456 raise ExpressionTypeError(
457 f'Invalid element on right side of IN expression "{node}": '
458 'Timespans are not allowed in this context.'
459 )
462class RangeWhereClauseConverter(WhereClauseConverter):
463 """Implementation of WhereClauseConverters for integer range literals.
465 Range literals may only appear on the right-hand side of IN operations
466 where the left-hand side operand is of type `int`.
468 Parameters
469 ----------
470 start : `int`
471 Starting point (inclusive) for the range.
472 stop : `int`
473 Stopping point (exclusive) for the range.
474 step : `int`
475 Step size for the range.
476 """
477 def __init__(self, start: int, stop: int, step: int):
478 self.start = start
479 self.stop = stop
480 self.step = step
482 @property
483 def dtype(self) -> type:
484 # Docstring inherited.
485 return range
487 def categorizeForIn(
488 self,
489 literals: List[sqlalchemy.sql.ColumnElement],
490 ranges: List[Tuple[int, int, int]],
491 dtype: type,
492 node: Node,
493 ) -> None:
494 # Docstring inherited.
495 if dtype is not int:
496 raise ExpressionTypeError(
497 f'Error in IN expression "{node}": range expressions '
498 f'are only supported for int operands, not {dtype.__name__}.'
499 )
500 ranges.append((self.start, self.stop, self.step))
503UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter]
504"""Signature of unary-operation callables directly stored in `DispatchTable`.
505"""
507BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter]
508"""Signature of binary-operation callables directly stored in `DispatchTable`.
509"""
511UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement]
512"""Signature for unary-operation callables that can work directly on SQLAlchemy
513column expressions.
514"""
516BinaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement],
517 sqlalchemy.sql.ColumnElement]
518"""Signature for binary-operation callables that can work directly on
519SQLAlchemy column expressions.
520"""
522_F = TypeVar("_F")
525def adaptIdentity(func: _F, result: Optional[type]) -> _F:
526 """An adapter function for `DispatchTable.registerUnary` and
527 `DispatchTable.registerBinary` that just returns this original function.
528 """
529 return func
532def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc:
533 """An adapter function for `DispatchTable.registerUnary` that converts a
534 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a
535 `ScalarWhereClauseConverter`.
536 """
537 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter:
538 assert isinstance(operand, ScalarWhereClauseConverter)
539 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result)
540 return adapted
543def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc:
544 """An adapter function for `DispatchTable.registerBinary` that converts a
545 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be
546 `ScalarWhereClauseConverter` instances.
547 """
548 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
549 assert isinstance(lhs, ScalarWhereClauseConverter)
550 assert isinstance(rhs, ScalarWhereClauseConverter)
551 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result)
552 return adapted
555class TimeBinaryOperator:
557 def __init__(self, operator: Callable, dtype: type):
558 self.operator = operator
559 self.dtype = dtype
561 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
562 assert isinstance(lhs, ScalarWhereClauseConverter)
563 assert isinstance(rhs, ScalarWhereClauseConverter)
564 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)]
565 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype)
567 @classmethod
568 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]:
569 """Coerce one or more ScalarWhereClauseConverters to datetime type if
570 necessary.
572 If any of the arguments has `datetime` type then all other arguments
573 are converted to `datetime` type as well.
575 Parameters
576 ----------
577 *args : `ScalarWhereClauseConverter`
578 Instances which represent time objects, their type can be one of
579 `Time` or `datetime`. If coercion happens, then `Time` objects can
580 only be literals, not expressions.
582 Returns
583 -------
584 converters : `list` [ `ScalarWhereClauseConverter` ]
585 List of converters in the same order as they appera in argument
586 list, some of them can be coerced to `datetime` type, non-coerced
587 arguments are returned without any change.
588 """
590 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
591 """Coerce single ScalarWhereClauseConverter to datetime literal.
592 """
593 if arg.dtype is not datetime:
594 assert arg.value is not None, "Cannot coerce non-literals"
595 assert arg.dtype is Time, "Cannot coerce non-Time literals"
596 with warnings.catch_warnings():
597 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
598 if erfa is not None:
599 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
600 dt = arg.value.to_datetime()
601 arg = ScalarWhereClauseConverter.fromLiteral(dt)
602 return arg
604 if any(arg.dtype is datetime for arg in args):
605 return [_coerce(arg) for arg in args]
606 else:
607 return list(args)
610class DispatchTable:
611 """An object that manages unary- and binary-operator type-dispatch tables
612 for `WhereClauseConverter`.
614 Notes
615 -----
616 A lot of the machinery in this class (and in the preceding function
617 signature type aliases) is all in service of making the actual dispatch
618 rules in the `build` method concise and easy to read, because that's where
619 all of the important high-level logic lives.
621 Double-dispatch is a pain in Python, as it is in most languages; it's worth
622 noting that I first tried the traditional visitor-pattern approach here,
623 and it was *definitely* much harder to see the actual behavior.
624 """
625 def __init__(self) -> None:
626 self._unary: Dict[Tuple[str, type], UnaryFunc] = {}
627 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {}
629 def registerUnary(
630 self,
631 operator: str,
632 operand: Union[type, Iterable[type]],
633 func: _F,
634 *,
635 result: Optional[type] = None,
636 adapt: Any = True,
637 ) -> None:
638 """Register a unary operation for one or more types.
640 Parameters
641 ----------
642 operator : `str`
643 Operator as it appears in the string expression language. Unary
644 operations that are not mapped to operators may use their own
645 arbitrary strings, as long as these are used consistently in
646 `build` and `applyUnary`.
647 operand : `type` or `Iterable` [ `type` ]
648 Type or types for which this operation is implemented by the given
649 ``func``.
650 func : `Callable`
651 Callable that implements the unary operation. If
652 ``adapt is True``, this should be a `UnaryColumnFunc`. If
653 ``adapt is False``, this should be a `UnaryFunc`. Otherwise,
654 this is whatever type is accepted as the first argument to
655 ``adapt``.
656 result : `type`, optional
657 Type of the expression returned by this operation. If not
658 provided, the type of the operand is assumed.
659 adapt : `bool` or `Callable`
660 A callable that wraps ``func`` (the first argument) and ``result``
661 (the second argument), returning a new callable with the
662 signature of `UnaryFunc`. `True` (default) and `False` invoke a
663 default adapter or no adapter (see ``func`` docs).
664 """
665 if adapt is True:
666 adapt = adaptUnaryColumnFunc
667 elif adapt is False:
668 adapt = adaptIdentity
669 for item in ensure_iterable(operand):
670 self._unary[operator, item] = adapt(
671 func,
672 result if result is not None else item
673 )
675 def registerBinary(
676 self,
677 operator: str,
678 lhs: Union[type, Iterable[type]],
679 func: _F,
680 *,
681 rhs: Optional[Union[type, Iterable[type]]] = None,
682 result: Optional[type] = None,
683 adapt: Any = True,
684 ) -> None:
685 """Register a binary operation for one or more types.
687 Parameters
688 ----------
689 operator : `str`
690 Operator as it appears in the string expression language. Binary
691 operations that are not mapped to operators may use their own
692 arbitrary strings, as long as these are used consistently in
693 `build` and `applyBinary`.
694 lhs : `type` or `Iterable` [ `type` ]
695 Left-hand side type or types for which this operation is
696 implemented by the given ``func``.
697 func : `Callable`
698 Callable that implements the binary operation. If
699 ``adapt is True``, this should be a `BinaryColumnFunc`. If
700 ``adapt is False``, this should be a `BinaryFunc`. Otherwise,
701 this is whatever type is accepted as the first argument to
702 ``adapt``.
703 rhs : `type` or `Iterable` [ `type` ]
704 Right-hand side type or types for which this operation is
705 implemented by the given ``func``. If multiple types, all
706 combinations of ``lhs`` and ``rhs`` are registered. If not
707 provided, each element of ``lhs`` is assumed to be paired with
708 itself, but mixed-type combinations are not registered.
709 result : `type`, optional
710 Type of the expression returned by this operation. If not
711 provided and ``rhs`` is also not provided, the type of the operand
712 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided,
713 then ``result=None`` will be forwarded to ``adapt``.
714 adapt : `bool` or `Callable`
715 A callable that wraps ``func`` (the first argument) and ``result``
716 (the second argument), returning a new callable with the
717 signature of `BinaryFunc`. `True` (default) and `False` invoke a
718 default adapter or no adapter (see ``func`` docs).
719 """
720 if adapt is True:
721 adapt = adaptBinaryColumnFunc
722 elif adapt is False:
723 adapt = adaptIdentity
724 for lh in ensure_iterable(lhs):
725 if rhs is None:
726 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh)
727 else:
728 for rh in ensure_iterable(rhs):
729 self._binary[operator, lh, rh] = adapt(func, result)
731 def applyUnary(
732 self,
733 operator: str,
734 operand: WhereClauseConverter,
735 ) -> WhereClauseConverter:
736 """Look up and apply the appropriate function for a registered unary
737 operation.
739 Parameters
740 ----------
741 operator : `str`
742 Operator for the operation to apply.
743 operand : `WhereClauseConverter`
744 Operand, with ``operand.dtype`` and ``operator`` used to look up
745 the appropriate function.
747 Returns
748 -------
749 expression : `WhereClauseConverter`
750 Converter instance that represents the operation, created by
751 calling the registered function.
753 Raises
754 ------
755 KeyError
756 Raised if the operator and operand type combination is not
757 recognized.
758 """
759 return self._unary[operator, operand.dtype](operand)
761 def applyBinary(
762 self,
763 operator: str,
764 lhs: WhereClauseConverter,
765 rhs: WhereClauseConverter,
766 ) -> WhereClauseConverter:
767 """Look up and apply the appropriate function for a registered binary
768 operation.
770 Parameters
771 ----------
772 operator : `str`
773 Operator for the operation to apply.
774 lhs : `WhereClauseConverter`
775 Left-hand side operand.
776 rhs : `WhereClauseConverter`
777 Right-hand side operand.
779 Returns
780 -------
781 expression : `WhereClauseConverter`
782 Converter instance that represents the operation, created by
783 calling the registered function.
785 Raises
786 ------
787 KeyError
788 Raised if the operator and operand type combination is not
789 recognized.
790 """
791 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs)
793 @classmethod
794 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable:
795 table = DispatchTable()
796 # Standard scalar unary and binary operators: just delegate to
797 # SQLAlchemy operators.
798 table.registerUnary("NOT", bool, sqlalchemy.sql.not_)
799 table.registerUnary("+", (int, float), operator.__pos__)
800 table.registerUnary("-", (int, float), operator.__neg__)
801 table.registerBinary("AND", bool, sqlalchemy.sql.and_)
802 table.registerBinary("OR", bool, sqlalchemy.sql.or_)
803 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool)
804 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool)
805 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool)
806 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool)
807 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool)
808 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool)
809 table.registerBinary("+", (int, float), operator.__add__)
810 table.registerBinary("-", (int, float), operator.__sub__)
811 table.registerBinary("*", (int, float), operator.__mul__)
812 table.registerBinary("/", (int, float), operator.__truediv__)
813 table.registerBinary("%", (int, float), operator.__mod__)
814 table.registerBinary("=", (Time, datetime), TimeBinaryOperator(operator.__eq__, bool),
815 rhs=(Time, datetime), adapt=False)
816 table.registerBinary("!=", (Time, datetime), TimeBinaryOperator(operator.__ne__, bool),
817 rhs=(Time, datetime), adapt=False)
818 table.registerBinary("<", (Time, datetime), TimeBinaryOperator(operator.__lt__, bool),
819 rhs=(Time, datetime), adapt=False)
820 table.registerBinary(">", (Time, datetime), TimeBinaryOperator(operator.__gt__, bool),
821 rhs=(Time, datetime), adapt=False)
822 table.registerBinary("<=", (Time, datetime), TimeBinaryOperator(operator.__le__, bool),
823 rhs=(Time, datetime), adapt=False)
824 table.registerBinary(">=", (Time, datetime), TimeBinaryOperator(operator.__ge__, bool),
825 rhs=(Time, datetime), adapt=False)
826 table.registerBinary(
827 "=",
828 lhs=(int, float, str, Time, type(None)),
829 rhs=(type(None),),
830 func=sqlalchemy.sql.expression.ColumnOperators.is_,
831 result=bool,
832 )
833 table.registerBinary(
834 "=",
835 lhs=(type(None),),
836 rhs=(int, float, str, Time, type(None)),
837 func=sqlalchemy.sql.expression.ColumnOperators.is_,
838 result=bool,
839 )
840 table.registerBinary(
841 "!=",
842 lhs=(int, float, str, Time, type(None)),
843 rhs=(type(None),),
844 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
845 result=bool,
846 )
847 table.registerBinary(
848 "!=",
849 lhs=(type(None),),
850 rhs=(int, float, str, Time, type(None)),
851 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
852 result=bool,
853 )
854 # Construct Timespan literals from 2-element tuples (A, B), where A and
855 # B are each either Time or None.
856 table.registerBinary(
857 "PAIR",
858 lhs=(Time, type(None)),
859 rhs=(Time, type(None)),
860 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass),
861 adapt=False,
862 )
863 # Less-than and greater-than between Timespans.
864 table.registerBinary(
865 "<",
866 lhs=Timespan,
867 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool),
868 adapt=False,
869 )
870 table.registerBinary(
871 ">",
872 lhs=Timespan,
873 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool),
874 adapt=False,
875 )
876 # Less-than and greater-than between Timespans and Times.
877 table.registerBinary(
878 "<",
879 lhs=Timespan,
880 rhs=Time,
881 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool),
882 adapt=False,
883 )
884 table.registerBinary(
885 ">",
886 lhs=Timespan,
887 rhs=Time,
888 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool),
889 adapt=False,
890 )
891 table.registerBinary(
892 "<",
893 lhs=Time,
894 rhs=Timespan,
895 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool),
896 adapt=False,
897 )
898 table.registerBinary(
899 ">",
900 lhs=Time,
901 rhs=Timespan,
902 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool),
903 adapt=False,
904 )
905 # OVERLAPS operator between Timespans.
906 table.registerBinary(
907 "OVERLAPS",
908 lhs=Timespan,
909 func=TimespanWhereClauseConverter.overlaps,
910 adapt=False,
911 )
912 # OVERLAPS operator between Timespans and Time is equivalent to
913 # "contains", but expression language only has OVERLAPS to keep it
914 # simple.
915 table.registerBinary(
916 "OVERLAPS",
917 lhs=Timespan,
918 rhs=Time,
919 func=TimespanWhereClauseConverter.contains,
920 adapt=False,
921 )
922 table.registerBinary(
923 "OVERLAPS",
924 lhs=Time,
925 rhs=Timespan,
926 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a),
927 adapt=False,
928 )
929 return table
932class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]):
933 """Implements TreeVisitor to convert the tree into
934 `WhereClauseConverter` objects.
936 This class should be used only by the `convertExpressionToSql` function;
937 external code should just call that function.
939 Parameters
940 ----------
941 universe : `DimensionUniverse`
942 All known dimensions.
943 columns: `QueryColumns`
944 Struct that organizes the special columns known to the query
945 under construction.
946 elements: `NamedKeyMapping`
947 `DimensionElement` instances and their associated tables.
948 bind: `Mapping`
949 Mapping from string names to literal values that should be subsituted
950 for those names when they appear (as identifiers) in the expression.
951 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation`
952 Class that encapsulates the representation of `Timespan` objects in
953 the database.
954 """
955 def __init__(
956 self,
957 universe: DimensionUniverse,
958 columns: QueryColumns,
959 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
960 bind: Mapping[str, Any],
961 TimespanReprClass: Type[TimespanDatabaseRepresentation],
962 ):
963 self.universe = universe
964 self.columns = columns
965 self.elements = elements
966 self.bind = bind
967 self._TimespanReprClass = TimespanReprClass
968 self._dispatch = DispatchTable.build(TimespanReprClass)
970 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter:
971 # Docstring inherited from TreeVisitor.visitNumericLiteral
972 # Convert string value into float or int
973 coerced: Union[int, float]
974 try:
975 coerced = int(value)
976 except ValueError:
977 coerced = float(value)
978 return ScalarWhereClauseConverter.fromLiteral(coerced)
980 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter:
981 # Docstring inherited from TreeVisitor.visitStringLiteral
982 return ScalarWhereClauseConverter.fromLiteral(value)
984 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter:
985 # Docstring inherited from TreeVisitor.visitTimeLiteral
986 return ScalarWhereClauseConverter.fromLiteral(value)
988 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter:
989 # Docstring inherited from TreeVisitor.visitIdentifier
990 if name in self.bind:
991 value = self.bind[name]
992 if isinstance(value, Timespan):
993 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value))
994 return ScalarWhereClauseConverter.fromLiteral(value)
995 constant = categorizeConstant(name)
996 if constant is ExpressionConstant.INGEST_DATE:
997 assert self.columns.datasets is not None
998 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
999 return ScalarWhereClauseConverter.fromExpression(
1000 _TimestampColumnElement(column=self.columns.datasets.ingestDate),
1001 datetime,
1002 )
1003 elif constant is ExpressionConstant.NULL:
1004 return ScalarWhereClauseConverter.fromLiteral(None)
1005 assert constant is None, "Check for enum values should be exhaustive."
1006 element, column = categorizeElementId(self.universe, name)
1007 if column is not None:
1008 if column == TimespanDatabaseRepresentation.NAME:
1009 if element.temporal is None:
1010 raise ExpressionTypeError(
1011 f"No timespan column exists for non-temporal element '{element.name}'."
1012 )
1013 return TimespanWhereClauseConverter(self.columns.timespans[element])
1014 else:
1015 if column not in element.RecordClass.fields.standard.names:
1016 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.")
1017 return ScalarWhereClauseConverter.fromExpression(
1018 self.elements[element].columns[column],
1019 element.RecordClass.fields.standard[column].getPythonType(),
1020 )
1021 else:
1022 assert isinstance(element, Dimension)
1023 return ScalarWhereClauseConverter.fromExpression(
1024 self.columns.getKeyColumn(element),
1025 element.primaryKey.getPythonType()
1026 )
1028 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1029 # Docstring inherited from TreeVisitor.visitUnaryOp
1030 try:
1031 return self._dispatch.applyUnary(operator, operand)
1032 except KeyError:
1033 raise ExpressionTypeError(
1034 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".'
1035 ) from None
1037 def visitBinaryOp(
1038 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node
1039 ) -> WhereClauseConverter:
1040 # Docstring inherited from TreeVisitor.visitBinaryOp
1041 try:
1042 return self._dispatch.applyBinary(operator, lhs, rhs)
1043 except KeyError:
1044 raise ExpressionTypeError(
1045 f'Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary '
1046 f'operator {operator} in "{node}".'
1047 ) from None
1049 def visitIsIn(
1050 self,
1051 lhs: WhereClauseConverter,
1052 values: List[WhereClauseConverter],
1053 not_in: bool,
1054 node: Node,
1055 ) -> WhereClauseConverter:
1056 if not isinstance(lhs, ScalarWhereClauseConverter):
1057 raise ExpressionTypeError(
1058 f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".'
1059 )
1060 # Docstring inherited from TreeVisitor.visitIsIn
1061 #
1062 # `values` is a list of literals and ranges, range is represented
1063 # by a tuple (start, stop, stride). We need to transform range into
1064 # some SQL construct, simplest would be to generate a set of literals
1065 # and add it to the same list but it could become too long. What we
1066 # do here is to introduce some large limit on the total number of
1067 # items in IN() and if range exceeds that limit then we do something
1068 # like:
1069 #
1070 # X IN (1, 2, 3)
1071 # OR
1072 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
1073 #
1074 # or for NOT IN case
1075 #
1076 # NOT (X IN (1, 2, 3)
1077 # OR
1078 # (X BETWEEN START AND STOP
1079 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
1080 #
1081 max_in_items = 1000
1082 clauses: List[sqlalchemy.sql.ColumnElement] = []
1083 # Split the list into literals and ranges
1084 literals: List[sqlalchemy.sql.ColumnElement] = []
1085 ranges: List[Tuple[int, int, int]] = []
1086 for value in values:
1087 value.categorizeForIn(literals, ranges, lhs.dtype, node)
1088 # Handle ranges (maybe by converting them to literals).
1089 for start, stop, stride in ranges:
1090 count = (stop - start + 1) // stride
1091 if len(literals) + count > max_in_items:
1092 # X BETWEEN START AND STOP
1093 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
1094 expr = lhs.column.between(start, stop)
1095 if stride != 1:
1096 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride))
1097 clauses.append(expr)
1098 else:
1099 # add all values to literal list, stop is inclusive
1100 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)]
1101 # Handle literals.
1102 if literals:
1103 # add IN() in front of BETWEENs
1104 clauses.insert(0, lhs.column.in_(literals))
1105 # Assemble the full expression.
1106 expr = sqlalchemy.sql.or_(*clauses)
1107 if not_in:
1108 expr = sqlalchemy.sql.not_(expr)
1109 return ScalarWhereClauseConverter.fromExpression(expr, bool)
1111 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1112 # Docstring inherited from TreeVisitor.visitParens
1113 return expression
1115 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter:
1116 # Docstring inherited from base class
1117 if len(items) != 2:
1118 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".')
1119 try:
1120 return self._dispatch.applyBinary("PAIR", items[0], items[1])
1121 except KeyError:
1122 raise ExpressionTypeError(
1123 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" '
1124 '(Note that date/time strings must be preceded by "T" to be recognized).'
1125 )
1127 def visitRangeLiteral(
1128 self, start: int, stop: int, stride: Optional[int], node: Node
1129 ) -> WhereClauseConverter:
1130 # Docstring inherited from TreeVisitor.visitRangeLiteral
1131 # stride can be None which means the same as 1.
1132 return RangeWhereClauseConverter(start, stop, stride or 1)
1134 def visitPointNode(
1135 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node
1136 ) -> WhereClauseConverter:
1137 # Docstring inherited from base class
1139 # this is a placeholder for future extension, we enabled syntax but
1140 # do not support actual use just yet.
1141 raise NotImplementedError("POINT() function is not supported yet")