Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "convertExpressionToSql",
26 "ExpressionTypeError",
27)
29from abc import ABC, abstractmethod
30import operator
31from typing import (
32 Any,
33 Callable,
34 Dict,
35 Iterable,
36 List,
37 Mapping,
38 Optional,
39 Tuple,
40 Type,
41 TYPE_CHECKING,
42 TypeVar,
43 Union,
44)
46from astropy.time import Time
47import sqlalchemy
48from sqlalchemy.ext.compiler import compiles
50from ....core import (
51 ddl,
52 Dimension,
53 DimensionElement,
54 DimensionUniverse,
55 NamedKeyMapping,
56 Timespan,
57 TimespanDatabaseRepresentation,
58)
59from ....core.utils import iterable
60from .parser import Node, TreeVisitor
61from .categorize import categorizeElementId, categorizeConstant, ExpressionConstant
63if TYPE_CHECKING: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true
64 from .._structs import QueryColumns
67def convertExpressionToSql(
68 tree: Node,
69 universe: DimensionUniverse,
70 columns: QueryColumns,
71 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
72 bind: Mapping[str, Any],
73 TimespanReprClass: Type[TimespanDatabaseRepresentation],
74) -> sqlalchemy.sql.ColumnElement:
75 """Convert a query expression tree into a SQLAlchemy expression object.
77 Parameters
78 ----------
79 tree : `Node`
80 Root node of the query expression tree.
81 universe : `DimensionUniverse`
82 All known dimensions.
83 columns : `QueryColumns`
84 Struct that organizes the special columns known to the query
85 under construction.
86 elements : `NamedKeyMapping`
87 `DimensionElement` instances and their associated tables.
88 bind : `Mapping`
89 Mapping from string names to literal values that should be subsituted
90 for those names when they appear (as identifiers) in the expression.
91 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation`
92 Class that encapsulates the representation of `Timespan` objects in
93 the database.
95 Returns
96 -------
97 sql : `sqlalchemy.sql.ColumnElement`
98 A boolean SQLAlchemy column expression.
100 Raises
101 ------
102 ExpressionTypeError
103 Raised if the operands in a query expression operation are incompatible
104 with the operator, or if the expression does not evaluate to a boolean.
105 """
106 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass)
107 converter = tree.visit(visitor)
108 return converter.finish(tree)
111class ExpressionTypeError(TypeError):
112 """Exception raised when the types in a query expression are not
113 compatible with the operators or other syntax.
114 """
117class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
118 """Special ColumnElement type used for TIMESTAMP columns in expressions.
120 TIMESTAMP columns in expressions are usually compared to time literals
121 which are `astropy.time.Time` instances that are converted to integer
122 nanoseconds since Epoch. For comparison we need to convert TIMESTAMP
123 column value to the same type. This type is a wrapper for actual column
124 that has special dialect-specific compilation methods defined below
125 transforming column in that common type.
127 This mechanism is only used for expressions in WHERE clause, values of the
128 TIMESTAMP columns returned from queries are still handled by standard
129 mechanism and they are converted to `datetime` instances.
130 """
131 def __init__(self, column: sqlalchemy.sql.ColumnElement):
132 super().__init__()
133 self._column = column
136@compiles(_TimestampColumnElement, "sqlite")
137def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
138 """Compilation of TIMESTAMP column for SQLite.
140 SQLite defines ``strftime`` function that can be used to convert timestamp
141 value to Unix seconds.
142 """
143 return f"STRFTIME('%s', {element._column.name})*1000000000"
146@compiles(_TimestampColumnElement, "postgresql")
147def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
148 """Compilation of TIMESTAMP column for PostgreSQL.
150 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function.
151 """
152 return f"EXTRACT(epoch FROM {element._column.name})*1000000000"
155class WhereClauseConverter(ABC):
156 """Abstract base class for the objects used to transform a butler query
157 expression tree into SQLAlchemy expression objects.
159 WhereClauseConverter instances are created and consumed by
160 `WhereClauseConverterVisitor`, which is in turn created and used only by
161 the `convertExpressionToSql` function.
162 """
164 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
165 """Finish converting this [boolean] expression, returning a SQLAlchemy
166 expression object.
168 Parameters
169 ----------
170 node : `Node`
171 Original expression tree nodethis converter represents; used only
172 for error reporting.
174 Returns
175 -------
176 sql : `sqlalchemy.sql.ColumnElement`
177 A boolean SQLAlchemy column expression.
179 Raises
180 ------
181 ExpressionTypeError
182 Raised if this node does not represent a boolean expression. The
183 default implementation always raises this exception; subclasses
184 that may actually represent a boolean should override.
185 """
186 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.')
188 @property
189 @abstractmethod
190 def dtype(self) -> type:
191 """The Python type of the expression tree node associated with this
192 converter (`type`).
194 This should be the exact type of any literal or bind object, and the
195 type produced by SQLAlchemy's converter mechanism when returning rows
196 from the database in the case of expressions that map to database
197 entities or expressions.
198 """
199 raise NotImplementedError()
201 @abstractmethod
202 def categorizeForIn(
203 self,
204 literals: List[sqlalchemy.sql.ColumnElement],
205 ranges: List[Tuple[int, int, int]],
206 dtype: type,
207 node: Node,
208 ) -> None:
209 """Visit this expression when it appears as an element in the
210 right-hand side of an IN expression.
212 Implementations must either:
214 - append or extend to ``literals``
215 - append or extend to ``ranges``
216 - raise `ExpressionTypeError`.
218 Parameters
219 ----------
220 literals : `list` [ `sqlalchemy.sql.ColumnElement` ]
221 List of SQL expression objects that the left-hand side of the IN
222 operation may match exactly.
223 ranges : `list` of `tuple`
224 List of (start, stop, step) tuples that represent ranges that the
225 left-hand side of the IN operation may match.
226 dtype : `type`
227 Type of the left-hand side operand for the IN expression. Literals
228 should only be appended to if ``self.dtype is dtype``, and
229 ``ranges`` should only be appended to if ``dtype is int``.
230 node : `Node`
231 Original expression tree node this converter represents; for use
232 only in error reporting.
234 Raises
235 ------
236 ExpressionTypeError
237 Raised if this node can never appear on the right-hand side of an
238 IN expression, or if it is incompatible with the left-hand side
239 type.
240 """
241 raise NotImplementedError()
244class ScalarWhereClauseConverter(WhereClauseConverter):
245 """Primary implementation of WhereClauseConverter, for expressions that can
246 always be represented directly by a single `sqlalchemy.sql.ColumnElement`
247 instance.
249 Should be constructed by calling either `fromExpression` or `fromLiteral`.
251 Parameters
252 ----------
253 column : `sqlalchemy.sql.ColumnElement`
254 A SQLAlchemy column expression.
255 value
256 The Python literal this expression was constructed from, or `None` if
257 it was not constructed from a literal. Note that this is also `None`
258 this object corresponds to the literal `None`, in which case
259 ``dtype is type(None)``.
260 dtype : `type`
261 Python type this expression maps to.
262 """
263 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type):
264 self.column = column
265 self.value = value
266 self._dtype = dtype
268 @classmethod
269 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter:
270 """Construct from an existing SQLAlchemy column expression and type.
272 Parameters
273 ----------
274 column : `sqlalchemy.sql.ColumnElement`
275 A SQLAlchemy column expression.
276 dtype : `type`
277 Python type this expression maps to.
279 Returns
280 -------
281 converter : `ScalarWhereClauseConverter`
282 Converter instance that wraps ``column``.
283 """
284 return cls(column, None, dtype)
286 @classmethod
287 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter:
288 """Construct from a Python literal.
290 Parameters
291 ----------
292 value
293 The Python literal to wrap.
295 Returns
296 -------
297 converter : `ScalarWhereClauseConverter`
298 Converter instance that wraps ``value``.
299 """
300 dtype = type(value)
301 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None)
302 return cls(column, value, dtype)
304 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
305 # Docstring inherited.
306 if self.dtype is not bool:
307 return super().finish(node) # will raise; just avoids duplicate error message
308 return self.column
310 @property
311 def dtype(self) -> type:
312 # Docstring inherited.
313 return self._dtype
315 def categorizeForIn(
316 self,
317 literals: List[sqlalchemy.sql.ColumnElement],
318 ranges: List[Tuple[int, int, int]],
319 dtype: type,
320 node: Node,
321 ) -> None:
322 # Docstring inherited.
323 if dtype is not self.dtype:
324 raise ExpressionTypeError(
325 f'Error in IN expression "{node}": left hand side has type '
326 f'{dtype.__name__}, but item has type {self.dtype.__name__}.'
327 )
328 literals.append(self.column)
331class TimespanWhereClauseConverter(WhereClauseConverter):
332 """Implementation of WhereClauseConverter for `Timespan` expressions.
334 Parameters
335 ----------
336 timespan : `TimespanDatabaseRepresentation`
337 Object that represents a logical timespan column or column expression
338 (which may or may not be backed by multiple real columns).
339 """
340 def __init__(self, timespan: TimespanDatabaseRepresentation):
341 self.timespan = timespan
343 @classmethod
344 def fromPair(
345 cls,
346 begin: ScalarWhereClauseConverter,
347 end: ScalarWhereClauseConverter,
348 TimespanReprClass: Type[TimespanDatabaseRepresentation],
349 ) -> TimespanWhereClauseConverter:
350 """Construct from a pair of literal expressions.
352 Parameters
353 ----------
354 begin : `ScalarWhereClauseConverter`
355 Converter object associated with an expression of type
356 `astropy.time.Time` or `None` (for a timespan that is unbounded
357 from below).
358 end : `ScalarWhereClauseConverter`
359 Converter object associated with an expression of type
360 `astropy.time.Time` or `None` (for a timespan that is unbounded
361 from above).
362 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
363 Class that encapsulates the representation of `Timespan` objects in
364 the database.
366 Returns
367 -------
368 converter : `TimespanWhereClauseConverter`
369 Converter instance that represents a `Timespan` literal.
371 Raises
372 ------
373 ExpressionTypeError
374 Raised if begin or end is a time column from the database or other
375 time expression, not a literal or bind time value.
376 """
377 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
378 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
379 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time):
380 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.")
381 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value)))
383 @property
384 def dtype(self) -> type:
385 # Docstring inherited.
386 return Timespan
388 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter:
389 """Construct a boolean converter expression that represents the overlap
390 of this timespan with another.
392 Parameters
393 ----------
394 other : `TimespanWhereClauseConverter`
395 RHS operand for the overlap operation.
397 Returns
398 -------
399 overlaps : `ScalarWhereClauseConverter`
400 Converter that wraps the boolean overlaps expression.
401 """
402 assert other.dtype is Timespan, "Guaranteed by dispatch table rules"
403 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool)
405 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
406 """Construct a boolean converter expression that represents whether
407 this timespans contains a scalar time.
409 Parameters
410 ----------
411 other : `ScalarWhereClauseConverter`
412 RHS operand for the overlap operation.
413 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
414 Ignored; provided for signature compatibility with `DispatchTable`.
416 Returns
417 -------
418 overlaps : `ScalarWhereClauseConverter`
419 Converter that wraps the boolean overlaps expression.
420 """
421 assert other.dtype is Time, "Guaranteed by dispatch table rules"
422 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool)
424 def categorizeForIn(
425 self,
426 literals: List[sqlalchemy.sql.ColumnElement],
427 ranges: List[Tuple[int, int, int]],
428 dtype: type,
429 node: Node,
430 ) -> None:
431 # Docstring inherited.
432 raise ExpressionTypeError(
433 f'Invalid element on right side of IN expression "{node}": '
434 'Timespans are not allowed in this context.'
435 )
438class RangeWhereClauseConverter(WhereClauseConverter):
439 """Implementation of WhereClauseConverters for integer range literals.
441 Range literals may only appear on the right-hand side of IN operations
442 where the left-hand side operand is of type `int`.
444 Parameters
445 ----------
446 start : `int`
447 Starting point (inclusive) for the range.
448 stop : `int`
449 Stopping point (exclusive) for the range.
450 step : `int`
451 Step size for the range.
452 """
453 def __init__(self, start: int, stop: int, step: int):
454 self.start = start
455 self.stop = stop
456 self.step = step
458 @property
459 def dtype(self) -> type:
460 # Docstring inherited.
461 return range
463 def categorizeForIn(
464 self,
465 literals: List[sqlalchemy.sql.ColumnElement],
466 ranges: List[Tuple[int, int, int]],
467 dtype: type,
468 node: Node,
469 ) -> None:
470 # Docstring inherited.
471 if dtype is not int:
472 raise ExpressionTypeError(
473 f'Error in IN expression "{node}": range expressions '
474 f'are only supported for int operands, not {dtype.__name__}.'
475 )
476 ranges.append((self.start, self.stop, self.step))
479UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter]
480"""Signature of unary-operation callables directly stored in `DispatchTable`.
481"""
483BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter]
484"""Signature of binary-operation callables directly stored in `DispatchTable`.
485"""
487UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement]
488"""Signature for unary-operation callables that can work directly on SQLAlchemy
489column expressions.
490"""
492BinaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement],
493 sqlalchemy.sql.ColumnElement]
494"""Signature for binary-operation callables that can work directly on
495SQLAlchemy column expressions.
496"""
498_F = TypeVar("_F")
501def adaptIdentity(func: _F, result: Optional[type]) -> _F:
502 """An adapter function for `DispatchTable.registerUnary` and
503 `DispatchTable.registerBinary` that just returns this original function.
504 """
505 return func
508def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc:
509 """An adapter function for `DispatchTable.registerUnary` that converts a
510 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a
511 `ScalarWhereClauseConverter`.
512 """
513 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter:
514 assert isinstance(operand, ScalarWhereClauseConverter)
515 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result)
516 return adapted
519def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc:
520 """An adapter function for `DispatchTable.registerBinary` that converts a
521 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be
522 `ScalarWhereClauseConverter` instances.
523 """
524 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
525 assert isinstance(lhs, ScalarWhereClauseConverter)
526 assert isinstance(rhs, ScalarWhereClauseConverter)
527 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result)
528 return adapted
531class DispatchTable:
532 """An object that manages unary- and binary-operator type-dispatch tables
533 for `WhereClauseConverter`.
535 Notes
536 -----
537 A lot of the machinery in this class (and in the preceding function
538 signature type aliases) is all in service of making the actual dispatch
539 rules in the `build` method concise and easy to read, because that's where
540 all of the important high-level logic lives.
542 Double-dispatch is a pain in Python, as it is in most languages; it's worth
543 noting that I first tried the traditional visitor-pattern approach here,
544 and it was *definitely* much harder to see the actual behavior.
545 """
546 def __init__(self) -> None:
547 self._unary: Dict[Tuple[str, type], UnaryFunc] = {}
548 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {}
550 def registerUnary(
551 self,
552 operator: str,
553 operand: Union[type, Iterable[type]],
554 func: _F,
555 *,
556 result: Optional[type] = None,
557 adapt: Any = True,
558 ) -> None:
559 """Register a unary operation for one or more types.
561 Parameters
562 ----------
563 operator : `str`
564 Operator as it appears in the string expression language. Unary
565 operations that are not mapped to operators may use their own
566 arbitrary strings, as long as these are used consistently in
567 `build` and `applyUnary`.
568 operand : `type` or `Iterable` [ `type` ]
569 Type or types for which this operation is implemented by the given
570 ``func``.
571 func : `Callable`
572 Callable that implements the unary operation. If
573 ``adapt is True``, this should be a `UnaryColumnFunc`. If
574 ``adapt is False``, this should be a `UnaryFunc`. Otherwise,
575 this is whatever type is accepted as the first argument to
576 ``adapt``.
577 result : `type`, optional
578 Type of the expression returned by this operation. If not
579 provided, the type of the operand is assumed.
580 adapt : `bool` or `Callable`
581 A callable that wraps ``func`` (the first argument) and ``result``
582 (the second argument), returning a new callable with the
583 signature of `UnaryFunc`. `True` (default) and `False` invoke a
584 default adapter or no adapter (see ``func`` docs).
585 """
586 if adapt is True:
587 adapt = adaptUnaryColumnFunc
588 elif adapt is False:
589 adapt = adaptIdentity
590 for item in iterable(operand):
591 self._unary[operator, item] = adapt(
592 func,
593 result if result is not None else item
594 )
596 def registerBinary(
597 self,
598 operator: str,
599 lhs: Union[type, Iterable[type]],
600 func: _F,
601 *,
602 rhs: Optional[Union[type, Iterable[type]]] = None,
603 result: Optional[type] = None,
604 adapt: Any = True,
605 ) -> None:
606 """Register a binary operation for one or more types.
608 Parameters
609 ----------
610 operator : `str`
611 Operator as it appears in the string expression language. Binary
612 operations that are not mapped to operators may use their own
613 arbitrary strings, as long as these are used consistently in
614 `build` and `applyBinary`.
615 lhs : `type` or `Iterable` [ `type` ]
616 Left-hand side type or types for which this operation is
617 implemented by the given ``func``.
618 func : `Callable`
619 Callable that implements the binary operation. If
620 ``adapt is True``, this should be a `BinaryColumnFunc`. If
621 ``adapt is False``, this should be a `BinaryFunc`. Otherwise,
622 this is whatever type is accepted as the first argument to
623 ``adapt``.
624 rhs : `type` or `Iterable` [ `type` ]
625 Right-hand side type or types for which this operation is
626 implemented by the given ``func``. If multiple types, all
627 combinations of ``lhs`` and ``rhs`` are registered. If not
628 provided, each element of ``lhs`` is assumed to be paired with
629 itself, but mixed-type combinations are not registered.
630 result : `type`, optional
631 Type of the expression returned by this operation. If not
632 provided and ``rhs`` is also not provided, the type of the operand
633 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided,
634 then ``result=None`` will be forwarded to ``adapt``.
635 adapt : `bool` or `Callable`
636 A callable that wraps ``func`` (the first argument) and ``result``
637 (the second argument), returning a new callable with the
638 signature of `BinaryFunc`. `True` (default) and `False` invoke a
639 default adapter or no adapter (see ``func`` docs).
640 """
641 if adapt is True:
642 adapt = adaptBinaryColumnFunc
643 elif adapt is False:
644 adapt = adaptIdentity
645 for lh in iterable(lhs):
646 if rhs is None:
647 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh)
648 else:
649 for rh in iterable(rhs):
650 self._binary[operator, lh, rh] = adapt(func, result)
652 def applyUnary(
653 self,
654 operator: str,
655 operand: WhereClauseConverter,
656 ) -> WhereClauseConverter:
657 """Look up and apply the appropriate function for a registered unary
658 operation.
660 Parameters
661 ----------
662 operator : `str`
663 Operator for the operation to apply.
664 operand : `WhereClauseConverter`
665 Operand, with ``operand.dtype`` and ``operator`` used to look up
666 the appropriate function.
668 Returns
669 -------
670 expression : `WhereClauseConverter`
671 Converter instance that represents the operation, created by
672 calling the registered function.
674 Raises
675 ------
676 KeyError
677 Raised if the operator and operand type combination is not
678 recognized.
679 """
680 return self._unary[operator, operand.dtype](operand)
682 def applyBinary(
683 self,
684 operator: str,
685 lhs: WhereClauseConverter,
686 rhs: WhereClauseConverter,
687 ) -> WhereClauseConverter:
688 """Look up and apply the appropriate function for a registered binary
689 operation.
691 Parameters
692 ----------
693 operator : `str`
694 Operator for the operation to apply.
695 lhs : `WhereClauseConverter`
696 Left-hand side operand.
697 rhs : `WhereClauseConverter`
698 Right-hand side operand.
700 Returns
701 -------
702 expression : `WhereClauseConverter`
703 Converter instance that represents the operation, created by
704 calling the registered function.
706 Raises
707 ------
708 KeyError
709 Raised if the operator and operand type combination is not
710 recognized.
711 """
712 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs)
714 @classmethod
715 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable:
716 table = DispatchTable()
717 # Standard scalar unary and binary operators: just delegate to
718 # SQLAlchemy operators.
719 table.registerUnary("NOT", bool, sqlalchemy.sql.not_)
720 table.registerUnary("+", (int, float), operator.__pos__)
721 table.registerUnary("-", (int, float), operator.__neg__)
722 table.registerBinary("AND", bool, sqlalchemy.sql.and_)
723 table.registerBinary("OR", bool, sqlalchemy.sql.or_)
724 table.registerBinary("=", (int, float, str, Time), operator.__eq__, result=bool)
725 table.registerBinary("!=", (int, float, str, Time), operator.__ne__, result=bool)
726 table.registerBinary("<", (int, float, str, Time), operator.__lt__, result=bool)
727 table.registerBinary(">", (int, float, str, Time), operator.__gt__, result=bool)
728 table.registerBinary("<=", (int, float, str, Time), operator.__le__, result=bool)
729 table.registerBinary(">=", (int, float, str, Time), operator.__ge__, result=bool)
730 table.registerBinary("+", (int, float), operator.__add__)
731 table.registerBinary("-", (int, float), operator.__sub__)
732 table.registerBinary("*", (int, float), operator.__mul__)
733 table.registerBinary("/", (int, float), operator.__truediv__)
734 table.registerBinary("%", (int, float), operator.__mod__)
735 # Construct Timespan literals from 2-element tuples (A, B), where A and
736 # B are each either Time or None.
737 table.registerBinary(
738 "PAIR",
739 lhs=(Time, type(None)),
740 rhs=(Time, type(None)),
741 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass),
742 adapt=False,
743 )
744 # Less-than and greater-than between Timespans.
745 table.registerBinary(
746 "<",
747 lhs=Timespan,
748 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool),
749 adapt=False,
750 )
751 table.registerBinary(
752 ">",
753 lhs=Timespan,
754 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool),
755 adapt=False,
756 )
757 # Less-than and greater-than between Timespans and Times.
758 table.registerBinary(
759 "<",
760 lhs=Timespan,
761 rhs=Time,
762 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool),
763 adapt=False,
764 )
765 table.registerBinary(
766 ">",
767 lhs=Timespan,
768 rhs=Time,
769 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool),
770 adapt=False,
771 )
772 table.registerBinary(
773 "<",
774 lhs=Time,
775 rhs=Timespan,
776 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool),
777 adapt=False,
778 )
779 table.registerBinary(
780 ">",
781 lhs=Time,
782 rhs=Timespan,
783 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool),
784 adapt=False,
785 )
786 # OVERLAPS operator between Timespans.
787 table.registerBinary(
788 "OVERLAPS",
789 lhs=Timespan,
790 func=TimespanWhereClauseConverter.overlaps,
791 adapt=False,
792 )
793 # OVERLAPS operator between Timespans and Time is equivalent to
794 # "contains", but expression language only has OVERLAPS to keep it
795 # simple.
796 table.registerBinary(
797 "OVERLAPS",
798 lhs=Timespan,
799 rhs=Time,
800 func=TimespanWhereClauseConverter.contains,
801 adapt=False,
802 )
803 table.registerBinary(
804 "OVERLAPS",
805 lhs=Time,
806 rhs=Timespan,
807 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a),
808 adapt=False,
809 )
810 return table
813class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]):
814 """Implements TreeVisitor to convert the tree into
815 `WhereClauseConverter` objects.
817 This class should be used only by the `convertExpressionToSql` function;
818 external code should just call that function.
820 Parameters
821 ----------
822 universe : `DimensionUniverse`
823 All known dimensions.
824 columns: `QueryColumns`
825 Struct that organizes the special columns known to the query
826 under construction.
827 elements: `NamedKeyMapping`
828 `DimensionElement` instances and their associated tables.
829 bind: `Mapping`
830 Mapping from string names to literal values that should be subsituted
831 for those names when they appear (as identifiers) in the expression.
832 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation`
833 Class that encapsulates the representation of `Timespan` objects in
834 the database.
835 """
836 def __init__(
837 self,
838 universe: DimensionUniverse,
839 columns: QueryColumns,
840 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
841 bind: Mapping[str, Any],
842 TimespanReprClass: Type[TimespanDatabaseRepresentation],
843 ):
844 self.universe = universe
845 self.columns = columns
846 self.elements = elements
847 self.bind = bind
848 self._TimespanReprClass = TimespanReprClass
849 self._dispatch = DispatchTable.build(TimespanReprClass)
851 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter:
852 # Docstring inherited from TreeVisitor.visitNumericLiteral
853 # Convert string value into float or int
854 coerced: Union[int, float]
855 try:
856 coerced = int(value)
857 except ValueError:
858 coerced = float(value)
859 return ScalarWhereClauseConverter.fromLiteral(coerced)
861 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter:
862 # Docstring inherited from TreeVisitor.visitStringLiteral
863 return ScalarWhereClauseConverter.fromLiteral(value)
865 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter:
866 # Docstring inherited from TreeVisitor.visitTimeLiteral
867 return ScalarWhereClauseConverter.fromLiteral(value)
869 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter:
870 # Docstring inherited from TreeVisitor.visitIdentifier
871 if name in self.bind:
872 value = self.bind[name]
873 if isinstance(value, Timespan):
874 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value))
875 return ScalarWhereClauseConverter.fromLiteral(value)
876 constant = categorizeConstant(name)
877 if constant is ExpressionConstant.INGEST_DATE:
878 assert self.columns.datasets is not None
879 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
880 return ScalarWhereClauseConverter.fromExpression(
881 _TimestampColumnElement(self.columns.datasets.ingestDate),
882 Time,
883 )
884 elif constant is ExpressionConstant.NULL:
885 return ScalarWhereClauseConverter.fromLiteral(None)
886 assert constant is None, "Check for enum values should be exhaustive."
887 element, column = categorizeElementId(self.universe, name)
888 if column is not None:
889 if column == TimespanDatabaseRepresentation.NAME:
890 if element.temporal is None:
891 raise ExpressionTypeError(
892 f"No timespan column exists for non-temporal element '{element.name}'."
893 )
894 return TimespanWhereClauseConverter(self.columns.timespans[element])
895 else:
896 if column not in element.RecordClass.fields.standard.names:
897 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.")
898 return ScalarWhereClauseConverter.fromExpression(
899 self.elements[element].columns[column],
900 element.RecordClass.fields.standard[column].getPythonType(),
901 )
902 else:
903 assert isinstance(element, Dimension)
904 return ScalarWhereClauseConverter.fromExpression(
905 self.columns.getKeyColumn(element),
906 element.primaryKey.getPythonType()
907 )
909 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter:
910 # Docstring inherited from TreeVisitor.visitUnaryOp
911 try:
912 return self._dispatch.applyUnary(operator, operand)
913 except KeyError:
914 raise ExpressionTypeError(
915 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".'
916 ) from None
918 def visitBinaryOp(
919 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node
920 ) -> WhereClauseConverter:
921 # Docstring inherited from TreeVisitor.visitBinaryOp
922 try:
923 return self._dispatch.applyBinary(operator, lhs, rhs)
924 except KeyError:
925 raise ExpressionTypeError(
926 f'Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary '
927 f'operator {operator} in "{node}".'
928 ) from None
930 def visitIsIn(
931 self,
932 lhs: WhereClauseConverter,
933 values: List[WhereClauseConverter],
934 not_in: bool,
935 node: Node,
936 ) -> WhereClauseConverter:
937 if not isinstance(lhs, ScalarWhereClauseConverter):
938 raise ExpressionTypeError(
939 f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".'
940 )
941 # Docstring inherited from TreeVisitor.visitIsIn
942 #
943 # `values` is a list of literals and ranges, range is represented
944 # by a tuple (start, stop, stride). We need to transform range into
945 # some SQL construct, simplest would be to generate a set of literals
946 # and add it to the same list but it could become too long. What we
947 # do here is to introduce some large limit on the total number of
948 # items in IN() and if range exceeds that limit then we do something
949 # like:
950 #
951 # X IN (1, 2, 3)
952 # OR
953 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
954 #
955 # or for NOT IN case
956 #
957 # NOT (X IN (1, 2, 3)
958 # OR
959 # (X BETWEEN START AND STOP
960 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
961 #
962 max_in_items = 1000
963 clauses: List[sqlalchemy.sql.ColumnElement] = []
964 # Split the list into literals and ranges
965 literals: List[sqlalchemy.sql.ColumnElement] = []
966 ranges: List[Tuple[int, int, int]] = []
967 for value in values:
968 value.categorizeForIn(literals, ranges, lhs.dtype, node)
969 # Handle ranges (maybe by converting them to literals).
970 for start, stop, stride in ranges:
971 count = (stop - start + 1) // stride
972 if len(literals) + count > max_in_items:
973 # X BETWEEN START AND STOP
974 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
975 expr = lhs.column.between(start, stop)
976 if stride != 1:
977 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride))
978 clauses.append(expr)
979 else:
980 # add all values to literal list, stop is inclusive
981 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)]
982 # Handle literals.
983 if literals:
984 # add IN() in front of BETWEENs
985 clauses.insert(0, lhs.column.in_(literals))
986 # Assemble the full expression.
987 expr = sqlalchemy.sql.or_(*clauses)
988 if not_in:
989 expr = sqlalchemy.sql.not_(expr)
990 return ScalarWhereClauseConverter.fromExpression(expr, bool)
992 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter:
993 # Docstring inherited from TreeVisitor.visitParens
994 return expression
996 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter:
997 # Docstring inherited from base class
998 if len(items) != 2:
999 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".')
1000 try:
1001 return self._dispatch.applyBinary("PAIR", items[0], items[1])
1002 except KeyError:
1003 raise ExpressionTypeError(
1004 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" '
1005 '(Note that date/time strings must be preceded by "T" to be recognized).'
1006 )
1008 def visitRangeLiteral(
1009 self, start: int, stop: int, stride: Optional[int], node: Node
1010 ) -> WhereClauseConverter:
1011 # Docstring inherited from TreeVisitor.visitRangeLiteral
1012 # stride can be None which means the same as 1.
1013 return RangeWhereClauseConverter(start, stop, stride or 1)
1015 def visitPointNode(
1016 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node
1017 ) -> WhereClauseConverter:
1018 # Docstring inherited from base class
1020 # this is a placeholder for future extension, we enabled syntax but
1021 # do not support actual use just yet.
1022 raise NotImplementedError("POINT() function is not supported yet")