Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py: 27%
338 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-15 00:10 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-15 00:10 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "convertExpressionToSql",
26 "ExpressionTypeError",
27)
29import operator
30import warnings
31from abc import ABC, abstractmethod
32from collections.abc import Set
33from datetime import datetime
34from typing import (
35 TYPE_CHECKING,
36 Any,
37 Callable,
38 Dict,
39 Iterable,
40 List,
41 Mapping,
42 Optional,
43 Tuple,
44 Type,
45 TypeVar,
46 Union,
47)
49import astropy.utils.exceptions
50import sqlalchemy
51from astropy.time import Time
52from lsst.utils.iteration import ensure_iterable
53from sqlalchemy.ext.compiler import compiles
54from sqlalchemy.sql.expression import func
55from sqlalchemy.sql.visitors import InternalTraversal
57from ....core import (
58 Dimension,
59 DimensionElement,
60 DimensionUniverse,
61 NamedKeyMapping,
62 Timespan,
63 TimespanDatabaseRepresentation,
64 ddl,
65)
66from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId
67from .parser import Node, TreeVisitor
69# As of astropy 4.2, the erfa interface is shipped independently and
70# ErfaWarning is no longer an AstropyWarning
71try:
72 import erfa
73except ImportError:
74 erfa = None
76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 from .._structs import QueryColumns
80def convertExpressionToSql(
81 tree: Node,
82 universe: DimensionUniverse,
83 columns: QueryColumns,
84 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
85 bind: Mapping[str, Any],
86 TimespanReprClass: Type[TimespanDatabaseRepresentation],
87) -> sqlalchemy.sql.ColumnElement:
88 """Convert a query expression tree into a SQLAlchemy expression object.
90 Parameters
91 ----------
92 tree : `Node`
93 Root node of the query expression tree.
94 universe : `DimensionUniverse`
95 All known dimensions.
96 columns : `QueryColumns`
97 Struct that organizes the special columns known to the query
98 under construction.
99 elements : `NamedKeyMapping`
100 `DimensionElement` instances and their associated tables.
101 bind : `Mapping`
102 Mapping from string names to literal values that should be substituted
103 for those names when they appear (as identifiers) in the expression.
104 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation`
105 Class that encapsulates the representation of `Timespan` objects in
106 the database.
108 Returns
109 -------
110 sql : `sqlalchemy.sql.ColumnElement`
111 A boolean SQLAlchemy column expression.
113 Raises
114 ------
115 ExpressionTypeError
116 Raised if the operands in a query expression operation are incompatible
117 with the operator, or if the expression does not evaluate to a boolean.
118 """
119 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass)
120 converter = tree.visit(visitor)
121 return converter.finish(tree)
124class ExpressionTypeError(TypeError):
125 """Exception raised when the types in a query expression are not
126 compatible with the operators or other syntax.
127 """
130class _TimestampLiteral(sqlalchemy.sql.ColumnElement):
131 """Special ColumnElement type used for TIMESTAMP literals in expressions.
133 SQLite stores timestamps as strings which sometimes can cause issues when
134 comparing strings. For more reliable comparison SQLite needs DATETIME()
135 wrapper for those strings. For PostgreSQL it works better if we add
136 TIMESTAMP to string literals.
137 """
139 inherit_cache = True
140 _traverse_internals = [("_literal", InternalTraversal.dp_plain_obj)]
142 def __init__(self, literal: datetime):
143 super().__init__()
144 self._literal = literal
147@compiles(_TimestampLiteral, "sqlite")
148def compile_timestamp_literal_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
149 """Compilation of TIMESTAMP literal for SQLite.
151 SQLite defines ``datetime`` function that can be used to convert timestamp
152 value to Unix seconds.
153 """
154 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw)
157@compiles(_TimestampLiteral, "postgresql")
158def compile_timestamp_literal_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
159 """Compilation of TIMESTAMP literal for PostgreSQL.
161 For PostgreSQL it works better if we add TIMESTAMP to string literals.
162 """
163 literal = element._literal.isoformat(sep=" ", timespec="microseconds")
164 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw)
167class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
168 """Special ColumnElement type used for TIMESTAMP columns or in expressions.
170 SQLite stores timestamps as strings which sometimes can cause issues when
171 comparing strings. For more reliable comparison SQLite needs DATETIME()
172 wrapper for columns.
174 This mechanism is only used for expressions in WHERE clause, values of the
175 TIMESTAMP columns returned from queries are still handled by standard
176 mechanism and they are converted to `datetime` instances.
177 """
179 inherit_cache = True
180 _traverse_internals = [("_column", InternalTraversal.dp_clauseelement)]
182 def __init__(self, column: sqlalchemy.sql.ColumnElement):
183 super().__init__()
184 self._column = column
187@compiles(_TimestampColumnElement, "sqlite")
188def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
189 """Compilation of TIMESTAMP column for SQLite.
191 SQLite defines ``datetime`` function that can be used to convert timestamp
192 value to Unix seconds.
193 """
194 return compiler.process(func.datetime(element._column), **kw)
197@compiles(_TimestampColumnElement, "postgresql")
198def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
199 """Compilation of TIMESTAMP column for PostgreSQL."""
200 return compiler.process(element._column, **kw)
203class WhereClauseConverter(ABC):
204 """Abstract base class for the objects used to transform a butler query
205 expression tree into SQLAlchemy expression objects.
207 WhereClauseConverter instances are created and consumed by
208 `WhereClauseConverterVisitor`, which is in turn created and used only by
209 the `convertExpressionToSql` function.
210 """
212 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
213 """Finish converting this [boolean] expression, returning a SQLAlchemy
214 expression object.
216 Parameters
217 ----------
218 node : `Node`
219 Original expression tree node this converter represents; used only
220 for error reporting.
222 Returns
223 -------
224 sql : `sqlalchemy.sql.ColumnElement`
225 A boolean SQLAlchemy column expression.
227 Raises
228 ------
229 ExpressionTypeError
230 Raised if this node does not represent a boolean expression. The
231 default implementation always raises this exception; subclasses
232 that may actually represent a boolean should override.
233 """
234 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.')
236 @property
237 @abstractmethod
238 def dtype(self) -> type:
239 """The Python type of the expression tree node associated with this
240 converter (`type`).
242 This should be the exact type of any literal or bind object, and the
243 type produced by SQLAlchemy's converter mechanism when returning rows
244 from the database in the case of expressions that map to database
245 entities or expressions.
246 """
247 raise NotImplementedError()
249 @abstractmethod
250 def categorizeForIn(
251 self,
252 literals: List[sqlalchemy.sql.ColumnElement],
253 ranges: List[Tuple[int, int, int]],
254 dtype: type,
255 node: Node,
256 ) -> None:
257 """Visit this expression when it appears as an element in the
258 right-hand side of an IN expression.
260 Implementations must either:
262 - append or extend to ``literals``
263 - append or extend to ``ranges``
264 - raise `ExpressionTypeError`.
266 Parameters
267 ----------
268 literals : `list` [ `sqlalchemy.sql.ColumnElement` ]
269 List of SQL expression objects that the left-hand side of the IN
270 operation may match exactly.
271 ranges : `list` of `tuple`
272 List of (start, stop, step) tuples that represent ranges that the
273 left-hand side of the IN operation may match.
274 dtype : `type`
275 Type of the left-hand side operand for the IN expression. Literals
276 should only be appended to if ``self.dtype is dtype``, and
277 ``ranges`` should only be appended to if ``dtype is int``.
278 node : `Node`
279 Original expression tree node this converter represents; for use
280 only in error reporting.
282 Raises
283 ------
284 ExpressionTypeError
285 Raised if this node can never appear on the right-hand side of an
286 IN expression, or if it is incompatible with the left-hand side
287 type.
288 """
289 raise NotImplementedError()
292class ScalarWhereClauseConverter(WhereClauseConverter):
293 """Primary implementation of WhereClauseConverter, for expressions that can
294 always be represented directly by a single `sqlalchemy.sql.ColumnElement`
295 instance.
297 Should be constructed by calling either `fromExpression` or `fromLiteral`.
299 Parameters
300 ----------
301 column : `sqlalchemy.sql.ColumnElement`
302 A SQLAlchemy column expression.
303 value
304 The Python literal this expression was constructed from, or `None` if
305 it was not constructed from a literal. Note that this is also `None`
306 this object corresponds to the literal `None`, in which case
307 ``dtype is type(None)``.
308 dtype : `type`
309 Python type this expression maps to.
310 """
312 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type):
313 self.column = column
314 self.value = value
315 self._dtype = dtype
317 @classmethod
318 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter:
319 """Construct from an existing SQLAlchemy column expression and type.
321 Parameters
322 ----------
323 column : `sqlalchemy.sql.ColumnElement`
324 A SQLAlchemy column expression.
325 dtype : `type`
326 Python type this expression maps to.
328 Returns
329 -------
330 converter : `ScalarWhereClauseConverter`
331 Converter instance that wraps ``column``.
332 """
333 return cls(column, None, dtype)
335 @classmethod
336 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter:
337 """Construct from a Python literal.
339 Parameters
340 ----------
341 value
342 The Python literal to wrap.
344 Returns
345 -------
346 converter : `ScalarWhereClauseConverter`
347 Converter instance that wraps ``value``.
348 """
349 dtype = type(value)
350 if dtype is datetime:
351 column = _TimestampLiteral(value)
352 else:
353 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None)
354 return cls(column, value, dtype)
356 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement:
357 # Docstring inherited.
358 if self.dtype is not bool:
359 return super().finish(node) # will raise; just avoids duplicate error message
360 return self.column
362 @property
363 def dtype(self) -> type:
364 # Docstring inherited.
365 return self._dtype
367 def categorizeForIn(
368 self,
369 literals: List[sqlalchemy.sql.ColumnElement],
370 ranges: List[Tuple[int, int, int]],
371 dtype: type,
372 node: Node,
373 ) -> None:
374 # Docstring inherited.
375 if dtype is not self.dtype:
376 raise ExpressionTypeError(
377 f'Error in IN expression "{node}": left hand side has type '
378 f"{dtype.__name__}, but item has type {self.dtype.__name__}."
379 )
380 literals.append(self.column)
383class SequenceWhereClauseConverter(WhereClauseConverter):
384 """Implementation of WhereClauseConverter, for expressions that represent
385 a sequence of `sqlalchemy.sql.ColumnElement` instance.
387 This converter is intended for bound identifiers whose bind value is a
388 sequence (but not string), which should only appear in the right hand side
389 of ``IN`` operator. It should be constructed by calling `fromLiteral`
390 method.
392 Parameters
393 ----------
394 columns : `list` [ `ScalarWhereClauseConverter` ]
395 Converters for items in the sequence.
396 """
398 def __init__(self, scalars: List[ScalarWhereClauseConverter]):
399 self.scalars = scalars
401 @classmethod
402 def fromLiteral(cls, values: Iterable[Any]) -> SequenceWhereClauseConverter:
403 """Construct from an iterable of Python literals.
405 Parameters
406 ----------
407 values : `list`
408 The Python literals to wrap.
410 Returns
411 -------
412 converter : `SequenceWhereClauseConverter`
413 Converter instance that wraps ``values``.
414 """
415 return cls([ScalarWhereClauseConverter.fromLiteral(value) for value in values])
417 @property
418 def dtype(self) -> type:
419 # Docstring inherited.
420 return list
422 def categorizeForIn(
423 self,
424 literals: List[sqlalchemy.sql.ColumnElement],
425 ranges: List[Tuple[int, int, int]],
426 dtype: type,
427 node: Node,
428 ) -> None:
429 # Docstring inherited.
430 for scalar in self.scalars:
431 scalar.categorizeForIn(literals, ranges, dtype, node)
434class TimespanWhereClauseConverter(WhereClauseConverter):
435 """Implementation of WhereClauseConverter for `Timespan` expressions.
437 Parameters
438 ----------
439 timespan : `TimespanDatabaseRepresentation`
440 Object that represents a logical timespan column or column expression
441 (which may or may not be backed by multiple real columns).
442 """
444 def __init__(self, timespan: TimespanDatabaseRepresentation):
445 self.timespan = timespan
447 @classmethod
448 def fromPair(
449 cls,
450 begin: ScalarWhereClauseConverter,
451 end: ScalarWhereClauseConverter,
452 TimespanReprClass: Type[TimespanDatabaseRepresentation],
453 ) -> TimespanWhereClauseConverter:
454 """Construct from a pair of literal expressions.
456 Parameters
457 ----------
458 begin : `ScalarWhereClauseConverter`
459 Converter object associated with an expression of type
460 `astropy.time.Time` or `None` (for a timespan that is unbounded
461 from below).
462 end : `ScalarWhereClauseConverter`
463 Converter object associated with an expression of type
464 `astropy.time.Time` or `None` (for a timespan that is unbounded
465 from above).
466 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
467 Class that encapsulates the representation of `Timespan` objects in
468 the database.
470 Returns
471 -------
472 converter : `TimespanWhereClauseConverter`
473 Converter instance that represents a `Timespan` literal.
475 Raises
476 ------
477 ExpressionTypeError
478 Raised if begin or end is a time column from the database or other
479 time expression, not a literal or bind time value.
480 """
481 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
482 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules."
483 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time):
484 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.")
485 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value)))
487 @property
488 def dtype(self) -> type:
489 # Docstring inherited.
490 return Timespan
492 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter:
493 """Construct a boolean converter expression that represents the overlap
494 of this timespan with another.
496 Parameters
497 ----------
498 other : `TimespanWhereClauseConverter`
499 RHS operand for the overlap operation.
501 Returns
502 -------
503 overlaps : `ScalarWhereClauseConverter`
504 Converter that wraps the boolean overlaps expression.
505 """
506 assert other.dtype is Timespan, "Guaranteed by dispatch table rules"
507 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool)
509 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
510 """Construct a boolean converter expression that represents whether
511 this timespans contains a scalar time.
513 Parameters
514 ----------
515 other : `ScalarWhereClauseConverter`
516 RHS operand for the overlap operation.
517 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass
518 Ignored; provided for signature compatibility with `DispatchTable`.
520 Returns
521 -------
522 overlaps : `ScalarWhereClauseConverter`
523 Converter that wraps the boolean overlaps expression.
524 """
525 assert other.dtype is Time, "Guaranteed by dispatch table rules"
526 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool)
528 def categorizeForIn(
529 self,
530 literals: List[sqlalchemy.sql.ColumnElement],
531 ranges: List[Tuple[int, int, int]],
532 dtype: type,
533 node: Node,
534 ) -> None:
535 # Docstring inherited.
536 raise ExpressionTypeError(
537 f'Invalid element on right side of IN expression "{node}": '
538 "Timespans are not allowed in this context."
539 )
542class RangeWhereClauseConverter(WhereClauseConverter):
543 """Implementation of WhereClauseConverters for integer range literals.
545 Range literals may only appear on the right-hand side of IN operations
546 where the left-hand side operand is of type `int`.
548 Parameters
549 ----------
550 start : `int`
551 Starting point (inclusive) for the range.
552 stop : `int`
553 Stopping point (exclusive) for the range.
554 step : `int`
555 Step size for the range.
556 """
558 def __init__(self, start: int, stop: int, step: int):
559 self.start = start
560 self.stop = stop
561 self.step = step
563 @property
564 def dtype(self) -> type:
565 # Docstring inherited.
566 return range
568 def categorizeForIn(
569 self,
570 literals: List[sqlalchemy.sql.ColumnElement],
571 ranges: List[Tuple[int, int, int]],
572 dtype: type,
573 node: Node,
574 ) -> None:
575 # Docstring inherited.
576 if dtype is not int:
577 raise ExpressionTypeError(
578 f'Error in IN expression "{node}": range expressions '
579 f"are only supported for int operands, not {dtype.__name__}."
580 )
581 ranges.append((self.start, self.stop, self.step))
584UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter]
585"""Signature of unary-operation callables directly stored in `DispatchTable`.
586"""
588BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter]
589"""Signature of binary-operation callables directly stored in `DispatchTable`.
590"""
592UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement]
593"""Signature for unary-operation callables that can work directly on SQLAlchemy
594column expressions.
595"""
597BinaryColumnFunc = Callable[
598 [sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement
599]
600"""Signature for binary-operation callables that can work directly on
601SQLAlchemy column expressions.
602"""
604_F = TypeVar("_F")
607def adaptIdentity(func: _F, result: Optional[type]) -> _F:
608 """An adapter function for `DispatchTable.registerUnary` and
609 `DispatchTable.registerBinary` that just returns this original function.
610 """
611 return func
614def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc:
615 """An adapter function for `DispatchTable.registerUnary` that converts a
616 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a
617 `ScalarWhereClauseConverter`.
618 """
620 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter:
621 assert isinstance(operand, ScalarWhereClauseConverter)
622 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result)
624 return adapted
627def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc:
628 """An adapter function for `DispatchTable.registerBinary` that converts a
629 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be
630 `ScalarWhereClauseConverter` instances.
631 """
633 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
634 assert isinstance(lhs, ScalarWhereClauseConverter)
635 assert isinstance(rhs, ScalarWhereClauseConverter)
636 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result)
638 return adapted
641class TimeBinaryOperator:
642 def __init__(self, operator: Callable, dtype: type):
643 self.operator = operator
644 self.dtype = dtype
646 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter:
647 assert isinstance(lhs, ScalarWhereClauseConverter)
648 assert isinstance(rhs, ScalarWhereClauseConverter)
649 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)]
650 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype)
652 @classmethod
653 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]:
654 """Coerce one or more ScalarWhereClauseConverters to datetime type if
655 necessary.
657 If any of the arguments has `datetime` type then all other arguments
658 are converted to `datetime` type as well.
660 Parameters
661 ----------
662 *args : `ScalarWhereClauseConverter`
663 Instances which represent time objects, their type can be one of
664 `Time` or `datetime`. If coercion happens, then `Time` objects can
665 only be literals, not expressions.
667 Returns
668 -------
669 converters : `list` [ `ScalarWhereClauseConverter` ]
670 List of converters in the same order as they appear in argument
671 list, some of them can be coerced to `datetime` type, non-coerced
672 arguments are returned without any change.
673 """
675 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter:
676 """Coerce single ScalarWhereClauseConverter to datetime literal."""
677 if arg.dtype is not datetime:
678 assert arg.value is not None, "Cannot coerce non-literals"
679 assert arg.dtype is Time, "Cannot coerce non-Time literals"
680 with warnings.catch_warnings():
681 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
682 if erfa is not None:
683 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
684 dt = arg.value.to_datetime()
685 arg = ScalarWhereClauseConverter.fromLiteral(dt)
686 return arg
688 if any(arg.dtype is datetime for arg in args):
689 return [_coerce(arg) for arg in args]
690 else:
691 return list(args)
694class DispatchTable:
695 """An object that manages unary- and binary-operator type-dispatch tables
696 for `WhereClauseConverter`.
698 Notes
699 -----
700 A lot of the machinery in this class (and in the preceding function
701 signature type aliases) is all in service of making the actual dispatch
702 rules in the `build` method concise and easy to read, because that's where
703 all of the important high-level logic lives.
705 Double-dispatch is a pain in Python, as it is in most languages; it's worth
706 noting that I first tried the traditional visitor-pattern approach here,
707 and it was *definitely* much harder to see the actual behavior.
708 """
710 def __init__(self) -> None:
711 self._unary: Dict[Tuple[str, type], UnaryFunc] = {}
712 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {}
714 def registerUnary(
715 self,
716 operator: str,
717 operand: Union[type, Iterable[type]],
718 func: _F,
719 *,
720 result: Optional[type] = None,
721 adapt: Any = True,
722 ) -> None:
723 """Register a unary operation for one or more types.
725 Parameters
726 ----------
727 operator : `str`
728 Operator as it appears in the string expression language. Unary
729 operations that are not mapped to operators may use their own
730 arbitrary strings, as long as these are used consistently in
731 `build` and `applyUnary`.
732 operand : `type` or `Iterable` [ `type` ]
733 Type or types for which this operation is implemented by the given
734 ``func``.
735 func : `Callable`
736 Callable that implements the unary operation. If
737 ``adapt is True``, this should be a `UnaryColumnFunc`. If
738 ``adapt is False``, this should be a `UnaryFunc`. Otherwise,
739 this is whatever type is accepted as the first argument to
740 ``adapt``.
741 result : `type`, optional
742 Type of the expression returned by this operation. If not
743 provided, the type of the operand is assumed.
744 adapt : `bool` or `Callable`
745 A callable that wraps ``func`` (the first argument) and ``result``
746 (the second argument), returning a new callable with the
747 signature of `UnaryFunc`. `True` (default) and `False` invoke a
748 default adapter or no adapter (see ``func`` docs).
749 """
750 if adapt is True:
751 adapt = adaptUnaryColumnFunc
752 elif adapt is False:
753 adapt = adaptIdentity
754 for item in ensure_iterable(operand):
755 self._unary[operator, item] = adapt(func, result if result is not None else item)
757 def registerBinary(
758 self,
759 operator: str,
760 lhs: Union[type, Iterable[type]],
761 func: _F,
762 *,
763 rhs: Optional[Union[type, Iterable[type]]] = None,
764 result: Optional[type] = None,
765 adapt: Any = True,
766 ) -> None:
767 """Register a binary operation for one or more types.
769 Parameters
770 ----------
771 operator : `str`
772 Operator as it appears in the string expression language. Binary
773 operations that are not mapped to operators may use their own
774 arbitrary strings, as long as these are used consistently in
775 `build` and `applyBinary`.
776 lhs : `type` or `Iterable` [ `type` ]
777 Left-hand side type or types for which this operation is
778 implemented by the given ``func``.
779 func : `Callable`
780 Callable that implements the binary operation. If
781 ``adapt is True``, this should be a `BinaryColumnFunc`. If
782 ``adapt is False``, this should be a `BinaryFunc`. Otherwise,
783 this is whatever type is accepted as the first argument to
784 ``adapt``.
785 rhs : `type` or `Iterable` [ `type` ]
786 Right-hand side type or types for which this operation is
787 implemented by the given ``func``. If multiple types, all
788 combinations of ``lhs`` and ``rhs`` are registered. If not
789 provided, each element of ``lhs`` is assumed to be paired with
790 itself, but mixed-type combinations are not registered.
791 result : `type`, optional
792 Type of the expression returned by this operation. If not
793 provided and ``rhs`` is also not provided, the type of the operand
794 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided,
795 then ``result=None`` will be forwarded to ``adapt``.
796 adapt : `bool` or `Callable`
797 A callable that wraps ``func`` (the first argument) and ``result``
798 (the second argument), returning a new callable with the
799 signature of `BinaryFunc`. `True` (default) and `False` invoke a
800 default adapter or no adapter (see ``func`` docs).
801 """
802 if adapt is True:
803 adapt = adaptBinaryColumnFunc
804 elif adapt is False:
805 adapt = adaptIdentity
806 for lh in ensure_iterable(lhs):
807 if rhs is None:
808 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh)
809 else:
810 for rh in ensure_iterable(rhs):
811 self._binary[operator, lh, rh] = adapt(func, result)
813 def applyUnary(
814 self,
815 operator: str,
816 operand: WhereClauseConverter,
817 ) -> WhereClauseConverter:
818 """Look up and apply the appropriate function for a registered unary
819 operation.
821 Parameters
822 ----------
823 operator : `str`
824 Operator for the operation to apply.
825 operand : `WhereClauseConverter`
826 Operand, with ``operand.dtype`` and ``operator`` used to look up
827 the appropriate function.
829 Returns
830 -------
831 expression : `WhereClauseConverter`
832 Converter instance that represents the operation, created by
833 calling the registered function.
835 Raises
836 ------
837 KeyError
838 Raised if the operator and operand type combination is not
839 recognized.
840 """
841 return self._unary[operator, operand.dtype](operand)
843 def applyBinary(
844 self,
845 operator: str,
846 lhs: WhereClauseConverter,
847 rhs: WhereClauseConverter,
848 ) -> WhereClauseConverter:
849 """Look up and apply the appropriate function for a registered binary
850 operation.
852 Parameters
853 ----------
854 operator : `str`
855 Operator for the operation to apply.
856 lhs : `WhereClauseConverter`
857 Left-hand side operand.
858 rhs : `WhereClauseConverter`
859 Right-hand side operand.
861 Returns
862 -------
863 expression : `WhereClauseConverter`
864 Converter instance that represents the operation, created by
865 calling the registered function.
867 Raises
868 ------
869 KeyError
870 Raised if the operator and operand type combination is not
871 recognized.
872 """
873 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs)
875 @classmethod
876 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable:
877 table = DispatchTable()
878 # Standard scalar unary and binary operators: just delegate to
879 # SQLAlchemy operators.
880 table.registerUnary("NOT", bool, sqlalchemy.sql.not_)
881 table.registerUnary("+", (int, float), operator.__pos__)
882 table.registerUnary("-", (int, float), operator.__neg__)
883 table.registerBinary("AND", bool, sqlalchemy.sql.and_)
884 table.registerBinary("OR", bool, sqlalchemy.sql.or_)
885 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool)
886 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool)
887 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool)
888 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool)
889 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool)
890 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool)
891 table.registerBinary("+", (int, float), operator.__add__)
892 table.registerBinary("-", (int, float), operator.__sub__)
893 table.registerBinary("*", (int, float), operator.__mul__)
894 table.registerBinary("/", (int, float), operator.__truediv__)
895 table.registerBinary("%", (int, float), operator.__mod__)
896 table.registerBinary(
897 "=",
898 (Time, datetime),
899 TimeBinaryOperator(operator.__eq__, bool),
900 rhs=(Time, datetime),
901 adapt=False,
902 )
903 table.registerBinary(
904 "!=",
905 (Time, datetime),
906 TimeBinaryOperator(operator.__ne__, bool),
907 rhs=(Time, datetime),
908 adapt=False,
909 )
910 table.registerBinary(
911 "<",
912 (Time, datetime),
913 TimeBinaryOperator(operator.__lt__, bool),
914 rhs=(Time, datetime),
915 adapt=False,
916 )
917 table.registerBinary(
918 ">",
919 (Time, datetime),
920 TimeBinaryOperator(operator.__gt__, bool),
921 rhs=(Time, datetime),
922 adapt=False,
923 )
924 table.registerBinary(
925 "<=",
926 (Time, datetime),
927 TimeBinaryOperator(operator.__le__, bool),
928 rhs=(Time, datetime),
929 adapt=False,
930 )
931 table.registerBinary(
932 ">=",
933 (Time, datetime),
934 TimeBinaryOperator(operator.__ge__, bool),
935 rhs=(Time, datetime),
936 adapt=False,
937 )
938 table.registerBinary(
939 "=",
940 lhs=(int, float, str, Time, type(None)),
941 rhs=(type(None),),
942 func=sqlalchemy.sql.expression.ColumnOperators.is_,
943 result=bool,
944 )
945 table.registerBinary(
946 "=",
947 lhs=(type(None),),
948 rhs=(int, float, str, Time, type(None)),
949 func=sqlalchemy.sql.expression.ColumnOperators.is_,
950 result=bool,
951 )
952 table.registerBinary(
953 "!=",
954 lhs=(int, float, str, Time, type(None)),
955 rhs=(type(None),),
956 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
957 result=bool,
958 )
959 table.registerBinary(
960 "!=",
961 lhs=(type(None),),
962 rhs=(int, float, str, Time, type(None)),
963 func=sqlalchemy.sql.expression.ColumnOperators.is_not,
964 result=bool,
965 )
966 # Construct Timespan literals from 2-element tuples (A, B), where A and
967 # B are each either Time or None.
968 table.registerBinary(
969 "PAIR",
970 lhs=(Time, type(None)),
971 rhs=(Time, type(None)),
972 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass),
973 adapt=False,
974 )
975 # Less-than and greater-than between Timespans.
976 table.registerBinary(
977 "<",
978 lhs=Timespan,
979 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool),
980 adapt=False,
981 )
982 table.registerBinary(
983 ">",
984 lhs=Timespan,
985 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool),
986 adapt=False,
987 )
988 # Less-than and greater-than between Timespans and Times.
989 table.registerBinary(
990 "<",
991 lhs=Timespan,
992 rhs=Time,
993 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool),
994 adapt=False,
995 )
996 table.registerBinary(
997 ">",
998 lhs=Timespan,
999 rhs=Time,
1000 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool),
1001 adapt=False,
1002 )
1003 table.registerBinary(
1004 "<",
1005 lhs=Time,
1006 rhs=Timespan,
1007 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool),
1008 adapt=False,
1009 )
1010 table.registerBinary(
1011 ">",
1012 lhs=Time,
1013 rhs=Timespan,
1014 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool),
1015 adapt=False,
1016 )
1017 # OVERLAPS operator between Timespans.
1018 table.registerBinary(
1019 "OVERLAPS",
1020 lhs=Timespan,
1021 func=TimespanWhereClauseConverter.overlaps,
1022 adapt=False,
1023 )
1024 # OVERLAPS operator between Timespans and Time is equivalent to
1025 # "contains", but expression language only has OVERLAPS to keep it
1026 # simple.
1027 table.registerBinary(
1028 "OVERLAPS",
1029 lhs=Timespan,
1030 rhs=Time,
1031 func=TimespanWhereClauseConverter.contains,
1032 adapt=False,
1033 )
1034 table.registerBinary(
1035 "OVERLAPS",
1036 lhs=Time,
1037 rhs=Timespan,
1038 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a),
1039 adapt=False,
1040 )
1041 return table
1044class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]):
1045 """Implements TreeVisitor to convert the tree into
1046 `WhereClauseConverter` objects.
1048 This class should be used only by the `convertExpressionToSql` function;
1049 external code should just call that function.
1051 Parameters
1052 ----------
1053 universe : `DimensionUniverse`
1054 All known dimensions.
1055 columns: `QueryColumns`
1056 Struct that organizes the special columns known to the query
1057 under construction.
1058 elements: `NamedKeyMapping`
1059 `DimensionElement` instances and their associated tables.
1060 bind: `Mapping`
1061 Mapping from string names to literal values that should be substituted
1062 for those names when they appear (as identifiers) in the expression.
1063 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation`
1064 Class that encapsulates the representation of `Timespan` objects in
1065 the database.
1066 """
1068 def __init__(
1069 self,
1070 universe: DimensionUniverse,
1071 columns: QueryColumns,
1072 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause],
1073 bind: Mapping[str, Any],
1074 TimespanReprClass: Type[TimespanDatabaseRepresentation],
1075 ):
1076 self.universe = universe
1077 self.columns = columns
1078 self.elements = elements
1079 self.bind = bind
1080 self._TimespanReprClass = TimespanReprClass
1081 self._dispatch = DispatchTable.build(TimespanReprClass)
1083 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter:
1084 # Docstring inherited from TreeVisitor.visitNumericLiteral
1085 # Convert string value into float or int
1086 coerced: Union[int, float]
1087 try:
1088 coerced = int(value)
1089 except ValueError:
1090 coerced = float(value)
1091 return ScalarWhereClauseConverter.fromLiteral(coerced)
1093 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter:
1094 # Docstring inherited from TreeVisitor.visitStringLiteral
1095 return ScalarWhereClauseConverter.fromLiteral(value)
1097 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter:
1098 # Docstring inherited from TreeVisitor.visitTimeLiteral
1099 return ScalarWhereClauseConverter.fromLiteral(value)
1101 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter:
1102 # Docstring inherited from TreeVisitor.visitIdentifier
1103 if name in self.bind:
1104 value = self.bind[name]
1105 if isinstance(value, Timespan):
1106 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value))
1107 elif isinstance(value, (list, tuple, Set)):
1108 # Only accept list, tuple, and Set, general test for Iterables
1109 # is not reliable (e.g. astropy Time is Iterable).
1110 return SequenceWhereClauseConverter.fromLiteral(value)
1111 return ScalarWhereClauseConverter.fromLiteral(value)
1112 constant = categorizeConstant(name)
1113 if constant is ExpressionConstant.INGEST_DATE:
1114 assert self.columns.datasets is not None
1115 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
1116 return ScalarWhereClauseConverter.fromExpression(
1117 _TimestampColumnElement(self.columns.datasets.ingestDate),
1118 datetime,
1119 )
1120 elif constant is ExpressionConstant.NULL:
1121 return ScalarWhereClauseConverter.fromLiteral(None)
1122 assert constant is None, "Check for enum values should be exhaustive."
1123 element, column = categorizeElementId(self.universe, name)
1124 if column is not None:
1125 if column == TimespanDatabaseRepresentation.NAME:
1126 if element.temporal is None:
1127 raise ExpressionTypeError(
1128 f"No timespan column exists for non-temporal element '{element.name}'."
1129 )
1130 return TimespanWhereClauseConverter(self.columns.timespans[element])
1131 else:
1132 if column not in element.RecordClass.fields.standard.names:
1133 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.")
1134 return ScalarWhereClauseConverter.fromExpression(
1135 self.elements[element].columns[column],
1136 element.RecordClass.fields.standard[column].getPythonType(),
1137 )
1138 else:
1139 assert isinstance(element, Dimension)
1140 return ScalarWhereClauseConverter.fromExpression(
1141 self.columns.getKeyColumn(element), element.primaryKey.getPythonType()
1142 )
1144 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1145 # Docstring inherited from TreeVisitor.visitUnaryOp
1146 try:
1147 return self._dispatch.applyUnary(operator, operand)
1148 except KeyError:
1149 raise ExpressionTypeError(
1150 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".'
1151 ) from None
1153 def visitBinaryOp(
1154 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node
1155 ) -> WhereClauseConverter:
1156 # Docstring inherited from TreeVisitor.visitBinaryOp
1157 try:
1158 return self._dispatch.applyBinary(operator, lhs, rhs)
1159 except KeyError:
1160 raise ExpressionTypeError(
1161 f"Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary "
1162 f'operator {operator} in "{node}".'
1163 ) from None
1165 def visitIsIn(
1166 self,
1167 lhs: WhereClauseConverter,
1168 values: List[WhereClauseConverter],
1169 not_in: bool,
1170 node: Node,
1171 ) -> WhereClauseConverter:
1172 if not isinstance(lhs, ScalarWhereClauseConverter):
1173 raise ExpressionTypeError(f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".')
1174 # Docstring inherited from TreeVisitor.visitIsIn
1175 #
1176 # `values` is a list of literals and ranges, range is represented
1177 # by a tuple (start, stop, stride). We need to transform range into
1178 # some SQL construct, simplest would be to generate a set of literals
1179 # and add it to the same list but it could become too long. What we
1180 # do here is to introduce some large limit on the total number of
1181 # items in IN() and if range exceeds that limit then we do something
1182 # like:
1183 #
1184 # X IN (1, 2, 3)
1185 # OR
1186 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
1187 #
1188 # or for NOT IN case
1189 #
1190 # NOT (X IN (1, 2, 3)
1191 # OR
1192 # (X BETWEEN START AND STOP
1193 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
1194 #
1195 max_in_items = 1000
1196 clauses: List[sqlalchemy.sql.ColumnElement] = []
1197 # Split the list into literals and ranges
1198 literals: List[sqlalchemy.sql.ColumnElement] = []
1199 ranges: List[Tuple[int, int, int]] = []
1200 for value in values:
1201 value.categorizeForIn(literals, ranges, lhs.dtype, node)
1202 # Handle ranges (maybe by converting them to literals).
1203 for start, stop, stride in ranges:
1204 count = (stop - start + 1) // stride
1205 if len(literals) + count > max_in_items:
1206 # X BETWEEN START AND STOP
1207 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
1208 expr = lhs.column.between(start, stop)
1209 if stride != 1:
1210 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride))
1211 clauses.append(expr)
1212 else:
1213 # add all values to literal list, stop is inclusive
1214 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)]
1215 # Handle literals.
1216 if literals:
1217 # add IN() in front of BETWEENs
1218 clauses.insert(0, lhs.column.in_(literals))
1219 # Assemble the full expression.
1220 expr = sqlalchemy.sql.or_(*clauses)
1221 if not_in:
1222 expr = sqlalchemy.sql.not_(expr)
1223 return ScalarWhereClauseConverter.fromExpression(expr, bool)
1225 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter:
1226 # Docstring inherited from TreeVisitor.visitParens
1227 return expression
1229 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter:
1230 # Docstring inherited from base class
1231 if len(items) != 2:
1232 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".')
1233 try:
1234 return self._dispatch.applyBinary("PAIR", items[0], items[1])
1235 except KeyError:
1236 raise ExpressionTypeError(
1237 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" '
1238 '(Note that date/time strings must be preceded by "T" to be recognized).'
1239 )
1241 def visitRangeLiteral(
1242 self, start: int, stop: int, stride: Optional[int], node: Node
1243 ) -> WhereClauseConverter:
1244 # Docstring inherited from TreeVisitor.visitRangeLiteral
1245 # stride can be None which means the same as 1.
1246 return RangeWhereClauseConverter(start, stop, stride or 1)
1248 def visitPointNode(
1249 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node
1250 ) -> WhereClauseConverter:
1251 # Docstring inherited from base class
1253 # this is a placeholder for future extension, we enabled syntax but
1254 # do not support actual use just yet.
1255 raise NotImplementedError("POINT() function is not supported yet")