Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25import dataclasses
26from typing import (
27 Any,
28 List,
29 Mapping,
30 Optional,
31 Sequence,
32 Set,
33 Tuple,
34 TYPE_CHECKING,
35 Union,
36)
38import sqlalchemy
39from sqlalchemy.ext.compiler import compiles
41from ...core import (
42 DataCoordinate,
43 DimensionUniverse,
44 Dimension,
45 DimensionElement,
46 DimensionGraph,
47 GovernorDimension,
48 NamedKeyDict,
49 NamedValueSet,
50)
51from ...core.ddl import AstropyTimeNsecTai
52from ..wildcards import EllipsisType, Ellipsis
53from .exprParser import Node, NormalForm, NormalFormVisitor, TreeVisitor
54from ._structs import QueryColumns
56if TYPE_CHECKING: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true
57 import astropy.time
60class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
61 """Special ColumnElement type used for TIMESTAMP columns in expressions.
63 TIMESTAMP columns in expressions are usually compared to time literals
64 which are `astropy.time.Time` instances that are converted to integer
65 nanoseconds since Epoch. For comparison we need to convert TIMESTAMP
66 column value to the same type. This type is a wrapper for actual column
67 that has special dialect-specific compilation methods defined below
68 transforming column in that common type.
70 This mechanism is only used for expressions in WHERE clause, values of the
71 TIMESTAMP columns returned from queries are still handled by standard
72 mechanism and they are converted to `datetime` instances.
73 """
74 def __init__(self, column: sqlalchemy.sql.ColumnElement):
75 super().__init__()
76 self._column = column
79@compiles(_TimestampColumnElement, "sqlite")
80def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
81 """Compilation of TIMESTAMP column for SQLite.
83 SQLite defines ``strftime`` function that can be used to convert timestamp
84 value to Unix seconds.
85 """
86 return f"STRFTIME('%s', {element._column.name})*1000000000"
89@compiles(_TimestampColumnElement, "postgresql")
90def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
91 """Compilation of TIMESTAMP column for PostgreSQL.
93 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function.
94 """
95 return f"EXTRACT(epoch FROM {element._column.name})*1000000000"
98def categorizeIngestDateId(name: str) -> bool:
99 """Categorize an identifier in a parsed expression as an ingest_date
100 attribute of a dataset table.
102 Parameters
103 ----------
104 name : `str`
105 Identifier to categorize.
107 Returns
108 -------
109 isIngestDate : `bool`
110 True is returned if identifier name is ``ingest_date``.
111 """
112 # TODO: this is hardcoded for now, may be better to extract it from schema
113 # but I do not know how to do it yet.
114 return name == "ingest_date"
117def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
118 """Categorize an identifier in a parsed expression as either a `Dimension`
119 name (indicating the primary key for that dimension) or a non-primary-key
120 column in a `DimensionElement` table.
122 Parameters
123 ----------
124 universe : `DimensionUniverse`
125 All known dimensions.
126 name : `str`
127 Identifier to categorize.
129 Returns
130 -------
131 element : `DimensionElement`
132 The `DimensionElement` the identifier refers to.
133 column : `str` or `None`
134 The name of a column in the table for ``element``, or `None` if
135 ``element`` is a `Dimension` and the requested column is its primary
136 key.
138 Raises
139 ------
140 LookupError
141 Raised if the identifier refers to a nonexistent `DimensionElement`
142 or column.
143 RuntimeError
144 Raised if the expression refers to a primary key in an illegal way.
145 This exception includes a suggestion for how to rewrite the expression,
146 so at least its message should generally be propagated up to a context
147 where the error can be interpreted by a human.
148 """
149 table, sep, column = name.partition('.')
150 if column:
151 try:
152 element = universe[table]
153 except KeyError as err:
154 raise LookupError(f"No dimension element with name '{table}'.") from err
155 if isinstance(element, Dimension) and column == element.primaryKey.name:
156 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
157 # can be clearer.
158 return element, None
159 elif column in element.graph.names:
160 # User said something like "patch.tract = x" or
161 # "tract.tract = x" instead of just "tract = x" or
162 # "tract.id = x", which is at least needlessly confusing and
163 # possibly not actually a column name, though we can guess
164 # what they were trying to do.
165 # Encourage them to clean that up and try again.
166 raise RuntimeError(
167 f"Invalid reference to '{table}.{column}' " # type: ignore
168 f"in expression; please use '{column}' or "
169 f"'{column}.{universe[column].primaryKey.name}' instead."
170 )
171 else:
172 if column not in element.RecordClass.fields.standard.names:
173 raise LookupError(f"Column '{column}' not found in table for {element}.")
174 return element, column
175 else:
176 try:
177 dimension = universe[table]
178 except KeyError as err:
179 raise LookupError(f"No dimension with name '{table}'.") from err
180 return dimension, None
183@dataclasses.dataclass
184class InspectionSummary:
185 """Base class for objects used by `CheckVisitor` and `InspectionVisitor`
186 to gather information about a parsed expression.
187 """
189 def update(self, other: InspectionSummary) -> None:
190 """Update ``self`` with all dimensions and columns from ``other``.
192 Parameters
193 ----------
194 other : `InspectionSummary`
195 The other summary object.
196 """
197 self.dimensions.update(other.dimensions)
198 for element, columns in other.columns.items():
199 self.columns.setdefault(element, set()).update(columns)
200 self.hasIngestDate = self.hasIngestDate or other.hasIngestDate
202 dimensions: NamedValueSet[Dimension] = dataclasses.field(default_factory=NamedValueSet)
203 """Dimensions whose primary keys or dependencies were referenced anywhere
204 in this branch (`NamedValueSet` [ `Dimension` ]).
205 """
207 columns: NamedKeyDict[DimensionElement, Set[str]] = dataclasses.field(default_factory=NamedKeyDict)
208 """Dimension element tables whose columns were referenced anywhere in this
209 branch (`NamedKeyDict` [ `DimensionElement`, `set` [ `str` ] ]).
210 """
212 hasIngestDate: bool = False
213 """Whether this expression includes the special dataset ingest date
214 identifier (`bool`).
215 """
218@dataclasses.dataclass
219class TreeSummary(InspectionSummary):
220 """Result object used by `InspectionVisitor` to gather information about
221 a parsed expression.
223 Notes
224 -----
225 TreeSummary adds attributes that allow dimension equivalence expressions
226 (e.g. "tract=4") to be recognized when they appear in simple contexts
227 (surrounded only by ANDs and ORs). When `InspectionVisitor` is used on its
228 own (i.e. when ``check=False`` in the query code), these don't do anything,
229 but they don't cost much, either. They are used by `CheckVisitor` when it
230 delegates to `InspectionVisitor` to see what governor dimension values are
231 set in a branch of the normal-form expression.
232 """
234 def merge(self, other: TreeSummary, isEq: bool = False) -> TreeSummary:
235 """Merge ``other`` into ``self``, making ``self`` a summary of both
236 expression tree branches.
238 Parameters
239 ----------
240 other : `TreeSummary`
241 The other summary object.
242 isEq : `bool`, optional
243 If `True` (`False` is default), these summaries are being combined
244 via the equality operator.
246 Returns
247 -------
248 self : `TreeSummary`
249 The merged summary (updated in-place).
250 """
251 self.update(other)
252 if isEq and self.isDataIdKeyOnly() and other.isDataIdValueOnly():
253 self.dataIdValue = other.dataIdValue
254 elif isEq and self.isDataIdValueOnly() and other.isDataIdKeyOnly():
255 self.dataIdKey = other.dataIdKey
256 else:
257 self.dataIdKey = None
258 self.dataIdValue = None
259 return self
261 def isDataIdKeyOnly(self) -> bool:
262 """Test whether this branch is _just_ a data ID key identifier.
263 """
264 return self.dataIdKey is not None and self.dataIdValue is None
266 def isDataIdValueOnly(self) -> bool:
267 """Test whether this branch is _just_ a literal value that may be
268 used as the value in a data ID key-value pair.
269 """
270 return self.dataIdKey is None and self.dataIdValue is not None
272 dataIdKey: Optional[Dimension] = None
273 """A `Dimension` that is (if `dataIdValue` is not `None`) or may be
274 (if `dataIdValue` is `None`) fully identified by a literal value in this
275 branch.
276 """
278 dataIdValue: Optional[str] = None
279 """A literal value that constrains (if `dataIdKey` is not `None`) or may
280 constrain (if `dataIdKey` is `None`) a dimension in this branch.
282 This is always a `str` or `None`, but it may need to be coerced to `int`
283 to reflect the actual user intent.
284 """
287class InspectionVisitor(TreeVisitor[TreeSummary]):
288 """Implements TreeVisitor to identify dimension elements that need
289 to be included in a query, prior to actually constructing a SQLAlchemy
290 WHERE clause from it.
292 Parameters
293 ----------
294 universe : `DimensionUniverse`
295 All known dimensions.
296 """
298 def __init__(self, universe: DimensionUniverse):
299 self.universe = universe
301 def visitNumericLiteral(self, value: str, node: Node) -> TreeSummary:
302 # Docstring inherited from TreeVisitor.visitNumericLiteral
303 return TreeSummary(dataIdValue=value)
305 def visitStringLiteral(self, value: str, node: Node) -> TreeSummary:
306 # Docstring inherited from TreeVisitor.visitStringLiteral
307 return TreeSummary(dataIdValue=value)
309 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> TreeSummary:
310 # Docstring inherited from TreeVisitor.visitTimeLiteral
311 return TreeSummary()
313 def visitIdentifier(self, name: str, node: Node) -> TreeSummary:
314 # Docstring inherited from TreeVisitor.visitIdentifier
315 if categorizeIngestDateId(name):
316 return TreeSummary(
317 hasIngestDate=True,
318 )
319 element, column = categorizeElementId(self.universe, name)
320 if column is None:
321 assert isinstance(element, Dimension)
322 return TreeSummary(
323 dimensions=NamedValueSet(element.graph.dimensions),
324 dataIdKey=element,
325 )
326 else:
327 return TreeSummary(
328 dimensions=NamedValueSet(element.graph.dimensions),
329 columns=NamedKeyDict({element: {column}})
330 )
332 def visitUnaryOp(self, operator: str, operand: TreeSummary, node: Node
333 ) -> TreeSummary:
334 # Docstring inherited from TreeVisitor.visitUnaryOp
335 return operand
337 def visitBinaryOp(self, operator: str, lhs: TreeSummary, rhs: TreeSummary,
338 node: Node) -> TreeSummary:
339 # Docstring inherited from TreeVisitor.visitBinaryOp
340 return lhs.merge(rhs, isEq=(operator == "="))
342 def visitIsIn(self, lhs: TreeSummary, values: List[TreeSummary], not_in: bool,
343 node: Node) -> TreeSummary:
344 # Docstring inherited from TreeVisitor.visitIsIn
345 for v in values:
346 lhs.merge(v)
347 return lhs
349 def visitParens(self, expression: TreeSummary, node: Node) -> TreeSummary:
350 # Docstring inherited from TreeVisitor.visitParens
351 return expression
353 def visitTupleNode(self, items: Tuple[TreeSummary, ...], node: Node) -> TreeSummary:
354 # Docstring inherited from base class
355 result = TreeSummary()
356 for i in items:
357 result.merge(i)
358 return result
360 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node
361 ) -> TreeSummary:
362 # Docstring inherited from TreeVisitor.visitRangeLiteral
363 return TreeSummary()
365 def visitPointNode(self, ra: TreeSummary, dec: TreeSummary, node: Node) -> TreeSummary:
366 # Docstring inherited from base class
367 return TreeSummary()
370@dataclasses.dataclass
371class InnerSummary(InspectionSummary):
372 """Result object used by `CheckVisitor` to gather referenced dimensions
373 and tables from an inner group of AND'd together expression branches, and
374 check them for consistency and completeness.
375 """
377 governors: NamedKeyDict[GovernorDimension, str] = dataclasses.field(default_factory=NamedKeyDict)
378 """Mapping containing the values of all governor dimensions that are
379 equated with literal values in this expression branch.
380 """
383@dataclasses.dataclass
384class OuterSummary(InspectionSummary):
385 """Result object used by `CheckVisitor` to gather referenced dimensions,
386 tables, and governor dimension values from the entire expression.
387 """
389 governors: NamedKeyDict[GovernorDimension, Union[Set[str], EllipsisType]] \
390 = dataclasses.field(default_factory=NamedKeyDict)
391 """Mapping containing all values that appear in this expression for any
392 governor dimension relevant to the query.
394 Mapping values may be a `set` of `str` to indicate that only these values
395 are permitted for a dimension, or ``...`` indicate that the values for
396 that governor are not fully constrained by this expression.
397 """
400class CheckVisitor(NormalFormVisitor[TreeSummary, InnerSummary, OuterSummary]):
401 """An implementation of `NormalFormVisitor` that identifies the dimensions
402 and tables that need to be included in a query while performing some checks
403 for completeness and consistency.
405 Parameters
406 ----------
407 dataId : `DataCoordinate`
408 Dimension values that are fully known in advance.
409 graph : `DimensionGraph`
410 The dimensions the query would include in the absence of this
411 expression.
412 """
413 def __init__(self, dataId: DataCoordinate, graph: DimensionGraph):
414 self.dataId = dataId
415 self.graph = graph
416 self._branchVisitor = InspectionVisitor(dataId.universe)
418 def visitBranch(self, node: Node) -> TreeSummary:
419 # Docstring inherited from NormalFormVisitor.
420 return node.visit(self._branchVisitor)
422 def visitInner(self, branches: Sequence[TreeSummary], form: NormalForm) -> InnerSummary:
423 # Docstring inherited from NormalFormVisitor.
424 # Disjunctive normal form means inner branches are AND'd together...
425 assert form is NormalForm.DISJUNCTIVE
426 # ...and that means each branch we iterate over together below
427 # constrains the others, and they all need to be consistent. Moreover,
428 # because outer branches are OR'd together, we also know that if
429 # something is missing from one of these branches (like a governor
430 # dimension value like the instrument or skymap needed to interpret a
431 # visit or tract number), it really is missing, because there's no way
432 # some other inner branch can constraint it.
433 #
434 # That is, except the data ID the visitor was passed at construction;
435 # that's AND'd to the entire expression later, and thus it affects all
436 # branches. To take care of that, we add any governor values it
437 # contains to the summary in advance.
438 summary = InnerSummary()
439 summary.governors.update((k, self.dataId[k]) for k in self.dataId.graph.governors) # type: ignore
440 # Finally, we loop over those branches.
441 for branch in branches:
442 # Update the sets of dimensions and columns we've seen anywhere in
443 # the expression in any context.
444 summary.update(branch)
445 # Test whether this branch has a form like '<dimension>=<value'
446 # (or equivalent; categorizeIdentifier is smart enough to see that
447 # e.g. 'detector.id=4' is equivalent to 'detector=4').
448 # If so, and it's a governor dimension, remember that we've
449 # constrained it on this branch, and make sure it's consistent
450 # with any other constraints on any other branches its AND'd with.
451 if isinstance(branch.dataIdKey, GovernorDimension) and branch.dataIdValue is not None:
452 governor = branch.dataIdKey
453 value = summary.governors.setdefault(governor, branch.dataIdValue)
454 if value != branch.dataIdValue:
455 # Expression says something like "instrument='HSC' AND
456 # instrument='DECam'", or data ID has one and expression
457 # has the other.
458 if governor in self.dataId:
459 raise RuntimeError(
460 f"Conflict between expression containing {governor.name}={branch.dataIdValue!r} "
461 f"and data ID with {governor.name}={value!r}."
462 )
463 else:
464 raise RuntimeError(
465 f"Conflicting literal values for {governor.name} in expression: "
466 f"{value!r} != {branch.dataIdValue!r}."
467 )
468 # Now that we know which governor values we've constrained, see if any
469 # are missing, i.e. if the expression contains something like "visit=X"
470 # without saying what instrument that visit corresponds to. This rules
471 # out a lot of accidents, but it also rules out possibly-legitimate
472 # multi-instrument queries like "visit.seeing < 0.7". But it's not
473 # unreasonable to ask the user to be explicit about the instruments
474 # they want to consider to work around this restriction, and that's
475 # what we do. Note that if someone does write an expression like
476 #
477 # (instrument='HSC' OR instrument='DECam') AND visit.seeing < 0.7
478 #
479 # then in disjunctive normal form that will become
480 #
481 # (instrument='HSC' AND visit.seeing < 0.7)
482 # OR (instrument='DECam' AND visit.seeing < 0.7)
483 #
484 # i.e. each instrument will get its own outer branch and the logic here
485 # still works (that sort of thing is why we convert to normal form,
486 # after all).
487 governorsNeededInBranch: NamedValueSet[GovernorDimension] = NamedValueSet()
488 for dimension in summary.dimensions:
489 governorsNeededInBranch.update(dimension.graph.governors)
490 if not governorsNeededInBranch.issubset(summary.governors.keys()):
491 missing = NamedValueSet(governorsNeededInBranch - summary.governors.keys())
492 raise RuntimeError(
493 f"No value(s) for governor dimensions {missing} in expression that references dependent "
494 "dimensions. 'Governor' dimensions must always be specified completely in either the "
495 "query expression (via simple 'name=<value>' terms, not 'IN' terms) or in a data ID passed "
496 "to the query method."
497 )
498 return summary
500 def visitOuter(self, branches: Sequence[InnerSummary], form: NormalForm) -> OuterSummary:
501 # Docstring inherited from NormalFormVisitor.
502 # Disjunctive normal form means outer branches are OR'd together.
503 assert form is NormalForm.DISJUNCTIVE
504 # Iterate over branches in first pass to gather all dimensions and
505 # columns referenced. This aggregation is for the full query, so we
506 # don't care whether things are joined by AND or OR (or + or -, etc).
507 summary = OuterSummary()
508 for branch in branches:
509 summary.update(branch)
510 # See if we've referenced any dimensions that weren't in the original
511 # query graph; if so, we update that to include them. This is what
512 # lets a user say "tract=X" on the command line (well, "skymap=Y AND
513 # tract=X" - logic in visitInner checks for that) when running a task
514 # like ISR that has nothing to do with skymaps.
515 if not summary.dimensions.issubset(self.graph.dimensions):
516 self.graph = DimensionGraph(
517 self.graph.universe,
518 dimensions=(summary.dimensions | self.graph.dimensions),
519 )
520 # Set up a dict of empty sets, with all of the governors this query
521 # involves as keys.
522 summary.governors.update((k, set()) for k in self.graph.governors)
523 # Iterate over branches again to see if there are any branches that
524 # don't constraint a particular governor (because these branches are
525 # OR'd together, that means there is no constraint on that governor at
526 # all); if that's the case, we set the dict value to None. If a
527 # governor is constrained by all branches, we update the set with the
528 # values that governor can have.
529 for branch in branches:
530 for governor in summary.governors:
531 currentValues = summary.governors[governor]
532 if currentValues is not Ellipsis:
533 branchValue = branch.governors.get(governor)
534 if branchValue is None:
535 # This governor is unconstrained in this branch, so
536 # no other branch can constrain it.
537 summary.governors[governor] = Ellipsis
538 else:
539 currentValues.add(branchValue)
540 return summary
543class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]):
544 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
545 clause.
547 Parameters
548 ----------
549 universe : `DimensionUniverse`
550 All known dimensions.
551 columns: `QueryColumns`
552 Struct that organizes the special columns known to the query
553 under construction.
554 elements: `NamedKeyDict`
555 `DimensionElement` instances and their associated tables.
556 """
558 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 558 ↛ exitline 558 didn't run the lambda on line 558
559 "+": lambda x: +x,
560 "-": lambda x: -x}
561 """Mapping or unary operator names to corresponding functions"""
563 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 563 ↛ exitline 563 didn't run the lambda on line 563
564 "AND": lambda x, y: sqlalchemy.sql.and_(x, y),
565 "=": lambda x, y: x == y,
566 "!=": lambda x, y: x != y,
567 "<": lambda x, y: x < y,
568 "<=": lambda x, y: x <= y,
569 ">": lambda x, y: x > y,
570 ">=": lambda x, y: x >= y,
571 "+": lambda x, y: x + y,
572 "-": lambda x, y: x - y,
573 "*": lambda x, y: x * y,
574 "/": lambda x, y: x / y,
575 "%": lambda x, y: x % y}
576 """Mapping or binary operator names to corresponding functions"""
578 def __init__(self, universe: DimensionUniverse,
579 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]):
580 self.universe = universe
581 self.columns = columns
582 self.elements = elements
583 self.hasIngestDate: bool = False
585 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
586 # Docstring inherited from TreeVisitor.visitNumericLiteral
587 # Convert string value into float or int
588 coerced: Union[int, float]
589 try:
590 coerced = int(value)
591 except ValueError:
592 coerced = float(value)
593 return sqlalchemy.sql.literal(coerced)
595 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
596 # Docstring inherited from TreeVisitor.visitStringLiteral
597 return sqlalchemy.sql.literal(value)
599 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement:
600 # Docstring inherited from TreeVisitor.visitTimeLiteral
601 return sqlalchemy.sql.literal(value, type_=AstropyTimeNsecTai)
603 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement:
604 # Docstring inherited from TreeVisitor.visitIdentifier
605 if categorizeIngestDateId(name):
606 self.hasIngestDate = True
607 assert self.columns.datasets is not None
608 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
609 return _TimestampColumnElement(self.columns.datasets.ingestDate)
610 element, column = categorizeElementId(self.universe, name)
611 if column is not None:
612 return self.elements[element].columns[column]
613 else:
614 assert isinstance(element, Dimension)
615 return self.columns.getKeyColumn(element)
617 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node
618 ) -> sqlalchemy.sql.ColumnElement:
619 # Docstring inherited from TreeVisitor.visitUnaryOp
620 func = self.unaryOps.get(operator)
621 if func:
622 return func(operand)
623 else:
624 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
626 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement,
627 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement:
628 # Docstring inherited from TreeVisitor.visitBinaryOp
629 func = self.binaryOps.get(operator)
630 if func:
631 return func(lhs, rhs)
632 else:
633 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
635 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement],
636 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement:
637 # Docstring inherited from TreeVisitor.visitIsIn
639 # `values` is a list of literals and ranges, range is represented
640 # by a tuple (start, stop, stride). We need to transform range into
641 # some SQL construct, simplest would be to generate a set of literals
642 # and add it to the same list but it could become too long. What we
643 # do here is to introduce some large limit on the total number of
644 # items in IN() and if range exceeds that limit then we do something
645 # like:
646 #
647 # X IN (1, 2, 3)
648 # OR
649 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
650 #
651 # or for NOT IN case
652 #
653 # NOT (X IN (1, 2, 3)
654 # OR
655 # (X BETWEEN START AND STOP
656 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
658 max_in_items = 1000
660 # split the list into literals and ranges
661 literals, ranges = [], []
662 for item in values:
663 if isinstance(item, tuple):
664 ranges.append(item)
665 else:
666 literals.append(item)
668 clauses = []
669 for start, stop, stride in ranges:
670 count = (stop - start + 1) // stride
671 if len(literals) + count > max_in_items:
672 # X BETWEEN START AND STOP
673 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
674 expr = lhs.between(start, stop)
675 if stride != 1:
676 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride))
677 clauses.append(expr)
678 else:
679 # add all values to literal list, stop is inclusive
680 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)]
682 if literals:
683 # add IN() in front of BETWEENs
684 clauses.insert(0, lhs.in_(literals))
686 expr = sqlalchemy.sql.or_(*clauses)
687 if not_in:
688 expr = sqlalchemy.sql.not_(expr)
690 return expr
692 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node
693 ) -> sqlalchemy.sql.ColumnElement:
694 # Docstring inherited from TreeVisitor.visitParens
695 return expression.self_group()
697 def visitTupleNode(self, items: Tuple[sqlalchemy.sql.ColumnElement, ...], node: Node
698 ) -> sqlalchemy.sql.ColumnElement:
699 # Docstring inherited from base class
700 return sqlalchemy.sql.expression.Tuple(*items)
702 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node
703 ) -> sqlalchemy.sql.ColumnElement:
704 # Docstring inherited from TreeVisitor.visitRangeLiteral
706 # Just return a triple and let parent clauses handle it,
707 # stride can be None which means the same as 1.
708 return (start, stop, stride or 1)
710 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None:
711 # Docstring inherited from base class
713 # this is a placeholder for future extension, we enabled syntax but
714 # do not support actual use just yet.
715 raise NotImplementedError("POINT() function is not supported yet")