Coverage for python/lsst/daf/butler/queries/_expression_strings.py: 26%
122 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:15 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:15 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from collections.abc import Set
31from typing import Literal, NamedTuple, TypeAlias
33import astropy.time
35from .._exceptions import InvalidQueryError
36from .._timespan import Timespan
37from ..column_spec import ColumnType
38from ..dimensions import DimensionUniverse
39from ..registry.queries.expressions.categorize import ExpressionConstant, categorizeConstant
40from ..registry.queries.expressions.parser import Node, RangeLiteral, TreeVisitor, parse_expression
41from ._identifiers import IdentifierContext, interpret_identifier
42from .tree import (
43 BinaryExpression,
44 ColumnExpression,
45 ComparisonOperator,
46 LiteralValue,
47 Predicate,
48 UnaryExpression,
49 make_column_literal,
50)
52BindValue = LiteralValue | list[LiteralValue] | tuple[LiteralValue] | Set[LiteralValue]
55def convert_expression_string_to_predicate(
56 expression: str, *, context: IdentifierContext, universe: DimensionUniverse
57) -> Predicate:
58 """Convert a Butler query expression string to a `Predicate` for use in a
59 QueryTree.
61 Parameters
62 ----------
63 expression : `str`
64 Butler expression query string, as used by the old query system to
65 specify filtering.
66 context : `IdentifierContext`
67 Contextual information that helps determine the meaning of an
68 identifier used in a query.
69 universe : `DimensionUniverse`
70 Dimension metadata for the Butler database being queried.
72 Returns
73 -------
74 predicate : `Predicate`
75 Predicate corresponding to that filter, for use in `QueryTree`.
76 """
77 try:
78 tree = parse_expression(expression)
79 except Exception as exc:
80 raise InvalidQueryError(f"Failed to parse expression '{expression}'") from exc
82 converter = _ConversionVisitor(context, universe)
83 predicate = tree.visit(converter)
84 assert isinstance(
85 predicate, Predicate
86 ), "The grammar should guarantee that we get a predicate back at the top level."
88 return predicate
91class _ColExpr(NamedTuple):
92 """Represents a portion of the original expression that has been converted
93 to a ColumnExpression object.
94 """
96 # This wrapper object mostly exists to help with typing and match() --
97 # ColumnExpression is a big discriminated union, and mypy was having a lot
98 # of trouble dealing with it in the context of _VisitorResult's extra
99 # layers of union.
101 value: ColumnExpression
103 @property
104 def column_type(self) -> ColumnType:
105 return self.value.column_type
108class _Null:
109 """Class representing a literal 'null' value in the expression."""
111 column_type: Literal["null"] = "null"
114class _RangeLiteral(NamedTuple):
115 """Class representing a range expression."""
117 value: RangeLiteral
118 column_type: Literal["range"] = "range"
121class _Sequence(NamedTuple):
122 value: list[ColumnExpression]
123 column_type: Literal["sequence"] = "sequence"
126_VisitorResult: TypeAlias = Predicate | _ColExpr | _Null | _RangeLiteral | _Sequence
129class _ConversionVisitor(TreeVisitor[_VisitorResult]):
130 def __init__(self, context: IdentifierContext, universe: DimensionUniverse):
131 super().__init__()
132 self.context = context
133 self.universe = universe
135 def visitBinaryOp(
136 self, operator: str, lhs: _VisitorResult, rhs: _VisitorResult, node: Node
137 ) -> _VisitorResult:
138 match (operator, lhs, rhs):
139 # Handle boolean operators.
140 case ["OR", Predicate() as lhs, Predicate() as rhs]:
141 return lhs.logical_or(rhs)
142 case ["AND", Predicate() as lhs, Predicate() as rhs]:
143 return lhs.logical_and(rhs)
145 # Handle comparison operators.
146 case [("=" | "!=" | "<" | ">" | "<=" | ">="), _ColExpr() as lhs, _ColExpr() as rhs]:
147 return Predicate.compare(
148 a=lhs.value, b=rhs.value, operator=_convert_comparison_operator(operator)
149 )
151 # Allow equality comparisons with None/NULL. We don't have an 'IS'
152 # operator.
153 case ["=", _ColExpr() as lhs, _Null()]:
154 return Predicate.is_null(lhs.value)
155 case ["!=", _ColExpr() as lhs, _Null()]:
156 return Predicate.is_null(lhs.value).logical_not()
157 case ["=", _Null(), _ColExpr() as rhs]:
158 return Predicate.is_null(rhs.value)
159 case ["!=", _Null(), _ColExpr() as rhs]:
160 return Predicate.is_null(rhs.value).logical_not()
162 # Handle arithmetic operations
163 case [("+" | "-" | "*" | "/" | "%") as op, _ColExpr() as lhs, _ColExpr() as rhs]:
164 return _ColExpr(BinaryExpression(a=lhs.value, b=rhs.value, operator=op))
166 raise InvalidQueryError(
167 f"Invalid types {lhs.column_type}, {rhs.column_type} for binary operator {operator!r} "
168 f"in expression {node!s}."
169 )
171 def visitIsIn(
172 self, lhs: _VisitorResult, values: list[_VisitorResult], not_in: bool, node: Node
173 ) -> _VisitorResult:
174 raise NotImplementedError("IN not supported yet")
176 def visitIdentifier(self, name: str, node: Node) -> _VisitorResult:
177 name = name.lower()
179 if name in self.context.bind:
180 value = self.context.bind[name]
181 # Lists of values do not have a direct representation in the new
182 # query system, so we have to handle them separately here.
183 if isinstance(value, list | tuple | Set):
184 literals: list[ColumnExpression] = [make_column_literal(item) for item in value]
185 types = set({item.column_type for item in literals})
186 if len(types) > 1:
187 raise InvalidQueryError(
188 f"Mismatched types in bind iterable: {value} has a mix of {types}."
189 )
190 return _Sequence(literals)
192 # The other constants are handled in interpret_identifier().
193 if categorizeConstant(name) == ExpressionConstant.NULL:
194 return _Null()
196 return _ColExpr(interpret_identifier(self.context, name))
198 def visitNumericLiteral(self, value: str, node: Node) -> _VisitorResult:
199 numeric: int | float
200 try:
201 numeric = int(value)
202 except ValueError:
203 # int() raises for float-like strings
204 numeric = float(value)
205 return _make_literal(numeric)
207 def visitParens(self, expression: _VisitorResult, node: Node) -> _VisitorResult:
208 return expression
210 def visitPointNode(self, ra: _VisitorResult, dec: _VisitorResult, node: Node) -> _VisitorResult:
211 raise NotImplementedError("POINT() function is not supported yet")
213 def visitRangeLiteral(
214 self, start: int, stop: int, stride: int | None, node: RangeLiteral
215 ) -> _VisitorResult:
216 # Consumed by visitIsIn.
217 return _RangeLiteral(node)
219 def visitStringLiteral(self, value: str, node: Node) -> _VisitorResult:
220 return _make_literal(value)
222 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> _VisitorResult:
223 return _make_literal(value)
225 def visitTupleNode(self, items: tuple[_VisitorResult, ...], node: Node) -> _VisitorResult:
226 if len(items) != 2:
227 raise InvalidQueryError(f"Timespan tuple should have exactly two items (begin, end) in '{node}'")
229 begin = _to_timespan_bound(items[0], node)
230 end = _to_timespan_bound(items[1], node)
231 return _make_literal(Timespan(begin, end))
233 def visitUnaryOp(self, operator: str, operand: _VisitorResult, node: Node) -> _VisitorResult:
234 # Docstring inherited.
235 match (operator, operand):
236 case ["NOT", Predicate() as operand]:
237 return operand.logical_not()
238 case ["+", _ColExpr(column_type="int" | "float") as operand]:
239 # + is a no-op.
240 return operand
241 case ["-", _ColExpr(column_type="int" | "float", value=expr)]:
242 return _ColExpr(UnaryExpression(operand=expr, operator="-"))
243 raise InvalidQueryError(
244 f"Unary operator {operator!r} is not valid for operand of type {operand.column_type} in {node!s}."
245 )
248def _make_literal(value: LiteralValue) -> _ColExpr:
249 return _ColExpr(make_column_literal(value))
252def _to_timespan_bound(value: _VisitorResult, node: Node) -> astropy.time.Time | None:
253 match (value):
254 case _ColExpr(value=expr) if expr.expression_type == "datetime":
255 return expr.value
256 case _Null():
257 return None
259 raise InvalidQueryError(
260 f'Invalid type in timespan tuple "{node}" '
261 '(Note that date/time strings must be preceded by "T" to be recognized).'
262 )
265def _convert_comparison_operator(value: str) -> ComparisonOperator:
266 """Convert an expression-string comparison operator to the format
267 used by QueryTree.
268 """
269 match value:
270 case "=":
271 return "=="
272 case "OVERLAPS":
273 return "overlaps"
274 case ("!=" | "<" | ">" | "<=" | ">=") as op:
275 return op
276 case _:
277 raise AssertionError(f"Unhandled comparison operator {value}")