Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25from typing import Any, List, Optional, Tuple, TYPE_CHECKING, Union
27import sqlalchemy
29from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet
30from .exprParser import Node, TreeVisitor
31from ._structs import QueryColumns
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 import astropy.time
37def categorizeIdentifier(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
38 """Categorize an identifier in a parsed expression as either a `Dimension`
39 name (indicating the primary key for that dimension) or a non-primary-key
40 column in a `DimensionElement` table.
42 Parameters
43 ----------
44 universe : `DimensionUniverse`
45 All known dimensions.
46 name : `str`
47 Identifier to categorize.
49 Returns
50 -------
51 element : `DimensionElement`
52 The `DimensionElement` the identifier refers to.
53 column : `str` or `None`
54 The name of a column in the table for ``element``, or `None` if
55 ``element`` is a `Dimension` and the requested column is its primary
56 key.
58 Raises
59 ------
60 LookupError
61 Raised if the identifier refers to a nonexistent `DimensionElement`
62 or column.
63 RuntimeError
64 Raised if the expression refers to a primary key in an illegal way.
65 This exception includes a suggestion for how to rewrite the expression,
66 so at least its message should generally be propagated up to a context
67 where the error can be interpreted by a human.
68 """
69 table, sep, column = name.partition('.')
70 if column:
71 try:
72 element = universe[table]
73 except KeyError as err:
74 raise LookupError(f"No dimension element with name '{table}'.") from err
75 if isinstance(element, Dimension) and column == element.primaryKey.name:
76 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
77 # can be clearer.
78 return element, None
79 elif column in element.graph.names:
80 # User said something like "patch.tract = x" or
81 # "tract.tract = x" instead of just "tract = x" or
82 # "tract.id = x", which is at least needlessly confusing and
83 # possibly not actually a column name, though we can guess
84 # what they were trying to do.
85 # Encourage them to clean that up and try again.
86 raise RuntimeError(
87 f"Invalid reference to '{table}.{column}' " # type: ignore
88 f"in expression; please use '{column}' or "
89 f"'{column}.{universe[column].primaryKey.name}' instead."
90 )
91 else:
92 if column not in element.RecordClass.fields.standard.names:
93 raise LookupError(f"Column '{column} not found in table for {element}.")
94 return element, column
95 else:
96 try:
97 dimension = universe[table]
98 except KeyError as err:
99 raise LookupError(f"No dimension with name '{table}.") from err
100 return dimension, None
103class InspectionVisitor(TreeVisitor[None]):
104 """Implements TreeVisitor to identify dimension elements that need
105 to be included in a query, prior to actually constructing a SQLAlchemy
106 WHERE clause from it.
108 Parameters
109 ----------
110 universe : `DimensionUniverse`
111 All known dimensions.
112 """
114 def __init__(self, universe: DimensionUniverse):
115 self.universe = universe
116 self.keys: NamedValueSet[Dimension] = NamedValueSet()
117 self.metadata: NamedKeyDict[DimensionElement, List[str]] = NamedKeyDict()
119 def visitNumericLiteral(self, value: str, node: Node) -> None:
120 # Docstring inherited from TreeVisitor.visitNumericLiteral
121 pass
123 def visitStringLiteral(self, value: str, node: Node) -> None:
124 # Docstring inherited from TreeVisitor.visitStringLiteral
125 pass
127 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> None:
128 # Docstring inherited from TreeVisitor.visitTimeLiteral
129 pass
131 def visitIdentifier(self, name: str, node: Node) -> None:
132 # Docstring inherited from TreeVisitor.visitIdentifier
133 element, column = categorizeIdentifier(self.universe, name)
134 if column is not None:
135 self.metadata.setdefault(element, []).append(column)
136 else:
137 assert isinstance(element, Dimension)
138 self.keys.add(element)
140 def visitUnaryOp(self, operator: str, operand: Any, node: Node) -> None:
141 # Docstring inherited from TreeVisitor.visitUnaryOp
142 pass
144 def visitBinaryOp(self, operator: str, lhs: Any, rhs: Any, node: Node) -> None:
145 # Docstring inherited from TreeVisitor.visitBinaryOp
146 pass
148 def visitIsIn(self, lhs: Any, values: List[Any], not_in: bool, node: Node) -> None:
149 # Docstring inherited from TreeVisitor.visitIsIn
150 pass
152 def visitParens(self, expression: Any, node: Node) -> None:
153 # Docstring inherited from TreeVisitor.visitParens
154 pass
156 def visitTupleNode(self, items: Tuple[Any, ...], node: Node) -> None:
157 # Docstring inherited from base class
158 pass
160 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node) -> None:
161 # Docstring inherited from TreeVisitor.visitRangeLiteral
162 pass
164 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None:
165 # Docstring inherited from base class
166 pass
169class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]):
170 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
171 clause.
173 Parameters
174 ----------
175 universe : `DimensionUniverse`
176 All known dimensions.
177 columns: `QueryColumns`
178 Struct that organizes the special columns known to the query
179 under construction.
180 elements: `NamedKeyDict`
181 `DimensionElement` instances and their associated tables.
182 """
184 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 184 ↛ exitline 184 didn't run the lambda on line 184
185 "+": lambda x: +x,
186 "-": lambda x: -x}
187 """Mapping or unary operator names to corresponding functions"""
189 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 189 ↛ exitline 189 didn't run the lambda on line 189
190 "AND": lambda x, y: sqlalchemy.sql.and_(x, y),
191 "=": lambda x, y: x == y,
192 "!=": lambda x, y: x != y,
193 "<": lambda x, y: x < y,
194 "<=": lambda x, y: x <= y,
195 ">": lambda x, y: x > y,
196 ">=": lambda x, y: x >= y,
197 "+": lambda x, y: x + y,
198 "-": lambda x, y: x - y,
199 "*": lambda x, y: x * y,
200 "/": lambda x, y: x / y,
201 "%": lambda x, y: x % y}
202 """Mapping or binary operator names to corresponding functions"""
204 def __init__(self, universe: DimensionUniverse,
205 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]):
206 self.universe = universe
207 self.columns = columns
208 self.elements = elements
210 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
211 # Docstring inherited from TreeVisitor.visitNumericLiteral
212 # Convert string value into float or int
213 coerced: Union[int, float]
214 try:
215 coerced = int(value)
216 except ValueError:
217 coerced = float(value)
218 return sqlalchemy.sql.literal(coerced)
220 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
221 # Docstring inherited from TreeVisitor.visitStringLiteral
222 return sqlalchemy.sql.literal(value)
224 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement:
225 # Docstring inherited from TreeVisitor.visitTimeLiteral
226 return sqlalchemy.sql.literal(value)
228 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement:
229 # Docstring inherited from TreeVisitor.visitIdentifier
230 element, column = categorizeIdentifier(self.universe, name)
231 if column is not None:
232 return self.elements[element].columns[column]
233 else:
234 assert isinstance(element, Dimension)
235 return self.columns.getKeyColumn(element)
237 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node
238 ) -> sqlalchemy.sql.ColumnElement:
239 # Docstring inherited from TreeVisitor.visitUnaryOp
240 func = self.unaryOps.get(operator)
241 if func:
242 return func(operand)
243 else:
244 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
246 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement,
247 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement:
248 # Docstring inherited from TreeVisitor.visitBinaryOp
249 func = self.binaryOps.get(operator)
250 if func:
251 return func(lhs, rhs)
252 else:
253 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
255 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement],
256 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement:
257 # Docstring inherited from TreeVisitor.visitIsIn
259 # `values` is a list of literals and ranges, range is represented
260 # by a tuple (start, stop, stride). We need to transform range into
261 # some SQL construct, simplest would be to generate a set of literals
262 # and add it to the same list but it could become too long. What we
263 # do here is to introduce some large limit on the total number of
264 # items in IN() and if range exceeds that limit then we do something
265 # like:
266 #
267 # X IN (1, 2, 3)
268 # OR
269 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
270 #
271 # or for NOT IN case
272 #
273 # NOT (X IN (1, 2, 3)
274 # OR
275 # (X BETWEEN START AND STOP
276 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
278 max_in_items = 1000
280 # split the list into literals and ranges
281 literals, ranges = [], []
282 for item in values:
283 if isinstance(item, tuple):
284 ranges.append(item)
285 else:
286 literals.append(item)
288 clauses = []
289 for start, stop, stride in ranges:
290 count = (stop - start + 1) // stride
291 if len(literals) + count > max_in_items:
292 # X BETWEEN START AND STOP
293 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
294 expr = lhs.between(start, stop)
295 if stride != 1:
296 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride))
297 clauses.append(expr)
298 else:
299 # add all values to literal list, stop is inclusive
300 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)]
302 if literals:
303 # add IN() in front of BETWEENs
304 clauses.insert(0, lhs.in_(literals))
306 expr = sqlalchemy.sql.or_(*clauses)
307 if not_in:
308 expr = sqlalchemy.sql.not_(expr)
310 return expr
312 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node
313 ) -> sqlalchemy.sql.ColumnElement:
314 # Docstring inherited from TreeVisitor.visitParens
315 return expression.self_group()
317 def visitTupleNode(self, items: Tuple[sqlalchemy.sql.ColumnElement, ...], node: Node
318 ) -> sqlalchemy.sql.ColumnElement:
319 # Docstring inherited from base class
320 return sqlalchemy.sql.expression.Tuple(*items)
322 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node
323 ) -> sqlalchemy.sql.ColumnElement:
324 # Docstring inherited from TreeVisitor.visitRangeLiteral
326 # Just return a triple and let parent clauses handle it,
327 # stride can be None which means the same as 1.
328 return (start, stop, stride or 1)
330 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None:
331 # Docstring inherited from base class
333 # this is a placeholder for future extension, we enabled syntax but
334 # do not support actual use just yet.
335 raise NotImplementedError("POINT() function is not supported yet")