Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25from typing import Any, List, Optional, Tuple, TYPE_CHECKING, Union
27import sqlalchemy
29from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet
30from .exprParser import Node, TreeVisitor
31from ._structs import QueryColumns
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 import astropy.time
37def categorizeIdentifier(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
38 """Categorize an identifier in a parsed expression as either a `Dimension`
39 name (indicating the primary key for that dimension) or a non-primary-key
40 column in a `DimensionElement` table.
42 Parameters
43 ----------
44 universe : `DimensionUniverse`
45 All known dimensions.
46 name : `str`
47 Identifier to categorize.
49 Returns
50 -------
51 element : `DimensionElement`
52 The `DimensionElement` the identifier refers to.
53 column : `str` or `None`
54 The name of a column in the table for ``element``, or `None` if
55 ``element`` is a `Dimension` and the requested column is its primary
56 key.
58 Raises
59 ------
60 LookupError
61 Raised if the identifier refers to a nonexistent `DimensionElement`
62 or column.
63 RuntimeError
64 Raised if the expression refers to a primary key in an illegal way.
65 This exception includes a suggestion for how to rewrite the expression,
66 so at least its message should generally be propagated up to a context
67 where the error can be interpreted by a human.
68 """
69 table, sep, column = name.partition('.')
70 if column:
71 try:
72 element = universe[table]
73 except KeyError as err:
74 raise LookupError(f"No dimension element with name '{table}'.") from err
75 if isinstance(element, Dimension) and column == element.primaryKey.name:
76 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
77 # can be clearer.
78 return element, None
79 elif column in element.graph.names:
80 # User said something like "patch.tract = x" or
81 # "tract.tract = x" instead of just "tract = x" or
82 # "tract.id = x", which is at least needlessly confusing and
83 # possibly not actually a column name, though we can guess
84 # what they were trying to do.
85 # Encourage them to clean that up and try again.
86 raise RuntimeError(
87 f"Invalid reference to '{table}.{column}' " # type: ignore
88 f"in expression; please use '{column}' or "
89 f"'{column}.{universe[column].primaryKey.name}' instead."
90 )
91 else:
92 if column not in element.RecordClass.fields.standard.names:
93 raise LookupError(f"Column '{column} not found in table for {element}.")
94 return element, column
95 else:
96 try:
97 dimension = universe[table]
98 except KeyError as err:
99 raise LookupError(f"No dimension with name '{table}.") from err
100 return dimension, None
103class InspectionVisitor(TreeVisitor[None]):
104 """Implements TreeVisitor to identify dimension elements that need
105 to be included in a query, prior to actually constructing a SQLAlchemy
106 WHERE clause from it.
108 Parameters
109 ----------
110 universe : `DimensionUniverse`
111 All known dimensions.
112 """
114 def __init__(self, universe: DimensionUniverse):
115 self.universe = universe
116 self.keys: NamedValueSet[Dimension] = NamedValueSet()
117 self.metadata: NamedKeyDict[DimensionElement, List[str]] = NamedKeyDict()
119 def visitNumericLiteral(self, value: str, node: Node) -> None:
120 # Docstring inherited from TreeVisitor.visitNumericLiteral
121 pass
123 def visitStringLiteral(self, value: str, node: Node) -> None:
124 # Docstring inherited from TreeVisitor.visitStringLiteral
125 pass
127 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> None:
128 # Docstring inherited from TreeVisitor.visitTimeLiteral
129 pass
131 def visitIdentifier(self, name: str, node: Node) -> None:
132 # Docstring inherited from TreeVisitor.visitIdentifier
133 element, column = categorizeIdentifier(self.universe, name)
134 if column is not None:
135 self.metadata.setdefault(element, []).append(column)
136 else:
137 assert isinstance(element, Dimension)
138 self.keys.add(element)
140 def visitUnaryOp(self, operator: str, operand: Any, node: Node) -> None:
141 # Docstring inherited from TreeVisitor.visitUnaryOp
142 pass
144 def visitBinaryOp(self, operator: str, lhs: Any, rhs: Any, node: Node) -> None:
145 # Docstring inherited from TreeVisitor.visitBinaryOp
146 pass
148 def visitIsIn(self, lhs: Any, values: List[Any], not_in: bool, node: Node) -> None:
149 # Docstring inherited from TreeVisitor.visitIsIn
150 pass
152 def visitParens(self, expression: Any, node: Node) -> None:
153 # Docstring inherited from TreeVisitor.visitParens
154 pass
156 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node) -> None:
157 # Docstring inherited from TreeVisitor.visitRangeLiteral
158 pass
161class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]):
162 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
163 clause.
165 Parameters
166 ----------
167 universe : `DimensionUniverse`
168 All known dimensions.
169 columns: `QueryColumns`
170 Struct that organizes the special columns known to the query
171 under construction.
172 elements: `NamedKeyDict`
173 `DimensionElement` instances and their associated tables.
174 """
176 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 176 ↛ exitline 176 didn't run the lambda on line 176
177 "+": lambda x: +x,
178 "-": lambda x: -x}
179 """Mapping or unary operator names to corresponding functions"""
181 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 181 ↛ exitline 181 didn't run the lambda on line 181
182 "AND": lambda x, y: sqlalchemy.sql.and_(x, y),
183 "=": lambda x, y: x == y,
184 "!=": lambda x, y: x != y,
185 "<": lambda x, y: x < y,
186 "<=": lambda x, y: x <= y,
187 ">": lambda x, y: x > y,
188 ">=": lambda x, y: x >= y,
189 "+": lambda x, y: x + y,
190 "-": lambda x, y: x - y,
191 "*": lambda x, y: x * y,
192 "/": lambda x, y: x / y,
193 "%": lambda x, y: x % y}
194 """Mapping or binary operator names to corresponding functions"""
196 def __init__(self, universe: DimensionUniverse,
197 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]):
198 self.universe = universe
199 self.columns = columns
200 self.elements = elements
202 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
203 # Docstring inherited from TreeVisitor.visitNumericLiteral
204 # Convert string value into float or int
205 coerced: Union[int, float]
206 try:
207 coerced = int(value)
208 except ValueError:
209 coerced = float(value)
210 return sqlalchemy.sql.literal(coerced)
212 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
213 # Docstring inherited from TreeVisitor.visitStringLiteral
214 return sqlalchemy.sql.literal(value)
216 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement:
217 # Docstring inherited from TreeVisitor.visitTimeLiteral
218 return sqlalchemy.sql.literal(value)
220 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement:
221 # Docstring inherited from TreeVisitor.visitIdentifier
222 element, column = categorizeIdentifier(self.universe, name)
223 if column is not None:
224 return self.elements[element].columns[column]
225 else:
226 assert isinstance(element, Dimension)
227 return self.columns.getKeyColumn(element)
229 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node
230 ) -> sqlalchemy.sql.ColumnElement:
231 # Docstring inherited from TreeVisitor.visitUnaryOp
232 func = self.unaryOps.get(operator)
233 if func:
234 return func(operand)
235 else:
236 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
238 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement,
239 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement:
240 # Docstring inherited from TreeVisitor.visitBinaryOp
241 func = self.binaryOps.get(operator)
242 if func:
243 return func(lhs, rhs)
244 else:
245 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
247 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement],
248 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement:
249 # Docstring inherited from TreeVisitor.visitIsIn
251 # `values` is a list of literals and ranges, range is represented
252 # by a tuple (start, stop, stride). We need to transform range into
253 # some SQL construct, simplest would be to generate a set of literals
254 # and add it to the same list but it could become too long. What we
255 # do here is to introduce some large limit on the total number of
256 # items in IN() and if range exceeds that limit then we do something
257 # like:
258 #
259 # X IN (1, 2, 3)
260 # OR
261 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
262 #
263 # or for NOT IN case
264 #
265 # NOT (X IN (1, 2, 3)
266 # OR
267 # (X BETWEEN START AND STOP
268 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
270 max_in_items = 1000
272 # split the list into literals and ranges
273 literals, ranges = [], []
274 for item in values:
275 if isinstance(item, tuple):
276 ranges.append(item)
277 else:
278 literals.append(item)
280 clauses = []
281 for start, stop, stride in ranges:
282 count = (stop - start + 1) // stride
283 if len(literals) + count > max_in_items:
284 # X BETWEEN START AND STOP
285 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
286 expr = lhs.between(start, stop)
287 if stride != 1:
288 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride))
289 clauses.append(expr)
290 else:
291 # add all values to literal list, stop is inclusive
292 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)]
294 if literals:
295 # add IN() in front of BETWEENs
296 clauses.insert(0, lhs.in_(literals))
298 expr = sqlalchemy.sql.or_(*clauses)
299 if not_in:
300 expr = sqlalchemy.sql.not_(expr)
302 return expr
304 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node
305 ) -> sqlalchemy.sql.ColumnElement:
306 # Docstring inherited from TreeVisitor.visitParens
307 return expression.self_group()
309 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node
310 ) -> sqlalchemy.sql.ColumnElement:
311 # Docstring inherited from TreeVisitor.visitRangeLiteral
313 # Just return a triple and let parent clauses handle it,
314 # stride can be None which means the same as 1.
315 return (start, stop, stride or 1)