Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25from typing import Any, List, Optional, Tuple, TYPE_CHECKING, Union
27import sqlalchemy
29from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet
30from .exprParser import Node, TreeVisitor
31from ._structs import QueryColumns
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 import astropy.time
37def categorizeIdentifier(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
38 """Categorize an identifier in a parsed expression as either a `Dimension`
39 name (indicating the primary key for that dimension) or a non-primary-key
40 column in a `DimensionElement` table.
42 Parameters
43 ----------
44 universe : `DimensionUniverse`
45 All known dimensions.
46 name : `str`
47 Identifier to categorize.
49 Returns
50 -------
51 element : `DimensionElement`
52 The `DimensionElement` the identifier refers to.
53 column : `str` or `None`
54 The name of a column in the table for ``element``, or `None` if
55 ``element`` is a `Dimension` and the requested column is its primary
56 key.
58 Raises
59 ------
60 LookupError
61 Raised if the identifier refers to a nonexistent `DimensionElement`
62 or column.
63 RuntimeError
64 Raised if the expression refers to a primary key in an illegal way.
65 This exception includes a suggestion for how to rewrite the expression,
66 so at least its message should generally be propagated up to a context
67 where the error can be interpreted by a human.
68 """
69 table, sep, column = name.partition('.')
70 if column:
71 try:
72 element = universe[table]
73 except KeyError as err:
74 raise LookupError(f"No dimension element with name '{table}'.") from err
75 if isinstance(element, Dimension) and column == element.primaryKey.name:
76 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
77 # can be clearer.
78 return element, None
79 elif column in element.graph.names:
80 # User said something like "patch.tract = x" or
81 # "tract.tract = x" instead of just "tract = x" or
82 # "tract.id = x", which is at least needlessly confusing and
83 # possibly not actually a column name, though we can guess
84 # what they were trying to do.
85 # Encourage them to clean that up and try again.
86 raise RuntimeError(
87 f"Invalid reference to '{table}.{column}' in expression; please use "
88 f"'{column}' or '{column}.{universe.dimensions[column].primaryKey.name}' instead."
89 )
90 else:
91 if column not in element.RecordClass.__slots__:
92 raise LookupError(f"Column '{column} not found in table for {element}.")
93 return element, column
94 else:
95 try:
96 dimension = universe.dimensions[table]
97 except KeyError as err:
98 raise LookupError(f"No dimension with name '{table}.") from err
99 return dimension, None
102class InspectionVisitor(TreeVisitor[None]):
103 """Implements TreeVisitor to identify dimension elements that need
104 to be included in a query, prior to actually constructing a SQLAlchemy
105 WHERE clause from it.
107 Parameters
108 ----------
109 universe : `DimensionUniverse`
110 All known dimensions.
111 """
113 def __init__(self, universe: DimensionUniverse):
114 self.universe = universe
115 self.keys: NamedValueSet[Dimension] = NamedValueSet()
116 self.metadata: NamedKeyDict[DimensionElement, List[str]] = NamedKeyDict()
118 def visitNumericLiteral(self, value: str, node: Node) -> None:
119 # Docstring inherited from TreeVisitor.visitNumericLiteral
120 pass
122 def visitStringLiteral(self, value: str, node: Node) -> None:
123 # Docstring inherited from TreeVisitor.visitStringLiteral
124 pass
126 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> None:
127 # Docstring inherited from TreeVisitor.visitTimeLiteral
128 pass
130 def visitIdentifier(self, name: str, node: Node) -> None:
131 # Docstring inherited from TreeVisitor.visitIdentifier
132 element, column = categorizeIdentifier(self.universe, name)
133 if column is not None:
134 self.metadata.setdefault(element, []).append(column)
135 else:
136 assert isinstance(element, Dimension)
137 self.keys.add(element)
139 def visitUnaryOp(self, operator: str, operand: Any, node: Node) -> None:
140 # Docstring inherited from TreeVisitor.visitUnaryOp
141 pass
143 def visitBinaryOp(self, operator: str, lhs: Any, rhs: Any, node: Node) -> None:
144 # Docstring inherited from TreeVisitor.visitBinaryOp
145 pass
147 def visitIsIn(self, lhs: Any, values: List[Any], not_in: bool, node: Node) -> None:
148 # Docstring inherited from TreeVisitor.visitIsIn
149 pass
151 def visitParens(self, expression: Any, node: Node) -> None:
152 # Docstring inherited from TreeVisitor.visitParens
153 pass
155 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node) -> None:
156 # Docstring inherited from TreeVisitor.visitRangeLiteral
157 pass
160class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]):
161 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
162 clause.
164 Parameters
165 ----------
166 universe : `DimensionUniverse`
167 All known dimensions.
168 columns: `QueryColumns`
169 Struct that organizes the special columns known to the query
170 under construction.
171 elements: `NamedKeyDict`
172 `DimensionElement` instances and their associated tables.
173 """
175 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 175 ↛ exitline 175 didn't run the lambda on line 175
176 "+": lambda x: +x,
177 "-": lambda x: -x}
178 """Mapping or unary operator names to corresponding functions"""
180 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 180 ↛ exitline 180 didn't run the lambda on line 180
181 "AND": lambda x, y: sqlalchemy.sql.and_(x, y),
182 "=": lambda x, y: x == y,
183 "!=": lambda x, y: x != y,
184 "<": lambda x, y: x < y,
185 "<=": lambda x, y: x <= y,
186 ">": lambda x, y: x > y,
187 ">=": lambda x, y: x >= y,
188 "+": lambda x, y: x + y,
189 "-": lambda x, y: x - y,
190 "*": lambda x, y: x * y,
191 "/": lambda x, y: x / y,
192 "%": lambda x, y: x % y}
193 """Mapping or binary operator names to corresponding functions"""
195 def __init__(self, universe: DimensionUniverse,
196 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]):
197 self.universe = universe
198 self.columns = columns
199 self.elements = elements
201 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
202 # Docstring inherited from TreeVisitor.visitNumericLiteral
203 # Convert string value into float or int
204 coerced: Union[int, float]
205 try:
206 coerced = int(value)
207 except ValueError:
208 coerced = float(value)
209 return sqlalchemy.sql.literal(coerced)
211 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
212 # Docstring inherited from TreeVisitor.visitStringLiteral
213 return sqlalchemy.sql.literal(value)
215 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement:
216 # Docstring inherited from TreeVisitor.visitTimeLiteral
217 return sqlalchemy.sql.literal(value)
219 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement:
220 # Docstring inherited from TreeVisitor.visitIdentifier
221 element, column = categorizeIdentifier(self.universe, name)
222 if column is not None:
223 return self.elements[element].columns[column]
224 else:
225 assert isinstance(element, Dimension)
226 return self.columns.getKeyColumn(element)
228 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node
229 ) -> sqlalchemy.sql.ColumnElement:
230 # Docstring inherited from TreeVisitor.visitUnaryOp
231 func = self.unaryOps.get(operator)
232 if func:
233 return func(operand)
234 else:
235 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
237 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement,
238 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement:
239 # Docstring inherited from TreeVisitor.visitBinaryOp
240 func = self.binaryOps.get(operator)
241 if func:
242 return func(lhs, rhs)
243 else:
244 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
246 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement],
247 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement:
248 # Docstring inherited from TreeVisitor.visitIsIn
250 # `values` is a list of literals and ranges, range is represented
251 # by a tuple (start, stop, stride). We need to transform range into
252 # some SQL construct, simplest would be to generate a set of literals
253 # and add it to the same list but it could become too long. What we
254 # do here is to introduce some large limit on the total number of
255 # items in IN() and if range exceeds that limit then we do something
256 # like:
257 #
258 # X IN (1, 2, 3)
259 # OR
260 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
261 #
262 # or for NOT IN case
263 #
264 # NOT (X IN (1, 2, 3)
265 # OR
266 # (X BETWEEN START AND STOP
267 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
269 max_in_items = 1000
271 # split the list into literals and ranges
272 literals, ranges = [], []
273 for item in values:
274 if isinstance(item, tuple):
275 ranges.append(item)
276 else:
277 literals.append(item)
279 clauses = []
280 for start, stop, stride in ranges:
281 count = (stop - start + 1) // stride
282 if len(literals) + count > max_in_items:
283 # X BETWEEN START AND STOP
284 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
285 expr = lhs.between(start, stop)
286 if stride != 1:
287 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride))
288 clauses.append(expr)
289 else:
290 # add all values to literal list, stop is inclusive
291 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)]
293 if literals:
294 # add IN() in front of BETWEENs
295 clauses.insert(0, lhs.in_(literals))
297 expr = sqlalchemy.sql.or_(*clauses)
298 if not_in:
299 expr = sqlalchemy.sql.not_(expr)
301 return expr
303 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node
304 ) -> sqlalchemy.sql.ColumnElement:
305 # Docstring inherited from TreeVisitor.visitParens
306 return expression.self_group()
308 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node
309 ) -> sqlalchemy.sql.ColumnElement:
310 # Docstring inherited from TreeVisitor.visitRangeLiteral
312 # Just return a triple and let parent clauses handle it,
313 # stride can be None which means the same as 1.
314 return (start, stop, stride or 1)