Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
26from sqlalchemy.sql import not_, or_, and_, literal, FromClause
28from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet
29from .exprParser import TreeVisitor
30from ._structs import QueryColumns
33def categorizeIdentifier(universe: DimensionUniverse, name: str):
34 """Categorize an identifier in a parsed expression as either a `Dimension`
35 name (indicating the primary key for that dimension) or a non-primary-key
36 column in a `DimensionElement` table.
38 Parameters
39 ----------
40 universe : `DimensionUniverse`
41 All known dimensions.
42 name : `str`
43 Identifier to categorize.
45 Returns
46 -------
47 element : `DimensionElement`
48 The `DimensionElement` the identifier refers to.
49 column : `str` or `None`
50 The name of a column in the table for ``element``, or `None` if
51 ``element`` is a `Dimension` and the requested column is its primary
52 key.
54 Raises
55 ------
56 LookupError
57 Raised if the identifier refers to a nonexistent `DimensionElement`
58 or column.
59 RuntimeError
60 Raised if the expression refers to a primary key in an illegal way.
61 This exception includes a suggestion for how to rewrite the expression,
62 so at least its message should generally be propagated up to a context
63 where the error can be interpreted by a human.
64 """
65 table, sep, column = name.partition('.')
66 if column:
67 try:
68 element = universe[table]
69 except KeyError as err:
70 raise LookupError(f"No dimension element with name '{table}'.") from err
71 if isinstance(element, Dimension) and column == element.primaryKey.name:
72 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
73 # can be clearer.
74 return element, None
75 elif column in element.graph.names:
76 # User said something like "patch.tract = x" or
77 # "tract.tract = x" instead of just "tract = x" or
78 # "tract.id = x", which is at least needlessly confusing and
79 # possibly not actually a column name, though we can guess
80 # what they were trying to do.
81 # Encourage them to clean that up and try again.
82 raise RuntimeError(
83 f"Invalid reference to '{table}.{column}' in expression; please use "
84 f"'{column}' or '{column}.{universe[column].primaryKey.name}' instead."
85 )
86 else:
87 if column not in element.RecordClass.__slots__:
88 raise LookupError(f"Column '{column} not found in table for {element}.")
89 return element, column
90 else:
91 try:
92 dimension = universe.dimensions[table]
93 except KeyError as err:
94 raise LookupError(f"No dimension with name '{table}.") from err
95 return dimension, None
98class InspectionVisitor(TreeVisitor):
99 """Implements TreeVisitor to identify dimension elements that need
100 to be included in a query, prior to actually constructing a SQLAlchemy
101 WHERE clause from it.
103 Parameters
104 ----------
105 universe : `DimensionUniverse`
106 All known dimensions.
107 """
109 def __init__(self, universe: DimensionUniverse):
110 self.universe = universe
111 self.keys = NamedValueSet()
112 self.metadata = NamedKeyDict()
114 def visitNumericLiteral(self, value, node):
115 # Docstring inherited from TreeVisitor.visitNumericLiteral
116 pass
118 def visitStringLiteral(self, value, node):
119 # Docstring inherited from TreeVisitor.visitStringLiteral
120 pass
122 def visitTimeLiteral(self, value, node):
123 # Docstring inherited from TreeVisitor.visitTimeLiteral
124 pass
126 def visitIdentifier(self, name, node):
127 # Docstring inherited from TreeVisitor.visitIdentifier
128 element, column = categorizeIdentifier(self.universe, name)
129 if column is not None:
130 self.metadata.setdefault(element, []).append(column)
131 else:
132 self.keys.add(element)
134 def visitUnaryOp(self, operator, operand, node):
135 # Docstring inherited from TreeVisitor.visitUnaryOp
136 pass
138 def visitBinaryOp(self, operator, lhs, rhs, node):
139 # Docstring inherited from TreeVisitor.visitBinaryOp
140 pass
142 def visitIsIn(self, lhs, values, not_in, node):
143 # Docstring inherited from TreeVisitor.visitIsIn
144 pass
146 def visitParens(self, expression, node):
147 # Docstring inherited from TreeVisitor.visitParens
148 pass
150 def visitRangeLiteral(self, start, stop, stride, node):
151 # Docstring inherited from TreeVisitor.visitRangeLiteral
152 pass
155class ClauseVisitor(TreeVisitor):
156 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
157 clause.
159 Parameters
160 ----------
161 universe : `DimensionUniverse`
162 All known dimensions.
163 columns: `QueryColumns`
164 Struct that organizes the special columns known to the query
165 under construction.
166 elements: `NamedKeyDict`
167 `DimensionElement` instances and their associated tables.
168 """
170 unaryOps = {"NOT": lambda x: not_(x), 170 ↛ exitline 170 didn't run the lambda on line 170
171 "+": lambda x: +x,
172 "-": lambda x: -x}
173 """Mapping or unary operator names to corresponding functions"""
175 binaryOps = {"OR": lambda x, y: or_(x, y), 175 ↛ exitline 175 didn't run the lambda on line 175
176 "AND": lambda x, y: and_(x, y),
177 "=": lambda x, y: x == y,
178 "!=": lambda x, y: x != y,
179 "<": lambda x, y: x < y,
180 "<=": lambda x, y: x <= y,
181 ">": lambda x, y: x > y,
182 ">=": lambda x, y: x >= y,
183 "+": lambda x, y: x + y,
184 "-": lambda x, y: x - y,
185 "*": lambda x, y: x * y,
186 "/": lambda x, y: x / y,
187 "%": lambda x, y: x % y}
188 """Mapping or binary operator names to corresponding functions"""
190 def __init__(self, universe: DimensionUniverse,
191 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, FromClause]):
192 self.universe = universe
193 self.columns = columns
194 self.elements = elements
196 def visitNumericLiteral(self, value, node):
197 # Docstring inherited from TreeVisitor.visitNumericLiteral
198 # Convert string value into float or int
199 try:
200 value = int(value)
201 except ValueError:
202 value = float(value)
203 return literal(value)
205 def visitStringLiteral(self, value, node):
206 # Docstring inherited from TreeVisitor.visitStringLiteral
207 return literal(value)
209 def visitTimeLiteral(self, value, node):
210 # Docstring inherited from TreeVisitor.visitTimeLiteral
211 return literal(value)
213 def visitIdentifier(self, name, node):
214 # Docstring inherited from TreeVisitor.visitIdentifier
215 element, column = categorizeIdentifier(self.universe, name)
216 if column is not None:
217 return self.elements[element].columns[column]
218 else:
219 return self.columns.getKeyColumn(element)
221 def visitUnaryOp(self, operator, operand, node):
222 # Docstring inherited from TreeVisitor.visitUnaryOp
223 func = self.unaryOps.get(operator)
224 if func:
225 return func(operand)
226 else:
227 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
229 def visitBinaryOp(self, operator, lhs, rhs, node):
230 # Docstring inherited from TreeVisitor.visitBinaryOp
231 func = self.binaryOps.get(operator)
232 if func:
233 return func(lhs, rhs)
234 else:
235 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
237 def visitIsIn(self, lhs, values, not_in, node):
238 # Docstring inherited from TreeVisitor.visitIsIn
240 # `values` is a list of literals and ranges, range is represented
241 # by a tuple (start, stop, stride). We need to transform range into
242 # some SQL construct, simplest would be to generate a set of literals
243 # and add it to the same list but it could become too long. What we
244 # do here is to introduce some large limit on the total number of
245 # items in IN() and if range exceeds that limit then we do something
246 # like:
247 #
248 # X IN (1, 2, 3)
249 # OR
250 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
251 #
252 # or for NOT IN case
253 #
254 # NOT (X IN (1, 2, 3)
255 # OR
256 # (X BETWEEN START AND STOP
257 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
259 max_in_items = 1000
261 # split the list into literals and ranges
262 literals, ranges = [], []
263 for item in values:
264 if isinstance(item, tuple):
265 ranges.append(item)
266 else:
267 literals.append(item)
269 clauses = []
270 for start, stop, stride in ranges:
271 count = (stop - start + 1) // stride
272 if len(literals) + count > max_in_items:
273 # X BETWEEN START AND STOP
274 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
275 expr = lhs.between(start, stop)
276 if stride != 1:
277 expr = and_(expr, (lhs % stride) == (start % stride))
278 clauses.append(expr)
279 else:
280 # add all values to literal list, stop is inclusive
281 literals += [literal(value) for value in range(start, stop+1, stride)]
283 if literals:
284 # add IN() in front of BETWEENs
285 clauses.insert(0, lhs.in_(literals))
287 expr = or_(*clauses)
288 if not_in:
289 expr = not_(expr)
291 return expr
293 def visitParens(self, expression, node):
294 # Docstring inherited from TreeVisitor.visitParens
295 return expression.self_group()
297 def visitRangeLiteral(self, start, stop, stride, node):
298 # Docstring inherited from TreeVisitor.visitRangeLiteral
300 # Just return a triple and let parent clauses handle it,
301 # stride can be None which means the same as 1.
302 return (start, stop, stride or 1)