Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
26from sqlalchemy.sql import not_, or_, and_, literal, FromClause
28from ...core.utils import NamedValueSet, NamedKeyDict
29from ...core import DimensionUniverse, Dimension, DimensionElement
30from .exprParser import TreeVisitor
31from ._structs import QueryColumns
34def categorizeIdentifier(universe: DimensionUniverse, name: str):
35 """Categorize an identifier in a parsed expression as either a `Dimension`
36 name (indicating the primary key for that dimension) or a non-primary-key
37 column in a `DimensionElement` table.
39 Parameters
40 ----------
41 universe : `DimensionUniverse`
42 All known dimensions.
43 name : `str`
44 Identifier to categorize.
46 Returns
47 -------
48 element : `DimensionElement`
49 The `DimensionElement` the identifier refers to.
50 column : `str` or `None`
51 The name of a column in the table for ``element``, or `None` if
52 ``element`` is a `Dimension` and the requested column is its primary
53 key.
55 Raises
56 ------
57 LookupError
58 Raised if the identifier refers to a nonexistent `DimensionElement`
59 or column.
60 RuntimeError
61 Raised if the expression refers to a primary key in an illegal way.
62 This exception includes a suggestion for how to rewrite the expression,
63 so at least its message should generally be propagated up to a context
64 where the error can be interpreted by a human.
65 """
66 table, sep, column = name.partition('.')
67 if column:
68 try:
69 element = universe[table]
70 except KeyError as err:
71 raise LookupError(f"No dimension element with name '{table}'.") from err
72 if isinstance(element, Dimension) and column == element.primaryKey.name:
73 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
74 # can be clearer.
75 return element, None
76 elif column in element.graph.names:
77 # User said something like "patch.tract = x" or
78 # "tract.tract = x" instead of just "tract = x" or
79 # "tract.id = x", which is at least needlessly confusing and
80 # possibly not actually a column name, though we can guess
81 # what they were trying to do.
82 # Encourage them to clean that up and try again.
83 raise RuntimeError(
84 f"Invalid reference to '{table}.{column}' in expression; please use "
85 f"'{column}' or '{column}.{universe[column].primaryKey.name}' instead."
86 )
87 else:
88 if column not in element.RecordClass.__slots__:
89 raise LookupError(f"Column '{column} not found in table for {element}.")
90 return element, column
91 else:
92 try:
93 dimension = universe.dimensions[table]
94 except KeyError as err:
95 raise LookupError(f"No dimension with name '{table}.") from err
96 return dimension, None
99class InspectionVisitor(TreeVisitor):
100 """Implements TreeVisitor to identify dimension elements that need
101 to be included in a query, prior to actually constructing a SQLAlchemy
102 WHERE clause from it.
104 Parameters
105 ----------
106 universe : `DimensionUniverse`
107 All known dimensions.
108 """
110 def __init__(self, universe: DimensionUniverse):
111 self.universe = universe
112 self.keys = NamedValueSet()
113 self.metadata = NamedKeyDict()
115 def visitNumericLiteral(self, value, node):
116 # Docstring inherited from TreeVisitor.visitNumericLiteral
117 pass
119 def visitStringLiteral(self, value, node):
120 # Docstring inherited from TreeVisitor.visitStringLiteral
121 pass
123 def visitTimeLiteral(self, value, node):
124 # Docstring inherited from TreeVisitor.visitTimeLiteral
125 pass
127 def visitIdentifier(self, name, node):
128 # Docstring inherited from TreeVisitor.visitIdentifier
129 element, column = categorizeIdentifier(self.universe, name)
130 if column is not None:
131 self.metadata.setdefault(element, []).append(column)
132 else:
133 self.keys.add(element)
135 def visitUnaryOp(self, operator, operand, node):
136 # Docstring inherited from TreeVisitor.visitUnaryOp
137 pass
139 def visitBinaryOp(self, operator, lhs, rhs, node):
140 # Docstring inherited from TreeVisitor.visitBinaryOp
141 pass
143 def visitIsIn(self, lhs, values, not_in, node):
144 # Docstring inherited from TreeVisitor.visitIsIn
145 pass
147 def visitParens(self, expression, node):
148 # Docstring inherited from TreeVisitor.visitParens
149 pass
151 def visitRangeLiteral(self, start, stop, stride, node):
152 # Docstring inherited from TreeVisitor.visitRangeLiteral
153 pass
156class ClauseVisitor(TreeVisitor):
157 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
158 clause.
160 Parameters
161 ----------
162 universe : `DimensionUniverse`
163 All known dimensions.
164 columns: `QueryColumns`
165 Struct that organizes the special columns known to the query
166 under construction.
167 elements: `NamedKeyDict`
168 `DimensionElement` instances and their associated tables.
169 """
171 unaryOps = {"NOT": lambda x: not_(x), 171 ↛ exitline 171 didn't run the lambda on line 171
172 "+": lambda x: +x,
173 "-": lambda x: -x}
174 """Mapping or unary operator names to corresponding functions"""
176 binaryOps = {"OR": lambda x, y: or_(x, y), 176 ↛ exitline 176 didn't run the lambda on line 176
177 "AND": lambda x, y: and_(x, y),
178 "=": lambda x, y: x == y,
179 "!=": lambda x, y: x != y,
180 "<": lambda x, y: x < y,
181 "<=": lambda x, y: x <= y,
182 ">": lambda x, y: x > y,
183 ">=": lambda x, y: x >= y,
184 "+": lambda x, y: x + y,
185 "-": lambda x, y: x - y,
186 "*": lambda x, y: x * y,
187 "/": lambda x, y: x / y,
188 "%": lambda x, y: x % y}
189 """Mapping or binary operator names to corresponding functions"""
191 def __init__(self, universe: DimensionUniverse,
192 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, FromClause]):
193 self.universe = universe
194 self.columns = columns
195 self.elements = elements
197 def visitNumericLiteral(self, value, node):
198 # Docstring inherited from TreeVisitor.visitNumericLiteral
199 # Convert string value into float or int
200 try:
201 value = int(value)
202 except ValueError:
203 value = float(value)
204 return literal(value)
206 def visitStringLiteral(self, value, node):
207 # Docstring inherited from TreeVisitor.visitStringLiteral
208 return literal(value)
210 def visitTimeLiteral(self, value, node):
211 # Docstring inherited from TreeVisitor.visitTimeLiteral
212 return literal(value)
214 def visitIdentifier(self, name, node):
215 # Docstring inherited from TreeVisitor.visitIdentifier
216 element, column = categorizeIdentifier(self.universe, name)
217 if column is not None:
218 return self.elements[element].columns[column]
219 else:
220 return self.columns.getKeyColumn(element)
222 def visitUnaryOp(self, operator, operand, node):
223 # Docstring inherited from TreeVisitor.visitUnaryOp
224 func = self.unaryOps.get(operator)
225 if func:
226 return func(operand)
227 else:
228 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
230 def visitBinaryOp(self, operator, lhs, rhs, node):
231 # Docstring inherited from TreeVisitor.visitBinaryOp
232 func = self.binaryOps.get(operator)
233 if func:
234 return func(lhs, rhs)
235 else:
236 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
238 def visitIsIn(self, lhs, values, not_in, node):
239 # Docstring inherited from TreeVisitor.visitIsIn
241 # `values` is a list of literals and ranges, range is represented
242 # by a tuple (start, stop, stride). We need to transform range into
243 # some SQL construct, simplest would be to generate a set of literals
244 # and add it to the same list but it could become too long. What we
245 # do here is to introduce some large limit on the total number of
246 # items in IN() and if range exceeds that limit then we do something
247 # like:
248 #
249 # X IN (1, 2, 3)
250 # OR
251 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
252 #
253 # or for NOT IN case
254 #
255 # NOT (X IN (1, 2, 3)
256 # OR
257 # (X BETWEEN START AND STOP
258 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
260 max_in_items = 1000
262 # split the list into literals and ranges
263 literals, ranges = [], []
264 for item in values:
265 if isinstance(item, tuple):
266 ranges.append(item)
267 else:
268 literals.append(item)
270 clauses = []
271 for start, stop, stride in ranges:
272 count = (stop - start + 1) // stride
273 if len(literals) + count > max_in_items:
274 # X BETWEEN START AND STOP
275 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
276 expr = lhs.between(start, stop)
277 if stride != 1:
278 expr = and_(expr, (lhs % stride) == (start % stride))
279 clauses.append(expr)
280 else:
281 # add all values to literal list, stop is inclusive
282 literals += [literal(value) for value in range(start, stop+1, stride)]
284 if literals:
285 # add IN() in front of BETWEENs
286 clauses.insert(0, lhs.in_(literals))
288 expr = or_(*clauses)
289 if not_in:
290 expr = not_(expr)
292 return expr
294 def visitParens(self, expression, node):
295 # Docstring inherited from TreeVisitor.visitParens
296 return expression.self_group()
298 def visitRangeLiteral(self, start, stop, stride, node):
299 # Docstring inherited from TreeVisitor.visitRangeLiteral
301 # Just return a triple and let parent clauses handle it,
302 # stride can be None which means the same as 1.
303 return (start, stop, stride or 1)