Coverage for python/lsst/daf/butler/registry/queries/expressions.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25from typing import Any, List, Mapping, Optional, Tuple, TYPE_CHECKING, Union
27import sqlalchemy
28from sqlalchemy.ext.compiler import compiles
30from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet
31from ...core.ddl import AstropyTimeNsecTai
32from .exprParser import Node, TreeVisitor
33from ._structs import QueryColumns
35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true
36 import astropy.time
39class _TimestampColumnElement(sqlalchemy.sql.ColumnElement):
40 """Special ColumnElement type used for TIMESTAMP columns in expressions.
42 TIMESTAMP columns in expressions are usually compared to time literals
43 which are `astropy.time.Time` instances that are converted to integer
44 nanoseconds since Epoch. For comparison we need to convert TIMESTAMP
45 column value to the same type. This type is a wrapper for actual column
46 that has special dialect-specific compilation methods defined below
47 transforming column in that common type.
49 This mechanism is only used for expressions in WHERE clause, values of the
50 TIMESTAMP columns returned from queries are still handled by standard
51 mechanism and they are converted to `datetime` instances.
52 """
53 def __init__(self, column: sqlalchemy.sql.ColumnElement):
54 super().__init__()
55 self._column = column
58@compiles(_TimestampColumnElement, "sqlite")
59def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
60 """Compilation of TIMESTAMP column for SQLite.
62 SQLite defines ``strftime`` function that can be used to convert timestamp
63 value to Unix seconds.
64 """
65 return f"STRFTIME('%s', {element._column.name})*1000000000"
68@compiles(_TimestampColumnElement, "postgresql")
69def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str:
70 """Compilation of TIMESTAMP column for PostgreSQL.
72 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function.
73 """
74 return f"EXTRACT(epoch FROM {element._column.name})*1000000000"
77def categorizeIngestDateId(name: str) -> bool:
78 """Categorize an identifier in a parsed expression as an ingest_date
79 attribute of a dataset table.
81 Parameters
82 ----------
83 name : `str`
84 Identifier to categorize.
86 Returns
87 -------
88 isIngestDate : `bool`
89 True is returned if identifier name is ``ingest_date``.
90 """
91 # TODO: this is hardcoded for now, may be better to extract it from schema
92 # but I do not know how to do it yet.
93 return name == "ingest_date"
96def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
97 """Categorize an identifier in a parsed expression as either a `Dimension`
98 name (indicating the primary key for that dimension) or a non-primary-key
99 column in a `DimensionElement` table.
101 Parameters
102 ----------
103 universe : `DimensionUniverse`
104 All known dimensions.
105 name : `str`
106 Identifier to categorize.
108 Returns
109 -------
110 element : `DimensionElement`
111 The `DimensionElement` the identifier refers to.
112 column : `str` or `None`
113 The name of a column in the table for ``element``, or `None` if
114 ``element`` is a `Dimension` and the requested column is its primary
115 key.
117 Raises
118 ------
119 LookupError
120 Raised if the identifier refers to a nonexistent `DimensionElement`
121 or column.
122 RuntimeError
123 Raised if the expression refers to a primary key in an illegal way.
124 This exception includes a suggestion for how to rewrite the expression,
125 so at least its message should generally be propagated up to a context
126 where the error can be interpreted by a human.
127 """
128 table, sep, column = name.partition('.')
129 if column:
130 try:
131 element = universe[table]
132 except KeyError as err:
133 raise LookupError(f"No dimension element with name '{table}'.") from err
134 if isinstance(element, Dimension) and column == element.primaryKey.name:
135 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
136 # can be clearer.
137 return element, None
138 elif column in element.graph.names:
139 # User said something like "patch.tract = x" or
140 # "tract.tract = x" instead of just "tract = x" or
141 # "tract.id = x", which is at least needlessly confusing and
142 # possibly not actually a column name, though we can guess
143 # what they were trying to do.
144 # Encourage them to clean that up and try again.
145 raise RuntimeError(
146 f"Invalid reference to '{table}.{column}' " # type: ignore
147 f"in expression; please use '{column}' or "
148 f"'{column}.{universe[column].primaryKey.name}' instead."
149 )
150 else:
151 if column not in element.RecordClass.fields.standard.names:
152 raise LookupError(f"Column '{column} not found in table for {element}.")
153 return element, column
154 else:
155 try:
156 dimension = universe[table]
157 except KeyError as err:
158 raise LookupError(f"No dimension with name '{table}.") from err
159 return dimension, None
162class InspectionVisitor(TreeVisitor[None]):
163 """Implements TreeVisitor to identify dimension elements that need
164 to be included in a query, prior to actually constructing a SQLAlchemy
165 WHERE clause from it.
167 Parameters
168 ----------
169 universe : `DimensionUniverse`
170 All known dimensions.
171 """
173 def __init__(self, universe: DimensionUniverse):
174 self.universe = universe
175 self.keys: NamedValueSet[Dimension] = NamedValueSet()
176 self.metadata: NamedKeyDict[DimensionElement, List[str]] = NamedKeyDict()
177 self.hasIngestDate: bool = False
179 def visitNumericLiteral(self, value: str, node: Node) -> None:
180 # Docstring inherited from TreeVisitor.visitNumericLiteral
181 pass
183 def visitStringLiteral(self, value: str, node: Node) -> None:
184 # Docstring inherited from TreeVisitor.visitStringLiteral
185 pass
187 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> None:
188 # Docstring inherited from TreeVisitor.visitTimeLiteral
189 pass
191 def visitIdentifier(self, name: str, node: Node) -> None:
192 # Docstring inherited from TreeVisitor.visitIdentifier
193 if categorizeIngestDateId(name):
194 self.hasIngestDate = True
195 return
196 element, column = categorizeElementId(self.universe, name)
197 if column is not None:
198 self.metadata.setdefault(element, []).append(column)
199 else:
200 assert isinstance(element, Dimension)
201 self.keys.add(element)
203 def visitUnaryOp(self, operator: str, operand: Any, node: Node) -> None:
204 # Docstring inherited from TreeVisitor.visitUnaryOp
205 pass
207 def visitBinaryOp(self, operator: str, lhs: Any, rhs: Any, node: Node) -> None:
208 # Docstring inherited from TreeVisitor.visitBinaryOp
209 pass
211 def visitIsIn(self, lhs: Any, values: List[Any], not_in: bool, node: Node) -> None:
212 # Docstring inherited from TreeVisitor.visitIsIn
213 pass
215 def visitParens(self, expression: Any, node: Node) -> None:
216 # Docstring inherited from TreeVisitor.visitParens
217 pass
219 def visitTupleNode(self, items: Tuple[Any, ...], node: Node) -> None:
220 # Docstring inherited from base class
221 pass
223 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node) -> None:
224 # Docstring inherited from TreeVisitor.visitRangeLiteral
225 pass
227 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None:
228 # Docstring inherited from base class
229 pass
232class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]):
233 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE
234 clause.
236 Parameters
237 ----------
238 universe : `DimensionUniverse`
239 All known dimensions.
240 columns: `QueryColumns`
241 Struct that organizes the special columns known to the query
242 under construction.
243 elements: `NamedKeyDict`
244 `DimensionElement` instances and their associated tables.
245 """
247 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 247 ↛ exitline 247 didn't run the lambda on line 247
248 "+": lambda x: +x,
249 "-": lambda x: -x}
250 """Mapping or unary operator names to corresponding functions"""
252 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 252 ↛ exitline 252 didn't run the lambda on line 252
253 "AND": lambda x, y: sqlalchemy.sql.and_(x, y),
254 "=": lambda x, y: x == y,
255 "!=": lambda x, y: x != y,
256 "<": lambda x, y: x < y,
257 "<=": lambda x, y: x <= y,
258 ">": lambda x, y: x > y,
259 ">=": lambda x, y: x >= y,
260 "+": lambda x, y: x + y,
261 "-": lambda x, y: x - y,
262 "*": lambda x, y: x * y,
263 "/": lambda x, y: x / y,
264 "%": lambda x, y: x % y}
265 """Mapping or binary operator names to corresponding functions"""
267 def __init__(self, universe: DimensionUniverse,
268 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]):
269 self.universe = universe
270 self.columns = columns
271 self.elements = elements
272 self.hasIngestDate: bool = False
274 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
275 # Docstring inherited from TreeVisitor.visitNumericLiteral
276 # Convert string value into float or int
277 coerced: Union[int, float]
278 try:
279 coerced = int(value)
280 except ValueError:
281 coerced = float(value)
282 return sqlalchemy.sql.literal(coerced)
284 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement:
285 # Docstring inherited from TreeVisitor.visitStringLiteral
286 return sqlalchemy.sql.literal(value)
288 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement:
289 # Docstring inherited from TreeVisitor.visitTimeLiteral
290 return sqlalchemy.sql.literal(value, type_=AstropyTimeNsecTai)
292 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement:
293 # Docstring inherited from TreeVisitor.visitIdentifier
294 if categorizeIngestDateId(name):
295 self.hasIngestDate = True
296 assert self.columns.datasets is not None
297 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
298 return _TimestampColumnElement(self.columns.datasets.ingestDate)
299 element, column = categorizeElementId(self.universe, name)
300 if column is not None:
301 return self.elements[element].columns[column]
302 else:
303 assert isinstance(element, Dimension)
304 return self.columns.getKeyColumn(element)
306 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node
307 ) -> sqlalchemy.sql.ColumnElement:
308 # Docstring inherited from TreeVisitor.visitUnaryOp
309 func = self.unaryOps.get(operator)
310 if func:
311 return func(operand)
312 else:
313 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.")
315 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement,
316 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement:
317 # Docstring inherited from TreeVisitor.visitBinaryOp
318 func = self.binaryOps.get(operator)
319 if func:
320 return func(lhs, rhs)
321 else:
322 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.")
324 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement],
325 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement:
326 # Docstring inherited from TreeVisitor.visitIsIn
328 # `values` is a list of literals and ranges, range is represented
329 # by a tuple (start, stop, stride). We need to transform range into
330 # some SQL construct, simplest would be to generate a set of literals
331 # and add it to the same list but it could become too long. What we
332 # do here is to introduce some large limit on the total number of
333 # items in IN() and if range exceeds that limit then we do something
334 # like:
335 #
336 # X IN (1, 2, 3)
337 # OR
338 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE))
339 #
340 # or for NOT IN case
341 #
342 # NOT (X IN (1, 2, 3)
343 # OR
344 # (X BETWEEN START AND STOP
345 # AND MOD(X, STRIDE) = MOD(START, STRIDE)))
347 max_in_items = 1000
349 # split the list into literals and ranges
350 literals, ranges = [], []
351 for item in values:
352 if isinstance(item, tuple):
353 ranges.append(item)
354 else:
355 literals.append(item)
357 clauses = []
358 for start, stop, stride in ranges:
359 count = (stop - start + 1) // stride
360 if len(literals) + count > max_in_items:
361 # X BETWEEN START AND STOP
362 # AND MOD(X, STRIDE) = MOD(START, STRIDE)
363 expr = lhs.between(start, stop)
364 if stride != 1:
365 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride))
366 clauses.append(expr)
367 else:
368 # add all values to literal list, stop is inclusive
369 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)]
371 if literals:
372 # add IN() in front of BETWEENs
373 clauses.insert(0, lhs.in_(literals))
375 expr = sqlalchemy.sql.or_(*clauses)
376 if not_in:
377 expr = sqlalchemy.sql.not_(expr)
379 return expr
381 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node
382 ) -> sqlalchemy.sql.ColumnElement:
383 # Docstring inherited from TreeVisitor.visitParens
384 return expression.self_group()
386 def visitTupleNode(self, items: Tuple[sqlalchemy.sql.ColumnElement, ...], node: Node
387 ) -> sqlalchemy.sql.ColumnElement:
388 # Docstring inherited from base class
389 return sqlalchemy.sql.expression.Tuple(*items)
391 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node
392 ) -> sqlalchemy.sql.ColumnElement:
393 # Docstring inherited from TreeVisitor.visitRangeLiteral
395 # Just return a triple and let parent clauses handle it,
396 # stride can be None which means the same as 1.
397 return (start, stop, stride or 1)
399 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None:
400 # Docstring inherited from base class
402 # this is a placeholder for future extension, we enabled syntax but
403 # do not support actual use just yet.
404 raise NotImplementedError("POINT() function is not supported yet")