Coverage for python/lsst/daf/butler/queries/expression_factory.py: 51%
186 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 10:53 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ExpressionFactory", "ExpressionProxy", "ScalarExpressionProxy", "TimespanProxy", "RegionProxy")
32from abc import ABC, abstractmethod
33from collections.abc import Iterable
34from typing import TYPE_CHECKING
36from lsst.sphgeom import Region
38from ..dimensions import Dimension, DimensionElement, DimensionUniverse
39from . import tree
41if TYPE_CHECKING:
42 from .._timespan import Timespan
43 from ._query import Query
45# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression,
46# but it just returns OrderExpression and Predicate objects directly, because
47# we don't need to overload any operators or define any methods on those.
50class ExpressionProxy(ABC):
51 """A wrapper for column expressions that overloads comparison operators
52 to return new expression proxies.
53 """
55 def __repr__(self) -> str:
56 return str(self._expression)
58 @property
59 def is_null(self) -> tree.Predicate:
60 """A boolean expression that tests whether this expression is NULL."""
61 return tree.Predicate.is_null(self._expression)
63 @staticmethod
64 def _make_expression(other: object) -> tree.ColumnExpression:
65 if isinstance(other, ExpressionProxy):
66 return other._expression
67 else:
68 return tree.make_column_literal(other)
70 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate:
71 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator)
73 @property
74 @abstractmethod
75 def _expression(self) -> tree.ColumnExpression:
76 raise NotImplementedError()
79class ScalarExpressionProxy(ExpressionProxy):
80 """An `ExpressionProxy` specialized for simple single-value columns."""
82 @property
83 def desc(self) -> tree.Reversed:
84 """An ordering expression that indicates that the sort on this
85 expression should be reversed.
86 """
87 return tree.Reversed(operand=self._expression)
89 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override]
90 return self._make_comparison(other, "==")
92 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override]
93 return self._make_comparison(other, "!=")
95 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override]
96 return self._make_comparison(other, "<")
98 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override]
99 return self._make_comparison(other, "<=")
101 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override]
102 return self._make_comparison(other, ">")
104 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override]
105 return self._make_comparison(other, ">=")
107 def __neg__(self) -> ScalarExpressionProxy:
108 return ResolvedScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-"))
110 def __add__(self, other: object) -> ScalarExpressionProxy:
111 return ResolvedScalarExpressionProxy(
112 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+")
113 )
115 def __radd__(self, other: object) -> ScalarExpressionProxy:
116 return ResolvedScalarExpressionProxy(
117 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+")
118 )
120 def __sub__(self, other: object) -> ScalarExpressionProxy:
121 return ResolvedScalarExpressionProxy(
122 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-")
123 )
125 def __rsub__(self, other: object) -> ScalarExpressionProxy:
126 return ResolvedScalarExpressionProxy(
127 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-")
128 )
130 def __mul__(self, other: object) -> ScalarExpressionProxy:
131 return ResolvedScalarExpressionProxy(
132 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*")
133 )
135 def __rmul__(self, other: object) -> ScalarExpressionProxy:
136 return ResolvedScalarExpressionProxy(
137 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*")
138 )
140 def __truediv__(self, other: object) -> ScalarExpressionProxy:
141 return ResolvedScalarExpressionProxy(
142 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/")
143 )
145 def __rtruediv__(self, other: object) -> ScalarExpressionProxy:
146 return ResolvedScalarExpressionProxy(
147 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/")
148 )
150 def __mod__(self, other: object) -> ScalarExpressionProxy:
151 return ResolvedScalarExpressionProxy(
152 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%")
153 )
155 def __rmod__(self, other: object) -> ScalarExpressionProxy:
156 return ResolvedScalarExpressionProxy(
157 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%")
158 )
160 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate:
161 """Return a boolean expression that tests whether this expression is
162 within a literal integer range.
164 Parameters
165 ----------
166 start : `int`, optional
167 Lower bound (inclusive) for the slice.
168 stop : `int` or `None`, optional
169 Upper bound (exclusive) for the slice, or `None` for no bound.
170 step : `int`, optional
171 Spacing between integers in the range.
173 Returns
174 -------
175 predicate : `tree.Predicate`
176 Boolean expression object.
177 """
178 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step)
180 def in_iterable(self, others: Iterable) -> tree.Predicate:
181 """Return a boolean expression that tests whether this expression
182 evaluates to a value that is in an iterable of other expressions.
184 Parameters
185 ----------
186 others : `collections.abc.Iterable`
187 An iterable of `ExpressionProxy` or values to be interpreted as
188 literals.
190 Returns
191 -------
192 predicate : `tree.Predicate`
193 Boolean expression object.
194 """
195 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others])
197 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate:
198 """Return a boolean expression that test whether this expression
199 evaluates to a value that is in a single-column selection from another
200 query.
202 Parameters
203 ----------
204 column : `ExpressionProxy`
205 Proxy for the column to extract from ``query``.
206 query : `Query`
207 Query to select from.
209 Returns
210 -------
211 predicate : `tree.Predicate`
212 Boolean expression object.
213 """
214 return tree.Predicate.in_query(self._expression, column._expression, query._tree)
217class ResolvedScalarExpressionProxy(ScalarExpressionProxy):
218 """A `ScalarExpressionProxy` backed by an actual expression.
220 Parameters
221 ----------
222 expression : `.tree.ColumnExpression`
223 Expression that backs this proxy.
224 """
226 def __init__(self, expression: tree.ColumnExpression):
227 self._expr = expression
229 @property
230 def _expression(self) -> tree.ColumnExpression:
231 return self._expr
234class TimespanProxy(ExpressionProxy):
235 """An `ExpressionProxy` specialized for timespan columns and literals.
237 Parameters
238 ----------
239 expression : `.tree.ColumnExpression`
240 Expression that backs this proxy.
241 """
243 def __init__(self, expression: tree.ColumnExpression):
244 self._expr = expression
246 @property
247 def begin(self) -> ScalarExpressionProxy:
248 """An expression representing the lower bound (inclusive)."""
249 return ResolvedScalarExpressionProxy(
250 tree.UnaryExpression(operand=self._expression, operator="begin_of")
251 )
253 @property
254 def end(self) -> ScalarExpressionProxy:
255 """An expression representing the upper bound (exclusive)."""
256 return ResolvedScalarExpressionProxy(
257 tree.UnaryExpression(operand=self._expression, operator="end_of")
258 )
260 def overlaps(self, other: TimespanProxy | Timespan) -> tree.Predicate:
261 """Return a boolean expression representing an overlap test between
262 this timespan and another.
264 Parameters
265 ----------
266 other : `TimespanProxy` or `Timespan`
267 Expression or literal to compare to.
269 Returns
270 -------
271 predicate : `tree.Predicate`
272 Boolean expression object.
273 """
274 return self._make_comparison(other, "overlaps")
276 @property
277 def _expression(self) -> tree.ColumnExpression:
278 return self._expr
281class RegionProxy(ExpressionProxy):
282 """An `ExpressionProxy` specialized for region columns and literals.
284 Parameters
285 ----------
286 expression : `.tree.ColumnExpression`
287 Expression that backs this proxy.
288 """
290 def __init__(self, expression: tree.ColumnExpression):
291 self._expr = expression
293 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate:
294 """Return a boolean expression representing an overlap test between
295 this region and another.
297 Parameters
298 ----------
299 other : `RegionProxy` or `Region`
300 Expression or literal to compare to.
302 Returns
303 -------
304 predicate : `tree.Predicate`
305 Boolean expression object.
306 """
307 return self._make_comparison(other, "overlaps")
309 @property
310 def _expression(self) -> tree.ColumnExpression:
311 return self._expr
314class DimensionElementProxy(ScalarExpressionProxy):
315 """An expression-creation proxy for a dimension element logical table.
317 Parameters
318 ----------
319 element : `DimensionElement`
320 Element this object wraps.
322 Notes
323 -----
324 The (dynamic) attributes of this object are expression proxies for the
325 non-dimension fields of the element's records.
326 """
328 def __init__(self, element: DimensionElement):
329 self._element = element
331 @property
332 def _expression(self) -> tree.ColumnExpression:
333 if isinstance(self._element, Dimension):
334 return tree.DimensionKeyReference(dimension=self._element)
335 else:
336 raise TypeError(f"Proxy expression {self!r} is does not resolve to a column.")
338 def __repr__(self) -> str:
339 return self._element.name
341 def __getattr__(self, field: str) -> ScalarExpressionProxy:
342 if field in self._element.schema.dimensions.names:
343 if field not in self._element.dimensions.names:
344 # This is a dimension self-reference, like visit.id.
345 return self
346 return DimensionElementProxy(self._element.dimensions[field])
347 try:
348 expression = tree.DimensionFieldReference(element=self._element, field=field)
349 except tree.InvalidQueryError:
350 raise AttributeError(field)
351 return ResolvedScalarExpressionProxy(expression)
353 @property
354 def region(self) -> RegionProxy:
355 try:
356 expression = tree.DimensionFieldReference(element=self._element, field="region")
357 except tree.InvalidQueryError:
358 raise AttributeError("region")
359 return RegionProxy(expression)
361 @property
362 def timespan(self) -> TimespanProxy:
363 try:
364 expression = tree.DimensionFieldReference(element=self._element, field="timespan")
365 except tree.InvalidQueryError:
366 raise AttributeError("timespan")
367 return TimespanProxy(expression)
369 def __dir__(self) -> list[str]:
370 # We only want timespan and region to appear in dir() for elements that
371 # have them, but we can't implement them in getattr without muddling
372 # the type annotations.
373 result = [entry for entry in super().__dir__() if entry != "timespan" and entry != "region"]
374 result.extend(self._element.schema.names)
375 return result
378class DatasetTypeProxy:
379 """An expression-creation proxy for a dataset type's logical table.
381 Parameters
382 ----------
383 dataset_type : `str`
384 Dataset type name or wildcard. Wildcards are usable only when the
385 query contains exactly one dataset type or a wildcard.
387 Notes
388 -----
389 The attributes of this object are expression proxies for the fields
390 associated with datasets.
391 """
393 def __init__(self, dataset_type: str):
394 self._dataset_type = dataset_type
396 def __repr__(self) -> str:
397 return self._dataset_type
399 # Attributes are actually fixed, but we implement them with __getattr__
400 # and __dir__ to avoid repeating the list. And someday they might expand
401 # to include Datastore record fields.
403 def __getattr__(self, field: str) -> ScalarExpressionProxy:
404 if field not in tree.DATASET_FIELD_NAMES:
405 raise AttributeError(field)
406 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field)
407 return ResolvedScalarExpressionProxy(expression)
409 @property
410 def timespan(self) -> TimespanProxy:
411 try:
412 expression = tree.DimensionFieldReference(element=self._element, field="timespan")
413 except tree.InvalidQueryError:
414 raise AttributeError("timespan")
415 return TimespanProxy(expression)
417 def __dir__(self) -> list[str]:
418 result = list(super().__dir__())
419 # "timespan" will be added by delegation to super() and we don't want
420 # it to appear twice.
421 result.extend(name for name in tree.DATASET_FIELD_NAMES if name != "timespan")
422 return result
425class ExpressionFactory:
426 """A factory for creating column expressions that uses operator overloading
427 to form a mini-language.
429 Instances of this class are usually obtained from
430 `Query.expression_factory`; see that property's documentation for more
431 information.
433 Parameters
434 ----------
435 universe : `DimensionUniverse`
436 Object that describes all dimensions.
437 """
439 def __init__(self, universe: DimensionUniverse):
440 self._universe = universe
442 def __getattr__(self, name: str) -> DimensionElementProxy:
443 try:
444 element = self._universe.elements[name]
445 except KeyError:
446 raise AttributeError(name)
447 return DimensionElementProxy(element)
449 def __getitem__(self, name: str) -> DatasetTypeProxy:
450 return DatasetTypeProxy(name)
452 def not_(self, operand: tree.Predicate) -> tree.Predicate:
453 """Apply a logical NOT operation to a boolean expression.
455 Parameters
456 ----------
457 operand : `tree.Predicate`
458 Expression to invetree.
460 Returns
461 -------
462 logical_not : `tree.Predicate`
463 A boolean expression that evaluates to the opposite of ``operand``.
464 """
465 return operand.logical_not()
467 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
468 """Combine a sequence of boolean expressions with logical AND.
470 Parameters
471 ----------
472 first : `tree.Predicate`
473 First operand (required).
474 *args
475 Additional operands.
477 Returns
478 -------
479 logical_and : `tree.Predicate`
480 A boolean expression that evaluates to `True` only if all operands
481 evaluate to `True.
482 """
483 return first.logical_and(*args)
485 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
486 """Combine a sequence of boolean expressions with logical OR.
488 Parameters
489 ----------
490 first : `tree.Predicate`
491 First operand (required).
492 *args
493 Additional operands.
495 Returns
496 -------
497 logical_or : `tree.Predicate`
498 A boolean expression that evaluates to `True` if any operand
499 evaluates to `True.
500 """
501 return first.logical_or(*args)
503 @staticmethod
504 def literal(value: object) -> ExpressionProxy:
505 """Return an expression proxy that represents a literal value.
507 Expression proxy objects obtained from this factory can generally be
508 compared directly to literals, so calling this method directly in user
509 code should rarely be necessary.
511 Parameters
512 ----------
513 value : `object`
514 Value to include as a literal in an expression tree.
516 Returns
517 -------
518 expression : `ExpressionProxy`
519 Expression wrapper for this literal.
520 """
521 expression = tree.make_column_literal(value)
522 match expression.expression_type:
523 case "timespan":
524 return TimespanProxy(expression)
525 case "region":
526 return RegionProxy(expression)
527 case "bool":
528 raise NotImplementedError("Boolean literals are not supported.")
529 case _:
530 return ResolvedScalarExpressionProxy(expression)
532 @staticmethod
533 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression:
534 """Return the column expression object that backs a proxy.
536 Parameters
537 ----------
538 proxy : `ExpressionProxy`
539 Proxy constructed via an `ExpressionFactory`.
541 Returns
542 -------
543 expression : `tree.ColumnExpression`
544 Underlying column expression object.
545 """
546 return proxy._expression