Coverage for python / lsst / daf / butler / queries / expression_factory.py: 44%
208 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ExpressionFactory", "ExpressionProxy", "RegionProxy", "ScalarExpressionProxy", "TimespanProxy")
32from abc import ABC, abstractmethod
33from collections.abc import Iterable
34from typing import TYPE_CHECKING
36import astropy.time
38from lsst.sphgeom import Region
40from .._exceptions import InvalidQueryError
41from ..dimensions import Dimension, DimensionElement, DimensionUniverse
42from . import tree
44if TYPE_CHECKING:
45 from .._timespan import Timespan
46 from ._query import Query
48# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression,
49# but it just returns OrderExpression and Predicate objects directly, because
50# we don't need to overload any operators or define any methods on those.
53class ExpressionProxy(ABC):
54 """A wrapper for column expressions that overloads comparison operators
55 to return new expression proxies.
56 """
58 def __repr__(self) -> str:
59 return str(self._expression)
61 @property
62 def is_null(self) -> tree.Predicate:
63 """A boolean expression that tests whether this expression is NULL."""
64 return tree.Predicate.is_null(self._expression)
66 @staticmethod
67 def _make_expression(other: object) -> tree.ColumnExpression:
68 if isinstance(other, ExpressionProxy):
69 return other._expression
70 else:
71 return tree.make_column_literal(other)
73 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate:
74 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator)
76 @property
77 @abstractmethod
78 def _expression(self) -> tree.ColumnExpression:
79 raise NotImplementedError()
82class ScalarExpressionProxy(ExpressionProxy):
83 """An `ExpressionProxy` specialized for simple single-value columns."""
85 @property
86 def desc(self) -> tree.Reversed:
87 """An ordering expression that indicates that the sort on this
88 expression should be reversed.
89 """
90 return tree.Reversed(operand=self._expression)
92 def as_boolean(self) -> tree.Predicate:
93 """If this scalar expression is a boolean, convert it to a `Predicate`
94 so it can be used as a boolean expression.
96 Raises
97 ------
98 InvalidQueryError
99 If this expression is not a boolean.
101 Returns
102 -------
103 predicate : `Predicate`
104 This expression converted to a `Predicate`.
105 """
106 expr = self._expression
107 raise InvalidQueryError(
108 f"Expression '{expr}' with type"
109 f" '{expr.column_type}' can't be used directly as a boolean value."
110 " Use a comparison operator like '>' or '==' instead."
111 )
113 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override]
114 return self._make_comparison(other, "==")
116 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override]
117 return self._make_comparison(other, "!=")
119 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override]
120 return self._make_comparison(other, "<")
122 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override]
123 return self._make_comparison(other, "<=")
125 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override]
126 return self._make_comparison(other, ">")
128 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override]
129 return self._make_comparison(other, ">=")
131 def __neg__(self) -> ScalarExpressionProxy:
132 return ResolvedScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-"))
134 def __add__(self, other: object) -> ScalarExpressionProxy:
135 return ResolvedScalarExpressionProxy(
136 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+")
137 )
139 def __radd__(self, other: object) -> ScalarExpressionProxy:
140 return ResolvedScalarExpressionProxy(
141 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+")
142 )
144 def __sub__(self, other: object) -> ScalarExpressionProxy:
145 return ResolvedScalarExpressionProxy(
146 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-")
147 )
149 def __rsub__(self, other: object) -> ScalarExpressionProxy:
150 return ResolvedScalarExpressionProxy(
151 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-")
152 )
154 def __mul__(self, other: object) -> ScalarExpressionProxy:
155 return ResolvedScalarExpressionProxy(
156 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*")
157 )
159 def __rmul__(self, other: object) -> ScalarExpressionProxy:
160 return ResolvedScalarExpressionProxy(
161 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*")
162 )
164 def __truediv__(self, other: object) -> ScalarExpressionProxy:
165 return ResolvedScalarExpressionProxy(
166 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/")
167 )
169 def __rtruediv__(self, other: object) -> ScalarExpressionProxy:
170 return ResolvedScalarExpressionProxy(
171 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/")
172 )
174 def __mod__(self, other: object) -> ScalarExpressionProxy:
175 return ResolvedScalarExpressionProxy(
176 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%")
177 )
179 def __rmod__(self, other: object) -> ScalarExpressionProxy:
180 return ResolvedScalarExpressionProxy(
181 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%")
182 )
184 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate:
185 """Return a boolean expression that tests whether this expression is
186 within a literal integer range.
188 Parameters
189 ----------
190 start : `int`, optional
191 Lower bound (inclusive) for the slice.
192 stop : `int` or `None`, optional
193 Upper bound (exclusive) for the slice, or `None` for no bound.
194 step : `int`, optional
195 Spacing between integers in the range.
197 Returns
198 -------
199 predicate : `tree.Predicate`
200 Boolean expression object.
201 """
202 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step)
204 def in_iterable(self, others: Iterable) -> tree.Predicate:
205 """Return a boolean expression that tests whether this expression
206 evaluates to a value that is in an iterable of other expressions.
208 Parameters
209 ----------
210 others : `collections.abc.Iterable`
211 An iterable of `ExpressionProxy` or values to be interpreted as
212 literals.
214 Returns
215 -------
216 predicate : `tree.Predicate`
217 Boolean expression object.
218 """
219 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others])
221 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate:
222 """Return a boolean expression that test whether this expression
223 evaluates to a value that is in a single-column selection from another
224 query.
226 Parameters
227 ----------
228 column : `ExpressionProxy`
229 Proxy for the column to extract from ``query``.
230 query : `Query`
231 Query to select from.
233 Returns
234 -------
235 predicate : `tree.Predicate`
236 Boolean expression object.
237 """
238 return tree.Predicate.in_query(self._expression, column._expression, query._tree)
240 def glob(self, pattern: str) -> tree.Predicate:
241 """Return a boolean expression that matches this expression against
242 pattern.
244 Parameters
245 ----------
246 pattern : `str`
247 Pattern to use for matching.
249 Returns
250 -------
251 predicate : `tree.Predicate`
252 Boolean expression object.
253 """
254 return self._make_comparison(pattern, "glob")
257class ResolvedScalarExpressionProxy(ScalarExpressionProxy):
258 """A `ScalarExpressionProxy` backed by an actual expression.
260 Parameters
261 ----------
262 expression : `.tree.ColumnExpression`
263 Expression that backs this proxy.
264 """
266 def __init__(self, expression: tree.ColumnExpression):
267 self._expr = expression
269 @property
270 def _expression(self) -> tree.ColumnExpression:
271 return self._expr
274class BooleanScalarExpressionProxy(ScalarExpressionProxy):
275 """A `ScalarExpressionProxy` representing a boolean column. You should
276 call `as_boolean()` on this object to convert it to an instance of
277 `Predicate` before attempting to use it.
279 Parameters
280 ----------
281 expression : `.tree.ColumnReference`
282 Boolean column reference that backs this proxy.
283 """
285 # This is a hack/work-around to make static typing work when referencing
286 # dimension record metadata boolean columns. From the perspective of
287 # typing, anything boolean should be a `Predicate`, but the type system has
288 # no way of knowing whether a given column is a bool or some other type.
290 def __init__(self, expression: tree.ColumnReference) -> None:
291 if expression.column_type != "bool":
292 raise ValueError(f"Expression is a {expression.column_type}, not a 'bool': {expression}")
293 self._boolean_expression = expression
295 @property
296 def is_null(self) -> tree.Predicate:
297 return ResolvedScalarExpressionProxy(self._boolean_expression).is_null
299 def as_boolean(self) -> tree.Predicate:
300 return tree.Predicate.from_bool_expression(self._boolean_expression)
302 @property
303 def _expression(self) -> tree.ColumnExpression:
304 raise InvalidQueryError(
305 f"Boolean expression '{self._boolean_expression}' can't be used directly in other expressions."
306 " Call the 'as_boolean()' method to convert it to a Predicate instead."
307 )
310class TimespanProxy(ExpressionProxy):
311 """An `ExpressionProxy` specialized for timespan columns and literals.
313 Parameters
314 ----------
315 expression : `.tree.ColumnExpression`
316 Expression that backs this proxy.
317 """
319 def __init__(self, expression: tree.ColumnExpression):
320 self._expr = expression
322 @property
323 def begin(self) -> ScalarExpressionProxy:
324 """An expression representing the lower bound (inclusive)."""
325 return ResolvedScalarExpressionProxy(
326 tree.UnaryExpression(operand=self._expression, operator="begin_of")
327 )
329 @property
330 def end(self) -> ScalarExpressionProxy:
331 """An expression representing the upper bound (exclusive)."""
332 return ResolvedScalarExpressionProxy(
333 tree.UnaryExpression(operand=self._expression, operator="end_of")
334 )
336 def overlaps(self, other: TimespanProxy | Timespan | astropy.time.Time) -> tree.Predicate:
337 """Return a boolean expression representing an overlap test between
338 this timespan and another timespan or a datetime.
340 Parameters
341 ----------
342 other : `TimespanProxy` or `Timespan`
343 Expression or literal to compare to.
345 Returns
346 -------
347 predicate : `tree.Predicate`
348 Boolean expression object.
349 """
350 return self._make_comparison(other, "overlaps")
352 @property
353 def _expression(self) -> tree.ColumnExpression:
354 return self._expr
357class RegionProxy(ExpressionProxy):
358 """An `ExpressionProxy` specialized for region columns and literals.
360 Parameters
361 ----------
362 expression : `.tree.ColumnExpression`
363 Expression that backs this proxy.
364 """
366 def __init__(self, expression: tree.ColumnExpression):
367 self._expr = expression
369 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate:
370 """Return a boolean expression representing an overlap test between
371 this region and another.
373 Parameters
374 ----------
375 other : `RegionProxy` or `lsst.sphgeom.Region`
376 Expression or literal to compare to.
378 Returns
379 -------
380 predicate : `tree.Predicate`
381 Boolean expression object.
382 """
383 return self._make_comparison(other, "overlaps")
385 @property
386 def _expression(self) -> tree.ColumnExpression:
387 return self._expr
390class DimensionElementProxy(ScalarExpressionProxy):
391 """An expression-creation proxy for a dimension element logical table.
393 Parameters
394 ----------
395 element : `DimensionElement`
396 Element this object wraps.
398 Notes
399 -----
400 The (dynamic) attributes of this object are expression proxies for the
401 non-dimension fields of the element's records.
402 """
404 def __init__(self, element: DimensionElement):
405 self._element = element
407 @property
408 def _expression(self) -> tree.ColumnExpression:
409 if isinstance(self._element, Dimension):
410 return tree.DimensionKeyReference(dimension=self._element)
411 else:
412 raise TypeError(f"Proxy expression {self!r} is does not resolve to a column.")
414 def __repr__(self) -> str:
415 return self._element.name
417 def __getattr__(self, field: str) -> ScalarExpressionProxy:
418 if field in self._element.schema.dimensions.names:
419 if field not in self._element.dimensions.names:
420 # This is a dimension self-reference, like visit.id.
421 return self
422 return DimensionElementProxy(self._element.dimensions[field])
423 try:
424 expression = tree.DimensionFieldReference(element=self._element, field=field)
425 except InvalidQueryError:
426 raise AttributeError(field) from None
427 if expression.column_type == "bool":
428 return BooleanScalarExpressionProxy(expression)
429 else:
430 return ResolvedScalarExpressionProxy(expression)
432 @property
433 def region(self) -> RegionProxy:
434 try:
435 expression = tree.DimensionFieldReference(element=self._element, field="region")
436 except InvalidQueryError:
437 raise AttributeError("region")
438 return RegionProxy(expression)
440 @property
441 def timespan(self) -> TimespanProxy:
442 try:
443 expression = tree.DimensionFieldReference(element=self._element, field="timespan")
444 except InvalidQueryError:
445 raise AttributeError("timespan") from None
446 return TimespanProxy(expression)
448 def __dir__(self) -> list[str]:
449 # We only want timespan and region to appear in dir() for elements that
450 # have them, but we can't implement them in getattr without muddling
451 # the type annotations.
452 result = [entry for entry in super().__dir__() if entry != "timespan" and entry != "region"]
453 result.extend(self._element.schema.names)
454 return result
457class DatasetTypeProxy:
458 """An expression-creation proxy for a dataset type's logical table.
460 Parameters
461 ----------
462 dataset_type : `str`
463 Dataset type name or wildcard. Wildcards are usable only when the
464 query contains exactly one dataset type or a wildcard.
466 Notes
467 -----
468 The attributes of this object are expression proxies for the fields
469 associated with datasets.
470 """
472 def __init__(self, dataset_type: str):
473 self._dataset_type = dataset_type
475 def __repr__(self) -> str:
476 return self._dataset_type
478 # Attributes are actually fixed, but we implement them with __getattr__
479 # and __dir__ to avoid repeating the list. And someday they might expand
480 # to include Datastore record fields.
482 def __getattr__(self, field: str) -> ScalarExpressionProxy:
483 if not tree.is_dataset_field(field):
484 raise AttributeError(field)
485 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field)
486 return ResolvedScalarExpressionProxy(expression)
488 @property
489 def timespan(self) -> TimespanProxy:
490 try:
491 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field="timespan")
492 except InvalidQueryError:
493 raise AttributeError("timespan") from None
494 return TimespanProxy(expression)
496 def __dir__(self) -> list[str]:
497 result = list(super().__dir__())
498 # "timespan" will be added by delegation to super() and we don't want
499 # it to appear twice.
500 result.extend(name for name in tree.DATASET_FIELD_NAMES if name != "timespan")
501 return result
504class ExpressionFactory:
505 """A factory for creating column expressions that uses operator overloading
506 to form a mini-language.
508 Instances of this class are usually obtained from
509 `Query.expression_factory`; see that property's documentation for more
510 information.
512 Parameters
513 ----------
514 universe : `DimensionUniverse`
515 Object that describes all dimensions.
516 """
518 def __init__(self, universe: DimensionUniverse):
519 self._universe = universe
521 def __getattr__(self, name: str) -> DimensionElementProxy:
522 try:
523 element = self._universe.elements[name]
524 except KeyError:
525 raise AttributeError(name)
526 return DimensionElementProxy(element)
528 def __getitem__(self, name: str) -> DatasetTypeProxy:
529 return DatasetTypeProxy(name)
531 def not_(self, operand: tree.Predicate) -> tree.Predicate:
532 """Apply a logical NOT operation to a boolean expression.
534 Parameters
535 ----------
536 operand : `tree.Predicate`
537 Expression to invetree.
539 Returns
540 -------
541 logical_not : `tree.Predicate`
542 A boolean expression that evaluates to the opposite of ``operand``.
543 """
544 return operand.logical_not()
546 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
547 """Combine a sequence of boolean expressions with logical AND.
549 Parameters
550 ----------
551 first : `tree.Predicate`
552 First operand (required).
553 *args
554 Additional operands.
556 Returns
557 -------
558 logical_and : `tree.Predicate`
559 A boolean expression that evaluates to `True` only if all operands
560 evaluate to `True`.
561 """
562 return first.logical_and(*args)
564 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
565 """Combine a sequence of boolean expressions with logical OR.
567 Parameters
568 ----------
569 first : `tree.Predicate`
570 First operand (required).
571 *args
572 Additional operands.
574 Returns
575 -------
576 logical_or : `tree.Predicate`
577 A boolean expression that evaluates to `True` if any operand
578 evaluates to `True`.
579 """
580 return first.logical_or(*args)
582 @staticmethod
583 def literal(value: object) -> ExpressionProxy:
584 """Return an expression proxy that represents a literal value.
586 Expression proxy objects obtained from this factory can generally be
587 compared directly to literals, so calling this method directly in user
588 code should rarely be necessary.
590 Parameters
591 ----------
592 value : `object`
593 Value to include as a literal in an expression tree.
595 Returns
596 -------
597 expression : `ExpressionProxy`
598 Expression wrapper for this literal.
599 """
600 expression = tree.make_column_literal(value)
601 match expression.expression_type:
602 case "timespan":
603 return TimespanProxy(expression)
604 case "region":
605 return RegionProxy(expression)
606 case "bool":
607 raise NotImplementedError("Boolean literals are not supported.")
608 case _:
609 return ResolvedScalarExpressionProxy(expression)
611 @staticmethod
612 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression:
613 """Return the column expression object that backs a proxy.
615 Parameters
616 ----------
617 proxy : `ExpressionProxy`
618 Proxy constructed via an `ExpressionFactory`.
620 Returns
621 -------
622 expression : `tree.ColumnExpression`
623 Underlying column expression object.
624 """
625 return proxy._expression