Coverage for python/lsst/daf/butler/queries/expression_factory.py: 51%
187 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ExpressionFactory", "ExpressionProxy", "ScalarExpressionProxy", "TimespanProxy", "RegionProxy")
32from abc import ABC, abstractmethod
33from collections.abc import Iterable
34from typing import TYPE_CHECKING
36from lsst.sphgeom import Region
38from .._exceptions import InvalidQueryError
39from ..dimensions import Dimension, DimensionElement, DimensionUniverse
40from . import tree
42if TYPE_CHECKING:
43 from .._timespan import Timespan
44 from ._query import Query
46# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression,
47# but it just returns OrderExpression and Predicate objects directly, because
48# we don't need to overload any operators or define any methods on those.
51class ExpressionProxy(ABC):
52 """A wrapper for column expressions that overloads comparison operators
53 to return new expression proxies.
54 """
56 def __repr__(self) -> str:
57 return str(self._expression)
59 @property
60 def is_null(self) -> tree.Predicate:
61 """A boolean expression that tests whether this expression is NULL."""
62 return tree.Predicate.is_null(self._expression)
64 @staticmethod
65 def _make_expression(other: object) -> tree.ColumnExpression:
66 if isinstance(other, ExpressionProxy):
67 return other._expression
68 else:
69 return tree.make_column_literal(other)
71 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate:
72 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator)
74 @property
75 @abstractmethod
76 def _expression(self) -> tree.ColumnExpression:
77 raise NotImplementedError()
80class ScalarExpressionProxy(ExpressionProxy):
81 """An `ExpressionProxy` specialized for simple single-value columns."""
83 @property
84 def desc(self) -> tree.Reversed:
85 """An ordering expression that indicates that the sort on this
86 expression should be reversed.
87 """
88 return tree.Reversed(operand=self._expression)
90 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override]
91 return self._make_comparison(other, "==")
93 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override]
94 return self._make_comparison(other, "!=")
96 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override]
97 return self._make_comparison(other, "<")
99 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override]
100 return self._make_comparison(other, "<=")
102 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override]
103 return self._make_comparison(other, ">")
105 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override]
106 return self._make_comparison(other, ">=")
108 def __neg__(self) -> ScalarExpressionProxy:
109 return ResolvedScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-"))
111 def __add__(self, other: object) -> ScalarExpressionProxy:
112 return ResolvedScalarExpressionProxy(
113 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+")
114 )
116 def __radd__(self, other: object) -> ScalarExpressionProxy:
117 return ResolvedScalarExpressionProxy(
118 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+")
119 )
121 def __sub__(self, other: object) -> ScalarExpressionProxy:
122 return ResolvedScalarExpressionProxy(
123 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-")
124 )
126 def __rsub__(self, other: object) -> ScalarExpressionProxy:
127 return ResolvedScalarExpressionProxy(
128 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-")
129 )
131 def __mul__(self, other: object) -> ScalarExpressionProxy:
132 return ResolvedScalarExpressionProxy(
133 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*")
134 )
136 def __rmul__(self, other: object) -> ScalarExpressionProxy:
137 return ResolvedScalarExpressionProxy(
138 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*")
139 )
141 def __truediv__(self, other: object) -> ScalarExpressionProxy:
142 return ResolvedScalarExpressionProxy(
143 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/")
144 )
146 def __rtruediv__(self, other: object) -> ScalarExpressionProxy:
147 return ResolvedScalarExpressionProxy(
148 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/")
149 )
151 def __mod__(self, other: object) -> ScalarExpressionProxy:
152 return ResolvedScalarExpressionProxy(
153 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%")
154 )
156 def __rmod__(self, other: object) -> ScalarExpressionProxy:
157 return ResolvedScalarExpressionProxy(
158 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%")
159 )
161 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate:
162 """Return a boolean expression that tests whether this expression is
163 within a literal integer range.
165 Parameters
166 ----------
167 start : `int`, optional
168 Lower bound (inclusive) for the slice.
169 stop : `int` or `None`, optional
170 Upper bound (exclusive) for the slice, or `None` for no bound.
171 step : `int`, optional
172 Spacing between integers in the range.
174 Returns
175 -------
176 predicate : `tree.Predicate`
177 Boolean expression object.
178 """
179 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step)
181 def in_iterable(self, others: Iterable) -> tree.Predicate:
182 """Return a boolean expression that tests whether this expression
183 evaluates to a value that is in an iterable of other expressions.
185 Parameters
186 ----------
187 others : `collections.abc.Iterable`
188 An iterable of `ExpressionProxy` or values to be interpreted as
189 literals.
191 Returns
192 -------
193 predicate : `tree.Predicate`
194 Boolean expression object.
195 """
196 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others])
198 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate:
199 """Return a boolean expression that test whether this expression
200 evaluates to a value that is in a single-column selection from another
201 query.
203 Parameters
204 ----------
205 column : `ExpressionProxy`
206 Proxy for the column to extract from ``query``.
207 query : `Query`
208 Query to select from.
210 Returns
211 -------
212 predicate : `tree.Predicate`
213 Boolean expression object.
214 """
215 return tree.Predicate.in_query(self._expression, column._expression, query._tree)
218class ResolvedScalarExpressionProxy(ScalarExpressionProxy):
219 """A `ScalarExpressionProxy` backed by an actual expression.
221 Parameters
222 ----------
223 expression : `.tree.ColumnExpression`
224 Expression that backs this proxy.
225 """
227 def __init__(self, expression: tree.ColumnExpression):
228 self._expr = expression
230 @property
231 def _expression(self) -> tree.ColumnExpression:
232 return self._expr
235class TimespanProxy(ExpressionProxy):
236 """An `ExpressionProxy` specialized for timespan columns and literals.
238 Parameters
239 ----------
240 expression : `.tree.ColumnExpression`
241 Expression that backs this proxy.
242 """
244 def __init__(self, expression: tree.ColumnExpression):
245 self._expr = expression
247 @property
248 def begin(self) -> ScalarExpressionProxy:
249 """An expression representing the lower bound (inclusive)."""
250 return ResolvedScalarExpressionProxy(
251 tree.UnaryExpression(operand=self._expression, operator="begin_of")
252 )
254 @property
255 def end(self) -> ScalarExpressionProxy:
256 """An expression representing the upper bound (exclusive)."""
257 return ResolvedScalarExpressionProxy(
258 tree.UnaryExpression(operand=self._expression, operator="end_of")
259 )
261 def overlaps(self, other: TimespanProxy | Timespan) -> tree.Predicate:
262 """Return a boolean expression representing an overlap test between
263 this timespan and another.
265 Parameters
266 ----------
267 other : `TimespanProxy` or `Timespan`
268 Expression or literal to compare to.
270 Returns
271 -------
272 predicate : `tree.Predicate`
273 Boolean expression object.
274 """
275 return self._make_comparison(other, "overlaps")
277 @property
278 def _expression(self) -> tree.ColumnExpression:
279 return self._expr
282class RegionProxy(ExpressionProxy):
283 """An `ExpressionProxy` specialized for region columns and literals.
285 Parameters
286 ----------
287 expression : `.tree.ColumnExpression`
288 Expression that backs this proxy.
289 """
291 def __init__(self, expression: tree.ColumnExpression):
292 self._expr = expression
294 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate:
295 """Return a boolean expression representing an overlap test between
296 this region and another.
298 Parameters
299 ----------
300 other : `RegionProxy` or `Region`
301 Expression or literal to compare to.
303 Returns
304 -------
305 predicate : `tree.Predicate`
306 Boolean expression object.
307 """
308 return self._make_comparison(other, "overlaps")
310 @property
311 def _expression(self) -> tree.ColumnExpression:
312 return self._expr
315class DimensionElementProxy(ScalarExpressionProxy):
316 """An expression-creation proxy for a dimension element logical table.
318 Parameters
319 ----------
320 element : `DimensionElement`
321 Element this object wraps.
323 Notes
324 -----
325 The (dynamic) attributes of this object are expression proxies for the
326 non-dimension fields of the element's records.
327 """
329 def __init__(self, element: DimensionElement):
330 self._element = element
332 @property
333 def _expression(self) -> tree.ColumnExpression:
334 if isinstance(self._element, Dimension):
335 return tree.DimensionKeyReference(dimension=self._element)
336 else:
337 raise TypeError(f"Proxy expression {self!r} is does not resolve to a column.")
339 def __repr__(self) -> str:
340 return self._element.name
342 def __getattr__(self, field: str) -> ScalarExpressionProxy:
343 if field in self._element.schema.dimensions.names:
344 if field not in self._element.dimensions.names:
345 # This is a dimension self-reference, like visit.id.
346 return self
347 return DimensionElementProxy(self._element.dimensions[field])
348 try:
349 expression = tree.DimensionFieldReference(element=self._element, field=field)
350 except InvalidQueryError:
351 raise AttributeError(field)
352 return ResolvedScalarExpressionProxy(expression)
354 @property
355 def region(self) -> RegionProxy:
356 try:
357 expression = tree.DimensionFieldReference(element=self._element, field="region")
358 except InvalidQueryError:
359 raise AttributeError("region")
360 return RegionProxy(expression)
362 @property
363 def timespan(self) -> TimespanProxy:
364 try:
365 expression = tree.DimensionFieldReference(element=self._element, field="timespan")
366 except InvalidQueryError:
367 raise AttributeError("timespan")
368 return TimespanProxy(expression)
370 def __dir__(self) -> list[str]:
371 # We only want timespan and region to appear in dir() for elements that
372 # have them, but we can't implement them in getattr without muddling
373 # the type annotations.
374 result = [entry for entry in super().__dir__() if entry != "timespan" and entry != "region"]
375 result.extend(self._element.schema.names)
376 return result
379class DatasetTypeProxy:
380 """An expression-creation proxy for a dataset type's logical table.
382 Parameters
383 ----------
384 dataset_type : `str`
385 Dataset type name or wildcard. Wildcards are usable only when the
386 query contains exactly one dataset type or a wildcard.
388 Notes
389 -----
390 The attributes of this object are expression proxies for the fields
391 associated with datasets.
392 """
394 def __init__(self, dataset_type: str):
395 self._dataset_type = dataset_type
397 def __repr__(self) -> str:
398 return self._dataset_type
400 # Attributes are actually fixed, but we implement them with __getattr__
401 # and __dir__ to avoid repeating the list. And someday they might expand
402 # to include Datastore record fields.
404 def __getattr__(self, field: str) -> ScalarExpressionProxy:
405 if field not in tree.DATASET_FIELD_NAMES:
406 raise AttributeError(field)
407 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field)
408 return ResolvedScalarExpressionProxy(expression)
410 @property
411 def timespan(self) -> TimespanProxy:
412 try:
413 expression = tree.DimensionFieldReference(element=self._element, field="timespan")
414 except InvalidQueryError:
415 raise AttributeError("timespan")
416 return TimespanProxy(expression)
418 def __dir__(self) -> list[str]:
419 result = list(super().__dir__())
420 # "timespan" will be added by delegation to super() and we don't want
421 # it to appear twice.
422 result.extend(name for name in tree.DATASET_FIELD_NAMES if name != "timespan")
423 return result
426class ExpressionFactory:
427 """A factory for creating column expressions that uses operator overloading
428 to form a mini-language.
430 Instances of this class are usually obtained from
431 `Query.expression_factory`; see that property's documentation for more
432 information.
434 Parameters
435 ----------
436 universe : `DimensionUniverse`
437 Object that describes all dimensions.
438 """
440 def __init__(self, universe: DimensionUniverse):
441 self._universe = universe
443 def __getattr__(self, name: str) -> DimensionElementProxy:
444 try:
445 element = self._universe.elements[name]
446 except KeyError:
447 raise AttributeError(name)
448 return DimensionElementProxy(element)
450 def __getitem__(self, name: str) -> DatasetTypeProxy:
451 return DatasetTypeProxy(name)
453 def not_(self, operand: tree.Predicate) -> tree.Predicate:
454 """Apply a logical NOT operation to a boolean expression.
456 Parameters
457 ----------
458 operand : `tree.Predicate`
459 Expression to invetree.
461 Returns
462 -------
463 logical_not : `tree.Predicate`
464 A boolean expression that evaluates to the opposite of ``operand``.
465 """
466 return operand.logical_not()
468 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
469 """Combine a sequence of boolean expressions with logical AND.
471 Parameters
472 ----------
473 first : `tree.Predicate`
474 First operand (required).
475 *args
476 Additional operands.
478 Returns
479 -------
480 logical_and : `tree.Predicate`
481 A boolean expression that evaluates to `True` only if all operands
482 evaluate to `True.
483 """
484 return first.logical_and(*args)
486 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
487 """Combine a sequence of boolean expressions with logical OR.
489 Parameters
490 ----------
491 first : `tree.Predicate`
492 First operand (required).
493 *args
494 Additional operands.
496 Returns
497 -------
498 logical_or : `tree.Predicate`
499 A boolean expression that evaluates to `True` if any operand
500 evaluates to `True.
501 """
502 return first.logical_or(*args)
504 @staticmethod
505 def literal(value: object) -> ExpressionProxy:
506 """Return an expression proxy that represents a literal value.
508 Expression proxy objects obtained from this factory can generally be
509 compared directly to literals, so calling this method directly in user
510 code should rarely be necessary.
512 Parameters
513 ----------
514 value : `object`
515 Value to include as a literal in an expression tree.
517 Returns
518 -------
519 expression : `ExpressionProxy`
520 Expression wrapper for this literal.
521 """
522 expression = tree.make_column_literal(value)
523 match expression.expression_type:
524 case "timespan":
525 return TimespanProxy(expression)
526 case "region":
527 return RegionProxy(expression)
528 case "bool":
529 raise NotImplementedError("Boolean literals are not supported.")
530 case _:
531 return ResolvedScalarExpressionProxy(expression)
533 @staticmethod
534 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression:
535 """Return the column expression object that backs a proxy.
537 Parameters
538 ----------
539 proxy : `ExpressionProxy`
540 Proxy constructed via an `ExpressionFactory`.
542 Returns
543 -------
544 expression : `tree.ColumnExpression`
545 Underlying column expression object.
546 """
547 return proxy._expression