Coverage for python/lsst/daf/butler/queries/expression_factory.py: 44%
156 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 10:00 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 10:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ExpressionFactory", "ExpressionProxy", "ScalarExpressionProxy", "TimespanProxy", "RegionProxy")
32from collections.abc import Iterable
33from typing import TYPE_CHECKING, cast
35from lsst.sphgeom import Region
37from ..dimensions import Dimension, DimensionElement, DimensionUniverse
38from . import tree
40if TYPE_CHECKING:
41 from .._timespan import Timespan
42 from ._query import Query
44# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression,
45# but it just returns OrderExpression and Predicate objects directly, because
46# we don't need to overload any operators or define any methods on those.
49class ExpressionProxy:
50 """A wrapper for column expressions that overloads comparison operators
51 to return new expression proxies.
53 Parameters
54 ----------
55 expression : `tree.ColumnExpression`
56 Underlying expression object.
57 """
59 def __init__(self, expression: tree.ColumnExpression):
60 self._expression = expression
62 def __repr__(self) -> str:
63 return str(self._expression)
65 @property
66 def is_null(self) -> tree.Predicate:
67 """A boolean expression that tests whether this expression is NULL."""
68 return tree.Predicate.is_null(self._expression)
70 @staticmethod
71 def _make_expression(other: object) -> tree.ColumnExpression:
72 if isinstance(other, ExpressionProxy):
73 return other._expression
74 else:
75 return tree.make_column_literal(other)
77 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate:
78 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator)
81class ScalarExpressionProxy(ExpressionProxy):
82 """An `ExpressionProxy` specialized for simple single-value columns."""
84 @property
85 def desc(self) -> tree.Reversed:
86 """An ordering expression that indicates that the sort on this
87 expression should be reversed.
88 """
89 return tree.Reversed(operand=self._expression)
91 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override]
92 return self._make_comparison(other, "==")
94 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override]
95 return self._make_comparison(other, "!=")
97 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override]
98 return self._make_comparison(other, "<")
100 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override]
101 return self._make_comparison(other, "<=")
103 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override]
104 return self._make_comparison(other, ">")
106 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override]
107 return self._make_comparison(other, ">=")
109 def __neg__(self) -> ScalarExpressionProxy:
110 return ScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-"))
112 def __add__(self, other: object) -> ScalarExpressionProxy:
113 return ScalarExpressionProxy(
114 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+")
115 )
117 def __radd__(self, other: object) -> ScalarExpressionProxy:
118 return ScalarExpressionProxy(
119 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+")
120 )
122 def __sub__(self, other: object) -> ScalarExpressionProxy:
123 return ScalarExpressionProxy(
124 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-")
125 )
127 def __rsub__(self, other: object) -> ScalarExpressionProxy:
128 return ScalarExpressionProxy(
129 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-")
130 )
132 def __mul__(self, other: object) -> ScalarExpressionProxy:
133 return ScalarExpressionProxy(
134 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*")
135 )
137 def __rmul__(self, other: object) -> ScalarExpressionProxy:
138 return ScalarExpressionProxy(
139 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*")
140 )
142 def __truediv__(self, other: object) -> ScalarExpressionProxy:
143 return ScalarExpressionProxy(
144 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/")
145 )
147 def __rtruediv__(self, other: object) -> ScalarExpressionProxy:
148 return ScalarExpressionProxy(
149 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/")
150 )
152 def __mod__(self, other: object) -> ScalarExpressionProxy:
153 return ScalarExpressionProxy(
154 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%")
155 )
157 def __rmod__(self, other: object) -> ScalarExpressionProxy:
158 return ScalarExpressionProxy(
159 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%")
160 )
162 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate:
163 """Return a boolean expression that tests whether this expression is
164 within a literal integer range.
166 Parameters
167 ----------
168 start : `int`, optional
169 Lower bound (inclusive) for the slice.
170 stop : `int` or `None`, optional
171 Upper bound (exclusive) for the slice, or `None` for no bound.
172 step : `int`, optional
173 Spacing between integers in the range.
175 Returns
176 -------
177 predicate : `tree.Predicate`
178 Boolean expression object.
179 """
180 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step)
182 def in_iterable(self, others: Iterable) -> tree.Predicate:
183 """Return a boolean expression that tests whether this expression
184 evaluates to a value that is in an iterable of other expressions.
186 Parameters
187 ----------
188 others : `collections.abc.Iterable`
189 An iterable of `ExpressionProxy` or values to be interpreted as
190 literals.
192 Returns
193 -------
194 predicate : `tree.Predicate`
195 Boolean expression object.
196 """
197 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others])
199 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate:
200 """Return a boolean expression that test whether this expression
201 evaluates to a value that is in a single-column selection from another
202 query.
204 Parameters
205 ----------
206 column : `ExpressionProxy`
207 Proxy for the column to extract from ``query``.
208 query : `Query`
209 Query to select from.
211 Returns
212 -------
213 predicate : `tree.Predicate`
214 Boolean expression object.
215 """
216 return tree.Predicate.in_query(self._expression, column._expression, query._tree)
219class TimespanProxy(ExpressionProxy):
220 """An `ExpressionProxy` specialized for timespan columns and literals."""
222 @property
223 def begin(self) -> ExpressionProxy:
224 """An expression representing the lower bound (inclusive)."""
225 return ExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="begin_of"))
227 @property
228 def end(self) -> ExpressionProxy:
229 """An expression representing the upper bound (exclusive)."""
230 return ExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="end_of"))
232 def overlaps(self, other: TimespanProxy | Timespan) -> tree.Predicate:
233 """Return a boolean expression representing an overlap test between
234 this timespan and another.
236 Parameters
237 ----------
238 other : `TimespanProxy` or `Timespan`
239 Expression or literal to compare to.
241 Returns
242 -------
243 predicate : `tree.Predicate`
244 Boolean expression object.
245 """
246 return self._make_comparison(other, "overlaps")
249class RegionProxy(ExpressionProxy):
250 """An `ExpressionProxy` specialized for region columns and literals."""
252 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate:
253 """Return a boolean expression representing an overlap test between
254 this region and another.
256 Parameters
257 ----------
258 other : `RegionProxy` or `Region`
259 Expression or literal to compare to.
261 Returns
262 -------
263 predicate : `tree.Predicate`
264 Boolean expression object.
265 """
266 return self._make_comparison(other, "overlaps")
269class DimensionElementProxy:
270 """An expression-creation proxy for a dimension element logical table.
272 Parameters
273 ----------
274 element : `DimensionElement`
275 Element this object wraps.
277 Notes
278 -----
279 The (dynamic) attributes of this object are expression proxies for the
280 non-dimension fields of the element's records.
281 """
283 def __init__(self, element: DimensionElement):
284 self._element = element
286 def __repr__(self) -> str:
287 return self._element.name
289 def __getattr__(self, field: str) -> ExpressionProxy:
290 if field in self._element.schema.dimensions.names:
291 return DimensionProxy(self._element.dimensions[field])
292 try:
293 expression = tree.DimensionFieldReference(element=self._element, field=field)
294 except tree.InvalidQueryError:
295 raise AttributeError(field)
296 match expression.column_type:
297 case "region":
298 return RegionProxy(expression)
299 case "timespan":
300 return TimespanProxy(expression)
301 return ScalarExpressionProxy(expression)
303 def __dir__(self) -> list[str]:
304 result = list(super().__dir__())
305 result.extend(self._element.schema.names)
306 return result
309class DimensionProxy(ScalarExpressionProxy, DimensionElementProxy):
310 """An expression-creation proxy for a dimension logical table.
312 Parameters
313 ----------
314 dimension : `Dimension`
315 Dimension this object wraps.
317 Notes
318 -----
319 This class combines record-field attribute access from `DimensionElement`
320 proxy with direct interpretation as a dimension key column via
321 `ScalarExpressionProxy`. For example::
323 x = query.expression_factory
324 query.where(
325 x.detector.purpose == "SCIENCE", # field access
326 x.detector > 100, # direct usage as an expression
327 )
328 """
330 def __init__(self, dimension: Dimension):
331 ScalarExpressionProxy.__init__(self, tree.DimensionKeyReference(dimension=dimension))
332 DimensionElementProxy.__init__(self, dimension)
334 def __getattr__(self, field: str) -> ExpressionProxy:
335 if field == self._element.primary_key.name:
336 return self
337 return super().__getattr__(field)
339 _element: Dimension
342class DatasetTypeProxy:
343 """An expression-creation proxy for a dataset type's logical table.
345 Parameters
346 ----------
347 dataset_type : `str`
348 Dataset type name or wildcard. Wildcards are usable only when the
349 query contains exactly one dataset type or a wildcard.
351 Notes
352 -----
353 The attributes of this object are expression proxies for the fields
354 associated with datasets.
355 """
357 def __init__(self, dataset_type: str):
358 self._dataset_type = dataset_type
360 def __repr__(self) -> str:
361 return self._dataset_type
363 # Attributes are actually fixed, but we implement them with __getattr__
364 # and __dir__ to avoid repeating the list. And someday they might expand
365 # to include Datastore record fields.
367 def __getattr__(self, field: str) -> ExpressionProxy:
368 if field not in tree.DATASET_FIELD_NAMES:
369 raise AttributeError(field)
370 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field)
371 match expression.column_type:
372 case "timespan":
373 return TimespanProxy(expression)
374 return ScalarExpressionProxy(expression)
376 def __dir__(self) -> list[str]:
377 result = list(super().__dir__())
378 result.extend(tree.DATASET_FIELD_NAMES)
379 return result
382class ExpressionFactory:
383 """A factory for creating column expressions that uses operator overloading
384 to form a mini-language.
386 Instances of this class are usually obtained from
387 `Query.expression_factory`; see that property's documentation for more
388 information.
390 Parameters
391 ----------
392 universe : `DimensionUniverse`
393 Object that describes all dimensions.
394 """
396 def __init__(self, universe: DimensionUniverse):
397 self._universe = universe
399 def __getattr__(self, name: str) -> DimensionElementProxy:
400 element = self._universe.elements[name]
401 if element in self._universe.dimensions:
402 return DimensionProxy(cast(Dimension, element))
403 return DimensionElementProxy(element)
405 def __getitem__(self, name: str) -> DatasetTypeProxy:
406 return DatasetTypeProxy(name)
408 def not_(self, operand: tree.Predicate) -> tree.Predicate:
409 """Apply a logical NOT operation to a boolean expression.
411 Parameters
412 ----------
413 operand : `tree.Predicate`
414 Expression to invetree.
416 Returns
417 -------
418 logical_not : `tree.Predicate`
419 A boolean expression that evaluates to the opposite of ``operand``.
420 """
421 return operand.logical_not()
423 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
424 """Combine a sequence of boolean expressions with logical AND.
426 Parameters
427 ----------
428 first : `tree.Predicate`
429 First operand (required).
430 *args
431 Additional operands.
433 Returns
434 -------
435 logical_and : `tree.Predicate`
436 A boolean expression that evaluates to `True` only if all operands
437 evaluate to `True.
438 """
439 return first.logical_and(*args)
441 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate:
442 """Combine a sequence of boolean expressions with logical OR.
444 Parameters
445 ----------
446 first : `tree.Predicate`
447 First operand (required).
448 *args
449 Additional operands.
451 Returns
452 -------
453 logical_or : `tree.Predicate`
454 A boolean expression that evaluates to `True` if any operand
455 evaluates to `True.
456 """
457 return first.logical_or(*args)
459 @staticmethod
460 def literal(value: object) -> ExpressionProxy:
461 """Return an expression proxy that represents a literal value.
463 Expression proxy objects obtained from this factory can generally be
464 compared directly to literals, so calling this method directly in user
465 code should rarely be necessary.
467 Parameters
468 ----------
469 value : `object`
470 Value to include as a literal in an expression tree.
472 Returns
473 -------
474 expression : `ExpressionProxy`
475 Expression wrapper for this literal.
476 """
477 expression = tree.make_column_literal(value)
478 match expression.expression_type:
479 case "timespan":
480 return TimespanProxy(expression)
481 case "region":
482 return RegionProxy(expression)
483 case "bool":
484 raise NotImplementedError("Boolean literals are not supported.")
485 case _:
486 return ScalarExpressionProxy(expression)
488 @staticmethod
489 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression:
490 """Return the column expression object that backs a proxy.
492 Parameters
493 ----------
494 proxy : `ExpressionProxy`
495 Proxy constructed via an `ExpressionFactory`.
497 Returns
498 -------
499 expression : `tree.ColumnExpression`
500 Underlying column expression object.
501 """
502 return proxy._expression