Coverage for python / lsst / daf / butler / queries / tree / _predicate.py: 42%
284 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ComparisonOperator",
32 "LogicalNotOperand",
33 "Predicate",
34 "PredicateLeaf",
35 "PredicateOperands",
36)
38import itertools
39from abc import ABC, abstractmethod
40from collections.abc import Iterable
41from typing import TYPE_CHECKING, Annotated, Literal, TypeAlias, TypeVar, cast, final
43import pydantic
45from ..._exceptions import InvalidQueryError
46from ._base import QueryTreeBase
47from ._column_expression import (
48 ColumnExpression,
49 ColumnReference,
50 is_numeric,
51 is_one_datetime_and_one_ingest_date,
52 is_one_timespan_and_one_datetime,
53 is_one_timespan_and_one_ingest_date,
54)
56if TYPE_CHECKING:
57 from ..visitors import PredicateVisitFlags, PredicateVisitor
58 from ._column_set import ColumnSet
59 from ._query_tree import QueryTree
61ComparisonOperator: TypeAlias = Literal["==", "!=", "<", ">", ">=", "<=", "overlaps", "glob"]
64_L = TypeVar("_L")
65_A = TypeVar("_A")
66_O = TypeVar("_O")
69class PredicateLeafBase(QueryTreeBase, ABC):
70 """Base class for leaf nodes of the `Predicate` tree.
72 This is a closed hierarchy whose concrete, `~typing.final` derived classes
73 are members of the `PredicateLeaf` union. That union should generally
74 be used in type annotations rather than the technically-open base class.
75 """
77 @abstractmethod
78 def gather_required_columns(self, columns: ColumnSet) -> None:
79 """Add any columns required to evaluate this predicate leaf to the
80 given column set.
82 Parameters
83 ----------
84 columns : `ColumnSet`
85 Set of columns to modify in place.
86 """
87 raise NotImplementedError()
89 @abstractmethod
90 def gather_governors(self, governors: set[str]) -> None:
91 """Add any governor dimensions that need to be fully identified for
92 this column expression to be sound.
94 Parameters
95 ----------
96 governors : `set` [ `str` ]
97 Set of governor dimension names to modify in place.
98 """
99 raise NotImplementedError()
101 def invert(self) -> PredicateLeaf:
102 """Return a new leaf that is the logical not of this one."""
103 return LogicalNot.model_construct(operand=cast("LogicalNotOperand", self))
105 @abstractmethod
106 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
107 """Invoke the visitor interface.
109 Parameters
110 ----------
111 visitor : `PredicateVisitor`
112 Visitor to invoke a method on.
113 flags : `PredicateVisitFlags`
114 Flags that provide information about where this leaf appears in the
115 larger predicate tree.
117 Returns
118 -------
119 result : `object`
120 Forwarded result from the visitor.
121 """
122 raise NotImplementedError()
125@final
126class Predicate(QueryTreeBase):
127 """A boolean column expression.
129 Notes
130 -----
131 Predicate is the only class representing a boolean column expression that
132 should be used outside of this module (though the objects it nests appear
133 in its serialized form and hence are not fully private). It provides
134 several `classmethod` factories for constructing those nested types inside
135 a `Predicate` instance, and `PredicateVisitor` subclasses should be used
136 to process them.
137 """
139 operands: PredicateOperands
140 """Nested tuple of operands, with outer items combined via AND and inner
141 items combined via OR.
142 """
144 @property
145 def column_type(self) -> Literal["bool"]:
146 """A string enumeration value representing the type of the column
147 expression.
148 """
149 return "bool"
151 @classmethod
152 def from_bool(cls, value: bool) -> Predicate:
153 """Construct a predicate that always evaluates to `True` or `False`.
155 Parameters
156 ----------
157 value : `bool`
158 Value the predicate should evaluate to.
160 Returns
161 -------
162 predicate : `Predicate`
163 Predicate that evaluates to the given boolean value.
164 """
165 # The values for True and False here make sense if you think about
166 # calling `all` and `any` with empty sequences; note that the
167 # `self.operands` attribute is evaluated as:
168 #
169 # value = all(any(or_group) for or_group in self.operands)
170 #
171 return cls.model_construct(operands=() if value else ((),))
173 @classmethod
174 def from_bool_expression(cls, value: ColumnReference) -> Predicate:
175 """Construct a predicate that wraps a boolean ColumnReference, taking
176 on the value of the underlying ColumnReference.
178 Parameters
179 ----------
180 value : `ColumnExpression`
181 Boolean-valued expression to convert to Predicate.
183 Returns
184 -------
185 predicate : `Predicate`
186 Predicate representing the expression.
187 """
188 if value.column_type != "bool":
189 raise ValueError(f"ColumnExpression must have column type 'bool', not '{value.column_type}'")
191 return cls._from_leaf(BooleanWrapper(operand=value))
193 @classmethod
194 def compare(cls, a: ColumnExpression, operator: ComparisonOperator, b: ColumnExpression) -> Predicate:
195 """Construct a predicate representing a binary comparison between
196 two non-boolean column expressions.
198 Parameters
199 ----------
200 a : `ColumnExpression`
201 First column expression in the comparison.
202 operator : `str`
203 Enumerated string representing the comparison operator to apply.
204 May be and of "==", "!=", "<", ">", "<=", ">=", or "overlaps".
205 b : `ColumnExpression`
206 Second column expression in the comparison.
208 Returns
209 -------
210 predicate : `Predicate`
211 Predicate representing the comparison.
212 """
213 return cls._from_leaf(Comparison(a=a, operator=operator, b=b))
215 @classmethod
216 def is_null(cls, operand: ColumnExpression) -> Predicate:
217 """Construct a predicate that tests whether a column expression is
218 NULL.
220 Parameters
221 ----------
222 operand : `ColumnExpression`
223 Column expression to test.
225 Returns
226 -------
227 predicate : `Predicate`
228 Predicate representing the NULL check.
229 """
230 return cls._from_leaf(IsNull(operand=operand))
232 @classmethod
233 def in_container(cls, member: ColumnExpression, container: Iterable[ColumnExpression]) -> Predicate:
234 """Construct a predicate that tests whether one column expression is
235 a member of a container of other column expressions.
237 Parameters
238 ----------
239 member : `ColumnExpression`
240 Column expression that may be a member of the container.
241 container : `~collections.abc.Iterable` [ `ColumnExpression` ]
242 Container of column expressions to test for membership in.
244 Returns
245 -------
246 predicate : `Predicate`
247 Predicate representing the membership test.
248 """
249 return cls._from_leaf(InContainer(member=member, container=tuple(container)))
251 @classmethod
252 def in_range(
253 cls, member: ColumnExpression, start: int = 0, stop: int | None = None, step: int = 1
254 ) -> Predicate:
255 """Construct a predicate that tests whether an integer column
256 expression is part of a strided range.
258 Parameters
259 ----------
260 member : `ColumnExpression`
261 Column expression that may be a member of the range.
262 start : `int`, optional
263 Beginning of the range, inclusive.
264 stop : `int` or `None`, optional
265 End of the range, exclusive.
266 step : `int`, optional
267 Offset between values in the range.
269 Returns
270 -------
271 predicate : `Predicate`
272 Predicate representing the membership test.
273 """
274 return cls._from_leaf(InRange(member=member, start=start, stop=stop, step=step))
276 @classmethod
277 def in_query(cls, member: ColumnExpression, column: ColumnExpression, query_tree: QueryTree) -> Predicate:
278 """Construct a predicate that tests whether a column expression is
279 present in a single-column projection of a query tree.
281 Parameters
282 ----------
283 member : `ColumnExpression`
284 Column expression that may be present in the query.
285 column : `ColumnExpression`
286 Column to project from the query.
287 query_tree : `QueryTree`
288 Query tree to select from.
290 Returns
291 -------
292 predicate : `Predicate`
293 Predicate representing the membership test.
294 """
295 return cls._from_leaf(InQuery(member=member, column=column, query_tree=query_tree))
297 def gather_required_columns(self, columns: ColumnSet) -> None:
298 """Add any columns required to evaluate this predicate to the given
299 column set.
301 Parameters
302 ----------
303 columns : `ColumnSet`
304 Set of columns to modify in place.
305 """
306 for or_group in self.operands:
307 for operand in or_group:
308 operand.gather_required_columns(columns)
310 def gather_governors(self, governors: set[str]) -> None:
311 """Add any governor dimensions that need to be fully identified for
312 this column expression to be sound.
314 Parameters
315 ----------
316 governors : `set` [ `str` ]
317 Set of governor dimension names to modify in place.
318 """
319 for or_group in self.operands:
320 for operand in or_group:
321 operand.gather_governors(governors)
323 def logical_and(self, *args: Predicate) -> Predicate:
324 """Construct a predicate representing the logical AND of this predicate
325 and one or more others.
327 Parameters
328 ----------
329 *args : `Predicate`
330 Other predicates.
332 Returns
333 -------
334 predicate : `Predicate`
335 Predicate representing the logical AND.
336 """
337 operands = self.operands
338 for arg in args:
339 operands = self._impl_and(operands, arg.operands)
340 if not all(operands):
341 # If any item in operands is an empty tuple (i.e. False), simplify.
342 operands = ((),)
343 return Predicate.model_construct(operands=operands)
345 def logical_or(self, *args: Predicate) -> Predicate:
346 """Construct a predicate representing the logical OR of this predicate
347 and one or more others.
349 Parameters
350 ----------
351 *args : `Predicate`
352 Other predicates.
354 Returns
355 -------
356 predicate : `Predicate`
357 Predicate representing the logical OR.
358 """
359 operands = self.operands
360 for arg in args:
361 operands = self._impl_or(operands, arg.operands)
362 return Predicate.model_construct(operands=operands)
364 def logical_not(self) -> Predicate:
365 """Construct a predicate representing the logical NOT of this
366 predicate.
368 Returns
369 -------
370 predicate : `Predicate`
371 Predicate representing the logical NOT.
372 """
373 new_operands: PredicateOperands = ((),)
374 for or_group in self.operands:
375 new_group: PredicateOperands = ()
376 for leaf in or_group:
377 new_group = self._impl_and(new_group, ((leaf.invert(),),))
378 new_operands = self._impl_or(new_operands, new_group)
379 return Predicate.model_construct(operands=new_operands)
381 def __str__(self) -> str:
382 and_terms = []
383 for or_group in self.operands:
384 match len(or_group):
385 case 0:
386 and_terms.append("False")
387 case 1:
388 and_terms.append(str(or_group[0]))
389 case _:
390 or_str = " OR ".join(str(operand) for operand in or_group)
391 if len(self.operands) > 1:
392 and_terms.append(f"({or_str})")
393 else:
394 and_terms.append(or_str)
395 if not and_terms:
396 return "True"
397 return " AND ".join(and_terms)
399 def visit(self, visitor: PredicateVisitor[_A, _O, _L]) -> _A:
400 """Invoke the visitor interface.
402 Parameters
403 ----------
404 visitor : `PredicateVisitor`
405 Visitor to invoke a method on.
407 Returns
408 -------
409 result : `object`
410 Forwarded result from the visitor.
411 """
412 return visitor._visit_logical_and(self.operands)
414 @classmethod
415 def _from_leaf(cls, leaf: PredicateLeaf) -> Predicate:
416 return cls._from_or_group((leaf,))
418 @classmethod
419 def _from_or_group(cls, or_group: tuple[PredicateLeaf, ...]) -> Predicate:
420 return Predicate.model_construct(operands=(or_group,))
422 @classmethod
423 def _impl_and(cls, a: PredicateOperands, b: PredicateOperands) -> PredicateOperands:
424 # We could simplify cases where both sides have some of the same leaf
425 # expressions; even using 'is' tests would simplify some cases where
426 # converting to conjunctive normal form twice leads to a lot of
427 # duplication, e.g. NOT ((A AND B) OR (C AND D)) or any kind of
428 # double-negation. Right now those cases seem pathological enough to
429 # be not worth our time.
430 return a + b if a is not b else a
432 @classmethod
433 def _impl_or(cls, a: PredicateOperands, b: PredicateOperands) -> PredicateOperands:
434 # Same comment re simplification as in _impl_and applies here.
435 return tuple([a_operand + b_operand for a_operand, b_operand in itertools.product(a, b)])
438@final
439class LogicalNot(PredicateLeafBase):
440 """A boolean column expression that inverts its operand."""
442 predicate_type: Literal["not"] = "not"
444 operand: LogicalNotOperand
445 """Upstream boolean expression to invert."""
447 def gather_required_columns(self, columns: ColumnSet) -> None:
448 # Docstring inherited.
449 self.operand.gather_required_columns(columns)
451 def gather_governors(self, governors: set[str]) -> None:
452 # Docstring inherited.
453 self.operand.gather_governors(governors)
455 def __str__(self) -> str:
456 return f"NOT {self.operand}"
458 def invert(self) -> LogicalNotOperand:
459 # Docstring inherited.
460 return self.operand
462 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
463 # Docstring inherited.
464 return visitor._visit_logical_not(self.operand, flags)
467class BooleanWrapper(PredicateLeafBase):
468 """Pass-through to a pre-existing boolean column expression."""
470 predicate_type: Literal["boolean_wrapper"] = "boolean_wrapper"
472 operand: ColumnReference
473 """Wrapped expression that will be used as the value for this predicate."""
475 def gather_required_columns(self, columns: ColumnSet) -> None:
476 # Docstring inherited.
477 self.operand.gather_required_columns(columns)
479 def gather_governors(self, governors: set[str]) -> None:
480 # Docstring inherited.
481 self.operand.gather_governors(governors)
483 def __str__(self) -> str:
484 return f"{self.operand}"
486 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
487 # Docstring inherited.
488 return visitor.visit_boolean_wrapper(self.operand, flags)
491@final
492class IsNull(PredicateLeafBase):
493 """A boolean column expression that tests whether its operand is NULL."""
495 predicate_type: Literal["is_null"] = "is_null"
497 operand: ColumnExpression
498 """Upstream expression to test."""
500 def gather_required_columns(self, columns: ColumnSet) -> None:
501 # Docstring inherited.
502 self.operand.gather_required_columns(columns)
504 def gather_governors(self, governors: set[str]) -> None:
505 # Docstring inherited.
506 self.operand.gather_governors(governors)
508 def __str__(self) -> str:
509 return f"{self.operand} IS NULL"
511 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
512 # Docstring inherited.
513 return visitor.visit_is_null(self.operand, flags)
516@final
517class Comparison(PredicateLeafBase):
518 """A boolean columns expression formed by comparing two non-boolean
519 expressions.
520 """
522 predicate_type: Literal["comparison"] = "comparison"
524 a: ColumnExpression
525 """Left-hand side expression for the comparison."""
527 b: ColumnExpression
528 """Right-hand side expression for the comparison."""
530 operator: ComparisonOperator
531 """Comparison operator."""
533 def gather_required_columns(self, columns: ColumnSet) -> None:
534 # Docstring inherited.
535 self.a.gather_required_columns(columns)
536 self.b.gather_required_columns(columns)
538 def gather_governors(self, governors: set[str]) -> None:
539 # Docstring inherited.
540 self.a.gather_governors(governors)
541 self.b.gather_governors(governors)
543 def __str__(self) -> str:
544 return f"{self.a} {self.operator.upper()} {self.b}"
546 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
547 # Docstring inherited.
548 return visitor.visit_comparison(self.a, self.operator, self.b, flags)
550 @pydantic.model_validator(mode="after")
551 def _validate_column_types(self) -> Comparison:
552 comparison_operators = ("==", "!=", "<", ">", ">=", "<=")
553 if self.operator == "overlaps" and (
554 is_one_timespan_and_one_datetime(self.a, self.b)
555 or is_one_timespan_and_one_ingest_date(self.a, self.b)
556 ):
557 # Allow timespan OVERLAPS datetime/ingest_date.
558 pass
559 elif is_one_datetime_and_one_ingest_date(self.a, self.b) and self.operator in comparison_operators:
560 # ingest_date might be one of two different column types
561 # (integer TAI nanoseconds like "datetime", or TIMESTAMP), but
562 # either one can be compared with a "datetime" column.
563 pass
564 elif is_numeric(self.a) and is_numeric(self.b) and self.operator in comparison_operators:
565 # Allow mixed comparisons between integers and floating points.
566 pass
567 elif self.a.column_type == self.b.column_type:
568 # Most operators require matching column types.
569 match (self.operator, self.a.column_type):
570 case ("==" | "!=", _):
571 pass
572 case ("<" | ">" | ">=" | "<=", "int" | "string" | "float" | "datetime"):
573 pass
574 case ("overlaps", "region" | "timespan"):
575 pass
576 case ("glob", "string"):
577 pass
578 case _:
579 raise InvalidQueryError(
580 f"Invalid column type {self.a.column_type} for operator {self.operator!r}."
581 )
582 else:
583 raise InvalidQueryError(
584 f"Column types for comparison {self} do not agree "
585 f"({self.a.column_type}, {self.b.column_type})."
586 )
588 return self
591@final
592class InContainer(PredicateLeafBase):
593 """A boolean column expression that tests whether one expression is a
594 member of an explicit sequence of other expressions.
595 """
597 predicate_type: Literal["in_container"] = "in_container"
599 member: ColumnExpression
600 """Expression to test for membership."""
602 container: tuple[ColumnExpression, ...]
603 """Expressions representing the elements of the container."""
605 def gather_required_columns(self, columns: ColumnSet) -> None:
606 # Docstring inherited.
607 self.member.gather_required_columns(columns)
608 for item in self.container:
609 item.gather_required_columns(columns)
611 def gather_governors(self, governors: set[str]) -> None:
612 # Docstring inherited.
613 self.member.gather_governors(governors)
614 for item in self.container:
615 item.gather_governors(governors)
617 def __str__(self) -> str:
618 return f"{self.member} IN [{', '.join(str(item) for item in self.container)}]"
620 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
621 # Docstring inherited.
622 return visitor.visit_in_container(self.member, self.container, flags)
624 @pydantic.model_validator(mode="after")
625 def _validate(self) -> InContainer:
626 if self.member.column_type == "timespan" or self.member.column_type == "region":
627 raise InvalidQueryError(
628 f"Timespan or region column {self.member} may not be used in IN expressions."
629 )
630 if not all(item.column_type == self.member.column_type for item in self.container):
631 raise InvalidQueryError(f"Column types for membership test {self} do not agree.")
632 return self
635@final
636class InRange(PredicateLeafBase):
637 """A boolean column expression that tests whether its expression is
638 included in an integer range.
639 """
641 predicate_type: Literal["in_range"] = "in_range"
643 member: ColumnExpression
644 """Expression to test for membership."""
646 start: int = 0
647 """Inclusive lower bound for the range."""
649 stop: int | None = None
650 """Exclusive upper bound for the range."""
652 step: int = 1
653 """Difference between values in the range."""
655 def gather_required_columns(self, columns: ColumnSet) -> None:
656 # Docstring inherited.
657 self.member.gather_required_columns(columns)
659 def gather_governors(self, governors: set[str]) -> None:
660 # Docstring inherited.
661 self.member.gather_governors(governors)
663 def __str__(self) -> str:
664 s = f"{self.start if self.start else ''}:{self.stop if self.stop is not None else ''}"
665 if self.step != 1:
666 s = f"{s}:{self.step}"
667 return f"{self.member} IN {s}"
669 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
670 return visitor.visit_in_range(self.member, self.start, self.stop, self.step, flags)
672 @pydantic.model_validator(mode="after")
673 def _validate(self) -> InRange:
674 if self.member.column_type != "int":
675 raise InvalidQueryError(f"Column {self.member} is not an integer.")
676 if self.step < 1:
677 raise InvalidQueryError("Range step must be >= 1.")
678 if self.stop is not None and self.stop < self.start:
679 raise InvalidQueryError("Range stop must be >= start.")
680 return self
683@final
684class InQuery(PredicateLeafBase):
685 """A boolean column expression that tests whether its expression is
686 included single-column projection of a relation.
688 This is primarily intended to be used on dataset ID columns, but it may
689 be useful for other columns as well.
690 """
692 predicate_type: Literal["in_query"] = "in_query"
694 member: ColumnExpression
695 """Expression to test for membership."""
697 column: ColumnExpression
698 """Expression to extract from `query_tree`."""
700 query_tree: QueryTree
701 """Relation whose rows from `column` represent the container."""
703 def gather_required_columns(self, columns: ColumnSet) -> None:
704 # Docstring inherited.
705 # We're only gathering columns from the query_tree this predicate is
706 # attached to, not `self.column`, which belongs to `self.query_tree`.
707 self.member.gather_required_columns(columns)
709 def gather_governors(self, governors: set[str]) -> None:
710 # Docstring inherited.
711 # We're only gathering governors from the query_tree this predicate is
712 # attached to, not `self.column`, which belongs to `self.query_tree`.
713 self.member.gather_governors(governors)
715 def __str__(self) -> str:
716 return f"{self.member} IN (query).{self.column}"
718 def visit(self, visitor: PredicateVisitor[_A, _O, _L], flags: PredicateVisitFlags) -> _L:
719 # Docstring inherited.
720 return visitor.visit_in_query_tree(self.member, self.column, self.query_tree, flags)
722 @pydantic.model_validator(mode="after")
723 def _validate_column_types(self) -> InQuery:
724 if self.member.column_type == "timespan" or self.member.column_type == "region":
725 raise InvalidQueryError(
726 f"Timespan or region column {self.member} may not be used in IN expressions."
727 )
728 if self.member.column_type != self.column.column_type:
729 raise InvalidQueryError(
730 f"Column types for membership test {self} do not agree "
731 f"({self.member.column_type}, {self.column.column_type})."
732 )
734 from ._column_set import ColumnSet
736 columns_required_in_tree = ColumnSet(self.query_tree.dimensions)
737 self.column.gather_required_columns(columns_required_in_tree)
738 if columns_required_in_tree.dimensions != self.query_tree.dimensions:
739 raise InvalidQueryError(
740 f"Column {self.column} requires dimensions {columns_required_in_tree.dimensions}, "
741 f"but query tree only has {self.query_tree.dimensions}."
742 )
743 if not columns_required_in_tree.dataset_fields.keys() <= self.query_tree.datasets.keys():
744 raise InvalidQueryError(
745 f"Column {self.column} requires dataset types "
746 f"{set(columns_required_in_tree.dataset_fields.keys())} that are not present in query tree."
747 )
748 return self
751LogicalNotOperand: TypeAlias = IsNull | Comparison | InContainer | InRange | InQuery | BooleanWrapper
752PredicateLeaf: TypeAlias = Annotated[
753 LogicalNotOperand | LogicalNot, pydantic.Field(discriminator="predicate_type")
754]
756PredicateOperands: TypeAlias = tuple[tuple[PredicateLeaf, ...], ...]