Coverage for python/lsst/daf/butler/queries/_base.py: 86%
37 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("QueryBase", "QueryResultsBase", "ValidityRangeMatchError")
32from abc import ABC, abstractmethod
33from collections.abc import Iterable, Mapping, Set
34from typing import Any, Self
36from ..dimensions import DataId, DimensionGroup
37from .convert_args import convert_order_by_args, convert_where_args
38from .driver import QueryDriver
39from .expression_factory import ExpressionProxy
40from .tree import OrderExpression, Predicate, QueryTree
43class ValidityRangeMatchError(RuntimeError):
44 """Exception raised when a find-first calibration dataset query does not
45 fully resolve validity ranges.
47 For a find-first query involving a calibration dataset to work, either the
48 query's result rows need to include a temporal dimension or needs to be
49 constrained temporally, such that each result row corresponds to a unique
50 calibration dataset. This exception can be raised if those dimensions or
51 constraint are missing, or if a temporal dimension timespan overlaps
52 multiple validity ranges (e.g. the recommended bias changes in the middle
53 of an exposure).
54 """
57class QueryBase(ABC):
58 """Common base class for `Query` and all `QueryResult` objects.
60 This class should rarely be referenced directly; it is public only because
61 it provides public methods to its subclasses.
63 Parameters
64 ----------
65 driver : `QueryDriver`
66 Implementation object that knows how to actually execute queries.
67 tree : `QueryTree`
68 Description of the query as a tree of joins and column expressions.
69 """
71 def __init__(self, driver: QueryDriver, tree: QueryTree):
72 self._driver = driver
73 self._tree = tree
75 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
76 """Test whether the query would return any rows.
78 Parameters
79 ----------
80 execute : `bool`, optional
81 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
82 determined prior to execution that the query would return no rows.
83 exact : `bool`, optional
84 If `True`, run the full query and perform post-query filtering if
85 needed, until at least one result row is found. If `False`, the
86 returned result does not account for post-query filtering, and
87 hence may be `True` even when all result rows would be filtered
88 out.
90 Returns
91 -------
92 any : `bool`
93 `True` if the query would (or might, depending on arguments) yield
94 result rows. `False` if it definitely would not.
95 """
96 return self._driver.any(self._tree, execute=execute, exact=exact)
98 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
99 """Return human-readable messages that may help explain why the query
100 yields no results.
102 Parameters
103 ----------
104 execute : `bool`, optional
105 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
106 of aspects of the tree to more precisely determine where rows were
107 filtered out.
109 Returns
110 -------
111 messages : `~collections.abc.Iterable` [ `str` ]
112 String messages that describe reasons the query might not yield any
113 results.
114 """
115 return self._driver.explain_no_results(self._tree, execute=execute)
117 @abstractmethod
118 def where(
119 self,
120 *args: str | Predicate | DataId,
121 bind: Mapping[str, Any] | None = None,
122 **kwargs: int | str,
123 ) -> Self:
124 """Return a query with a boolean-expression filter on its rows.
126 Parameters
127 ----------
128 *args
129 Constraints to apply, combined with logical AND. Arguments may be
130 `str` expressions to parse, `Predicate` objects (these are
131 typically constructed via `expression_factory`) or data IDs.
132 bind : `~collections.abc.Mapping`
133 Mapping from string identifier appearing in a string expression to
134 a literal value that should be substituted for it. This is
135 recommended instead of embedding literals directly into the
136 expression, especially for strings, timespans, or other types where
137 quoting or formatting is nontrivial.
138 **kwargs
139 Data ID key value pairs that extend and override any present in
140 ``*args``.
142 Returns
143 -------
144 query : `QueryBase`
145 A new query object with the given row filters (as well as any
146 already present in ``self``). All row filters are combined with
147 logical AND.
149 Notes
150 -----
151 If an expression references a dimension or dimension element that is
152 not already present in the query, it will be joined in, but dataset
153 searches must already be joined into a query in order to reference
154 their fields in expressions.
156 Data ID values are not checked for consistency; they are extracted from
157 ``args`` and then ``kwargs`` and combined, with later values overriding
158 earlier ones.
159 """
160 raise NotImplementedError()
163class QueryResultsBase(QueryBase):
164 """Common base class for query result objects with countable rows."""
166 @property
167 @abstractmethod
168 def dimensions(self) -> DimensionGroup:
169 """All dimensions included in the query's columns."""
170 raise NotImplementedError()
172 @abstractmethod
173 def count(self, *, exact: bool = True, discard: bool = False) -> int:
174 """Count the number of rows this query would return.
176 Parameters
177 ----------
178 exact : `bool`, optional
179 If `True`, run the full query and perform post-query filtering if
180 needed to account for that filtering in the count. If `False`, the
181 result may be an upper bound.
182 discard : `bool`, optional
183 If `True`, compute the exact count even if it would require running
184 the full query and then throwing away the result rows after
185 counting them. If `False`, this is an error, as the user would
186 usually be better off executing the query first to fetch its rows
187 into a new query (or passing ``exact=False``). Ignored if
188 ``exact=False``.
190 Returns
191 -------
192 count : `int`
193 The number of rows the query would return, or an upper bound if
194 ``exact=False``.
195 """
196 raise NotImplementedError()
198 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self:
199 """Return a new query that yields ordered results.
201 Parameters
202 ----------
203 *args : `str`
204 Names of the columns/dimensions to use for ordering. Column name
205 can be prefixed with minus (``-``) to use descending ordering.
207 Returns
208 -------
209 result : `QueryResultsBase`
210 An ordered version of this query results object.
212 Notes
213 -----
214 If this method is called multiple times, the new sort terms replace
215 the old ones.
216 """
217 return self._copy(
218 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args)
219 )
221 def limit(self, limit: int | None = None) -> Self:
222 """Return a new query that slices its result rows positionally.
224 Parameters
225 ----------
226 limit : `int` or `None`, optional
227 Upper limit on the number of returned records. `None` (default)
228 means no limit.
230 Returns
231 -------
232 result : `QueryResultsBase`
233 A sliced version of this query results object.
235 Notes
236 -----
237 If this method is called multiple times, the new slice parameters
238 replace the old ones. Slicing always occurs after sorting, even if
239 `limit` is called before `order_by`.
240 """
241 return self._copy(self._tree, limit=limit)
243 def where(
244 self,
245 *args: str | Predicate | DataId,
246 bind: Mapping[str, Any] | None = None,
247 **kwargs: int | str,
248 ) -> Self:
249 # Docstring inherited.
250 return self._copy(
251 tree=self._tree.where(
252 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs)
253 ),
254 driver=self._driver,
255 )
257 @abstractmethod
258 def _get_datasets(self) -> Set[str]:
259 """Return all dataset types included in the query's result rows."""
260 raise NotImplementedError()
262 @abstractmethod
263 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self:
264 """Return a modified copy of ``self``.
266 Implementations should validate modifications, not assume they are
267 correct.
268 """
269 raise NotImplementedError()