Coverage for python/lsst/daf/butler/queries/_base.py: 86%
42 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-05 11:36 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-05 11:36 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("QueryBase", "HomogeneousQueryBase", "CountableQueryBase", "QueryResultsBase")
32from abc import ABC, abstractmethod
33from collections.abc import Iterable, Mapping, Set
34from typing import Any, Self
36from ..dimensions import DataId, DimensionGroup
37from .convert_args import convert_order_by_args, convert_where_args
38from .driver import QueryDriver
39from .expression_factory import ExpressionProxy
40from .tree import OrderExpression, Predicate, QueryTree
43class QueryBase(ABC):
44 """Common base class for `Query` and all `QueryResult` objects.
46 This class should rarely be referenced directly; it is public only because
47 it provides public methods to its subclasses.
48 """
50 @abstractmethod
51 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
52 """Test whether the query would return any rows.
54 Parameters
55 ----------
56 execute : `bool`, optional
57 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
58 determined prior to execution that the query would return no rows.
59 exact : `bool`, optional
60 If `True`, run the full query and perform post-query filtering if
61 needed, until at least one result row is found. If `False`, the
62 returned result does not account for post-query filtering, and
63 hence may be `True` even when all result rows would be filtered
64 out.
66 Returns
67 -------
68 any : `bool`
69 `True` if the query would (or might, depending on arguments) yield
70 result rows. `False` if it definitely would not.
71 """
72 raise NotImplementedError()
74 @abstractmethod
75 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
76 """Return human-readable messages that may help explain why the query
77 yields no results.
79 Parameters
80 ----------
81 execute : `bool`, optional
82 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
83 of aspects of the tree to more precisely determine where rows were
84 filtered out.
86 Returns
87 -------
88 messages : `~collections.abc.Iterable` [ `str` ]
89 String messages that describe reasons the query might not yield any
90 results.
91 """
92 raise NotImplementedError()
94 @abstractmethod
95 def where(
96 self,
97 *args: str | Predicate | DataId,
98 bind: Mapping[str, Any] | None = None,
99 **kwargs: Any,
100 ) -> Self:
101 """Return a query with a boolean-expression filter on its rows.
103 Parameters
104 ----------
105 *args
106 Constraints to apply, combined with logical AND. Arguments may be
107 `str` expressions to parse, `Predicate` objects (these are
108 typically constructed via `expression_factory`) or data IDs.
109 bind : `~collections.abc.Mapping`
110 Mapping from string identifier appearing in a string expression to
111 a literal value that should be substituted for it. This is
112 recommended instead of embedding literals directly into the
113 expression, especially for strings, timespans, or other types where
114 quoting or formatting is nontrivial.
115 **kwargs
116 Data ID key value pairs that extend and override any present in
117 ``*args``.
119 Returns
120 -------
121 query : `QueryBase`
122 A new query object with the given row filters (as well as any
123 already present in ``self``). All row filters are combined with
124 logical AND.
126 Notes
127 -----
128 If an expression references a dimension or dimension element that is
129 not already present in the query, it will be joined in, but dataset
130 searches must already be joined into a query in order to reference
131 their fields in expressions.
133 Data ID values are not checked for consistency; they are extracted from
134 ``args`` and then ``kwargs`` and combined, with later values overriding
135 earlier ones.
136 """
137 raise NotImplementedError()
140class HomogeneousQueryBase(QueryBase):
141 """Common base class for `Query` and query result classes that are
142 iterables with consistent dimensions throughout.
144 This class should rarely be referenced directly; it is public only because
145 it provides public methods to its subclasses.
147 Parameters
148 ----------
149 driver : `QueryDriver`
150 Implementation object that knows how to actually execute queries.
151 tree : `QueryTree`
152 Description of the query as a tree of joins and column expressions.
153 """
155 def __init__(self, driver: QueryDriver, tree: QueryTree):
156 self._driver = driver
157 self._tree = tree
159 @property
160 def dimensions(self) -> DimensionGroup:
161 """All dimensions included in the query's columns."""
162 return self._tree.dimensions
164 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
165 # Docstring inherited.
166 return self._driver.any(self._tree, execute=execute, exact=exact)
168 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
169 # Docstring inherited.
170 return self._driver.explain_no_results(self._tree, execute=execute)
173class CountableQueryBase(QueryBase):
174 """Common base class for query result objects for which the number of
175 result rows is a well-defined concept.
177 This class should rarely be referenced directly; it is public only because
178 it provides public methods to its subclasses.
179 """
181 @abstractmethod
182 def count(self, *, exact: bool = True, discard: bool = False) -> int:
183 """Count the number of rows this query would return.
185 Parameters
186 ----------
187 exact : `bool`, optional
188 If `True`, run the full query and perform post-query filtering if
189 needed to account for that filtering in the count. If `False`, the
190 result may be an upper bound.
191 discard : `bool`, optional
192 If `True`, compute the exact count even if it would require running
193 the full query and then throwing away the result rows after
194 counting them. If `False`, this is an error, as the user would
195 usually be better off executing the query first to fetch its rows
196 into a new query (or passing ``exact=False``). Ignored if
197 ``exact=False``.
199 Returns
200 -------
201 count : `int`
202 The number of rows the query would return, or an upper bound if
203 ``exact=False``.
204 """
205 raise NotImplementedError()
208class QueryResultsBase(HomogeneousQueryBase, CountableQueryBase):
209 """Common base class for query result objects with homogeneous dimensions
210 and countable rows.
211 """
213 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self:
214 """Return a new query that yields ordered results.
216 Parameters
217 ----------
218 *args : `str`
219 Names of the columns/dimensions to use for ordering. Column name
220 can be prefixed with minus (``-``) to use descending ordering.
222 Returns
223 -------
224 result : `QueryResultsBase`
225 An ordered version of this query results object.
227 Notes
228 -----
229 If this method is called multiple times, the new sort terms replace
230 the old ones.
231 """
232 return self._copy(
233 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args)
234 )
236 def limit(self, limit: int | None = None, offset: int = 0) -> Self:
237 """Return a new query that slices its result rows positionally.
239 Parameters
240 ----------
241 limit : `int` or `None`, optional
242 Upper limit on the number of returned records. `None` (default)
243 means no limit.
244 offset : `int`, optional
245 The number of records to skip before returning at most ``limit``
246 records.
248 Returns
249 -------
250 result : `QueryResultsBase`
251 A sliced version of this query results object.
253 Notes
254 -----
255 If this method is called multiple times, the new slice parameters
256 replace the old ones. Slicing always occurs after sorting, even if
257 `limit` is called before `order_by`.
258 """
259 return self._copy(self._tree, limit=limit, offset=offset)
261 def where(
262 self,
263 *args: str | Predicate | DataId,
264 bind: Mapping[str, Any] | None = None,
265 **kwargs: Any,
266 ) -> Self:
267 # Docstring inherited.
268 return self._copy(
269 tree=self._tree.where(
270 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs)
271 ),
272 driver=self._driver,
273 )
275 @abstractmethod
276 def _get_datasets(self) -> Set[str]:
277 """Return all dataset types included in the query's result rows."""
278 raise NotImplementedError()
280 @abstractmethod
281 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self:
282 """Return a modified copy of ``self``.
284 Implementations should validate odifications, not assume they are
285 correct.
286 """
287 raise NotImplementedError()