Coverage for python/lsst/daf/butler/queries/_base.py: 83%
36 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("QueryBase", "QueryResultsBase")
32from abc import ABC, abstractmethod
33from collections.abc import Iterable, Mapping, Set
34from typing import Any, Self
36from ..dimensions import DataId, DimensionGroup
37from .convert_args import convert_order_by_args, convert_where_args
38from .driver import QueryDriver
39from .expression_factory import ExpressionProxy
40from .tree import OrderExpression, Predicate, QueryTree
43class QueryBase(ABC):
44 """Common base class for `Query` and all `QueryResult` objects.
46 This class should rarely be referenced directly; it is public only because
47 it provides public methods to its subclasses.
49 Parameters
50 ----------
51 driver : `QueryDriver`
52 Implementation object that knows how to actually execute queries.
53 tree : `QueryTree`
54 Description of the query as a tree of joins and column expressions.
55 """
57 def __init__(self, driver: QueryDriver, tree: QueryTree):
58 self._driver = driver
59 self._tree = tree
61 @property
62 def dimensions(self) -> DimensionGroup:
63 """All dimensions included in the query's columns."""
64 return self._tree.dimensions
66 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
67 """Test whether the query would return any rows.
69 Parameters
70 ----------
71 execute : `bool`, optional
72 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
73 determined prior to execution that the query would return no rows.
74 exact : `bool`, optional
75 If `True`, run the full query and perform post-query filtering if
76 needed, until at least one result row is found. If `False`, the
77 returned result does not account for post-query filtering, and
78 hence may be `True` even when all result rows would be filtered
79 out.
81 Returns
82 -------
83 any : `bool`
84 `True` if the query would (or might, depending on arguments) yield
85 result rows. `False` if it definitely would not.
86 """
87 return self._driver.any(self._tree, execute=execute, exact=exact)
89 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
90 """Return human-readable messages that may help explain why the query
91 yields no results.
93 Parameters
94 ----------
95 execute : `bool`, optional
96 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
97 of aspects of the tree to more precisely determine where rows were
98 filtered out.
100 Returns
101 -------
102 messages : `~collections.abc.Iterable` [ `str` ]
103 String messages that describe reasons the query might not yield any
104 results.
105 """
106 return self._driver.explain_no_results(self._tree, execute=execute)
108 @abstractmethod
109 def where(
110 self,
111 *args: str | Predicate | DataId,
112 bind: Mapping[str, Any] | None = None,
113 **kwargs: int | str,
114 ) -> Self:
115 """Return a query with a boolean-expression filter on its rows.
117 Parameters
118 ----------
119 *args
120 Constraints to apply, combined with logical AND. Arguments may be
121 `str` expressions to parse, `Predicate` objects (these are
122 typically constructed via `expression_factory`) or data IDs.
123 bind : `~collections.abc.Mapping`
124 Mapping from string identifier appearing in a string expression to
125 a literal value that should be substituted for it. This is
126 recommended instead of embedding literals directly into the
127 expression, especially for strings, timespans, or other types where
128 quoting or formatting is nontrivial.
129 **kwargs
130 Data ID key value pairs that extend and override any present in
131 ``*args``.
133 Returns
134 -------
135 query : `QueryBase`
136 A new query object with the given row filters (as well as any
137 already present in ``self``). All row filters are combined with
138 logical AND.
140 Notes
141 -----
142 If an expression references a dimension or dimension element that is
143 not already present in the query, it will be joined in, but dataset
144 searches must already be joined into a query in order to reference
145 their fields in expressions.
147 Data ID values are not checked for consistency; they are extracted from
148 ``args`` and then ``kwargs`` and combined, with later values overriding
149 earlier ones.
150 """
151 raise NotImplementedError()
154class QueryResultsBase(QueryBase):
155 """Common base class for query result objects with countable rows."""
157 @abstractmethod
158 def count(self, *, exact: bool = True, discard: bool = False) -> int:
159 """Count the number of rows this query would return.
161 Parameters
162 ----------
163 exact : `bool`, optional
164 If `True`, run the full query and perform post-query filtering if
165 needed to account for that filtering in the count. If `False`, the
166 result may be an upper bound.
167 discard : `bool`, optional
168 If `True`, compute the exact count even if it would require running
169 the full query and then throwing away the result rows after
170 counting them. If `False`, this is an error, as the user would
171 usually be better off executing the query first to fetch its rows
172 into a new query (or passing ``exact=False``). Ignored if
173 ``exact=False``.
175 Returns
176 -------
177 count : `int`
178 The number of rows the query would return, or an upper bound if
179 ``exact=False``.
180 """
181 raise NotImplementedError()
183 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self:
184 """Return a new query that yields ordered results.
186 Parameters
187 ----------
188 *args : `str`
189 Names of the columns/dimensions to use for ordering. Column name
190 can be prefixed with minus (``-``) to use descending ordering.
192 Returns
193 -------
194 result : `QueryResultsBase`
195 An ordered version of this query results object.
197 Notes
198 -----
199 If this method is called multiple times, the new sort terms replace
200 the old ones.
201 """
202 return self._copy(
203 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args)
204 )
206 def limit(self, limit: int | None = None) -> Self:
207 """Return a new query that slices its result rows positionally.
209 Parameters
210 ----------
211 limit : `int` or `None`, optional
212 Upper limit on the number of returned records. `None` (default)
213 means no limit.
215 Returns
216 -------
217 result : `QueryResultsBase`
218 A sliced version of this query results object.
220 Notes
221 -----
222 If this method is called multiple times, the new slice parameters
223 replace the old ones. Slicing always occurs after sorting, even if
224 `limit` is called before `order_by`.
225 """
226 return self._copy(self._tree, limit=limit)
228 def where(
229 self,
230 *args: str | Predicate | DataId,
231 bind: Mapping[str, Any] | None = None,
232 **kwargs: int | str,
233 ) -> Self:
234 # Docstring inherited.
235 return self._copy(
236 tree=self._tree.where(
237 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs)
238 ),
239 driver=self._driver,
240 )
242 @abstractmethod
243 def _get_datasets(self) -> Set[str]:
244 """Return all dataset types included in the query's result rows."""
245 raise NotImplementedError()
247 @abstractmethod
248 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self:
249 """Return a modified copy of ``self``.
251 Implementations should validate modifications, not assume they are
252 correct.
253 """
254 raise NotImplementedError()