Coverage for python / lsst / daf / butler / queries / driver.py: 82%
66 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-18 08:43 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-18 08:43 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "DataCoordinateResultPage",
32 "DatasetRefResultPage",
33 "DimensionRecordResultPage",
34 "GeneralResultPage",
35 "QueryDriver",
36 "ResultPage",
37)
39import dataclasses
40from abc import abstractmethod
41from collections.abc import Iterable, Iterator
42from contextlib import AbstractContextManager
43from typing import Any, TypeAlias, Union, overload
45from .._dataset_ref import DatasetRef
46from .._dataset_type import DatasetType
47from ..dimensions import (
48 DataCoordinate,
49 DataIdValue,
50 DimensionElement,
51 DimensionGroup,
52 DimensionRecord,
53 DimensionRecordSet,
54 DimensionRecordTable,
55 DimensionUniverse,
56)
57from .result_specs import (
58 DataCoordinateResultSpec,
59 DatasetRefResultSpec,
60 DimensionRecordResultSpec,
61 GeneralResultSpec,
62 ResultSpec,
63)
64from .tree import DataCoordinateUploadKey, MaterializationKey, QueryTree
66# The Page types below could become Pydantic models instead of dataclasses if
67# that makes them more directly usable by RemoteButler (at least once we have
68# Pydantic-friendly containers for all of them). We may want to add a
69# discriminator annotation to the ResultPage union if we do that.
72@dataclasses.dataclass
73class DataCoordinateResultPage:
74 """A single page of results from a data coordinate query."""
76 spec: DataCoordinateResultSpec
78 # TODO: On DM-41114 this will become a custom container that normalizes out
79 # attached DimensionRecords and is Pydantic-friendly.
80 rows: list[DataCoordinate]
83@dataclasses.dataclass
84class DimensionRecordResultPage:
85 """A single page of results from a dimension record query."""
87 spec: DimensionRecordResultSpec
88 rows: Iterable[DimensionRecord]
90 def as_table(self) -> DimensionRecordTable:
91 if isinstance(self.rows, DimensionRecordTable):
92 return self.rows
93 else:
94 return DimensionRecordTable(self.spec.element, self.rows)
96 def as_set(self) -> DimensionRecordSet:
97 if isinstance(self.rows, DimensionRecordSet):
98 return self.rows
99 else:
100 return DimensionRecordSet(self.spec.element, self.rows)
103@dataclasses.dataclass
104class DatasetRefResultPage:
105 """A single page of results from a dataset query."""
107 spec: DatasetRefResultSpec
109 # TODO: On DM-41115 this will become a custom container that normalizes out
110 # attached DimensionRecords and is Pydantic-friendly.
111 rows: list[DatasetRef]
114@dataclasses.dataclass
115class GeneralResultPage:
116 """A single page of results from a general query."""
118 spec: GeneralResultSpec
120 # Raw tabular data, with columns in the same order as
121 # spec.get_result_columns().
122 rows: list[tuple[Any, ...]]
124 # This map contains dimension records for cached and skypix elements,
125 # and only when spec.include_dimension_records is True.
126 dimension_records: dict[DimensionElement, DimensionRecordSet] | None
129ResultPage: TypeAlias = Union[
130 DataCoordinateResultPage, DimensionRecordResultPage, DatasetRefResultPage, GeneralResultPage
131]
134class QueryDriver(AbstractContextManager[None]):
135 """Base class for the implementation object inside `Query` objects
136 that is specialized for DirectButler vs. RemoteButler.
138 Notes
139 -----
140 Implementations should be context managers. This allows them to manage the
141 lifetime of server-side state, such as:
143 - a SQL transaction, when necessary (DirectButler);
144 - SQL cursors for queries that were not fully iterated over (DirectButler);
145 - temporary database tables (DirectButler);
146 - result-page Parquet files that were never fetched (RemoteButler);
147 - uploaded Parquet files used to fill temporary database tables
148 (RemoteButler);
149 - cached content needed to construct query trees, like collection summaries
150 (potentially all Butlers).
152 When possible, these sorts of things should be cleaned up earlier when they
153 are no longer needed, and the Butler server will still have to guard
154 against the context manager's ``__exit__`` signal never reaching it, but a
155 context manager will take care of these much more often than relying on
156 garbage collection and ``__del__`` would.
157 """
159 @property
160 @abstractmethod
161 def universe(self) -> DimensionUniverse:
162 """Object that defines all dimensions."""
163 raise NotImplementedError()
165 @overload
166 def execute( 166 ↛ exitline 166 didn't return from function 'execute' because
167 self, result_spec: DataCoordinateResultSpec, tree: QueryTree
168 ) -> Iterator[DataCoordinateResultPage]: ...
170 @overload
171 def execute( 171 ↛ exitline 171 didn't return from function 'execute' because
172 self, result_spec: DimensionRecordResultSpec, tree: QueryTree
173 ) -> Iterator[DimensionRecordResultPage]: ...
175 @overload
176 def execute( 176 ↛ exitline 176 didn't return from function 'execute' because
177 self, result_spec: DatasetRefResultSpec, tree: QueryTree
178 ) -> Iterator[DatasetRefResultPage]: ...
180 @overload
181 def execute(self, result_spec: GeneralResultSpec, tree: QueryTree) -> Iterator[GeneralResultPage]: ... 181 ↛ exitline 181 didn't return from function 'execute' because
183 @abstractmethod
184 def execute(self, result_spec: ResultSpec, tree: QueryTree) -> Iterator[ResultPage]:
185 """Execute a query and return the first result page.
187 Parameters
188 ----------
189 result_spec : `ResultSpec`
190 The kind of results the user wants from the query. This can affect
191 the actual query (i.e. SQL and Python postprocessing) that is run,
192 e.g. by changing what is in the SQL SELECT clause and even what
193 tables are joined in, but it never changes the number or order of
194 result rows.
195 tree : `QueryTree`
196 Query tree to evaluate.
198 Yields
199 ------
200 page : `ResultPage`
201 A page whose type corresponds to the type of ``result_spec``, with
202 rows from the query.
203 """
204 raise NotImplementedError()
206 @abstractmethod
207 def materialize(
208 self,
209 tree: QueryTree,
210 dimensions: DimensionGroup,
211 datasets: frozenset[str],
212 allow_duplicate_overlaps: bool = False,
213 ) -> MaterializationKey:
214 """Execute a query tree, saving results to temporary storage for use
215 in later queries.
217 Parameters
218 ----------
219 tree : `QueryTree`
220 Query tree to evaluate.
221 dimensions : `DimensionGroup`
222 Dimensions whose key columns should be preserved.
223 datasets : `frozenset` [ `str` ]
224 Names of dataset types whose ID columns may be materialized. It
225 is implementation-defined whether they actually are.
226 allow_duplicate_overlaps : `bool`, optional
227 If set to `True` then query will be allowed to generate
228 non-distinct rows for spatial overlaps.
230 Returns
231 -------
232 key : `MaterializationKey`
233 Unique identifier for the result rows that allows them to be
234 referenced in a `QueryTree`.
235 """
236 raise NotImplementedError()
238 @abstractmethod
239 def upload_data_coordinates(
240 self, dimensions: DimensionGroup, rows: Iterable[tuple[DataIdValue, ...]]
241 ) -> DataCoordinateUploadKey:
242 """Upload a table of data coordinates for use in later queries.
244 Parameters
245 ----------
246 dimensions : `DimensionGroup`
247 Dimensions of the data coordinates.
248 rows : `~collections.abc.Iterable` [ `tuple` ]
249 Tuples of data coordinate values, covering just the "required"
250 subset of ``dimensions``.
252 Returns
253 -------
254 key : `DataCoordinateUploadKey`
255 Unique identifier for the upload that allows it to be referenced in
256 a `QueryTree`.
257 """
258 raise NotImplementedError()
260 @abstractmethod
261 def count(
262 self,
263 tree: QueryTree,
264 result_spec: ResultSpec,
265 *,
266 exact: bool,
267 discard: bool,
268 ) -> int:
269 """Return the number of rows a query would return.
271 Parameters
272 ----------
273 tree : `QueryTree`
274 Query tree to evaluate.
275 result_spec : `ResultSpec`
276 The kind of results the user wants to count.
277 exact : `bool`, optional
278 If `True`, run the full query and perform post-query filtering if
279 needed to account for that filtering in the count. If `False`, the
280 result may be an upper bound.
281 discard : `bool`, optional
282 If `True`, compute the exact count even if it would require running
283 the full query and then throwing away the result rows after
284 counting them. If `False`, this is an error, as the user would
285 usually be better off executing the query first to fetch its rows
286 into a new query (or passing ``exact=False``). Ignored if
287 ``exact=False``.
288 """
289 raise NotImplementedError()
291 @abstractmethod
292 def any(self, tree: QueryTree, *, execute: bool, exact: bool) -> bool:
293 """Test whether the query would return any rows.
295 Parameters
296 ----------
297 tree : `QueryTree`
298 Query tree to evaluate.
299 execute : `bool`, optional
300 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
301 determined prior to execution that the query would return no rows.
302 exact : `bool`, optional
303 If `True`, run the full query and perform post-query filtering if
304 needed, until at least one result row is found. If `False`, the
305 returned result does not account for post-query filtering, and
306 hence may be `True` even when all result rows would be filtered
307 out.
309 Returns
310 -------
311 any : `bool`
312 `True` if the query would (or might, depending on arguments) yield
313 result rows. `False` if it definitely would not.
314 """
315 raise NotImplementedError()
317 @abstractmethod
318 def explain_no_results(self, tree: QueryTree, execute: bool) -> Iterable[str]:
319 """Return human-readable messages that may help explain why the query
320 yields no results.
322 Parameters
323 ----------
324 tree : `QueryTree`
325 Query tree to evaluate.
326 execute : `bool`, optional
327 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
328 of aspects of the tree to more precisely determine where rows were
329 filtered out.
331 Returns
332 -------
333 messages : `~collections.abc.Iterable` [ `str` ]
334 String messages that describe reasons the query might not yield any
335 results.
336 """
337 raise NotImplementedError()
339 @abstractmethod
340 def get_default_collections(self) -> tuple[str, ...]:
341 """Return the default collection search path.
343 Returns
344 -------
345 collections : `tuple` [ `str`, ... ]
346 The default collection search path as a tuple of `str`.
348 Raises
349 ------
350 NoDefaultCollectionError
351 Raised if there are no default collections.
352 """
353 raise NotImplementedError()
355 @abstractmethod
356 def get_dataset_type(self, name: str) -> DatasetType:
357 """Return the dimensions for a dataset type.
359 Parameters
360 ----------
361 name : `str`
362 Name of the dataset type.
364 Returns
365 -------
366 dataset_type : `DatasetType`
367 Dimensions of the dataset type.
369 Raises
370 ------
371 MissingDatasetTypeError
372 Raised if the dataset type is not registered.
373 """
374 raise NotImplementedError()