Coverage for python/lsst/daf/butler/registry/queries/_query.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25import itertools
26from typing import Optional, Dict, Any, Tuple, Callable
28from sqlalchemy.sql import FromClause
29from sqlalchemy.engine import RowProxy, ResultProxy, Connection
31from lsst.sphgeom import Region
33from ...core import (
34 DataCoordinate,
35 DatasetRef,
36 DatasetType,
37 DimensionGraph,
38 ExpandedDataCoordinate,
39)
40from ._structs import QuerySummary, QueryColumns, QueryParameters
43class Query:
44 """A wrapper for a SQLAlchemy query that knows how to re-bind parameters
45 and transform result rows into data IDs and dataset references.
47 A `Query` should almost always be constructed directly by a call to
48 `QueryBuilder.finish`; direct construction will make it difficult to be
49 able to maintain invariants between arguments (see the documentation for
50 `QueryColumns` and `QueryParameters` for more information).
52 Parameters
53 ----------
54 connection: `sqlalchemy.engine.Connection`
55 Connection used to execute the query.
56 sql : `sqlalchemy.sql.FromClause`
57 A complete SELECT query, including at least SELECT, FROM, and WHERE
58 clauses.
59 summary : `QuerySummary`
60 Struct that organizes the dimensions involved in the query.
61 columns : `QueryColumns`
62 Columns that are referenced in the query in any clause.
63 parameters : `QueryParameters`
64 Bind parameters for the query.
66 Notes
67 -----
68 SQLAlchemy is used in the public interface of `Query` rather than just its
69 implementation simply because avoiding this would entail writing wrappers
70 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy`
71 classes that are themselves generic wrappers for lower-level Python DBAPI
72 classes. Another layer would entail another set of computational
73 overheads, but the only reason we would seriously consider not using
74 SQLAlchemy here in the future would be to reduce computational overheads.
75 """
77 def __init__(self, *, connection: Connection, sql: FromClause,
78 summary: QuerySummary, columns: QueryColumns, parameters: QueryParameters):
79 self.summary = summary
80 self.sql = sql
81 self._columns = columns
82 self._parameters = parameters
83 self._connection = connection
85 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]:
86 """Return a callable that can perform extra Python-side filtering of
87 query results.
89 To get the expected results from a query, the returned predicate *must*
90 be used to ignore rows for which it returns `False`; this permits the
91 `QueryBuilder` implementation to move logic from the database to Python
92 without changing the public interface.
94 Parameters
95 ----------
96 region : `sphgeom.Region`, optional
97 A region that any result-row regions must overlap in order for the
98 predicate to return `True`. If not provided, this will be the
99 region in `QuerySummary.dataId`, if there is one.
101 Returns
102 -------
103 func : `Callable`
104 A callable that takes a single `sqlalchemy.engine.RowProxy`
105 argmument and returns `bool`.
106 """
107 whereRegion = region if region is not None else self.summary.dataId.region
109 def closure(row: RowProxy) -> bool:
110 rowRegions = [row[column] for column in self._columns.regions.values()]
111 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
112 return False
113 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
115 return closure
117 def bind(self, dataId: ExpandedDataCoordinate) -> Dict[str, Any]:
118 """Return a dictionary that can be passed to a SQLAlchemy execute
119 method to provide WHERE clause information at execution time rather
120 than construction time.
122 Most callers should call `Query.execute` directly instead; when called
123 with a data ID, that calls `bind` internally.
125 Parameters
126 ----------
127 dataId : `ExpandedDataCoordinate`
128 Data ID to transform into bind parameters. This must identify
129 all dimensions in `QuerySummary.given`, and must have the same
130 primary key values for all dimensions also identified by
131 `QuerySummary.dataId`.
133 Returns
134 -------
135 parameters : `dict`
136 Dictionary that can be passed as the second argument (with
137 ``self.sql`` this first argument) to SQLAlchemy execute methods.
139 Notes
140 -----
141 Calling `bind` does not automatically update the callable returned by
142 `predicate` with the given data ID's region (if it has one). That
143 must be done manually by passing the region when calling `predicate`.
144 """
145 assert dataId.graph == self.summary.given
146 result = {}
147 for dimension, parameter in self._parameters.keys.items():
148 result[parameter] = dataId.full[dimension]
149 if self._parameters.timespan:
150 result[self._parameters.timespan.begin] = dataId.timespan.begin
151 result[self._parameters.timespan.end] = dataId.timespan.end
152 for dimension, parameter in self._parameters.skypix.items():
153 result[parameter] = dimension.pixelization.envelope(dataId.region)
154 return result
156 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None) -> DataCoordinate:
157 """Extract a data ID from a result row.
159 Parameters
160 ----------
161 row : `sqlalchemy.engine.RowProxy`
162 A result row from a SQLAlchemy SELECT query.
163 graph : `DimensionGraph`, optional
164 The dimensions the returned data ID should identify. If not
165 provided, this will be all dimensions in `QuerySummary.requested`.
167 Returns
168 -------
169 dataId : `DataCoordinate`
170 A minimal data ID that identifies the requested dimensions but
171 includes no metadata or implied dimensions.
172 """
173 if graph is None:
174 graph = self.summary.requested
175 values = tuple(row[self._columns.getKeyColumn(dimension)] for dimension in graph.required)
176 return DataCoordinate(graph, values)
178 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType,
179 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]:
180 """Extract a `DatasetRef` from a result row.
182 Parameters
183 ----------
184 row : `sqlalchemy.engine.RowProxy`
185 A result row from a SQLAlchemy SELECT query.
186 datasetType : `DatasetType`
187 Type of the dataset to extract. Must have been included in the
188 `Query` via a call to `QueryBuilder.joinDataset` with
189 ``isResult=True``, or otherwise included in
190 `QueryColumns.datasets`.
191 dataId : `DataCoordinate`
192 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
193 `DataCoordinate` is constructed from ``row`` if `None`.
195 Returns
196 -------
197 ref : `DatasetRef`
198 Reference to the dataset; guaranteed to have `DatasetRef.id` not
199 `None`.
200 rank : `int` or `None`
201 Integer index of the collection in which this dataset was found,
202 within the sequence of collections passed when constructing the
203 query. `None` if `QueryBuilder.joinDataset` was called with
204 ``addRank=False``.
205 """
206 if dataId is None:
207 dataId = self.extractDataId(row, graph=datasetType.dimensions)
208 datasetIdColumn, datasetRankColumn = self._columns.datasets[datasetType]
209 return (DatasetRef(datasetType, dataId, id=row[datasetIdColumn]),
210 row[datasetRankColumn] if datasetRankColumn is not None else None)
212 def execute(self, dataId: Optional[ExpandedDataCoordinate] = None) -> ResultProxy:
213 """Execute the query.
215 This may be called multiple times with different arguments to apply
216 different bind parameter values without repeating the work of
217 constructing the query.
219 Parameters
220 ----------
221 dataId : `ExpandedDataCoordinate`, optional
222 Data ID to transform into bind parameters. This must identify
223 all dimensions in `QuerySummary.given`, and must have the same
224 primary key values for all dimensions also identified by
225 `QuerySummary.dataId`. If not provided, `QuerySummary.dataId`
226 must identify all dimensions in `QuerySummary.given`.
228 Returns
229 -------
230 results : `sqlalchemy.engine.ResultProxy`
231 Object representing the query results; see SQLAlchemy documentation
232 for more information.
233 """
234 if dataId is not None:
235 params = self.bind(dataId)
236 return self._connection.execute(self.sql, params)
237 else:
238 return self._connection.execute(self.sql)