Coverage for python/lsst/daf/butler/registry/queries/_query.py : 31%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25import itertools
26from typing import Optional, Tuple, Callable
28from sqlalchemy.sql import FromClause
29from sqlalchemy.engine import RowProxy, ResultProxy, Connection
31from lsst.sphgeom import Region
33from ...core import (
34 DataCoordinate,
35 DatasetRef,
36 DatasetType,
37 DimensionGraph,
38)
39from ._structs import QuerySummary, QueryColumns
42class Query:
43 """A wrapper for a SQLAlchemy query that knows how to transform result rows
44 into data IDs and dataset references.
46 A `Query` should almost always be constructed directly by a call to
47 `QueryBuilder.finish`; direct construction will make it difficult to be
48 able to maintain invariants between arguments (see the documentation for
49 `QueryColumns` for more information).
51 Parameters
52 ----------
53 connection: `sqlalchemy.engine.Connection`
54 Connection used to execute the query.
55 sql : `sqlalchemy.sql.FromClause`
56 A complete SELECT query, including at least SELECT, FROM, and WHERE
57 clauses.
58 summary : `QuerySummary`
59 Struct that organizes the dimensions involved in the query.
60 columns : `QueryColumns`
61 Columns that are referenced in the query in any clause.
63 Notes
64 -----
65 SQLAlchemy is used in the public interface of `Query` rather than just its
66 implementation simply because avoiding this would entail writing wrappers
67 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy`
68 classes that are themselves generic wrappers for lower-level Python DBAPI
69 classes. Another layer would entail another set of computational
70 overheads, but the only reason we would seriously consider not using
71 SQLAlchemy here in the future would be to reduce computational overheads.
72 """
74 def __init__(self, *, connection: Connection, sql: FromClause,
75 summary: QuerySummary, columns: QueryColumns):
76 self.summary = summary
77 self.sql = sql
78 self._columns = columns
79 self._connection = connection
81 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]:
82 """Return a callable that can perform extra Python-side filtering of
83 query results.
85 To get the expected results from a query, the returned predicate *must*
86 be used to ignore rows for which it returns `False`; this permits the
87 `QueryBuilder` implementation to move logic from the database to Python
88 without changing the public interface.
90 Parameters
91 ----------
92 region : `sphgeom.Region`, optional
93 A region that any result-row regions must overlap in order for the
94 predicate to return `True`. If not provided, this will be the
95 region in `QuerySummary.dataId`, if there is one.
97 Returns
98 -------
99 func : `Callable`
100 A callable that takes a single `sqlalchemy.engine.RowProxy`
101 argmument and returns `bool`.
102 """
103 whereRegion = region if region is not None else self.summary.dataId.region
105 def closure(row: RowProxy) -> bool:
106 rowRegions = [row[column] for column in self._columns.regions.values()]
107 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
108 return False
109 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
111 return closure
113 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None) -> DataCoordinate:
114 """Extract a data ID from a result row.
116 Parameters
117 ----------
118 row : `sqlalchemy.engine.RowProxy`
119 A result row from a SQLAlchemy SELECT query.
120 graph : `DimensionGraph`, optional
121 The dimensions the returned data ID should identify. If not
122 provided, this will be all dimensions in `QuerySummary.requested`.
124 Returns
125 -------
126 dataId : `DataCoordinate`
127 A minimal data ID that identifies the requested dimensions but
128 includes no metadata or implied dimensions.
129 """
130 if graph is None:
131 graph = self.summary.requested
132 values = tuple(row[self._columns.getKeyColumn(dimension)] for dimension in graph.required)
133 return DataCoordinate(graph, values)
135 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType,
136 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]:
137 """Extract a `DatasetRef` from a result row.
139 Parameters
140 ----------
141 row : `sqlalchemy.engine.RowProxy`
142 A result row from a SQLAlchemy SELECT query.
143 datasetType : `DatasetType`
144 Type of the dataset to extract. Must have been included in the
145 `Query` via a call to `QueryBuilder.joinDataset` with
146 ``isResult=True``, or otherwise included in
147 `QueryColumns.datasets`.
148 dataId : `DataCoordinate`
149 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
150 `DataCoordinate` is constructed from ``row`` if `None`.
152 Returns
153 -------
154 ref : `DatasetRef`
155 Reference to the dataset; guaranteed to have `DatasetRef.id` not
156 `None`.
157 rank : `int` or `None`
158 Integer index of the collection in which this dataset was found,
159 within the sequence of collections passed when constructing the
160 query. `None` if `QueryBuilder.joinDataset` was called with
161 ``addRank=False``.
162 """
163 if dataId is None:
164 dataId = self.extractDataId(row, graph=datasetType.dimensions)
165 datasetIdColumn, datasetRankColumn = self._columns.datasets[datasetType]
166 return (DatasetRef(datasetType, dataId, id=row[datasetIdColumn]),
167 row[datasetRankColumn] if datasetRankColumn is not None else None)
169 def execute(self) -> ResultProxy:
170 """Execute the query.
172 Returns
173 -------
174 results : `sqlalchemy.engine.ResultProxy`
175 Object representing the query results; see SQLAlchemy documentation
176 for more information.
177 """
178 return self._connection.execute(self.sql)