Coverage for python/lsst/daf/butler/registry/queries/_query.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25import itertools
26from typing import Iterable, Optional, Tuple, Callable
28from sqlalchemy.sql import FromClause
29from sqlalchemy.engine import RowProxy
31from lsst.sphgeom import Region
33from ...core import (
34 DataCoordinate,
35 DatasetRef,
36 DatasetType,
37 Dimension,
38 DimensionGraph,
39)
40from ..interfaces import CollectionManager
41from ._structs import QuerySummary, QueryColumns
44class Query:
45 """A wrapper for a SQLAlchemy query that knows how to transform result rows
46 into data IDs and dataset references.
48 A `Query` should almost always be constructed directly by a call to
49 `QueryBuilder.finish`; direct construction will make it difficult to be
50 able to maintain invariants between arguments (see the documentation for
51 `QueryColumns` for more information).
53 Parameters
54 ----------
55 sql : `sqlalchemy.sql.FromClause`
56 A complete SELECT query, including at least SELECT, FROM, and WHERE
57 clauses.
58 summary : `QuerySummary`
59 Struct that organizes the dimensions involved in the query.
60 columns : `QueryColumns`
61 Columns that are referenced in the query in any clause.
62 collections : `CollectionsManager`,
63 Manager object for collection tables.
65 Notes
66 -----
67 SQLAlchemy is used in the public interface of `Query` rather than just its
68 implementation simply because avoiding this would entail writing wrappers
69 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy`
70 classes that are themselves generic wrappers for lower-level Python DBAPI
71 classes. Another layer would entail another set of computational
72 overheads, but the only reason we would seriously consider not using
73 SQLAlchemy here in the future would be to reduce computational overheads.
74 """
76 def __init__(self, *, sql: FromClause,
77 summary: QuerySummary,
78 columns: QueryColumns,
79 collections: CollectionManager):
80 self.summary = summary
81 self.sql = sql
82 self._columns = columns
83 self._collections = collections
85 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]:
86 """Return a callable that can perform extra Python-side filtering of
87 query results.
89 To get the expected results from a query, the returned predicate *must*
90 be used to ignore rows for which it returns `False`; this permits the
91 `QueryBuilder` implementation to move logic from the database to Python
92 without changing the public interface.
94 Parameters
95 ----------
96 region : `sphgeom.Region`, optional
97 A region that any result-row regions must overlap in order for the
98 predicate to return `True`. If not provided, this will be the
99 region in `QuerySummary.dataId`, if there is one.
101 Returns
102 -------
103 func : `Callable`
104 A callable that takes a single `sqlalchemy.engine.RowProxy`
105 argmument and returns `bool`.
106 """
107 whereRegion = region if region is not None else self.summary.dataId.region
109 def closure(row: RowProxy) -> bool:
110 rowRegions = [row[column] for column in self._columns.regions.values()]
111 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
112 return False
113 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
115 return closure
117 def extractDimensionsTuple(self, row: RowProxy, dimensions: Iterable[Dimension]) -> tuple:
118 """Extract a tuple of data ID values from a result row.
120 Parameters
121 ----------
122 row : `sqlalchemy.engine.RowProxy`
123 A result row from a SQLAlchemy SELECT query.
124 dimensions : `Iterable` [ `Dimension` ]
125 The dimensions to include in the returned tuple, in order.
127 Returns
128 -------
129 values : `tuple`
130 A tuple of dimension primary key values.
131 """
132 return tuple(row[self._columns.getKeyColumn(dimension)] for dimension in dimensions)
134 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None
135 ) -> DataCoordinate:
136 """Extract a data ID from a result row.
138 Parameters
139 ----------
140 row : `sqlalchemy.engine.RowProxy`
141 A result row from a SQLAlchemy SELECT query.
142 graph : `DimensionGraph`, optional
143 The dimensions the returned data ID should identify. If not
144 provided, this will be all dimensions in `QuerySummary.requested`.
146 Returns
147 -------
148 dataId : `DataCoordinate`
149 A data ID that identifies all required and implied dimensions.
150 """
151 if graph is None:
152 graph = self.summary.requested
153 return DataCoordinate.fromFullValues(
154 graph,
155 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
156 )
158 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType,
159 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]:
160 """Extract a `DatasetRef` from a result row.
162 Parameters
163 ----------
164 row : `sqlalchemy.engine.RowProxy`
165 A result row from a SQLAlchemy SELECT query.
166 datasetType : `DatasetType`
167 Type of the dataset to extract. Must have been included in the
168 `Query` via a call to `QueryBuilder.joinDataset` with
169 ``isResult=True``, or otherwise included in
170 `QueryColumns.datasets`.
171 dataId : `DataCoordinate`
172 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
173 `DataCoordinate` is constructed from ``row`` if `None`.
175 Returns
176 -------
177 ref : `DatasetRef`
178 Reference to the dataset; guaranteed to have `DatasetRef.id` not
179 `None`.
180 rank : `int` or `None`
181 Integer index of the collection in which this dataset was found,
182 within the sequence of collections passed when constructing the
183 query. `None` if `QueryBuilder.joinDataset` was called with
184 ``addRank=False``.
185 """
186 if dataId is None:
187 dataId = self.extractDataId(row, graph=datasetType.dimensions)
188 datasetColumns = self._columns.datasets[datasetType]
189 runRecord = self._collections[row[datasetColumns.runKey]]
190 return (DatasetRef(datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name),
191 row[datasetColumns.rank] if datasetColumns.rank is not None else None)