Coverage for python/lsst/daf/butler/registry/queries/_query.py : 31%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25import itertools
26from typing import Optional, Tuple, Callable
28from sqlalchemy.sql import FromClause
29from sqlalchemy.engine import RowProxy
31from lsst.sphgeom import Region
33from ...core import (
34 DataCoordinate,
35 DatasetRef,
36 DatasetType,
37 DimensionGraph,
38)
39from ..interfaces import CollectionManager
40from ._structs import QuerySummary, QueryColumns
43class Query:
44 """A wrapper for a SQLAlchemy query that knows how to transform result rows
45 into data IDs and dataset references.
47 A `Query` should almost always be constructed directly by a call to
48 `QueryBuilder.finish`; direct construction will make it difficult to be
49 able to maintain invariants between arguments (see the documentation for
50 `QueryColumns` for more information).
52 Parameters
53 ----------
54 sql : `sqlalchemy.sql.FromClause`
55 A complete SELECT query, including at least SELECT, FROM, and WHERE
56 clauses.
57 summary : `QuerySummary`
58 Struct that organizes the dimensions involved in the query.
59 columns : `QueryColumns`
60 Columns that are referenced in the query in any clause.
61 collections : `CollectionsManager`,
62 Manager object for collection tables.
64 Notes
65 -----
66 SQLAlchemy is used in the public interface of `Query` rather than just its
67 implementation simply because avoiding this would entail writing wrappers
68 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy`
69 classes that are themselves generic wrappers for lower-level Python DBAPI
70 classes. Another layer would entail another set of computational
71 overheads, but the only reason we would seriously consider not using
72 SQLAlchemy here in the future would be to reduce computational overheads.
73 """
75 def __init__(self, *, sql: FromClause,
76 summary: QuerySummary,
77 columns: QueryColumns,
78 collections: CollectionManager):
79 self.summary = summary
80 self.sql = sql
81 self._columns = columns
82 self._collections = collections
84 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]:
85 """Return a callable that can perform extra Python-side filtering of
86 query results.
88 To get the expected results from a query, the returned predicate *must*
89 be used to ignore rows for which it returns `False`; this permits the
90 `QueryBuilder` implementation to move logic from the database to Python
91 without changing the public interface.
93 Parameters
94 ----------
95 region : `sphgeom.Region`, optional
96 A region that any result-row regions must overlap in order for the
97 predicate to return `True`. If not provided, this will be the
98 region in `QuerySummary.dataId`, if there is one.
100 Returns
101 -------
102 func : `Callable`
103 A callable that takes a single `sqlalchemy.engine.RowProxy`
104 argmument and returns `bool`.
105 """
106 whereRegion = region if region is not None else self.summary.dataId.region
108 def closure(row: RowProxy) -> bool:
109 rowRegions = [row[column] for column in self._columns.regions.values()]
110 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
111 return False
112 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
114 return closure
116 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None) -> DataCoordinate:
117 """Extract a data ID from a result row.
119 Parameters
120 ----------
121 row : `sqlalchemy.engine.RowProxy`
122 A result row from a SQLAlchemy SELECT query.
123 graph : `DimensionGraph`, optional
124 The dimensions the returned data ID should identify. If not
125 provided, this will be all dimensions in `QuerySummary.requested`.
127 Returns
128 -------
129 dataId : `DataCoordinate`
130 A minimal data ID that identifies the requested dimensions but
131 includes no metadata or implied dimensions.
132 """
133 if graph is None:
134 graph = self.summary.requested
135 values = tuple(row[self._columns.getKeyColumn(dimension)] for dimension in graph.required)
136 return DataCoordinate(graph, values)
138 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType,
139 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]:
140 """Extract a `DatasetRef` from a result row.
142 Parameters
143 ----------
144 row : `sqlalchemy.engine.RowProxy`
145 A result row from a SQLAlchemy SELECT query.
146 datasetType : `DatasetType`
147 Type of the dataset to extract. Must have been included in the
148 `Query` via a call to `QueryBuilder.joinDataset` with
149 ``isResult=True``, or otherwise included in
150 `QueryColumns.datasets`.
151 dataId : `DataCoordinate`
152 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
153 `DataCoordinate` is constructed from ``row`` if `None`.
155 Returns
156 -------
157 ref : `DatasetRef`
158 Reference to the dataset; guaranteed to have `DatasetRef.id` not
159 `None`.
160 rank : `int` or `None`
161 Integer index of the collection in which this dataset was found,
162 within the sequence of collections passed when constructing the
163 query. `None` if `QueryBuilder.joinDataset` was called with
164 ``addRank=False``.
165 """
166 if dataId is None:
167 dataId = self.extractDataId(row, graph=datasetType.dimensions)
168 datasetColumns = self._columns.datasets[datasetType]
169 runRecord = self._collections[row[datasetColumns.runKey]]
170 return (DatasetRef(datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name),
171 row[datasetColumns.rank] if datasetColumns.rank is not None else None)