Coverage for python/lsst/daf/butler/registry/queries/_structs.py : 37%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["QuerySummary"] # other classes here are local to subpackage
25from dataclasses import dataclass
26from typing import Iterator, List, Optional, Union
28from sqlalchemy.sql import ColumnElement
30from ...core import (
31 DatasetType,
32 Dimension,
33 DimensionElement,
34 DimensionGraph,
35 DimensionUniverse,
36 ExpandedDataCoordinate,
37 NamedKeyDict,
38 NamedValueSet,
39 SkyPixDimension,
40 Timespan,
41)
42# We're not trying to add parsing to the lex/yacc parser code, so MyPy
43# doesn't know about some of these imports.
44from .exprParser import Node, ParserYacc # type: ignore
47@dataclass
48class QueryWhereExpression:
49 """A struct representing a parsed user-provided WHERE expression.
51 Parameters
52 ----------
53 universe : `DimensionUniverse`
54 All known dimensions.
55 expression : `str`, optional
56 The string expression to parse.
57 """
58 def __init__(self, universe: DimensionUniverse, expression: Optional[str] = None):
59 if expression:
60 from .expressions import InspectionVisitor
61 try:
62 parser = ParserYacc()
63 self.tree = parser.parse(expression)
64 except Exception as exc:
65 raise RuntimeError(f"Failed to parse user expression `{expression}'.") from exc
66 visitor = InspectionVisitor(universe)
67 assert self.tree is not None
68 self.tree.visit(visitor)
69 self.keys = visitor.keys
70 self.metadata = visitor.metadata
71 else:
72 self.tree = None
73 self.keys = NamedValueSet()
74 self.metadata = NamedKeyDict()
76 tree: Optional[Node]
77 """The parsed user expression tree, if present (`Node` or `None`).
78 """
80 keys: NamedValueSet[Dimension]
81 """All dimensions whose keys are referenced by the expression
82 (`NamedValueSet` of `Dimension`).
83 """
85 metadata: NamedKeyDict[DimensionElement, List[str]]
86 """All dimension elements metadata fields referenced by the expression
87 (`NamedKeyDict` mapping `DimensionElement` to a `set` of field names).
88 """
91@dataclass
92class QuerySummary:
93 """A struct that holds and categorizes the dimensions involved in a query.
95 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
96 it needs to include all of the dimensions that will be included in the
97 query (including any needed for querying datasets).
99 Parameters
100 ----------
101 requested : `DimensionGraph`
102 The dimensions whose primary keys should be included in the result rows
103 of the query.
104 dataId : `ExpandedDataCoordinate`, optional
105 A fully-expanded data ID identifying dimensions known in advance. If
106 not provided, will be set to an empty data ID.
107 expression : `str` or `QueryWhereExpression`, optional
108 A user-provided string WHERE expression.
109 """
110 def __init__(self, requested: DimensionGraph, *,
111 dataId: Optional[ExpandedDataCoordinate] = None,
112 expression: Optional[Union[str, QueryWhereExpression]] = None):
113 self.requested = requested
114 self.dataId = dataId if dataId is not None else ExpandedDataCoordinate(requested.universe.empty, (),
115 records=NamedKeyDict())
116 self.expression = (expression if isinstance(expression, QueryWhereExpression)
117 else QueryWhereExpression(requested.universe, expression))
119 requested: DimensionGraph
120 """Dimensions whose primary keys should be included in the result rows of
121 the query (`DimensionGraph`).
122 """
124 dataId: ExpandedDataCoordinate
125 """A data ID identifying dimensions known before query construction
126 (`ExpandedDataCoordinate`).
127 """
129 expression: QueryWhereExpression
130 """Information about any parsed user WHERE expression
131 (`QueryWhereExpression`).
132 """
134 @property
135 def universe(self) -> DimensionUniverse:
136 """All known dimensions (`DimensionUniverse`).
137 """
138 return self.requested.universe
140 @property
141 def spatial(self) -> NamedValueSet[DimensionElement]:
142 """Dimension elements whose regions and skypix IDs should be included
143 in the query (`NamedValueSet` of `DimensionElement`).
144 """
145 # An element may participate spatially in the query if:
146 # - it's the most precise spatial element for its system in the
147 # requested dimensions (i.e. in `self.requested.spatial`);
148 # - it isn't also given at query construction time.
149 result = NamedValueSet(self.mustHaveKeysJoined.spatial - self.dataId.graph.elements)
150 if len(result) == 1:
151 # There's no spatial join, but there might be a WHERE filter based
152 # on a given region.
153 if self.dataId.graph.spatial:
154 # We can only perform those filters against SkyPix dimensions,
155 # so if what we have isn't one, add the common SkyPix dimension
156 # to the query; the element we have will be joined to that.
157 element, = result
158 if not isinstance(element, SkyPixDimension):
159 result.add(self.universe.commonSkyPix)
160 else:
161 # There is no spatial join or filter in this query. Even
162 # if this element might be associated with spatial
163 # information, we don't need it for this query.
164 return NamedValueSet()
165 elif len(result) > 1:
166 # There's a spatial join. Those require the common SkyPix
167 # system to be included in the query in order to connect them.
168 result.add(self.universe.commonSkyPix)
169 return result
171 @property
172 def temporal(self) -> NamedValueSet[DimensionElement]:
173 """Dimension elements whose timespans should be included in the
174 query (`NamedValueSet` of `DimensionElement`).
175 """
176 # An element may participate temporally in the query if:
177 # - it's the most precise temporal element for its system in the
178 # requested dimensions (i.e. in `self.requested.temporal`);
179 # - it isn't also given at query construction time.
180 result = NamedValueSet(self.mustHaveKeysJoined.temporal - self.dataId.graph.elements)
181 if len(result) == 1 and not self.dataId.graph.temporal:
182 # No temporal join or filter. Even if this element might be
183 # associated with temporal information, we don't need it for this
184 # query.
185 return NamedValueSet()
186 return result
188 @property
189 def mustHaveKeysJoined(self) -> DimensionGraph:
190 """Dimensions whose primary keys must be used in the JOIN ON clauses
191 of the query, even if their tables do not appear (`DimensionGraph`).
193 A `Dimension` primary key can appear in a join clause without its table
194 via a foreign key column in table of a dependent dimension element or
195 dataset.
196 """
197 names = set(self.requested.names | self.expression.keys.names)
198 return DimensionGraph(self.universe, names=names)
200 @property
201 def mustHaveTableJoined(self) -> NamedValueSet[DimensionElement]:
202 """Dimension elements whose associated tables must appear in the
203 query's FROM clause (`NamedValueSet` of `DimensionElement`).
204 """
205 result = NamedValueSet(self.spatial | self.temporal | self.expression.metadata.keys())
206 for dimension in self.mustHaveKeysJoined:
207 if dimension.implied:
208 result.add(dimension)
209 for element in self.mustHaveKeysJoined.union(self.dataId.graph).elements:
210 if element.alwaysJoin:
211 result.add(element)
212 return result
215@dataclass
216class DatasetQueryColumns:
217 """A struct containing the columns used to reconstruct `DatasetRef`
218 instances from query results.
219 """
221 id: ColumnElement
222 """Column containing the unique integer ID for this dataset.
223 """
225 runKey: ColumnElement
226 """Foreign key column to the `~CollectionType.RUN` collection that holds
227 this dataset.
228 """
230 rank: Optional[ColumnElement] = None
231 """Column containing the index into the ordered sequence of given
232 collections for the collection in which this dataset was found.
233 """
235 def __iter__(self) -> Iterator[ColumnElement]:
236 yield self.id
237 yield self.runKey
238 if self.rank is not None:
239 yield self.rank
242@dataclass
243class QueryColumns:
244 """A struct organizing the columns in an under-construction or currently-
245 executing query.
247 Takes no parameters at construction, as expected usage is to add elements
248 to its container attributes incrementally.
249 """
250 def __init__(self) -> None:
251 self.keys = NamedKeyDict()
252 self.timespans = NamedKeyDict()
253 self.regions = NamedKeyDict()
254 self.datasets = NamedKeyDict()
256 keys: NamedKeyDict[Dimension, List[ColumnElement]]
257 """Columns that correspond to the primary key values of dimensions
258 (`NamedKeyDict` mapping `Dimension` to a `list` of `ColumnElement`).
260 Each value list contains columns from multiple tables corresponding to the
261 same dimension, and the query should constrain the values of those columns
262 to be the same.
264 In a `Query`, the keys of this dictionary must include at least the
265 dimensions in `QuerySummary.requested` and `QuerySummary.dataId.graph`.
266 """
268 timespans: NamedKeyDict[DimensionElement, Timespan[ColumnElement]]
269 """Columns that correspond to timespans for elements that participate in a
270 temporal join or filter in the query (`NamedKeyDict` mapping
271 `DimensionElement` to `Timespan` of `ColumnElement`).
273 In a `Query`, the keys of this dictionary must be exactly the elements
274 in `QuerySummary.temporal`.
275 """
277 regions: NamedKeyDict[DimensionElement, ColumnElement]
278 """Columns that correspond to regions for elements that participate in a
279 spatial join or filter in the query (`NamedKeyDict` mapping
280 `DimensionElement` to `ColumnElement`).
282 In a `Query`, the keys of this dictionary must be exactly the elements
283 in `QuerySummary.spatial`.
284 """
286 datasets: NamedKeyDict[DatasetType, DatasetQueryColumns]
287 """Columns that can be used to construct `DatasetRef` instances from query
288 results, for each `DatasetType` included in the query
289 (`NamedKeyDict` [ `DatasetType`, `DatasetQueryColumns` ] ).
290 """
292 def getKeyColumn(self, dimension: Dimension) -> ColumnElement:
293 """ Return one of the columns in self.keys for the given dimension.
295 The column selected is an implentation detail but is guaranteed to
296 be deterministic and consistent across multiple calls.
298 Parameters
299 ----------
300 dimension : `Dimension`
301 Element for which to obtain a key column.
303 Returns
304 -------
305 column : `sqlalchemy.sql.ColumnElement`
306 SQLAlchemy column object.
307 """
308 # Choosing the last element here is entirely for human readers of the
309 # query (e.g. developers debugging things); it makes it more likely a
310 # dimension key will be provided by the dimension's own table, or
311 # failing that, some closely related dimension, which might be less
312 # surprising to see than e.g. some dataset subquery. From the
313 # database's perspective this is entirely arbitrary, cause the query
314 # guarantees they all have equal values.
315 return self.keys[dimension][-1]