Coverage for python/lsst/daf/butler/registry/queries/_structs.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["QuerySummary"] # other classes here are local to subpackage
25from dataclasses import dataclass
26from typing import Optional, Tuple, List, Set, Union
28from sqlalchemy.sql import ColumnElement
30from ...core import (
31 DatasetType,
32 Dimension,
33 DimensionElement,
34 DimensionGraph,
35 DimensionUniverse,
36 ExpandedDataCoordinate,
37 SkyPixDimension,
38 Timespan,
39)
40from ...core.utils import NamedValueSet, NamedKeyDict
41from .exprParser import Node, ParserYacc
44@dataclass
45class QueryWhereExpression:
46 """A struct representing a parsed user-provided WHERE expression.
48 Parameters
49 ----------
50 universe : `DimensionUniverse`
51 All known dimensions.
52 expression : `str`, optional
53 The string expression to parse.
54 """
55 def __init__(self, universe: DimensionUniverse, expression: Optional[str] = None):
56 if expression:
57 from .expressions import InspectionVisitor
58 try:
59 parser = ParserYacc()
60 self.tree = parser.parse(expression)
61 except Exception as exc:
62 raise RuntimeError(f"Failed to parse user expression `{expression}'.") from exc
63 visitor = InspectionVisitor(universe)
64 self.tree.visit(visitor)
65 self.keys = visitor.keys
66 self.metadata = visitor.metadata
67 else:
68 self.tree = None
69 self.keys = NamedValueSet()
70 self.metadata = NamedKeyDict()
72 tree: Optional[Node]
73 """The parsed user expression tree, if present (`Node` or `None`).
74 """
76 keys: NamedValueSet[Dimension]
77 """All dimensions whose keys are referenced by the expression
78 (`NamedValueSet` of `Dimension`).
79 """
81 metadata: NamedKeyDict[DimensionElement, Set[str]]
82 """All dimension elements metadata fields referenced by the expression
83 (`NamedKeyDict` mapping `DimensionElement` to a `set` of field names).
84 """
87@dataclass
88class QuerySummary:
89 """A struct that holds and categorizes the dimensions involved in a query.
91 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
92 it needs to include all of the dimensions that will be included in the
93 query (including any needed for querying datasets).
95 Parameters
96 ----------
97 requested : `DimensionGraph`
98 The dimensions whose primary keys should be included in the result rows
99 of the query.
100 dataId : `ExpandedDataCoordinate`, optional
101 A fully-expanded data ID identifying dimensions known in advance. If
102 not provided, will be set to an empty data ID.
103 expression : `str` or `QueryWhereExpression`, optional
104 A user-provided string WHERE expression.
105 """
106 def __init__(self, requested: DimensionGraph, *,
107 dataId: Optional[ExpandedDataCoordinate] = None,
108 expression: Optional[Union[str, QueryWhereExpression]] = None):
109 self.requested = requested
110 self.dataId = dataId if dataId is not None else ExpandedDataCoordinate(requested.universe.empty, ())
111 self.expression = (expression if isinstance(expression, QueryWhereExpression)
112 else QueryWhereExpression(requested.universe, expression))
114 requested: DimensionGraph
115 """Dimensions whose primary keys should be included in the result rows of
116 the query (`DimensionGraph`).
117 """
119 dataId: ExpandedDataCoordinate
120 """A data ID identifying dimensions known before query construction
121 (`ExpandedDataCoordinate`).
122 """
124 expression: QueryWhereExpression
125 """Information about any parsed user WHERE expression
126 (`QueryWhereExpression`).
127 """
129 @property
130 def universe(self) -> DimensionUniverse:
131 """All known dimensions (`DimensionUniverse`).
132 """
133 return self.requested.universe
135 @property
136 def spatial(self) -> NamedValueSet[DimensionElement]:
137 """Dimension elements whose regions and skypix IDs should be included
138 in the query (`NamedValueSet` of `DimensionElement`).
139 """
140 # An element may participate spatially in the query if:
141 # - it's the most precise spatial element for its system in the
142 # requested dimensions (i.e. in `self.requested.spatial`);
143 # - it isn't also given at query construction time.
144 result = self.mustHaveKeysJoined.spatial - self.dataId.graph.elements
145 if len(result) == 1:
146 # There's no spatial join, but there might be a WHERE filter based
147 # on a given region.
148 if self.dataId.graph.spatial:
149 # We can only perform those filters against SkyPix dimensions,
150 # so if what we have isn't one, add the common SkyPix dimension
151 # to the query; the element we have will be joined to that.
152 element, = result
153 if not isinstance(element, SkyPixDimension):
154 result.add(self.universe.commonSkyPix)
155 else:
156 # There is no spatial join or filter in this query. Even
157 # if this element might be associated with spatial
158 # information, we don't need it for this query.
159 return NamedValueSet()
160 elif len(result) > 1:
161 # There's a spatial join. Those require the common SkyPix
162 # system to be included in the query in order to connect them.
163 result.add(self.universe.commonSkyPix)
164 return result
166 @property
167 def temporal(self) -> NamedValueSet[DimensionElement]:
168 """Dimension elements whose timespans should be included in the
169 query (`NamedValueSet` of `DimensionElement`).
170 """
171 # An element may participate temporally in the query if:
172 # - it's the most precise temporal element for its system in the
173 # requested dimensions (i.e. in `self.requested.temporal`);
174 # - it isn't also given at query construction time.
175 result = self.mustHaveKeysJoined.temporal - self.dataId.graph.elements
176 if len(result) == 1 and not self.dataId.graph.temporal:
177 # No temporal join or filter. Even if this element might be
178 # associated with temporal information, we don't need it for this
179 # query.
180 return NamedValueSet()
181 return result
183 @property
184 def mustHaveKeysJoined(self) -> DimensionGraph:
185 """Dimensions whose primary keys must be used in the JOIN ON clauses
186 of the query, even if their tables do not appear (`DimensionGraph`).
188 A `Dimension` primary key can appear in a join clause without its table
189 via a foreign key column in table of a dependent dimension element or
190 dataset.
191 """
192 names = set(self.requested.names | self.expression.keys.names)
193 return DimensionGraph(self.universe, names=names)
195 @property
196 def mustHaveTableJoined(self) -> NamedValueSet[DimensionElement]:
197 """Dimension elements whose associated tables must appear in the
198 query's FROM clause (`NamedValueSet` of `DimensionElement`).
199 """
200 result = self.spatial | self.temporal | self.expression.metadata.keys()
201 for dimension in self.mustHaveKeysJoined:
202 if dimension.implied:
203 result.add(dimension)
204 for element in self.mustHaveKeysJoined.union(self.dataId.graph).elements:
205 if element.alwaysJoin:
206 result.add(element)
207 return result
210@dataclass
211class QueryColumns:
212 """A struct organizing the columns in an under-construction or currently-
213 executing query.
215 Takes no parameters at construction, as expected usage is to add elements
216 to its container attributes incrementally.
217 """
218 def __init__(self):
219 self.keys = NamedKeyDict()
220 self.timespans = NamedKeyDict()
221 self.regions = NamedKeyDict()
222 self.datasets = NamedKeyDict()
224 keys: NamedKeyDict[Dimension, List[ColumnElement]]
225 """Columns that correspond to the primary key values of dimensions
226 (`NamedKeyDict` mapping `Dimension` to a `list` of `ColumnElement`).
228 Each value list contains columns from multiple tables corresponding to the
229 same dimension, and the query should constrain the values of those columns
230 to be the same.
232 In a `Query`, the keys of this dictionary must include at least the
233 dimensions in `QuerySummary.requested` and `QuerySummary.dataId.graph`.
234 """
236 timespans: NamedKeyDict[DimensionElement, Timespan[ColumnElement]]
237 """Columns that correspond to timespans for elements that participate in a
238 temporal join or filter in the query (`NamedKeyDict` mapping
239 `DimensionElement` to `Timespan` of `ColumnElement`).
241 In a `Query`, the keys of this dictionary must be exactly the elements
242 in `QuerySummary.temporal`.
243 """
245 regions: NamedKeyDict[DimensionElement, ColumnElement]
246 """Columns that correspond to regions for elements that participate in a
247 spatial join or filter in the query (`NamedKeyDict` mapping
248 `DimensionElement` to `ColumnElement`).
250 In a `Query`, the keys of this dictionary must be exactly the elements
251 in `QuerySummary.spatial`.
252 """
254 datasets: NamedKeyDict[DatasetType, Tuple[ColumnElement, Optional[ColumnElement]]]
255 """Columns that correspond to the ``dataset_id`` and optionally collection
256 rank for a dataset in the query (`NamedKeyDict` mapping `DatasetType` to
257 `tuple` of `ColumnElement`).
259 "Collection rank" here is the index of the collection in which this dataset
260 was found in the list of collections to search; a lower rank corresponds
261 to a collection that appears earlier in the search path.
262 """
264 def getKeyColumn(self, dimension: Dimension) -> ColumnElement:
265 """ Return one of the columns in self.keys for the given dimension.
267 The column selected is an implentation detail but is guaranteed to
268 be deterministic and consistent across multiple calls.
270 Parameters
271 ----------
272 dimension : `Dimension`
273 Element for which to obtain a key column.
275 Returns
276 -------
277 column : `sqlalchemy.sql.ColumnElement`
278 SQLAlchemy column object.
279 """
280 # Choosing the last element here is entirely for human readers of the
281 # query (e.g. developers debugging things); it makes it more likely a
282 # dimension key will be provided by the dimension's own table, or
283 # failing that, some closely related dimension, which might be less
284 # surprising to see than e.g. some dataset subquery. From the
285 # database's perspective this is entirely arbitrary, cause the query
286 # guarantees they all have equal values.
287 return self.keys[dimension][-1]