Coverage for python/lsst/daf/butler/registry/queries/_structs.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["QuerySummary", "RegistryManagers"] # other classes here are local to subpackage
25from dataclasses import dataclass
26from typing import Iterator, List, Optional, Union
28from sqlalchemy.sql import ColumnElement
30from lsst.sphgeom import Region
31from ...core import (
32 TimespanDatabaseRepresentation,
33 DataCoordinate,
34 DatasetType,
35 Dimension,
36 DimensionElement,
37 DimensionGraph,
38 DimensionUniverse,
39 NamedKeyDict,
40 NamedValueSet,
41 SkyPixDimension,
42)
43from ..interfaces import (
44 CollectionManager,
45 DatasetRecordStorageManager,
46 DimensionRecordStorageManager,
47)
48# We're not trying to add typing to the lex/yacc parser code, so MyPy
49# doesn't know about some of these imports.
50from .exprParser import Node, ParserYacc # type: ignore
53@dataclass
54class QueryWhereExpression:
55 """A struct representing a parsed user-provided WHERE expression.
57 Parameters
58 ----------
59 universe : `DimensionUniverse`
60 All known dimensions.
61 expression : `str`, optional
62 The string expression to parse.
63 """
64 def __init__(self, universe: DimensionUniverse, expression: Optional[str] = None):
65 if expression:
66 from .expressions import InspectionVisitor
67 try:
68 parser = ParserYacc()
69 self.tree = parser.parse(expression)
70 except Exception as exc:
71 raise RuntimeError(f"Failed to parse user expression `{expression}'.") from exc
72 visitor = InspectionVisitor(universe)
73 assert self.tree is not None
74 self.tree.visit(visitor)
75 self.keys = visitor.keys
76 self.metadata = visitor.metadata
77 else:
78 self.tree = None
79 self.keys = NamedValueSet()
80 self.metadata = NamedKeyDict()
82 tree: Optional[Node]
83 """The parsed user expression tree, if present (`Node` or `None`).
84 """
86 keys: NamedValueSet[Dimension]
87 """All dimensions whose keys are referenced by the expression
88 (`NamedValueSet` of `Dimension`).
89 """
91 metadata: NamedKeyDict[DimensionElement, List[str]]
92 """All dimension elements metadata fields referenced by the expression
93 (`NamedKeyDict` mapping `DimensionElement` to a `set` of field names).
94 """
97@dataclass
98class QuerySummary:
99 """A struct that holds and categorizes the dimensions involved in a query.
101 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
102 it needs to include all of the dimensions that will be included in the
103 query (including any needed for querying datasets).
105 Parameters
106 ----------
107 requested : `DimensionGraph`
108 The dimensions whose primary keys should be included in the result rows
109 of the query.
110 dataId : `DataCoordinate`, optional
111 A fully-expanded data ID identifying dimensions known in advance. If
112 not provided, will be set to an empty data ID. ``dataId.hasRecords()``
113 must return `True`.
114 expression : `str` or `QueryWhereExpression`, optional
115 A user-provided string WHERE expression.
116 whereRegion : `lsst.sphgeom.Region`, optional
117 A spatial region that all rows must overlap. If `None` and ``dataId``
118 is not `None`, ``dataId.region`` will be used.
119 """
120 def __init__(self, requested: DimensionGraph, *,
121 dataId: Optional[DataCoordinate] = None,
122 expression: Optional[Union[str, QueryWhereExpression]] = None,
123 whereRegion: Optional[Region] = None):
124 self.requested = requested
125 self.dataId = dataId if dataId is not None else DataCoordinate.makeEmpty(requested.universe)
126 self.expression = (expression if isinstance(expression, QueryWhereExpression)
127 else QueryWhereExpression(requested.universe, expression))
128 if whereRegion is None and self.dataId is not None:
129 whereRegion = self.dataId.region
130 self.whereRegion = whereRegion
132 requested: DimensionGraph
133 """Dimensions whose primary keys should be included in the result rows of
134 the query (`DimensionGraph`).
135 """
137 dataId: DataCoordinate
138 """A data ID identifying dimensions known before query construction
139 (`DataCoordinate`).
141 ``dataId.hasRecords()`` is guaranteed to return `True`.
142 """
144 whereRegion: Optional[Region]
145 """A spatial region that all result rows must overlap
146 (`lsst.sphgeom.Region` or `None`).
147 """
149 expression: QueryWhereExpression
150 """Information about any parsed user WHERE expression
151 (`QueryWhereExpression`).
152 """
154 @property
155 def universe(self) -> DimensionUniverse:
156 """All known dimensions (`DimensionUniverse`).
157 """
158 return self.requested.universe
160 @property
161 def spatial(self) -> NamedValueSet[DimensionElement]:
162 """Dimension elements whose regions and skypix IDs should be included
163 in the query (`NamedValueSet` of `DimensionElement`).
164 """
165 # An element may participate spatially in the query if:
166 # - it's the most precise spatial element for its system in the
167 # requested dimensions (i.e. in `self.requested.spatial`);
168 # - it isn't also given at query construction time.
169 result: NamedValueSet[DimensionElement] = NamedValueSet()
170 for family in self.mustHaveKeysJoined.spatial:
171 element = family.choose(self.mustHaveKeysJoined.elements)
172 assert isinstance(element, DimensionElement)
173 if element not in self.dataId.graph.elements:
174 result.add(element)
175 if len(result) == 1:
176 # There's no spatial join, but there might be a WHERE filter based
177 # on a given region.
178 if self.dataId.graph.spatial:
179 # We can only perform those filters against SkyPix dimensions,
180 # so if what we have isn't one, add the common SkyPix dimension
181 # to the query; the element we have will be joined to that.
182 element, = result
183 if not isinstance(element, SkyPixDimension):
184 result.add(self.universe.commonSkyPix)
185 else:
186 # There is no spatial join or filter in this query. Even
187 # if this element might be associated with spatial
188 # information, we don't need it for this query.
189 return NamedValueSet()
190 elif len(result) > 1:
191 # There's a spatial join. Those require the common SkyPix
192 # system to be included in the query in order to connect them.
193 result.add(self.universe.commonSkyPix)
194 return result
196 @property
197 def temporal(self) -> NamedValueSet[DimensionElement]:
198 """Dimension elements whose timespans should be included in the
199 query (`NamedValueSet` of `DimensionElement`).
200 """
201 # An element may participate temporally in the query if:
202 # - it's the most precise temporal element for its system in the
203 # requested dimensions (i.e. in `self.requested.temporal`);
204 # - it isn't also given at query construction time.
205 result: NamedValueSet[DimensionElement] = NamedValueSet()
206 for family in self.mustHaveKeysJoined.temporal:
207 element = family.choose(self.mustHaveKeysJoined.elements)
208 assert isinstance(element, DimensionElement)
209 if element not in self.dataId.graph.elements:
210 result.add(element)
211 if len(result) == 1 and not self.dataId.graph.temporal:
212 # No temporal join or filter. Even if this element might be
213 # associated with temporal information, we don't need it for this
214 # query.
215 return NamedValueSet()
216 return result
218 @property
219 def mustHaveKeysJoined(self) -> DimensionGraph:
220 """Dimensions whose primary keys must be used in the JOIN ON clauses
221 of the query, even if their tables do not appear (`DimensionGraph`).
223 A `Dimension` primary key can appear in a join clause without its table
224 via a foreign key column in table of a dependent dimension element or
225 dataset.
226 """
227 names = set(self.requested.names | self.expression.keys.names)
228 return DimensionGraph(self.universe, names=names)
230 @property
231 def mustHaveTableJoined(self) -> NamedValueSet[DimensionElement]:
232 """Dimension elements whose associated tables must appear in the
233 query's FROM clause (`NamedValueSet` of `DimensionElement`).
234 """
235 result = NamedValueSet(self.spatial | self.temporal | self.expression.metadata.keys())
236 for dimension in self.mustHaveKeysJoined:
237 if dimension.implied:
238 result.add(dimension)
239 for element in self.mustHaveKeysJoined.union(self.dataId.graph).elements:
240 if element.alwaysJoin:
241 result.add(element)
242 return result
245@dataclass
246class DatasetQueryColumns:
247 """A struct containing the columns used to reconstruct `DatasetRef`
248 instances from query results.
249 """
251 datasetType: DatasetType
252 """The dataset type being queried (`DatasetType`).
253 """
255 id: ColumnElement
256 """Column containing the unique integer ID for this dataset.
257 """
259 runKey: ColumnElement
260 """Foreign key column to the `~CollectionType.RUN` collection that holds
261 this dataset.
262 """
264 def __iter__(self) -> Iterator[ColumnElement]:
265 yield self.id
266 yield self.runKey
269@dataclass
270class QueryColumns:
271 """A struct organizing the columns in an under-construction or currently-
272 executing query.
274 Takes no parameters at construction, as expected usage is to add elements
275 to its container attributes incrementally.
276 """
277 def __init__(self) -> None:
278 self.keys = NamedKeyDict()
279 self.timespans = NamedKeyDict()
280 self.regions = NamedKeyDict()
281 self.datasets = None
283 keys: NamedKeyDict[Dimension, List[ColumnElement]]
284 """Columns that correspond to the primary key values of dimensions
285 (`NamedKeyDict` mapping `Dimension` to a `list` of `ColumnElement`).
287 Each value list contains columns from multiple tables corresponding to the
288 same dimension, and the query should constrain the values of those columns
289 to be the same.
291 In a `Query`, the keys of this dictionary must include at least the
292 dimensions in `QuerySummary.requested` and `QuerySummary.dataId.graph`.
293 """
295 timespans: NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]
296 """Columns that correspond to timespans for elements that participate in a
297 temporal join or filter in the query (`NamedKeyDict` mapping
298 `DimensionElement` to `TimespanDatabaseRepresentation`).
300 In a `Query`, the keys of this dictionary must be exactly the elements
301 in `QuerySummary.temporal`.
302 """
304 regions: NamedKeyDict[DimensionElement, ColumnElement]
305 """Columns that correspond to regions for elements that participate in a
306 spatial join or filter in the query (`NamedKeyDict` mapping
307 `DimensionElement` to `ColumnElement`).
309 In a `Query`, the keys of this dictionary must be exactly the elements
310 in `QuerySummary.spatial`.
311 """
313 datasets: Optional[DatasetQueryColumns]
314 """Columns that can be used to construct `DatasetRef` instances from query
315 results.
316 (`DatasetQueryColumns` or `None`).
317 """
319 def isEmpty(self) -> bool:
320 """Return `True` if this query has no columns at all.
321 """
322 return not (self.keys or self.timespans or self.regions or self.datasets is not None)
324 def getKeyColumn(self, dimension: Union[Dimension, str]) -> ColumnElement:
325 """ Return one of the columns in self.keys for the given dimension.
327 The column selected is an implentation detail but is guaranteed to
328 be deterministic and consistent across multiple calls.
330 Parameters
331 ----------
332 dimension : `Dimension` or `str`
333 Dimension for which to obtain a key column.
335 Returns
336 -------
337 column : `sqlalchemy.sql.ColumnElement`
338 SQLAlchemy column object.
339 """
340 # Choosing the last element here is entirely for human readers of the
341 # query (e.g. developers debugging things); it makes it more likely a
342 # dimension key will be provided by the dimension's own table, or
343 # failing that, some closely related dimension, which might be less
344 # surprising to see than e.g. some dataset subquery. From the
345 # database's perspective this is entirely arbitrary, because the query
346 # guarantees they all have equal values.
347 return self.keys[dimension][-1]
350@dataclass
351class RegistryManagers:
352 """Struct used to pass around the manager objects that back a `Registry`
353 and are used internally by the query system.
354 """
356 collections: CollectionManager
357 """Manager for collections (`CollectionManager`).
358 """
360 datasets: DatasetRecordStorageManager
361 """Manager for datasets and dataset types (`DatasetRecordStorageManager`).
362 """
364 dimensions: DimensionRecordStorageManager
365 """Manager for dimensions (`DimensionRecordStorageManager`).
366 """