Coverage for python/lsst/daf/butler/registry/queries/_structs.py: 36%
166 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-23 03:00 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-23 03:00 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["QuerySummary"] # other classes here are local to subpackage
25import dataclasses
26from collections.abc import Iterable, Mapping, Set
27from typing import Any
29import astropy.time
30from lsst.daf.relation import ColumnExpression, ColumnTag, Predicate, SortTerm
31from lsst.sphgeom import IntersectionRegion, Region
32from lsst.utils.classes import cached_getter, immutable
34from ...core import (
35 DataCoordinate,
36 DatasetType,
37 DimensionElement,
38 DimensionGraph,
39 DimensionKeyColumnTag,
40 DimensionRecordColumnTag,
41 DimensionUniverse,
42 NamedValueAbstractSet,
43 NamedValueSet,
44 SkyPixDimension,
45)
47# We're not trying to add typing to the lex/yacc parser code, so MyPy
48# doesn't know about some of these imports.
49from .expressions import make_string_expression_predicate
50from .expressions.categorize import categorizeElementOrderByName, categorizeOrderByName
53@dataclasses.dataclass(frozen=True, eq=False)
54class QueryWhereClause:
55 """Structure holding various contributions to a query's WHERE clause.
57 Instances of this class should only be created by
58 `QueryWhereExpression.combine`, which guarantees the consistency of its
59 attributes.
60 """
62 @classmethod
63 def combine(
64 cls,
65 dimensions: DimensionGraph,
66 expression: str = "",
67 *,
68 bind: Mapping[str, Any] | None = None,
69 data_id: DataCoordinate | None = None,
70 region: Region | None = None,
71 defaults: DataCoordinate | None = None,
72 dataset_type_name: str | None = None,
73 allow_orphans: bool = False,
74 ) -> QueryWhereClause:
75 """Construct from various components.
77 Parameters
78 ----------
79 dimensions : `DimensionGraph`
80 The dimensions that would be included in the query in the absence
81 of the WHERE clause.
82 expression : `str`, optional
83 A user-provided string expression.
84 bind : `Mapping` [ `str`, `object` ], optional
85 Mapping containing literal values that should be injected into the
86 query expression, keyed by the identifiers they replace.
87 data_id : `DataCoordinate`, optional
88 A data ID identifying dimensions known in advance. If not
89 provided, will be set to an empty data ID.
90 region : `lsst.sphgeom.Region`, optional
91 A spatial constraint that all rows must overlap.
92 defaults : `DataCoordinate`, optional
93 A data ID containing default for governor dimensions.
94 dataset_type_name : `str` or `None`, optional
95 The name of the dataset type to assume for unqualified dataset
96 columns, or `None` if there are no such identifiers.
97 allow_orphans : `bool`, optional
98 If `True`, permit expressions to refer to dimensions without
99 providing a value for their governor dimensions (e.g. referring to
100 a visit without an instrument). Should be left to default to
101 `False` in essentially all new code.
103 Returns
104 -------
105 where : `QueryWhereClause`
106 An object representing the WHERE clause for a query.
107 """
108 if data_id is None:
109 data_id = DataCoordinate.makeEmpty(dimensions.universe)
110 if defaults is None:
111 defaults = DataCoordinate.makeEmpty(dimensions.universe)
112 expression_predicate, governor_constraints = make_string_expression_predicate(
113 expression,
114 dimensions,
115 bind=bind,
116 data_id=data_id,
117 defaults=defaults,
118 dataset_type_name=dataset_type_name,
119 allow_orphans=allow_orphans,
120 )
121 return QueryWhereClause(
122 expression_predicate,
123 data_id,
124 region=region,
125 governor_constraints=governor_constraints,
126 )
128 expression_predicate: Predicate | None
129 """A predicate that evaluates a string expression from the user
130 (`expressions.Predicate` or `None`).
131 """
133 data_id: DataCoordinate
134 """A data ID identifying dimensions known before query construction
135 (`DataCoordinate`).
136 """
138 region: Region | None
139 """A spatial region that all result rows must overlap
140 (`lsst.sphgeom.Region` or `None`).
141 """
143 governor_constraints: Mapping[str, Set[str]]
144 """Restrictions on the values governor dimensions can take in this query,
145 imposed by the string expression and/or data ID
146 (`Mapping` [ `set`, `~collections.abc.Set` [ `str` ] ]).
148 Governor dimensions not present in this mapping are not constrained at all.
149 """
152@dataclasses.dataclass(frozen=True)
153class OrderByClauseColumn:
154 """Information about single column in ORDER BY clause."""
156 element: DimensionElement
157 """Dimension element for data in this column (`DimensionElement`)."""
159 column: str | None
160 """Name of the column or `None` for primary key (`str` or `None`)"""
162 ordering: bool
163 """True for ascending order, False for descending (`bool`)."""
166@dataclasses.dataclass(frozen=True, eq=False)
167class OrderByClause:
168 """Class for information about columns in ORDER BY clause"""
170 @classmethod
171 def parse_general(cls, order_by: Iterable[str], graph: DimensionGraph) -> OrderByClause:
172 """Parse an iterable of strings in the context of a multi-dimension
173 query.
175 Parameters
176 ----------
177 order_by : `Iterable` [ `str` ]
178 Sequence of names to use for ordering with optional "-" prefix.
179 graph : `DimensionGraph`
180 Dimensions used by a query.
182 Returns
183 -------
184 clause : `OrderByClause`
185 New order-by clause representing the given string columns.
186 """
187 terms = []
188 for name in order_by:
189 if not name or name == "-":
190 raise ValueError("Empty dimension name in ORDER BY")
191 ascending = True
192 if name[0] == "-":
193 ascending = False
194 name = name[1:]
195 element, column = categorizeOrderByName(graph, name)
196 term = cls._make_term(element, column, ascending)
197 terms.append(term)
198 return cls(terms)
200 @classmethod
201 def parse_element(cls, order_by: Iterable[str], element: DimensionElement) -> OrderByClause:
202 """Parse an iterable of strings in the context of a single dimension
203 element query.
205 Parameters
206 ----------
207 order_by : `Iterable` [ `str` ]
208 Sequence of names to use for ordering with optional "-" prefix.
209 element : `DimensionElement`
210 Single or primary dimension element in the query
212 Returns
213 -------
214 clause : `OrderByClause`
215 New order-by clause representing the given string columns.
216 """
217 terms = []
218 for name in order_by:
219 if not name or name == "-":
220 raise ValueError("Empty dimension name in ORDER BY")
221 ascending = True
222 if name[0] == "-":
223 ascending = False
224 name = name[1:]
225 column = categorizeElementOrderByName(element, name)
226 term = cls._make_term(element, column, ascending)
227 terms.append(term)
228 return cls(terms)
230 @classmethod
231 def _make_term(cls, element: DimensionElement, column: str | None, ascending: bool) -> SortTerm:
232 """Make a single sort term from parsed user expression values.
234 Parameters
235 ----------
236 element : `DimensionElement`
237 Dimension element the sort term references.
238 column : `str` or `None`
239 DimensionRecord field name, or `None` if ``element`` is a
240 `Dimension` and the sort term is on is key value.
241 ascending : `bool`
242 Whether to sort ascending (`True`) or descending (`False`).
244 Returns
245 -------
246 term : `lsst.daf.relation.SortTerm`
247 Sort term struct.
248 """
249 tag: ColumnTag
250 expression: ColumnExpression
251 if column is None:
252 tag = DimensionKeyColumnTag(element.name)
253 expression = ColumnExpression.reference(tag)
254 elif column in ("timespan.begin", "timespan.end"):
255 base_column, _, subfield = column.partition(".")
256 tag = DimensionRecordColumnTag(element.name, base_column)
257 expression = ColumnExpression.reference(tag).method(
258 "lower" if subfield == "begin" else "upper", dtype=astropy.time.Time
259 )
260 else:
261 tag = DimensionRecordColumnTag(element.name, column)
262 expression = ColumnExpression.reference(tag)
263 return SortTerm(expression, ascending)
265 terms: Iterable[SortTerm]
266 """Columns that appear in the ORDER BY
267 (`Iterable` [ `OrderByClauseColumn` ]).
268 """
270 @property
271 @cached_getter
272 def columns_required(self) -> Set[ColumnTag]:
273 """Set of column tags for all columns referenced by the ORDER BY clause
274 (`~collections.abc.Set` [ `ColumnTag` ]).
275 """
276 tags: set[ColumnTag] = set()
277 for term in self.terms:
278 tags.update(term.expression.columns_required)
279 return tags
282@immutable
283class ElementOrderByClause:
284 """Class for information about columns in ORDER BY clause for one element.
286 Parameters
287 ----------
288 order_by : `Iterable` [ `str` ]
289 Sequence of names to use for ordering with optional "-" prefix.
290 element : `DimensionElement`
291 Dimensions used by a query.
292 """
294 def __init__(self, order_by: Iterable[str], element: DimensionElement):
295 self.order_by_columns = []
296 for name in order_by:
297 if not name or name == "-":
298 raise ValueError("Empty dimension name in ORDER BY")
299 ascending = True
300 if name[0] == "-":
301 ascending = False
302 name = name[1:]
303 column = categorizeElementOrderByName(element, name)
304 self.order_by_columns.append(
305 OrderByClauseColumn(element=element, column=column, ordering=ascending)
306 )
308 order_by_columns: Iterable[OrderByClauseColumn]
309 """Columns that appear in the ORDER BY
310 (`Iterable` [ `OrderByClauseColumn` ]).
311 """
314@immutable
315class QuerySummary:
316 """A struct that holds and categorizes the dimensions involved in a query.
318 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
319 it needs to include all of the dimensions that will be included in the
320 query (including any needed for querying datasets).
322 Parameters
323 ----------
324 requested : `DimensionGraph`
325 The dimensions whose primary keys should be included in the result rows
326 of the query.
327 data_id : `DataCoordinate`, optional
328 A fully-expanded data ID identifying dimensions known in advance. If
329 not provided, will be set to an empty data ID.
330 expression : `str`, optional
331 A user-provided string WHERE expression.
332 region : `lsst.sphgeom.Region`, optional
333 A spatial constraint that all rows must overlap.
334 timespan : `Timespan`, optional
335 A temporal constraint that all rows must overlap.
336 bind : `Mapping` [ `str`, `object` ], optional
337 Mapping containing literal values that should be injected into the
338 query expression, keyed by the identifiers they replace.
339 defaults : `DataCoordinate`, optional
340 A data ID containing default for governor dimensions.
341 datasets : `Iterable` [ `DatasetType` ], optional
342 Dataset types whose searches may be joined into the query. Callers
343 must still call `QueryBuilder.joinDataset` explicitly to control how
344 that join happens (e.g. which collections are searched), but by
345 declaring them here first we can ensure that the query includes the
346 right dimensions for those joins.
347 order_by : `Iterable` [ `str` ]
348 Sequence of names to use for ordering with optional "-" prefix.
349 limit : `Tuple`, optional
350 Limit on the number of returned rows and optional offset.
351 check : `bool`, optional
352 If `False`, permit expressions to refer to dimensions without providing
353 a value for their governor dimensions (e.g. referring to a visit
354 without an instrument). Should be left to default to `True` in
355 essentially all new code.
356 """
358 def __init__(
359 self,
360 requested: DimensionGraph,
361 *,
362 data_id: DataCoordinate | None = None,
363 expression: str = "",
364 region: Region | None = None,
365 bind: Mapping[str, Any] | None = None,
366 defaults: DataCoordinate | None = None,
367 datasets: Iterable[DatasetType] = (),
368 order_by: Iterable[str] | None = None,
369 limit: tuple[int, int | None] | None = None,
370 check: bool = True,
371 ):
372 self.requested = requested
373 self.datasets = NamedValueSet(datasets).freeze()
374 if len(self.datasets) == 1:
375 (dataset_type_name,) = self.datasets.names
376 else:
377 dataset_type_name = None
378 self.where = QueryWhereClause.combine(
379 self.requested,
380 expression=expression,
381 bind=bind,
382 data_id=data_id,
383 region=region,
384 defaults=defaults,
385 dataset_type_name=dataset_type_name,
386 allow_orphans=not check,
387 )
388 self.order_by = None if order_by is None else OrderByClause.parse_general(order_by, requested)
389 self.limit = limit
390 self.columns_required, self.dimensions, self.region = self._compute_columns_required()
392 requested: DimensionGraph
393 """Dimensions whose primary keys should be included in the result rows of
394 the query (`DimensionGraph`).
395 """
397 where: QueryWhereClause
398 """Structure containing objects that contribute to the WHERE clause of the
399 query (`QueryWhereClause`).
400 """
402 datasets: NamedValueAbstractSet[DatasetType]
403 """Dataset types whose searches may be joined into the query
404 (`NamedValueAbstractSet` [ `DatasetType` ]).
405 """
407 order_by: OrderByClause | None
408 """Object that manages how the query results should be sorted
409 (`OrderByClause` or `None`).
410 """
412 limit: tuple[int, int | None] | None
413 """Integer offset and maximum number of rows returned (prior to
414 postprocessing filters), respectively.
415 """
417 dimensions: DimensionGraph
418 """All dimensions in the query in any form (`DimensionGraph`).
419 """
421 region: Region | None
422 """Region that bounds all query results (`lsst.sphgeom.Region`).
424 While `QueryWhereClause.region` and the ``region`` constructor argument
425 represent an external region given directly by the caller, this represents
426 the region actually used directly as a constraint on the query results,
427 which can also come from the data ID passed by the caller.
428 """
430 columns_required: Set[ColumnTag]
431 """All columns that must be included directly in the query.
433 This does not include columns that only need to be included in the result
434 rows, and hence could be provided by postprocessors.
435 """
437 @property
438 def universe(self) -> DimensionUniverse:
439 """All known dimensions (`DimensionUniverse`)."""
440 return self.requested.universe
442 def _compute_columns_required(
443 self,
444 ) -> tuple[set[ColumnTag], DimensionGraph, Region | None]:
445 """Compute the columns that must be provided by the relations joined
446 into this query in order to obtain the right *set* of result rows in
447 the right order.
449 This does not include columns that only need to be included in the
450 result rows, and hence could be provided by postprocessors.
451 """
452 tags: set[ColumnTag] = set(DimensionKeyColumnTag.generate(self.requested.names))
453 for dataset_type in self.datasets:
454 tags.update(DimensionKeyColumnTag.generate(dataset_type.dimensions.names))
455 if self.where.expression_predicate is not None:
456 tags.update(self.where.expression_predicate.columns_required)
457 if self.order_by is not None:
458 tags.update(self.order_by.columns_required)
459 region = self.where.region
460 for dimension in self.where.data_id.graph:
461 dimension_tag = DimensionKeyColumnTag(dimension.name)
462 if dimension_tag in tags:
463 continue
464 if dimension == self.universe.commonSkyPix or not isinstance(dimension, SkyPixDimension):
465 # If a dimension in the data ID is available from dimension
466 # tables or dimension spatial-join tables in the database,
467 # include it in the set of dimensions whose tables should be
468 # joined. This makes these data ID constraints work just like
469 # simple 'where' constraints, which is good.
470 tags.add(dimension_tag)
471 else:
472 # This is a SkyPixDimension other than the common one. If it's
473 # not already present in the query (e.g. from a dataset join),
474 # this is a pure spatial constraint, which we can only apply by
475 # modifying the 'region' for the query. That will also require
476 # that we join in the common skypix dimension.
477 pixel = dimension.pixelization.pixel(self.where.data_id[dimension])
478 if region is None:
479 region = pixel
480 else:
481 region = IntersectionRegion(region, pixel)
482 # Make sure the dimension keys are expanded self-consistently in what
483 # we return by passing them through DimensionGraph.
484 dimensions = DimensionGraph(
485 self.universe, names={tag.dimension for tag in DimensionKeyColumnTag.filter_from(tags)}
486 )
487 # If we have a region constraint, ensure region columns and the common
488 # skypix dimension are included.
489 missing_common_skypix = False
490 if region is not None:
491 for family in dimensions.spatial:
492 element = family.choose(dimensions.elements)
493 tags.add(DimensionRecordColumnTag(element.name, "region"))
494 if not isinstance(element, SkyPixDimension) and self.universe.commonSkyPix not in dimensions:
495 missing_common_skypix = True
496 if missing_common_skypix:
497 dimensions = dimensions.union(self.universe.commonSkyPix.graph)
498 tags.update(DimensionKeyColumnTag.generate(dimensions.names))
499 return (tags, dimensions, region)