Coverage for python/lsst/daf/butler/registry/queries/_structs.py: 36%
166 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["QuerySummary"] # other classes here are local to subpackage
25import dataclasses
26from collections.abc import Iterable, Mapping, Set
27from typing import Any
29import astropy.time
30from lsst.daf.relation import ColumnExpression, ColumnTag, Predicate, SortTerm
31from lsst.sphgeom import IntersectionRegion, Region
32from lsst.utils.classes import cached_getter, immutable
34from ...core import (
35 ColumnTypeInfo,
36 DataCoordinate,
37 DatasetType,
38 DimensionElement,
39 DimensionGraph,
40 DimensionKeyColumnTag,
41 DimensionRecordColumnTag,
42 DimensionUniverse,
43 NamedValueAbstractSet,
44 NamedValueSet,
45 SkyPixDimension,
46)
48# We're not trying to add typing to the lex/yacc parser code, so MyPy
49# doesn't know about some of these imports.
50from .expressions import make_string_expression_predicate
51from .expressions.categorize import categorizeElementOrderByName, categorizeOrderByName
54@dataclasses.dataclass(frozen=True, eq=False)
55class QueryWhereClause:
56 """Structure holding various contributions to a query's WHERE clause.
58 Instances of this class should only be created by
59 `QueryWhereExpression.combine`, which guarantees the consistency of its
60 attributes.
61 """
63 @classmethod
64 def combine(
65 cls,
66 dimensions: DimensionGraph,
67 expression: str = "",
68 *,
69 column_types: ColumnTypeInfo,
70 bind: Mapping[str, Any] | None = None,
71 data_id: DataCoordinate | None = None,
72 region: Region | None = None,
73 defaults: DataCoordinate | None = None,
74 dataset_type_name: str | None = None,
75 allow_orphans: bool = False,
76 ) -> QueryWhereClause:
77 """Construct from various components.
79 Parameters
80 ----------
81 dimensions : `DimensionGraph`
82 The dimensions that would be included in the query in the absence
83 of the WHERE clause.
84 expression : `str`, optional
85 A user-provided string expression.
86 column_types : `ColumnTypeInfo`
87 Information about column types.
88 bind : `~collections.abc.Mapping` [ `str`, `object` ], optional
89 Mapping containing literal values that should be injected into the
90 query expression, keyed by the identifiers they replace.
91 data_id : `DataCoordinate`, optional
92 A data ID identifying dimensions known in advance. If not
93 provided, will be set to an empty data ID.
94 region : `lsst.sphgeom.Region`, optional
95 A spatial constraint that all rows must overlap.
96 defaults : `DataCoordinate`, optional
97 A data ID containing default for governor dimensions.
98 dataset_type_name : `str` or `None`, optional
99 The name of the dataset type to assume for unqualified dataset
100 columns, or `None` if there are no such identifiers.
101 allow_orphans : `bool`, optional
102 If `True`, permit expressions to refer to dimensions without
103 providing a value for their governor dimensions (e.g. referring to
104 a visit without an instrument). Should be left to default to
105 `False` in essentially all new code.
107 Returns
108 -------
109 where : `QueryWhereClause`
110 An object representing the WHERE clause for a query.
111 """
112 if data_id is None:
113 data_id = DataCoordinate.makeEmpty(dimensions.universe)
114 if defaults is None:
115 defaults = DataCoordinate.makeEmpty(dimensions.universe)
116 expression_predicate, governor_constraints = make_string_expression_predicate(
117 expression,
118 dimensions,
119 column_types=column_types,
120 bind=bind,
121 data_id=data_id,
122 defaults=defaults,
123 dataset_type_name=dataset_type_name,
124 allow_orphans=allow_orphans,
125 )
126 return QueryWhereClause(
127 expression_predicate,
128 data_id,
129 region=region,
130 governor_constraints=governor_constraints,
131 )
133 expression_predicate: Predicate | None
134 """A predicate that evaluates a string expression from the user
135 (`expressions.Predicate` or `None`).
136 """
138 data_id: DataCoordinate
139 """A data ID identifying dimensions known before query construction
140 (`DataCoordinate`).
141 """
143 region: Region | None
144 """A spatial region that all result rows must overlap
145 (`lsst.sphgeom.Region` or `None`).
146 """
148 governor_constraints: Mapping[str, Set[str]]
149 """Restrictions on the values governor dimensions can take in this query,
150 imposed by the string expression and/or data ID
151 (`~collections.abc.Mapping` [ `str`, `~collections.abc.Set` [ `str` ] ]).
153 Governor dimensions not present in this mapping are not constrained at all.
154 """
157@dataclasses.dataclass(frozen=True)
158class OrderByClauseColumn:
159 """Information about single column in ORDER BY clause."""
161 element: DimensionElement
162 """Dimension element for data in this column (`DimensionElement`)."""
164 column: str | None
165 """Name of the column or `None` for primary key (`str` or `None`)"""
167 ordering: bool
168 """True for ascending order, False for descending (`bool`)."""
171@dataclasses.dataclass(frozen=True, eq=False)
172class OrderByClause:
173 """Class for information about columns in ORDER BY clause"""
175 @classmethod
176 def parse_general(cls, order_by: Iterable[str], graph: DimensionGraph) -> OrderByClause:
177 """Parse an iterable of strings in the context of a multi-dimension
178 query.
180 Parameters
181 ----------
182 order_by : `~collections.abc.Iterable` [ `str` ]
183 Sequence of names to use for ordering with optional "-" prefix.
184 graph : `DimensionGraph`
185 Dimensions used by a query.
187 Returns
188 -------
189 clause : `OrderByClause`
190 New order-by clause representing the given string columns.
191 """
192 terms = []
193 for name in order_by:
194 if not name or name == "-":
195 raise ValueError("Empty dimension name in ORDER BY")
196 ascending = True
197 if name[0] == "-":
198 ascending = False
199 name = name[1:]
200 element, column = categorizeOrderByName(graph, name)
201 term = cls._make_term(element, column, ascending)
202 terms.append(term)
203 return cls(terms)
205 @classmethod
206 def parse_element(cls, order_by: Iterable[str], element: DimensionElement) -> OrderByClause:
207 """Parse an iterable of strings in the context of a single dimension
208 element query.
210 Parameters
211 ----------
212 order_by : `~collections.abc.Iterable` [ `str` ]
213 Sequence of names to use for ordering with optional "-" prefix.
214 element : `DimensionElement`
215 Single or primary dimension element in the query
217 Returns
218 -------
219 clause : `OrderByClause`
220 New order-by clause representing the given string columns.
221 """
222 terms = []
223 for name in order_by:
224 if not name or name == "-":
225 raise ValueError("Empty dimension name in ORDER BY")
226 ascending = True
227 if name[0] == "-":
228 ascending = False
229 name = name[1:]
230 column = categorizeElementOrderByName(element, name)
231 term = cls._make_term(element, column, ascending)
232 terms.append(term)
233 return cls(terms)
235 @classmethod
236 def _make_term(cls, element: DimensionElement, column: str | None, ascending: bool) -> SortTerm:
237 """Make a single sort term from parsed user expression values.
239 Parameters
240 ----------
241 element : `DimensionElement`
242 Dimension element the sort term references.
243 column : `str` or `None`
244 DimensionRecord field name, or `None` if ``element`` is a
245 `Dimension` and the sort term is on is key value.
246 ascending : `bool`
247 Whether to sort ascending (`True`) or descending (`False`).
249 Returns
250 -------
251 term : `lsst.daf.relation.SortTerm`
252 Sort term struct.
253 """
254 tag: ColumnTag
255 expression: ColumnExpression
256 if column is None:
257 tag = DimensionKeyColumnTag(element.name)
258 expression = ColumnExpression.reference(tag)
259 elif column in ("timespan.begin", "timespan.end"):
260 base_column, _, subfield = column.partition(".")
261 tag = DimensionRecordColumnTag(element.name, base_column)
262 expression = ColumnExpression.reference(tag).method(
263 "lower" if subfield == "begin" else "upper", dtype=astropy.time.Time
264 )
265 else:
266 tag = DimensionRecordColumnTag(element.name, column)
267 expression = ColumnExpression.reference(tag)
268 return SortTerm(expression, ascending)
270 terms: Iterable[SortTerm]
271 """Columns that appear in the ORDER BY
272 (`~collections.abc.Iterable` [ `OrderByClauseColumn` ]).
273 """
275 @property
276 @cached_getter
277 def columns_required(self) -> Set[ColumnTag]:
278 """Set of column tags for all columns referenced by the ORDER BY clause
279 (`~collections.abc.Set` [ `ColumnTag` ]).
280 """
281 tags: set[ColumnTag] = set()
282 for term in self.terms:
283 tags.update(term.expression.columns_required)
284 return tags
287@immutable
288class ElementOrderByClause:
289 """Class for information about columns in ORDER BY clause for one element.
291 Parameters
292 ----------
293 order_by : `~collections.abc.Iterable` [ `str` ]
294 Sequence of names to use for ordering with optional "-" prefix.
295 element : `DimensionElement`
296 Dimensions used by a query.
297 """
299 def __init__(self, order_by: Iterable[str], element: DimensionElement):
300 self.order_by_columns = []
301 for name in order_by:
302 if not name or name == "-":
303 raise ValueError("Empty dimension name in ORDER BY")
304 ascending = True
305 if name[0] == "-":
306 ascending = False
307 name = name[1:]
308 column = categorizeElementOrderByName(element, name)
309 self.order_by_columns.append(
310 OrderByClauseColumn(element=element, column=column, ordering=ascending)
311 )
313 order_by_columns: Iterable[OrderByClauseColumn]
314 """Columns that appear in the ORDER BY
315 (`~collections.abc.Iterable` [ `OrderByClauseColumn` ]).
316 """
319@immutable
320class QuerySummary:
321 """A struct that holds and categorizes the dimensions involved in a query.
323 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
324 it needs to include all of the dimensions that will be included in the
325 query (including any needed for querying datasets).
327 Parameters
328 ----------
329 requested : `DimensionGraph`
330 The dimensions whose primary keys should be included in the result rows
331 of the query.
332 column_types : `ColumnTypeInfo`
333 Information about column types.
334 data_id : `DataCoordinate`, optional
335 A fully-expanded data ID identifying dimensions known in advance. If
336 not provided, will be set to an empty data ID.
337 expression : `str`, optional
338 A user-provided string WHERE expression.
339 region : `lsst.sphgeom.Region`, optional
340 A spatial constraint that all rows must overlap.
341 timespan : `Timespan`, optional
342 A temporal constraint that all rows must overlap.
343 bind : `~collections.abc.Mapping` [ `str`, `object` ], optional
344 Mapping containing literal values that should be injected into the
345 query expression, keyed by the identifiers they replace.
346 defaults : `DataCoordinate`, optional
347 A data ID containing default for governor dimensions.
348 datasets : `~collections.abc.Iterable` [ `DatasetType` ], optional
349 Dataset types whose searches may be joined into the query. Callers
350 must still call `QueryBuilder.joinDataset` explicitly to control how
351 that join happens (e.g. which collections are searched), but by
352 declaring them here first we can ensure that the query includes the
353 right dimensions for those joins.
354 order_by : `~collections.abc.Iterable` [ `str` ]
355 Sequence of names to use for ordering with optional "-" prefix.
356 limit : `Tuple`, optional
357 Limit on the number of returned rows and optional offset.
358 check : `bool`, optional
359 If `False`, permit expressions to refer to dimensions without providing
360 a value for their governor dimensions (e.g. referring to a visit
361 without an instrument). Should be left to default to `True` in
362 essentially all new code.
363 """
365 def __init__(
366 self,
367 requested: DimensionGraph,
368 *,
369 column_types: ColumnTypeInfo,
370 data_id: DataCoordinate | None = None,
371 expression: str = "",
372 region: Region | None = None,
373 bind: Mapping[str, Any] | None = None,
374 defaults: DataCoordinate | None = None,
375 datasets: Iterable[DatasetType] = (),
376 order_by: Iterable[str] | None = None,
377 limit: tuple[int, int | None] | None = None,
378 check: bool = True,
379 ):
380 self.requested = requested
381 self.datasets = NamedValueSet(datasets).freeze()
382 if len(self.datasets) == 1:
383 (dataset_type_name,) = self.datasets.names
384 else:
385 dataset_type_name = None
386 self.where = QueryWhereClause.combine(
387 self.requested,
388 expression=expression,
389 column_types=column_types,
390 bind=bind,
391 data_id=data_id,
392 region=region,
393 defaults=defaults,
394 dataset_type_name=dataset_type_name,
395 allow_orphans=not check,
396 )
397 self.order_by = None if order_by is None else OrderByClause.parse_general(order_by, requested)
398 self.limit = limit
399 self.columns_required, self.dimensions, self.region = self._compute_columns_required()
401 requested: DimensionGraph
402 """Dimensions whose primary keys should be included in the result rows of
403 the query (`DimensionGraph`).
404 """
406 where: QueryWhereClause
407 """Structure containing objects that contribute to the WHERE clause of the
408 query (`QueryWhereClause`).
409 """
411 datasets: NamedValueAbstractSet[DatasetType]
412 """Dataset types whose searches may be joined into the query
413 (`NamedValueAbstractSet` [ `DatasetType` ]).
414 """
416 order_by: OrderByClause | None
417 """Object that manages how the query results should be sorted
418 (`OrderByClause` or `None`).
419 """
421 limit: tuple[int, int | None] | None
422 """Integer offset and maximum number of rows returned (prior to
423 postprocessing filters), respectively.
424 """
426 dimensions: DimensionGraph
427 """All dimensions in the query in any form (`DimensionGraph`).
428 """
430 region: Region | None
431 """Region that bounds all query results (`lsst.sphgeom.Region`).
433 While `QueryWhereClause.region` and the ``region`` constructor argument
434 represent an external region given directly by the caller, this represents
435 the region actually used directly as a constraint on the query results,
436 which can also come from the data ID passed by the caller.
437 """
439 columns_required: Set[ColumnTag]
440 """All columns that must be included directly in the query.
442 This does not include columns that only need to be included in the result
443 rows, and hence could be provided by postprocessors.
444 """
446 @property
447 def universe(self) -> DimensionUniverse:
448 """All known dimensions (`DimensionUniverse`)."""
449 return self.requested.universe
451 def _compute_columns_required(
452 self,
453 ) -> tuple[set[ColumnTag], DimensionGraph, Region | None]:
454 """Compute the columns that must be provided by the relations joined
455 into this query in order to obtain the right *set* of result rows in
456 the right order.
458 This does not include columns that only need to be included in the
459 result rows, and hence could be provided by postprocessors.
460 """
461 tags: set[ColumnTag] = set(DimensionKeyColumnTag.generate(self.requested.names))
462 for dataset_type in self.datasets:
463 tags.update(DimensionKeyColumnTag.generate(dataset_type.dimensions.names))
464 if self.where.expression_predicate is not None:
465 tags.update(self.where.expression_predicate.columns_required)
466 if self.order_by is not None:
467 tags.update(self.order_by.columns_required)
468 region = self.where.region
469 for dimension in self.where.data_id.graph:
470 dimension_tag = DimensionKeyColumnTag(dimension.name)
471 if dimension_tag in tags:
472 continue
473 if dimension == self.universe.commonSkyPix or not isinstance(dimension, SkyPixDimension):
474 # If a dimension in the data ID is available from dimension
475 # tables or dimension spatial-join tables in the database,
476 # include it in the set of dimensions whose tables should be
477 # joined. This makes these data ID constraints work just like
478 # simple 'where' constraints, which is good.
479 tags.add(dimension_tag)
480 else:
481 # This is a SkyPixDimension other than the common one. If it's
482 # not already present in the query (e.g. from a dataset join),
483 # this is a pure spatial constraint, which we can only apply by
484 # modifying the 'region' for the query. That will also require
485 # that we join in the common skypix dimension.
486 pixel = dimension.pixelization.pixel(self.where.data_id[dimension])
487 if region is None:
488 region = pixel
489 else:
490 region = IntersectionRegion(region, pixel)
491 # Make sure the dimension keys are expanded self-consistently in what
492 # we return by passing them through DimensionGraph.
493 dimensions = DimensionGraph(
494 self.universe, names={tag.dimension for tag in DimensionKeyColumnTag.filter_from(tags)}
495 )
496 # If we have a region constraint, ensure region columns and the common
497 # skypix dimension are included.
498 missing_common_skypix = False
499 if region is not None:
500 for family in dimensions.spatial:
501 element = family.choose(dimensions.elements)
502 tags.add(DimensionRecordColumnTag(element.name, "region"))
503 if not isinstance(element, SkyPixDimension) and self.universe.commonSkyPix not in dimensions:
504 missing_common_skypix = True
505 if missing_common_skypix:
506 dimensions = dimensions.union(self.universe.commonSkyPix.graph)
507 tags.update(DimensionKeyColumnTag.generate(dimensions.names))
508 return (tags, dimensions, region)