Coverage for python/lsst/daf/butler/registry/queries/_structs.py: 40%
172 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["QuerySummary"] # other classes here are local to subpackage
31import dataclasses
32from collections.abc import Iterable, Mapping, Set
33from typing import Any
35import astropy.time
36from lsst.daf.relation import ColumnExpression, ColumnTag, Predicate, SortTerm
37from lsst.sphgeom import IntersectionRegion, Region
38from lsst.utils.classes import cached_getter, immutable
40from ..._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag
41from ..._column_type_info import ColumnTypeInfo
42from ..._dataset_type import DatasetType
43from ..._named import NamedValueAbstractSet, NamedValueSet
44from ...dimensions import DataCoordinate, DimensionElement, DimensionGroup, DimensionUniverse, SkyPixDimension
46# We're not trying to add typing to the lex/yacc parser code, so MyPy
47# doesn't know about some of these imports.
48from .expressions import make_string_expression_predicate
49from .expressions.categorize import categorizeElementOrderByName, categorizeOrderByName
52@dataclasses.dataclass(frozen=True, eq=False)
53class QueryWhereClause:
54 """Structure holding various contributions to a query's WHERE clause.
56 Instances of this class should only be created by
57 `QueryWhereExpression.combine`, which guarantees the consistency of its
58 attributes.
59 """
61 @classmethod
62 def combine(
63 cls,
64 dimensions: DimensionGroup,
65 expression: str = "",
66 *,
67 column_types: ColumnTypeInfo,
68 bind: Mapping[str, Any] | None = None,
69 data_id: DataCoordinate | None = None,
70 region: Region | None = None,
71 defaults: DataCoordinate | None = None,
72 dataset_type_name: str | None = None,
73 allow_orphans: bool = False,
74 ) -> QueryWhereClause:
75 """Construct from various components.
77 Parameters
78 ----------
79 dimensions : `DimensionGroup`
80 The dimensions that would be included in the query in the absence
81 of the WHERE clause.
82 expression : `str`, optional
83 A user-provided string expression.
84 column_types : `ColumnTypeInfo`
85 Information about column types.
86 bind : `~collections.abc.Mapping` [ `str`, `object` ], optional
87 Mapping containing literal values that should be injected into the
88 query expression, keyed by the identifiers they replace.
89 data_id : `DataCoordinate`, optional
90 A data ID identifying dimensions known in advance. If not
91 provided, will be set to an empty data ID.
92 region : `lsst.sphgeom.Region`, optional
93 A spatial constraint that all rows must overlap.
94 defaults : `DataCoordinate`, optional
95 A data ID containing default for governor dimensions.
96 dataset_type_name : `str` or `None`, optional
97 The name of the dataset type to assume for unqualified dataset
98 columns, or `None` if there are no such identifiers.
99 allow_orphans : `bool`, optional
100 If `True`, permit expressions to refer to dimensions without
101 providing a value for their governor dimensions (e.g. referring to
102 a visit without an instrument). Should be left to default to
103 `False` in essentially all new code.
105 Returns
106 -------
107 where : `QueryWhereClause`
108 An object representing the WHERE clause for a query.
109 """
110 if data_id is None:
111 data_id = DataCoordinate.make_empty(dimensions.universe)
112 if defaults is None:
113 defaults = DataCoordinate.make_empty(dimensions.universe)
114 expression_predicate, governor_constraints = make_string_expression_predicate(
115 expression,
116 dimensions,
117 column_types=column_types,
118 bind=bind,
119 data_id=data_id,
120 defaults=defaults,
121 dataset_type_name=dataset_type_name,
122 allow_orphans=allow_orphans,
123 )
124 return QueryWhereClause(
125 expression_predicate,
126 data_id,
127 region=region,
128 governor_constraints=governor_constraints,
129 )
131 expression_predicate: Predicate | None
132 """A predicate that evaluates a string expression from the user
133 (`expressions.Predicate` or `None`).
134 """
136 data_id: DataCoordinate
137 """A data ID identifying dimensions known before query construction
138 (`DataCoordinate`).
139 """
141 region: Region | None
142 """A spatial region that all result rows must overlap
143 (`lsst.sphgeom.Region` or `None`).
144 """
146 governor_constraints: Mapping[str, Set[str]]
147 """Restrictions on the values governor dimensions can take in this query,
148 imposed by the string expression and/or data ID
149 (`~collections.abc.Mapping` [ `str`, `~collections.abc.Set` [ `str` ] ]).
151 Governor dimensions not present in this mapping are not constrained at all.
152 """
155@dataclasses.dataclass(frozen=True)
156class OrderByClauseColumn:
157 """Information about single column in ORDER BY clause."""
159 element: DimensionElement
160 """Dimension element for data in this column (`DimensionElement`)."""
162 column: str | None
163 """Name of the column or `None` for primary key (`str` or `None`)"""
165 ordering: bool
166 """True for ascending order, False for descending (`bool`)."""
169@dataclasses.dataclass(frozen=True, eq=False)
170class OrderByClause:
171 """Class for information about columns in ORDER BY clause."""
173 @classmethod
174 def parse_general(cls, order_by: Iterable[str], dimensions: DimensionGroup) -> OrderByClause:
175 """Parse an iterable of strings in the context of a multi-dimension
176 query.
178 Parameters
179 ----------
180 order_by : `~collections.abc.Iterable` [ `str` ]
181 Sequence of names to use for ordering with optional "-" prefix.
182 dimensions : `DimensionGroup`
183 Dimensions used by a query.
185 Returns
186 -------
187 clause : `OrderByClause`
188 New order-by clause representing the given string columns.
189 """
190 terms = []
191 for name in order_by:
192 if not name or name == "-":
193 raise ValueError("Empty dimension name in ORDER BY")
194 ascending = True
195 if name[0] == "-":
196 ascending = False
197 name = name[1:]
198 element, column = categorizeOrderByName(dimensions, name)
199 term = cls._make_term(element, column, ascending)
200 terms.append(term)
201 return cls(terms)
203 @classmethod
204 def parse_element(cls, order_by: Iterable[str], element: DimensionElement) -> OrderByClause:
205 """Parse an iterable of strings in the context of a single dimension
206 element query.
208 Parameters
209 ----------
210 order_by : `~collections.abc.Iterable` [ `str` ]
211 Sequence of names to use for ordering with optional "-" prefix.
212 element : `DimensionElement`
213 Single or primary dimension element in the query
215 Returns
216 -------
217 clause : `OrderByClause`
218 New order-by clause representing the given string columns.
219 """
220 terms = []
221 for name in order_by:
222 if not name or name == "-":
223 raise ValueError("Empty dimension name in ORDER BY")
224 ascending = True
225 if name[0] == "-":
226 ascending = False
227 name = name[1:]
228 column = categorizeElementOrderByName(element, name)
229 term = cls._make_term(element, column, ascending)
230 terms.append(term)
231 return cls(terms)
233 @classmethod
234 def _make_term(cls, element: DimensionElement, column: str | None, ascending: bool) -> SortTerm:
235 """Make a single sort term from parsed user expression values.
237 Parameters
238 ----------
239 element : `DimensionElement`
240 Dimension element the sort term references.
241 column : `str` or `None`
242 DimensionRecord field name, or `None` if ``element`` is a
243 `Dimension` and the sort term is on is key value.
244 ascending : `bool`
245 Whether to sort ascending (`True`) or descending (`False`).
247 Returns
248 -------
249 term : `lsst.daf.relation.SortTerm`
250 Sort term struct.
251 """
252 tag: ColumnTag
253 expression: ColumnExpression
254 if column is None:
255 tag = DimensionKeyColumnTag(element.name)
256 expression = ColumnExpression.reference(tag)
257 elif column in ("timespan.begin", "timespan.end"):
258 base_column, _, subfield = column.partition(".")
259 tag = DimensionRecordColumnTag(element.name, base_column)
260 expression = ColumnExpression.reference(tag).method(
261 "lower" if subfield == "begin" else "upper", dtype=astropy.time.Time
262 )
263 else:
264 tag = DimensionRecordColumnTag(element.name, column)
265 expression = ColumnExpression.reference(tag)
266 return SortTerm(expression, ascending)
268 terms: Iterable[SortTerm]
269 """Columns that appear in the ORDER BY
270 (`~collections.abc.Iterable` [ `OrderByClauseColumn` ]).
271 """
273 @property
274 @cached_getter
275 def columns_required(self) -> Set[ColumnTag]:
276 """Set of column tags for all columns referenced by the ORDER BY clause
277 (`~collections.abc.Set` [ `ColumnTag` ]).
278 """
279 tags: set[ColumnTag] = set()
280 for term in self.terms:
281 tags.update(term.expression.columns_required)
282 return tags
285@immutable
286class ElementOrderByClause:
287 """Class for information about columns in ORDER BY clause for one element.
289 Parameters
290 ----------
291 order_by : `~collections.abc.Iterable` [ `str` ]
292 Sequence of names to use for ordering with optional "-" prefix.
293 element : `DimensionElement`
294 Dimensions used by a query.
295 """
297 def __init__(self, order_by: Iterable[str], element: DimensionElement):
298 self.order_by_columns = []
299 for name in order_by:
300 if not name or name == "-":
301 raise ValueError("Empty dimension name in ORDER BY")
302 ascending = True
303 if name[0] == "-":
304 ascending = False
305 name = name[1:]
306 column = categorizeElementOrderByName(element, name)
307 self.order_by_columns.append(
308 OrderByClauseColumn(element=element, column=column, ordering=ascending)
309 )
311 order_by_columns: Iterable[OrderByClauseColumn]
312 """Columns that appear in the ORDER BY
313 (`~collections.abc.Iterable` [ `OrderByClauseColumn` ]).
314 """
317@immutable
318class QuerySummary:
319 """A struct that holds and categorizes the dimensions involved in a query.
321 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
322 it needs to include all of the dimensions that will be included in the
323 query (including any needed for querying datasets).
325 Parameters
326 ----------
327 requested : `DimensionGroup`
328 The dimensions whose primary keys should be included in the result rows
329 of the query.
330 column_types : `ColumnTypeInfo`
331 Information about column types.
332 data_id : `DataCoordinate`, optional
333 A fully-expanded data ID identifying dimensions known in advance. If
334 not provided, will be set to an empty data ID.
335 expression : `str`, optional
336 A user-provided string WHERE expression.
337 region : `lsst.sphgeom.Region`, optional
338 A spatial constraint that all rows must overlap.
339 timespan : `Timespan`, optional
340 A temporal constraint that all rows must overlap.
341 bind : `~collections.abc.Mapping` [ `str`, `object` ], optional
342 Mapping containing literal values that should be injected into the
343 query expression, keyed by the identifiers they replace.
344 defaults : `DataCoordinate`, optional
345 A data ID containing default for governor dimensions.
346 datasets : `~collections.abc.Iterable` [ `DatasetType` ], optional
347 Dataset types whose searches may be joined into the query. Callers
348 must still call `QueryBuilder.joinDataset` explicitly to control how
349 that join happens (e.g. which collections are searched), but by
350 declaring them here first we can ensure that the query includes the
351 right dimensions for those joins.
352 order_by : `~collections.abc.Iterable` [ `str` ]
353 Sequence of names to use for ordering with optional "-" prefix.
354 limit : `Tuple`, optional
355 Limit on the number of returned rows and optional offset.
356 check : `bool`, optional
357 If `False`, permit expressions to refer to dimensions without providing
358 a value for their governor dimensions (e.g. referring to a visit
359 without an instrument). Should be left to default to `True` in
360 essentially all new code.
361 """
363 def __init__(
364 self,
365 requested: DimensionGroup,
366 *,
367 column_types: ColumnTypeInfo,
368 data_id: DataCoordinate | None = None,
369 expression: str = "",
370 region: Region | None = None,
371 bind: Mapping[str, Any] | None = None,
372 defaults: DataCoordinate | None = None,
373 datasets: Iterable[DatasetType] = (),
374 order_by: Iterable[str] | None = None,
375 limit: tuple[int, int | None] | None = None,
376 check: bool = True,
377 ):
378 self.requested = requested
379 self.datasets = NamedValueSet(datasets).freeze()
380 if len(self.datasets) == 1:
381 (dataset_type_name,) = self.datasets.names
382 else:
383 dataset_type_name = None
384 self.where = QueryWhereClause.combine(
385 self.requested,
386 expression=expression,
387 column_types=column_types,
388 bind=bind,
389 data_id=data_id,
390 region=region,
391 defaults=defaults,
392 dataset_type_name=dataset_type_name,
393 allow_orphans=not check,
394 )
395 self.order_by = None if order_by is None else OrderByClause.parse_general(order_by, requested)
396 self.limit = limit
397 self.columns_required, self.dimensions, self.region = self._compute_columns_required()
399 requested: DimensionGroup
400 """Dimensions whose primary keys should be included in the result rows of
401 the query (`DimensionGroup`).
402 """
404 where: QueryWhereClause
405 """Structure containing objects that contribute to the WHERE clause of the
406 query (`QueryWhereClause`).
407 """
409 datasets: NamedValueAbstractSet[DatasetType]
410 """Dataset types whose searches may be joined into the query
411 (`NamedValueAbstractSet` [ `DatasetType` ]).
412 """
414 order_by: OrderByClause | None
415 """Object that manages how the query results should be sorted
416 (`OrderByClause` or `None`).
417 """
419 limit: tuple[int, int | None] | None
420 """Integer offset and maximum number of rows returned (prior to
421 postprocessing filters), respectively.
422 """
424 dimensions: DimensionGroup
425 """All dimensions in the query in any form (`DimensionGroup`).
426 """
428 region: Region | None
429 """Region that bounds all query results (`lsst.sphgeom.Region`).
431 While `QueryWhereClause.region` and the ``region`` constructor argument
432 represent an external region given directly by the caller, this represents
433 the region actually used directly as a constraint on the query results,
434 which can also come from the data ID passed by the caller.
435 """
437 columns_required: Set[ColumnTag]
438 """All columns that must be included directly in the query.
440 This does not include columns that only need to be included in the result
441 rows, and hence could be provided by postprocessors.
442 """
444 @property
445 def universe(self) -> DimensionUniverse:
446 """All known dimensions (`DimensionUniverse`)."""
447 return self.requested.universe
449 def _compute_columns_required(
450 self,
451 ) -> tuple[set[ColumnTag], DimensionGroup, Region | None]:
452 """Compute the columns that must be provided by the relations joined
453 into this query in order to obtain the right *set* of result rows in
454 the right order.
456 This does not include columns that only need to be included in the
457 result rows, and hence could be provided by postprocessors.
458 """
459 tags: set[ColumnTag] = set(DimensionKeyColumnTag.generate(self.requested.names))
460 for dataset_type in self.datasets:
461 tags.update(DimensionKeyColumnTag.generate(dataset_type.dimensions.names))
462 if self.where.expression_predicate is not None:
463 tags.update(self.where.expression_predicate.columns_required)
464 if self.order_by is not None:
465 tags.update(self.order_by.columns_required)
466 region = self.where.region
467 for dimension_name in self.where.data_id.dimensions.names:
468 dimension_tag = DimensionKeyColumnTag(dimension_name)
469 if dimension_tag in tags:
470 continue
471 if skypix_dimension := self.universe.skypix_dimensions.get(dimension_name):
472 if skypix_dimension == self.universe.commonSkyPix:
473 # Common skypix dimension is should be available from
474 # spatial join tables.
475 tags.add(dimension_tag)
476 else:
477 # This is a SkyPixDimension other than the common one. If
478 # it's not already present in the query (e.g. from a
479 # dataset join), this is a pure spatial constraint, which
480 # we can only apply by modifying the 'region' for the
481 # query. That will also require that we join in the common
482 # skypix dimension.
483 pixel = skypix_dimension.pixelization.pixel(self.where.data_id[dimension_name])
484 if region is None:
485 region = pixel
486 else:
487 region = IntersectionRegion(region, pixel)
488 else:
489 # If a dimension in the data ID is available from dimension
490 # tables or dimension spatial-join tables in the database,
491 # include it in the set of dimensions whose tables should
492 # be joined. This makes these data ID constraints work
493 # just like simple 'where' constraints, which is good.
494 tags.add(dimension_tag)
495 # Make sure the dimension keys are expanded self-consistently in what
496 # we return by passing them through DimensionGroup.
497 dimensions = DimensionGroup(
498 self.universe, names={tag.dimension for tag in DimensionKeyColumnTag.filter_from(tags)}
499 )
500 # If we have a region constraint, ensure region columns and the common
501 # skypix dimension are included.
502 missing_common_skypix = False
503 if region is not None:
504 for family in dimensions.spatial:
505 element = family.choose(dimensions.elements.names, self.universe)
506 tags.add(DimensionRecordColumnTag(element.name, "region"))
507 if (
508 not isinstance(element, SkyPixDimension)
509 and self.universe.commonSkyPix.name not in dimensions
510 ):
511 missing_common_skypix = True
512 if missing_common_skypix:
513 dimensions = dimensions.union(self.universe.commonSkyPix.minimal_group)
514 tags.update(DimensionKeyColumnTag.generate(dimensions.names))
515 return (tags, dimensions, region)