Coverage for python/lsst/daf/butler/registry/queries/_structs.py: 40%
166 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["QuerySummary"] # other classes here are local to subpackage
31import dataclasses
32from collections.abc import Iterable, Mapping, Set
33from typing import Any
35import astropy.time
36from lsst.daf.relation import ColumnExpression, ColumnTag, Predicate, SortTerm
37from lsst.sphgeom import IntersectionRegion, Region
38from lsst.utils.classes import cached_getter, immutable
40from ...core import (
41 ColumnTypeInfo,
42 DataCoordinate,
43 DatasetType,
44 DimensionElement,
45 DimensionGraph,
46 DimensionKeyColumnTag,
47 DimensionRecordColumnTag,
48 DimensionUniverse,
49 NamedValueAbstractSet,
50 NamedValueSet,
51 SkyPixDimension,
52)
54# We're not trying to add typing to the lex/yacc parser code, so MyPy
55# doesn't know about some of these imports.
56from .expressions import make_string_expression_predicate
57from .expressions.categorize import categorizeElementOrderByName, categorizeOrderByName
60@dataclasses.dataclass(frozen=True, eq=False)
61class QueryWhereClause:
62 """Structure holding various contributions to a query's WHERE clause.
64 Instances of this class should only be created by
65 `QueryWhereExpression.combine`, which guarantees the consistency of its
66 attributes.
67 """
69 @classmethod
70 def combine(
71 cls,
72 dimensions: DimensionGraph,
73 expression: str = "",
74 *,
75 column_types: ColumnTypeInfo,
76 bind: Mapping[str, Any] | None = None,
77 data_id: DataCoordinate | None = None,
78 region: Region | None = None,
79 defaults: DataCoordinate | None = None,
80 dataset_type_name: str | None = None,
81 allow_orphans: bool = False,
82 ) -> QueryWhereClause:
83 """Construct from various components.
85 Parameters
86 ----------
87 dimensions : `DimensionGraph`
88 The dimensions that would be included in the query in the absence
89 of the WHERE clause.
90 expression : `str`, optional
91 A user-provided string expression.
92 column_types : `ColumnTypeInfo`
93 Information about column types.
94 bind : `~collections.abc.Mapping` [ `str`, `object` ], optional
95 Mapping containing literal values that should be injected into the
96 query expression, keyed by the identifiers they replace.
97 data_id : `DataCoordinate`, optional
98 A data ID identifying dimensions known in advance. If not
99 provided, will be set to an empty data ID.
100 region : `lsst.sphgeom.Region`, optional
101 A spatial constraint that all rows must overlap.
102 defaults : `DataCoordinate`, optional
103 A data ID containing default for governor dimensions.
104 dataset_type_name : `str` or `None`, optional
105 The name of the dataset type to assume for unqualified dataset
106 columns, or `None` if there are no such identifiers.
107 allow_orphans : `bool`, optional
108 If `True`, permit expressions to refer to dimensions without
109 providing a value for their governor dimensions (e.g. referring to
110 a visit without an instrument). Should be left to default to
111 `False` in essentially all new code.
113 Returns
114 -------
115 where : `QueryWhereClause`
116 An object representing the WHERE clause for a query.
117 """
118 if data_id is None:
119 data_id = DataCoordinate.makeEmpty(dimensions.universe)
120 if defaults is None:
121 defaults = DataCoordinate.makeEmpty(dimensions.universe)
122 expression_predicate, governor_constraints = make_string_expression_predicate(
123 expression,
124 dimensions,
125 column_types=column_types,
126 bind=bind,
127 data_id=data_id,
128 defaults=defaults,
129 dataset_type_name=dataset_type_name,
130 allow_orphans=allow_orphans,
131 )
132 return QueryWhereClause(
133 expression_predicate,
134 data_id,
135 region=region,
136 governor_constraints=governor_constraints,
137 )
139 expression_predicate: Predicate | None
140 """A predicate that evaluates a string expression from the user
141 (`expressions.Predicate` or `None`).
142 """
144 data_id: DataCoordinate
145 """A data ID identifying dimensions known before query construction
146 (`DataCoordinate`).
147 """
149 region: Region | None
150 """A spatial region that all result rows must overlap
151 (`lsst.sphgeom.Region` or `None`).
152 """
154 governor_constraints: Mapping[str, Set[str]]
155 """Restrictions on the values governor dimensions can take in this query,
156 imposed by the string expression and/or data ID
157 (`~collections.abc.Mapping` [ `str`, `~collections.abc.Set` [ `str` ] ]).
159 Governor dimensions not present in this mapping are not constrained at all.
160 """
163@dataclasses.dataclass(frozen=True)
164class OrderByClauseColumn:
165 """Information about single column in ORDER BY clause."""
167 element: DimensionElement
168 """Dimension element for data in this column (`DimensionElement`)."""
170 column: str | None
171 """Name of the column or `None` for primary key (`str` or `None`)"""
173 ordering: bool
174 """True for ascending order, False for descending (`bool`)."""
177@dataclasses.dataclass(frozen=True, eq=False)
178class OrderByClause:
179 """Class for information about columns in ORDER BY clause."""
181 @classmethod
182 def parse_general(cls, order_by: Iterable[str], graph: DimensionGraph) -> OrderByClause:
183 """Parse an iterable of strings in the context of a multi-dimension
184 query.
186 Parameters
187 ----------
188 order_by : `~collections.abc.Iterable` [ `str` ]
189 Sequence of names to use for ordering with optional "-" prefix.
190 graph : `DimensionGraph`
191 Dimensions used by a query.
193 Returns
194 -------
195 clause : `OrderByClause`
196 New order-by clause representing the given string columns.
197 """
198 terms = []
199 for name in order_by:
200 if not name or name == "-":
201 raise ValueError("Empty dimension name in ORDER BY")
202 ascending = True
203 if name[0] == "-":
204 ascending = False
205 name = name[1:]
206 element, column = categorizeOrderByName(graph, name)
207 term = cls._make_term(element, column, ascending)
208 terms.append(term)
209 return cls(terms)
211 @classmethod
212 def parse_element(cls, order_by: Iterable[str], element: DimensionElement) -> OrderByClause:
213 """Parse an iterable of strings in the context of a single dimension
214 element query.
216 Parameters
217 ----------
218 order_by : `~collections.abc.Iterable` [ `str` ]
219 Sequence of names to use for ordering with optional "-" prefix.
220 element : `DimensionElement`
221 Single or primary dimension element in the query
223 Returns
224 -------
225 clause : `OrderByClause`
226 New order-by clause representing the given string columns.
227 """
228 terms = []
229 for name in order_by:
230 if not name or name == "-":
231 raise ValueError("Empty dimension name in ORDER BY")
232 ascending = True
233 if name[0] == "-":
234 ascending = False
235 name = name[1:]
236 column = categorizeElementOrderByName(element, name)
237 term = cls._make_term(element, column, ascending)
238 terms.append(term)
239 return cls(terms)
241 @classmethod
242 def _make_term(cls, element: DimensionElement, column: str | None, ascending: bool) -> SortTerm:
243 """Make a single sort term from parsed user expression values.
245 Parameters
246 ----------
247 element : `DimensionElement`
248 Dimension element the sort term references.
249 column : `str` or `None`
250 DimensionRecord field name, or `None` if ``element`` is a
251 `Dimension` and the sort term is on is key value.
252 ascending : `bool`
253 Whether to sort ascending (`True`) or descending (`False`).
255 Returns
256 -------
257 term : `lsst.daf.relation.SortTerm`
258 Sort term struct.
259 """
260 tag: ColumnTag
261 expression: ColumnExpression
262 if column is None:
263 tag = DimensionKeyColumnTag(element.name)
264 expression = ColumnExpression.reference(tag)
265 elif column in ("timespan.begin", "timespan.end"):
266 base_column, _, subfield = column.partition(".")
267 tag = DimensionRecordColumnTag(element.name, base_column)
268 expression = ColumnExpression.reference(tag).method(
269 "lower" if subfield == "begin" else "upper", dtype=astropy.time.Time
270 )
271 else:
272 tag = DimensionRecordColumnTag(element.name, column)
273 expression = ColumnExpression.reference(tag)
274 return SortTerm(expression, ascending)
276 terms: Iterable[SortTerm]
277 """Columns that appear in the ORDER BY
278 (`~collections.abc.Iterable` [ `OrderByClauseColumn` ]).
279 """
281 @property
282 @cached_getter
283 def columns_required(self) -> Set[ColumnTag]:
284 """Set of column tags for all columns referenced by the ORDER BY clause
285 (`~collections.abc.Set` [ `ColumnTag` ]).
286 """
287 tags: set[ColumnTag] = set()
288 for term in self.terms:
289 tags.update(term.expression.columns_required)
290 return tags
293@immutable
294class ElementOrderByClause:
295 """Class for information about columns in ORDER BY clause for one element.
297 Parameters
298 ----------
299 order_by : `~collections.abc.Iterable` [ `str` ]
300 Sequence of names to use for ordering with optional "-" prefix.
301 element : `DimensionElement`
302 Dimensions used by a query.
303 """
305 def __init__(self, order_by: Iterable[str], element: DimensionElement):
306 self.order_by_columns = []
307 for name in order_by:
308 if not name or name == "-":
309 raise ValueError("Empty dimension name in ORDER BY")
310 ascending = True
311 if name[0] == "-":
312 ascending = False
313 name = name[1:]
314 column = categorizeElementOrderByName(element, name)
315 self.order_by_columns.append(
316 OrderByClauseColumn(element=element, column=column, ordering=ascending)
317 )
319 order_by_columns: Iterable[OrderByClauseColumn]
320 """Columns that appear in the ORDER BY
321 (`~collections.abc.Iterable` [ `OrderByClauseColumn` ]).
322 """
325@immutable
326class QuerySummary:
327 """A struct that holds and categorizes the dimensions involved in a query.
329 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and
330 it needs to include all of the dimensions that will be included in the
331 query (including any needed for querying datasets).
333 Parameters
334 ----------
335 requested : `DimensionGraph`
336 The dimensions whose primary keys should be included in the result rows
337 of the query.
338 column_types : `ColumnTypeInfo`
339 Information about column types.
340 data_id : `DataCoordinate`, optional
341 A fully-expanded data ID identifying dimensions known in advance. If
342 not provided, will be set to an empty data ID.
343 expression : `str`, optional
344 A user-provided string WHERE expression.
345 region : `lsst.sphgeom.Region`, optional
346 A spatial constraint that all rows must overlap.
347 timespan : `Timespan`, optional
348 A temporal constraint that all rows must overlap.
349 bind : `~collections.abc.Mapping` [ `str`, `object` ], optional
350 Mapping containing literal values that should be injected into the
351 query expression, keyed by the identifiers they replace.
352 defaults : `DataCoordinate`, optional
353 A data ID containing default for governor dimensions.
354 datasets : `~collections.abc.Iterable` [ `DatasetType` ], optional
355 Dataset types whose searches may be joined into the query. Callers
356 must still call `QueryBuilder.joinDataset` explicitly to control how
357 that join happens (e.g. which collections are searched), but by
358 declaring them here first we can ensure that the query includes the
359 right dimensions for those joins.
360 order_by : `~collections.abc.Iterable` [ `str` ]
361 Sequence of names to use for ordering with optional "-" prefix.
362 limit : `Tuple`, optional
363 Limit on the number of returned rows and optional offset.
364 check : `bool`, optional
365 If `False`, permit expressions to refer to dimensions without providing
366 a value for their governor dimensions (e.g. referring to a visit
367 without an instrument). Should be left to default to `True` in
368 essentially all new code.
369 """
371 def __init__(
372 self,
373 requested: DimensionGraph,
374 *,
375 column_types: ColumnTypeInfo,
376 data_id: DataCoordinate | None = None,
377 expression: str = "",
378 region: Region | None = None,
379 bind: Mapping[str, Any] | None = None,
380 defaults: DataCoordinate | None = None,
381 datasets: Iterable[DatasetType] = (),
382 order_by: Iterable[str] | None = None,
383 limit: tuple[int, int | None] | None = None,
384 check: bool = True,
385 ):
386 self.requested = requested
387 self.datasets = NamedValueSet(datasets).freeze()
388 if len(self.datasets) == 1:
389 (dataset_type_name,) = self.datasets.names
390 else:
391 dataset_type_name = None
392 self.where = QueryWhereClause.combine(
393 self.requested,
394 expression=expression,
395 column_types=column_types,
396 bind=bind,
397 data_id=data_id,
398 region=region,
399 defaults=defaults,
400 dataset_type_name=dataset_type_name,
401 allow_orphans=not check,
402 )
403 self.order_by = None if order_by is None else OrderByClause.parse_general(order_by, requested)
404 self.limit = limit
405 self.columns_required, self.dimensions, self.region = self._compute_columns_required()
407 requested: DimensionGraph
408 """Dimensions whose primary keys should be included in the result rows of
409 the query (`DimensionGraph`).
410 """
412 where: QueryWhereClause
413 """Structure containing objects that contribute to the WHERE clause of the
414 query (`QueryWhereClause`).
415 """
417 datasets: NamedValueAbstractSet[DatasetType]
418 """Dataset types whose searches may be joined into the query
419 (`NamedValueAbstractSet` [ `DatasetType` ]).
420 """
422 order_by: OrderByClause | None
423 """Object that manages how the query results should be sorted
424 (`OrderByClause` or `None`).
425 """
427 limit: tuple[int, int | None] | None
428 """Integer offset and maximum number of rows returned (prior to
429 postprocessing filters), respectively.
430 """
432 dimensions: DimensionGraph
433 """All dimensions in the query in any form (`DimensionGraph`).
434 """
436 region: Region | None
437 """Region that bounds all query results (`lsst.sphgeom.Region`).
439 While `QueryWhereClause.region` and the ``region`` constructor argument
440 represent an external region given directly by the caller, this represents
441 the region actually used directly as a constraint on the query results,
442 which can also come from the data ID passed by the caller.
443 """
445 columns_required: Set[ColumnTag]
446 """All columns that must be included directly in the query.
448 This does not include columns that only need to be included in the result
449 rows, and hence could be provided by postprocessors.
450 """
452 @property
453 def universe(self) -> DimensionUniverse:
454 """All known dimensions (`DimensionUniverse`)."""
455 return self.requested.universe
457 def _compute_columns_required(
458 self,
459 ) -> tuple[set[ColumnTag], DimensionGraph, Region | None]:
460 """Compute the columns that must be provided by the relations joined
461 into this query in order to obtain the right *set* of result rows in
462 the right order.
464 This does not include columns that only need to be included in the
465 result rows, and hence could be provided by postprocessors.
466 """
467 tags: set[ColumnTag] = set(DimensionKeyColumnTag.generate(self.requested.names))
468 for dataset_type in self.datasets:
469 tags.update(DimensionKeyColumnTag.generate(dataset_type.dimensions.names))
470 if self.where.expression_predicate is not None:
471 tags.update(self.where.expression_predicate.columns_required)
472 if self.order_by is not None:
473 tags.update(self.order_by.columns_required)
474 region = self.where.region
475 for dimension in self.where.data_id.graph:
476 dimension_tag = DimensionKeyColumnTag(dimension.name)
477 if dimension_tag in tags:
478 continue
479 if dimension == self.universe.commonSkyPix or not isinstance(dimension, SkyPixDimension):
480 # If a dimension in the data ID is available from dimension
481 # tables or dimension spatial-join tables in the database,
482 # include it in the set of dimensions whose tables should be
483 # joined. This makes these data ID constraints work just like
484 # simple 'where' constraints, which is good.
485 tags.add(dimension_tag)
486 else:
487 # This is a SkyPixDimension other than the common one. If it's
488 # not already present in the query (e.g. from a dataset join),
489 # this is a pure spatial constraint, which we can only apply by
490 # modifying the 'region' for the query. That will also require
491 # that we join in the common skypix dimension.
492 pixel = dimension.pixelization.pixel(self.where.data_id[dimension])
493 if region is None:
494 region = pixel
495 else:
496 region = IntersectionRegion(region, pixel)
497 # Make sure the dimension keys are expanded self-consistently in what
498 # we return by passing them through DimensionGraph.
499 dimensions = DimensionGraph(
500 self.universe, names={tag.dimension for tag in DimensionKeyColumnTag.filter_from(tags)}
501 )
502 # If we have a region constraint, ensure region columns and the common
503 # skypix dimension are included.
504 missing_common_skypix = False
505 if region is not None:
506 for family in dimensions.spatial:
507 element = family.choose(dimensions.elements)
508 tags.add(DimensionRecordColumnTag(element.name, "region"))
509 if not isinstance(element, SkyPixDimension) and self.universe.commonSkyPix not in dimensions:
510 missing_common_skypix = True
511 if missing_common_skypix:
512 dimensions = dimensions.union(self.universe.commonSkyPix.graph)
513 tags.update(DimensionKeyColumnTag.generate(dimensions.names))
514 return (tags, dimensions, region)