Coverage for python/lsst/daf/butler/queries/result_specs.py: 42%
132 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ResultSpecBase",
32 "DataCoordinateResultSpec",
33 "DimensionRecordResultSpec",
34 "DatasetRefResultSpec",
35)
37from abc import ABC, abstractmethod
38from collections.abc import Mapping
39from typing import Annotated, Literal, TypeAlias, cast
41import pydantic
43from .._exceptions import InvalidQueryError
44from ..dimensions import DimensionElement, DimensionGroup, DimensionUniverse
45from ..pydantic_utils import DeferredValidation
46from .tree import ColumnSet, DatasetFieldName, OrderExpression, QueryTree
49class ResultSpecBase(pydantic.BaseModel, ABC):
50 """Base class for all query-result specification objects.
52 A result specification is a struct that is combined with a `QueryTree` to
53 represent a serializable query-results object.
54 """
56 result_type: str
57 """String literal that corresponds to a concrete derived type."""
59 order_by: tuple[OrderExpression, ...] = ()
60 """Expressions to sort the rows by."""
62 limit: int | None = None
63 """Maximum number of rows to return, or `None` for no bound."""
65 def validate_tree(self, tree: QueryTree) -> None:
66 """Check that this result object is consistent with a query tree.
68 Parameters
69 ----------
70 tree : `QueryTree`
71 Query tree that defines the joins and row-filtering that these
72 results will come from.
73 """
74 spec = cast(ResultSpec, self)
75 if not spec.dimensions <= tree.dimensions:
76 raise InvalidQueryError(
77 f"Query result specification has dimensions {spec.dimensions} that are not a subset of the "
78 f"query's dimensions {tree.dimensions}."
79 )
80 result_columns = spec.get_result_columns()
81 assert result_columns.dimensions == spec.dimensions, "enforced by ResultSpec implementations"
82 for dataset_type in result_columns.dataset_fields:
83 if dataset_type not in tree.datasets:
84 raise InvalidQueryError(f"Dataset {dataset_type!r} is not available from this query.")
85 order_by_columns = ColumnSet(spec.dimensions)
86 for term in spec.order_by:
87 term.gather_required_columns(order_by_columns)
88 if not (order_by_columns.dimensions <= spec.dimensions):
89 raise InvalidQueryError(
90 "Order-by expression may not reference columns that are not in the result dimensions."
91 )
92 for dataset_type in order_by_columns.dataset_fields.keys():
93 if dataset_type not in tree.datasets:
94 raise InvalidQueryError(
95 f"Dataset type {dataset_type!r} in order-by expression is not part of the query."
96 )
98 @property
99 def find_first_dataset(self) -> str | None:
100 """The dataset type for which find-first resolution is required, if
101 any.
102 """
103 return None
105 @abstractmethod
106 def get_result_columns(self) -> ColumnSet:
107 """Return the columns included in the actual result rows.
109 This does not necessarily include all columns required by the
110 `order_by` terms that are also a part of this spec.
111 """
112 raise NotImplementedError()
115class DataCoordinateResultSpec(ResultSpecBase):
116 """Specification for a query that yields `DataCoordinate` objects."""
118 result_type: Literal["data_coordinate"] = "data_coordinate"
120 dimensions: DimensionGroup
121 """The dimensions of the data IDs returned by this query."""
123 include_dimension_records: bool = False
124 """Whether the returned data IDs include dimension records."""
126 def get_result_columns(self) -> ColumnSet:
127 # Docstring inherited.
128 result = ColumnSet(self.dimensions)
129 if self.include_dimension_records:
130 for element_name in self.dimensions.elements:
131 element = self.dimensions.universe[element_name]
132 if not element.is_cached and element not in self.dimensions.universe.skypix_dimensions:
133 result.dimension_fields[element_name].update(element.schema.remainder.names)
134 return result
137class DimensionRecordResultSpec(ResultSpecBase):
138 """Specification for a query that yields `DimensionRecord` objects."""
140 result_type: Literal["dimension_record"] = "dimension_record"
142 element: DimensionElement
143 """The name and definition of the dimension records returned by this query.
144 """
146 @property
147 def dimensions(self) -> DimensionGroup:
148 """The dimensions that are required or implied (directly or indirectly)
149 by this dimension element.
150 """
151 return self.element.minimal_group
153 def get_result_columns(self) -> ColumnSet:
154 # Docstring inherited.
155 result = ColumnSet(self.element.minimal_group)
156 if self.element not in self.dimensions.universe.skypix_dimensions:
157 result.dimension_fields[self.element.name].update(self.element.schema.remainder.names)
158 result.drop_dimension_keys(self.element.minimal_group.names - self.element.dimensions.names)
159 return result
162class DatasetRefResultSpec(ResultSpecBase):
163 """Specification for a query that yields `DatasetRef` objects."""
165 result_type: Literal["dataset_ref"] = "dataset_ref"
167 dataset_type_name: str
168 """The dataset type name of the datasets returned by this query."""
170 dimensions: DimensionGroup
171 """The dimensions of the datasets returned by this query."""
173 storage_class_name: str
174 """The name of the storage class of the datasets returned by this query."""
176 include_dimension_records: bool = False
177 """Whether the data IDs returned by this query include dimension records.
178 """
180 find_first: bool
181 """Whether this query should resolve data ID duplicates according to the
182 order of the collections to be searched.
183 """
185 @property
186 def find_first_dataset(self) -> str | None:
187 # Docstring inherited.
188 return self.dataset_type_name if self.find_first else None
190 def get_result_columns(self) -> ColumnSet:
191 # Docstring inherited.
192 result = ColumnSet(self.dimensions)
193 result.dataset_fields[self.dataset_type_name].update({"dataset_id", "run"})
194 if self.include_dimension_records:
195 for element_name in self.dimensions.elements:
196 element = self.dimensions.universe[element_name]
197 if not element.is_cached and element not in self.dimensions.universe.skypix_dimensions:
198 result.dimension_fields[element_name].update(element.schema.remainder.names)
199 return result
202class GeneralResultSpec(ResultSpecBase):
203 """Specification for a query that yields a table with
204 an explicit list of columns.
205 """
207 result_type: Literal["general"] = "general"
209 dimensions: DimensionGroup
210 """The dimensions that span all fields returned by this query."""
212 dimension_fields: Mapping[str, set[str]]
213 """Dimension record fields included in this query."""
215 dataset_fields: Mapping[str, set[DatasetFieldName]]
216 """Dataset fields included in this query."""
218 find_first: bool
219 """Whether this query requires find-first resolution for a dataset.
221 This can only be `True` if exactly one dataset type's fields are included
222 in the results.
223 """
225 @property
226 def find_first_dataset(self) -> str | None:
227 # Docstring inherited.
228 if self.find_first:
229 (dataset_type,) = self.dataset_fields.keys()
230 return dataset_type
231 return None
233 def get_result_columns(self) -> ColumnSet:
234 # Docstring inherited.
235 result = ColumnSet(self.dimensions)
236 for element_name, fields_for_element in self.dimension_fields.items():
237 result.dimension_fields[element_name].update(fields_for_element)
238 for dataset_type, fields_for_dataset in self.dataset_fields.items():
239 result.dataset_fields[dataset_type].update(fields_for_dataset)
240 return result
242 @pydantic.model_validator(mode="after")
243 def _validate(self) -> GeneralResultSpec:
244 if self.find_first and len(self.dataset_fields) != 1:
245 raise InvalidQueryError("find_first=True requires exactly one result dataset type.")
246 for element_name, fields_for_element in self.dimension_fields.items():
247 if element_name not in self.dimensions.elements:
248 raise InvalidQueryError(f"Dimension element {element_name} is not in {self.dimensions}.")
249 if not fields_for_element:
250 raise InvalidQueryError(
251 f"Empty dimension element field set for {element_name!r} is not permitted."
252 )
253 elif element_name in self.dimensions.universe.skypix_dimensions.names:
254 raise InvalidQueryError(
255 f"Regions for skypix dimension {element_name!r} are not stored; compute them via "
256 f"{element_name}.pixelization.pixel(id) instead."
257 )
258 for dataset_type, fields_for_dataset in self.dataset_fields.items():
259 if not fields_for_dataset:
260 raise InvalidQueryError(f"Empty dataset field set for {dataset_type!r} is not permitted.")
261 return self
264ResultSpec: TypeAlias = Annotated[
265 DataCoordinateResultSpec | DimensionRecordResultSpec | DatasetRefResultSpec | GeneralResultSpec,
266 pydantic.Field(discriminator="result_type"),
267]
270class SerializedResultSpec(DeferredValidation[ResultSpec]):
271 def to_result_spec(self, universe: DimensionUniverse) -> ResultSpec:
272 return self.validated(universe=universe)