Coverage for python/lsst/daf/butler/queries/result_specs.py: 41%
127 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-13 09:58 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-13 09:58 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ResultSpecBase",
32 "DataCoordinateResultSpec",
33 "DimensionRecordResultSpec",
34 "DatasetRefResultSpec",
35)
37from abc import ABC, abstractmethod
38from collections.abc import Mapping
39from typing import Annotated, Literal, TypeAlias, cast
41import pydantic
43from ..dimensions import DimensionElement, DimensionGroup
44from .tree import ColumnSet, DatasetFieldName, InvalidQueryError, OrderExpression, QueryTree
47class ResultSpecBase(pydantic.BaseModel, ABC):
48 """Base class for all query-result specification objects.
50 A result specification is a struct that is combined with a `QueryTree` to
51 represent a serializable query-results object.
52 """
54 result_type: str
55 """String literal that corresponds to a concrete derived type."""
57 order_by: tuple[OrderExpression, ...] = ()
58 """Expressions to sort the rows by."""
60 limit: int | None = None
61 """Maximum number of rows to return, or `None` for no bound."""
63 def validate_tree(self, tree: QueryTree) -> None:
64 """Check that this result object is consistent with a query tree.
66 Parameters
67 ----------
68 tree : `QueryTree`
69 Query tree that defines the joins and row-filtering that these
70 results will come from.
71 """
72 spec = cast(ResultSpec, self)
73 if not spec.dimensions <= tree.dimensions:
74 raise InvalidQueryError(
75 f"Query result specification has dimensions {spec.dimensions} that are not a subset of the "
76 f"query's dimensions {tree.dimensions}."
77 )
78 result_columns = spec.get_result_columns()
79 assert result_columns.dimensions == spec.dimensions, "enforced by ResultSpec implementations"
80 for dataset_type in result_columns.dataset_fields:
81 if dataset_type not in tree.datasets:
82 raise InvalidQueryError(f"Dataset {dataset_type!r} is not available from this query.")
83 order_by_columns = ColumnSet(spec.dimensions)
84 for term in spec.order_by:
85 term.gather_required_columns(order_by_columns)
86 if not (order_by_columns.dimensions <= spec.dimensions):
87 raise InvalidQueryError(
88 "Order-by expression may not reference columns that are not in the result dimensions."
89 )
90 for dataset_type in order_by_columns.dataset_fields.keys():
91 if dataset_type not in tree.datasets:
92 raise InvalidQueryError(
93 f"Dataset type {dataset_type!r} in order-by expression is not part of the query."
94 )
96 @property
97 def find_first_dataset(self) -> str | None:
98 """The dataset type for which find-first resolution is required, if
99 any.
100 """
101 return None
103 @abstractmethod
104 def get_result_columns(self) -> ColumnSet:
105 """Return the columns included in the actual result rows.
107 This does not necessarily include all columns required by the
108 `order_by` terms that are also a part of this spec.
109 """
110 raise NotImplementedError()
113class DataCoordinateResultSpec(ResultSpecBase):
114 """Specification for a query that yields `DataCoordinate` objects."""
116 result_type: Literal["data_coordinate"] = "data_coordinate"
118 dimensions: DimensionGroup
119 """The dimensions of the data IDs returned by this query."""
121 include_dimension_records: bool = False
122 """Whether the returned data IDs include dimension records."""
124 def get_result_columns(self) -> ColumnSet:
125 # Docstring inherited.
126 result = ColumnSet(self.dimensions)
127 if self.include_dimension_records:
128 for element_name in self.dimensions.elements:
129 element = self.dimensions.universe[element_name]
130 if not element.is_cached and element not in self.dimensions.universe.skypix_dimensions:
131 result.dimension_fields[element_name].update(element.schema.remainder.names)
132 return result
135class DimensionRecordResultSpec(ResultSpecBase):
136 """Specification for a query that yields `DimensionRecord` objects."""
138 result_type: Literal["dimension_record"] = "dimension_record"
140 element: DimensionElement
141 """The name and definition of the dimension records returned by this query.
142 """
144 @property
145 def dimensions(self) -> DimensionGroup:
146 """The dimensions that are required or implied (directly or indirectly)
147 by this dimension element.
148 """
149 return self.element.minimal_group
151 def get_result_columns(self) -> ColumnSet:
152 # Docstring inherited.
153 result = ColumnSet(self.element.minimal_group)
154 if self.element not in self.dimensions.universe.skypix_dimensions:
155 result.dimension_fields[self.element.name].update(self.element.schema.remainder.names)
156 result.drop_dimension_keys(self.element.minimal_group.names - self.element.dimensions.names)
157 return result
160class DatasetRefResultSpec(ResultSpecBase):
161 """Specification for a query that yields `DatasetRef` objects."""
163 result_type: Literal["dataset_ref"] = "dataset_ref"
165 dataset_type_name: str
166 """The dataset type name of the datasets returned by this query."""
168 dimensions: DimensionGroup
169 """The dimensions of the datasets returned by this query."""
171 storage_class_name: str
172 """The name of the storage class of the datasets returned by this query."""
174 include_dimension_records: bool = False
175 """Whether the data IDs returned by this query include dimension records.
176 """
178 find_first: bool
179 """Whether this query should resolve data ID duplicates according to the
180 order of the collections to be searched.
181 """
183 @property
184 def find_first_dataset(self) -> str | None:
185 # Docstring inherited.
186 return self.dataset_type_name if self.find_first else None
188 def get_result_columns(self) -> ColumnSet:
189 # Docstring inherited.
190 result = ColumnSet(self.dimensions)
191 result.dataset_fields[self.dataset_type_name].update({"dataset_id", "run"})
192 if self.include_dimension_records:
193 for element_name in self.dimensions.elements:
194 element = self.dimensions.universe[element_name]
195 if not element.is_cached and element not in self.dimensions.universe.skypix_dimensions:
196 result.dimension_fields[element_name].update(element.schema.remainder.names)
197 return result
200class GeneralResultSpec(ResultSpecBase):
201 """Specification for a query that yields a table with
202 an explicit list of columns.
203 """
205 result_type: Literal["general"] = "general"
207 dimensions: DimensionGroup
208 """The dimensions that span all fields returned by this query."""
210 dimension_fields: Mapping[str, set[str]]
211 """Dimension record fields included in this query."""
213 dataset_fields: Mapping[str, set[DatasetFieldName]]
214 """Dataset fields included in this query."""
216 find_first: bool
217 """Whether this query requires find-first resolution for a dataset.
219 This can only be `True` if exactly one dataset type's fields are included
220 in the results.
221 """
223 @property
224 def find_first_dataset(self) -> str | None:
225 # Docstring inherited.
226 if self.find_first:
227 (dataset_type,) = self.dataset_fields.keys()
228 return dataset_type
229 return None
231 def get_result_columns(self) -> ColumnSet:
232 # Docstring inherited.
233 result = ColumnSet(self.dimensions)
234 for element_name, fields_for_element in self.dimension_fields.items():
235 result.dimension_fields[element_name].update(fields_for_element)
236 for dataset_type, fields_for_dataset in self.dataset_fields.items():
237 result.dataset_fields[dataset_type].update(fields_for_dataset)
238 return result
240 @pydantic.model_validator(mode="after")
241 def _validate(self) -> GeneralResultSpec:
242 if self.find_first and len(self.dataset_fields) != 1:
243 raise InvalidQueryError("find_first=True requires exactly one result dataset type.")
244 for element_name, fields_for_element in self.dimension_fields.items():
245 if element_name not in self.dimensions.elements:
246 raise InvalidQueryError(f"Dimension element {element_name} is not in {self.dimensions}.")
247 if not fields_for_element:
248 raise InvalidQueryError(
249 f"Empty dimension element field set for {element_name!r} is not permitted."
250 )
251 elif element_name in self.dimensions.universe.skypix_dimensions.names:
252 raise InvalidQueryError(
253 f"Regions for skypix dimension {element_name!r} are not stored; compute them via "
254 f"{element_name}.pixelization.pixel(id) instead."
255 )
256 for dataset_type, fields_for_dataset in self.dataset_fields.items():
257 if not fields_for_dataset:
258 raise InvalidQueryError(f"Empty dataset field set for {dataset_type!r} is not permitted.")
259 return self
262ResultSpec: TypeAlias = Annotated[
263 DataCoordinateResultSpec | DimensionRecordResultSpec | DatasetRefResultSpec | GeneralResultSpec,
264 pydantic.Field(discriminator="result_type"),
265]