Coverage for python/lsst/daf/butler/queries/result_specs.py: 43%
126 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-07 11:04 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-07 11:04 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ResultSpecBase",
32 "DataCoordinateResultSpec",
33 "DimensionRecordResultSpec",
34 "DatasetRefResultSpec",
35)
37from abc import ABC, abstractmethod
38from collections.abc import Mapping
39from typing import Annotated, Literal, TypeAlias, Union, cast
41import pydantic
43from ..dimensions import DimensionElement, DimensionGroup
44from .tree import ColumnSet, DatasetFieldName, InvalidQueryError, OrderExpression, QueryTree
47class ResultSpecBase(pydantic.BaseModel, ABC):
48 """Base class for all query-result specification objects.
50 A result specification is a struct that is combined with a `QueryTree` to
51 represent a serializable query-results object.
52 """
54 result_type: str
55 """String literal that corresponds to a concrete derived type."""
57 order_by: tuple[OrderExpression, ...] = ()
58 """Expressions to sort the rows by."""
60 offset: int = 0
61 """Index of the first row to return."""
63 limit: int | None = None
64 """Maximum number of rows to return, or `None` for no bound."""
66 def validate_tree(self, tree: QueryTree) -> None:
67 """Check that this result object is consistent with a query tree.
69 Parameters
70 ----------
71 tree : `QueryTree`
72 Query tree that defines the joins and row-filtering that these
73 results will come from.
74 """
75 spec = cast(ResultSpec, self)
76 if not spec.dimensions <= tree.dimensions:
77 raise InvalidQueryError(
78 f"Query result specification has dimensions {spec.dimensions} that are not a subset of the "
79 f"query's dimensions {tree.dimensions}."
80 )
81 result_columns = spec.get_result_columns()
82 assert result_columns.dimensions == spec.dimensions, "enforced by ResultSpec implementations"
83 for dataset_type in result_columns.dataset_fields:
84 if dataset_type not in tree.datasets:
85 raise InvalidQueryError(f"Dataset {dataset_type!r} is not available from this query.")
86 order_by_columns = ColumnSet(spec.dimensions)
87 for term in spec.order_by:
88 term.gather_required_columns(order_by_columns)
89 if not (order_by_columns.dimensions <= spec.dimensions):
90 raise InvalidQueryError(
91 "Order-by expression may not reference columns that are not in the result dimensions."
92 )
93 for dataset_type in order_by_columns.dataset_fields.keys():
94 if dataset_type not in tree.datasets:
95 raise InvalidQueryError(
96 f"Dataset type {dataset_type!r} in order-by expression is not part of the query."
97 )
99 @property
100 def find_first_dataset(self) -> str | None:
101 """The dataset type for which find-first resolution is required, if
102 any.
103 """
104 return None
106 @abstractmethod
107 def get_result_columns(self) -> ColumnSet:
108 """Return the columns included in the actual result rows.
110 This does not necessarily include all columns required by the
111 `order_by` terms that are also a part of this spec.
112 """
113 raise NotImplementedError()
116class DataCoordinateResultSpec(ResultSpecBase):
117 """Specification for a query that yields `DataCoordinate` objects."""
119 result_type: Literal["data_coordinate"] = "data_coordinate"
121 dimensions: DimensionGroup
122 """The dimensions of the data IDs returned by this query."""
124 include_dimension_records: bool = False
125 """Whether the returned data IDs include dimension records."""
127 def get_result_columns(self) -> ColumnSet:
128 # Docstring inherited.
129 result = ColumnSet(self.dimensions)
130 if self.include_dimension_records:
131 for element_name in self.dimensions.elements:
132 element = self.dimensions.universe[element_name]
133 if not element.is_cached:
134 result.dimension_fields[element_name].update(element.schema.remainder.names)
135 return result
138class DimensionRecordResultSpec(ResultSpecBase):
139 """Specification for a query that yields `DimensionRecord` objects."""
141 result_type: Literal["dimension_record"] = "dimension_record"
143 element: DimensionElement
144 """The name and definition of the dimension records returned by this query.
145 """
147 @property
148 def dimensions(self) -> DimensionGroup:
149 """The dimensions that are required or implied (directly or indirectly)
150 by this dimension element.
151 """
152 return self.element.minimal_group
154 def get_result_columns(self) -> ColumnSet:
155 # Docstring inherited.
156 result = ColumnSet(self.element.minimal_group)
157 result.dimension_fields[self.element.name].update(self.element.schema.remainder.names)
158 result.drop_dimension_keys(self.element.minimal_group.names - self.element.dimensions.names)
159 return result
162class DatasetRefResultSpec(ResultSpecBase):
163 """Specification for a query that yields `DatasetRef` objects."""
165 result_type: Literal["dataset_ref"] = "dataset_ref"
167 dataset_type_name: str
168 """The dataset type name of the datasets returned by this query."""
170 dimensions: DimensionGroup
171 """The dimensions of the datasets returned by this query."""
173 storage_class_name: str
174 """The name of the storage class of the datasets returned by this query."""
176 include_dimension_records: bool = False
177 """Whether the data IDs returned by this query include dimension records.
178 """
180 find_first: bool
181 """Whether this query should resolve data ID duplicates according to the
182 order of the collections to be searched.
183 """
185 @property
186 def find_first_dataset(self) -> str | None:
187 # Docstring inherited.
188 return self.dataset_type_name if self.find_first else None
190 def get_result_columns(self) -> ColumnSet:
191 # Docstring inherited.
192 result = ColumnSet(self.dimensions)
193 result.dataset_fields[self.dataset_type_name].update({"dataset_id", "run"})
194 if self.include_dimension_records:
195 for element_name in self.dimensions.elements:
196 element = self.dimensions.universe[element_name]
197 if not element.is_cached:
198 result.dimension_fields[element_name].update(element.schema.remainder.names)
199 return result
202class GeneralResultSpec(ResultSpecBase):
203 """Specification for a query that yields a table with
204 an explicit list of columns.
205 """
207 result_type: Literal["general"] = "general"
209 dimensions: DimensionGroup
210 """The dimensions that span all fields returned by this query."""
212 dimension_fields: Mapping[str, set[str]]
213 """Dimension record fields included in this query."""
215 dataset_fields: Mapping[str, set[DatasetFieldName]]
216 """Dataset fields included in this query."""
218 find_first: bool
219 """Whether this query requires find-first resolution for a dataset.
221 This can only be `True` if exactly one dataset type's fields are included
222 in the results.
223 """
225 @property
226 def find_first_dataset(self) -> str | None:
227 # Docstring inherited.
228 if self.find_first:
229 (dataset_type,) = self.dataset_fields.keys()
230 return dataset_type
231 return None
233 def get_result_columns(self) -> ColumnSet:
234 # Docstring inherited.
235 result = ColumnSet(self.dimensions)
236 for element_name, fields_for_element in self.dimension_fields.items():
237 result.dimension_fields[element_name].update(fields_for_element)
238 for dataset_type, fields_for_dataset in self.dataset_fields.items():
239 result.dataset_fields[dataset_type].update(fields_for_dataset)
240 return result
242 @pydantic.model_validator(mode="after")
243 def _validate(self) -> GeneralResultSpec:
244 if self.find_first and len(self.dataset_fields) != 1:
245 raise InvalidQueryError("find_first=True requires exactly one result dataset type.")
246 for element_name, fields_for_element in self.dimension_fields.items():
247 if element_name not in self.dimensions.elements:
248 raise InvalidQueryError(f"Dimension element {element_name} is not in {self.dimensions}.")
249 if not fields_for_element:
250 raise InvalidQueryError(
251 f"Empty dimension element field set for {element_name!r} is not permitted."
252 )
253 for dataset_type, fields_for_dataset in self.dataset_fields.items():
254 if not fields_for_dataset:
255 raise InvalidQueryError(f"Empty dataset field set for {dataset_type!r} is not permitted.")
256 return self
259ResultSpec: TypeAlias = Annotated[
260 Union[DataCoordinateResultSpec, DimensionRecordResultSpec, DatasetRefResultSpec, GeneralResultSpec],
261 pydantic.Field(discriminator="result_type"),
262]