Coverage for python/lsst/daf/butler/queries/result_specs.py: 41%

127 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 02:03 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "ResultSpecBase", 

32 "DataCoordinateResultSpec", 

33 "DimensionRecordResultSpec", 

34 "DatasetRefResultSpec", 

35) 

36 

37from abc import ABC, abstractmethod 

38from collections.abc import Mapping 

39from typing import Annotated, Literal, TypeAlias, cast 

40 

41import pydantic 

42 

43from ..dimensions import DimensionElement, DimensionGroup 

44from .tree import ColumnSet, DatasetFieldName, InvalidQueryError, OrderExpression, QueryTree 

45 

46 

47class ResultSpecBase(pydantic.BaseModel, ABC): 

48 """Base class for all query-result specification objects. 

49 

50 A result specification is a struct that is combined with a `QueryTree` to 

51 represent a serializable query-results object. 

52 """ 

53 

54 result_type: str 

55 """String literal that corresponds to a concrete derived type.""" 

56 

57 order_by: tuple[OrderExpression, ...] = () 

58 """Expressions to sort the rows by.""" 

59 

60 limit: int | None = None 

61 """Maximum number of rows to return, or `None` for no bound.""" 

62 

63 def validate_tree(self, tree: QueryTree) -> None: 

64 """Check that this result object is consistent with a query tree. 

65 

66 Parameters 

67 ---------- 

68 tree : `QueryTree` 

69 Query tree that defines the joins and row-filtering that these 

70 results will come from. 

71 """ 

72 spec = cast(ResultSpec, self) 

73 if not spec.dimensions <= tree.dimensions: 

74 raise InvalidQueryError( 

75 f"Query result specification has dimensions {spec.dimensions} that are not a subset of the " 

76 f"query's dimensions {tree.dimensions}." 

77 ) 

78 result_columns = spec.get_result_columns() 

79 assert result_columns.dimensions == spec.dimensions, "enforced by ResultSpec implementations" 

80 for dataset_type in result_columns.dataset_fields: 

81 if dataset_type not in tree.datasets: 

82 raise InvalidQueryError(f"Dataset {dataset_type!r} is not available from this query.") 

83 order_by_columns = ColumnSet(spec.dimensions) 

84 for term in spec.order_by: 

85 term.gather_required_columns(order_by_columns) 

86 if not (order_by_columns.dimensions <= spec.dimensions): 

87 raise InvalidQueryError( 

88 "Order-by expression may not reference columns that are not in the result dimensions." 

89 ) 

90 for dataset_type in order_by_columns.dataset_fields.keys(): 

91 if dataset_type not in tree.datasets: 

92 raise InvalidQueryError( 

93 f"Dataset type {dataset_type!r} in order-by expression is not part of the query." 

94 ) 

95 

96 @property 

97 def find_first_dataset(self) -> str | None: 

98 """The dataset type for which find-first resolution is required, if 

99 any. 

100 """ 

101 return None 

102 

103 @abstractmethod 

104 def get_result_columns(self) -> ColumnSet: 

105 """Return the columns included in the actual result rows. 

106 

107 This does not necessarily include all columns required by the 

108 `order_by` terms that are also a part of this spec. 

109 """ 

110 raise NotImplementedError() 

111 

112 

113class DataCoordinateResultSpec(ResultSpecBase): 

114 """Specification for a query that yields `DataCoordinate` objects.""" 

115 

116 result_type: Literal["data_coordinate"] = "data_coordinate" 

117 

118 dimensions: DimensionGroup 

119 """The dimensions of the data IDs returned by this query.""" 

120 

121 include_dimension_records: bool = False 

122 """Whether the returned data IDs include dimension records.""" 

123 

124 def get_result_columns(self) -> ColumnSet: 

125 # Docstring inherited. 

126 result = ColumnSet(self.dimensions) 

127 if self.include_dimension_records: 

128 for element_name in self.dimensions.elements: 

129 element = self.dimensions.universe[element_name] 

130 if not element.is_cached and element not in self.dimensions.universe.skypix_dimensions: 

131 result.dimension_fields[element_name].update(element.schema.remainder.names) 

132 return result 

133 

134 

135class DimensionRecordResultSpec(ResultSpecBase): 

136 """Specification for a query that yields `DimensionRecord` objects.""" 

137 

138 result_type: Literal["dimension_record"] = "dimension_record" 

139 

140 element: DimensionElement 

141 """The name and definition of the dimension records returned by this query. 

142 """ 

143 

144 @property 

145 def dimensions(self) -> DimensionGroup: 

146 """The dimensions that are required or implied (directly or indirectly) 

147 by this dimension element. 

148 """ 

149 return self.element.minimal_group 

150 

151 def get_result_columns(self) -> ColumnSet: 

152 # Docstring inherited. 

153 result = ColumnSet(self.element.minimal_group) 

154 if self.element not in self.dimensions.universe.skypix_dimensions: 

155 result.dimension_fields[self.element.name].update(self.element.schema.remainder.names) 

156 result.drop_dimension_keys(self.element.minimal_group.names - self.element.dimensions.names) 

157 return result 

158 

159 

160class DatasetRefResultSpec(ResultSpecBase): 

161 """Specification for a query that yields `DatasetRef` objects.""" 

162 

163 result_type: Literal["dataset_ref"] = "dataset_ref" 

164 

165 dataset_type_name: str 

166 """The dataset type name of the datasets returned by this query.""" 

167 

168 dimensions: DimensionGroup 

169 """The dimensions of the datasets returned by this query.""" 

170 

171 storage_class_name: str 

172 """The name of the storage class of the datasets returned by this query.""" 

173 

174 include_dimension_records: bool = False 

175 """Whether the data IDs returned by this query include dimension records. 

176 """ 

177 

178 find_first: bool 

179 """Whether this query should resolve data ID duplicates according to the 

180 order of the collections to be searched. 

181 """ 

182 

183 @property 

184 def find_first_dataset(self) -> str | None: 

185 # Docstring inherited. 

186 return self.dataset_type_name if self.find_first else None 

187 

188 def get_result_columns(self) -> ColumnSet: 

189 # Docstring inherited. 

190 result = ColumnSet(self.dimensions) 

191 result.dataset_fields[self.dataset_type_name].update({"dataset_id", "run"}) 

192 if self.include_dimension_records: 

193 for element_name in self.dimensions.elements: 

194 element = self.dimensions.universe[element_name] 

195 if not element.is_cached and element not in self.dimensions.universe.skypix_dimensions: 

196 result.dimension_fields[element_name].update(element.schema.remainder.names) 

197 return result 

198 

199 

200class GeneralResultSpec(ResultSpecBase): 

201 """Specification for a query that yields a table with 

202 an explicit list of columns. 

203 """ 

204 

205 result_type: Literal["general"] = "general" 

206 

207 dimensions: DimensionGroup 

208 """The dimensions that span all fields returned by this query.""" 

209 

210 dimension_fields: Mapping[str, set[str]] 

211 """Dimension record fields included in this query.""" 

212 

213 dataset_fields: Mapping[str, set[DatasetFieldName]] 

214 """Dataset fields included in this query.""" 

215 

216 find_first: bool 

217 """Whether this query requires find-first resolution for a dataset. 

218 

219 This can only be `True` if exactly one dataset type's fields are included 

220 in the results. 

221 """ 

222 

223 @property 

224 def find_first_dataset(self) -> str | None: 

225 # Docstring inherited. 

226 if self.find_first: 

227 (dataset_type,) = self.dataset_fields.keys() 

228 return dataset_type 

229 return None 

230 

231 def get_result_columns(self) -> ColumnSet: 

232 # Docstring inherited. 

233 result = ColumnSet(self.dimensions) 

234 for element_name, fields_for_element in self.dimension_fields.items(): 

235 result.dimension_fields[element_name].update(fields_for_element) 

236 for dataset_type, fields_for_dataset in self.dataset_fields.items(): 

237 result.dataset_fields[dataset_type].update(fields_for_dataset) 

238 return result 

239 

240 @pydantic.model_validator(mode="after") 

241 def _validate(self) -> GeneralResultSpec: 

242 if self.find_first and len(self.dataset_fields) != 1: 

243 raise InvalidQueryError("find_first=True requires exactly one result dataset type.") 

244 for element_name, fields_for_element in self.dimension_fields.items(): 

245 if element_name not in self.dimensions.elements: 

246 raise InvalidQueryError(f"Dimension element {element_name} is not in {self.dimensions}.") 

247 if not fields_for_element: 

248 raise InvalidQueryError( 

249 f"Empty dimension element field set for {element_name!r} is not permitted." 

250 ) 

251 elif element_name in self.dimensions.universe.skypix_dimensions.names: 

252 raise InvalidQueryError( 

253 f"Regions for skypix dimension {element_name!r} are not stored; compute them via " 

254 f"{element_name}.pixelization.pixel(id) instead." 

255 ) 

256 for dataset_type, fields_for_dataset in self.dataset_fields.items(): 

257 if not fields_for_dataset: 

258 raise InvalidQueryError(f"Empty dataset field set for {dataset_type!r} is not permitted.") 

259 return self 

260 

261 

262ResultSpec: TypeAlias = Annotated[ 

263 DataCoordinateResultSpec | DimensionRecordResultSpec | DatasetRefResultSpec | GeneralResultSpec, 

264 pydantic.Field(discriminator="result_type"), 

265]