Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 7%

102 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = () # all symbols intentionally private; for internal package use. 

30 

31import enum 

32from typing import cast 

33 

34from ....dimensions import Dimension, DimensionElement, DimensionGroup, DimensionUniverse 

35 

36 

37class ExpressionConstant(enum.Enum): 

38 """Enumeration for constants recognized in all expressions.""" 

39 

40 NULL = "null" 

41 INGEST_DATE = "ingest_date" 

42 

43 

44def categorizeConstant(name: str) -> ExpressionConstant | None: 

45 """Categorize an identifier in a parsed expression as one of a few global 

46 constants. 

47 

48 Parameters 

49 ---------- 

50 name : `str` 

51 Identifier to categorize. Case-insensitive. 

52 

53 Returns 

54 ------- 

55 categorized : `ExpressionConstant` or `None` 

56 Enumeration value if the string represents a constant, `None` 

57 otherwise. 

58 """ 

59 try: 

60 return ExpressionConstant(name.lower()) 

61 except ValueError: 

62 return None 

63 

64 

65def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[DimensionElement, str | None]: 

66 """Categorize an identifier in a parsed expression as either a `Dimension` 

67 name (indicating the primary key for that dimension) or a non-primary-key 

68 column in a `DimensionElement` table. 

69 

70 Parameters 

71 ---------- 

72 universe : `DimensionUniverse` 

73 All known dimensions. 

74 name : `str` 

75 Identifier to categorize. 

76 

77 Returns 

78 ------- 

79 element : `DimensionElement` 

80 The `DimensionElement` the identifier refers to. 

81 column : `str` or `None` 

82 The name of a column in the table for ``element``, or `None` if 

83 ``element`` is a `Dimension` and the requested column is its primary 

84 key. 

85 

86 Raises 

87 ------ 

88 LookupError 

89 Raised if the identifier refers to a nonexistent `DimensionElement` 

90 or column. 

91 RuntimeError 

92 Raised if the expression refers to a primary key in an illegal way. 

93 This exception includes a suggestion for how to rewrite the expression, 

94 so at least its message should generally be propagated up to a context 

95 where the error can be interpreted by a human. 

96 """ 

97 table, _, column = name.partition(".") 

98 if column: 

99 try: 

100 element = universe[table] 

101 except KeyError: 

102 if table == "timespan" or table == "datetime" or table == "timestamp": 

103 raise LookupError( 

104 "Dimension element name cannot be inferred in this context; " 

105 f"use <dimension>.timespan.{column} instead." 

106 ) from None 

107 raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None 

108 if isinstance(element, Dimension) and column == element.primaryKey.name: 

109 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

110 # can be clearer. 

111 return element, None 

112 elif column in element.dimensions.names: 

113 # User said something like "patch.tract = x" or 

114 # "tract.tract = x" instead of just "tract = x" or 

115 # "tract.id = x", which is at least needlessly confusing and 

116 # possibly not actually a column name, though we can guess 

117 # what they were trying to do. 

118 # Encourage them to clean that up and try again. 

119 name = universe[column].primaryKey.name # type: ignore 

120 raise RuntimeError( 

121 f"Invalid reference to '{table}.{column}' " 

122 f"in expression; please use '{column}' or " 

123 f"'{column}.{name}' instead." 

124 ) 

125 else: 

126 return element, column 

127 else: 

128 try: 

129 dimension = universe[table] 

130 except KeyError as err: 

131 raise LookupError(f"No dimension with name '{table}'.") from err 

132 return dimension, None 

133 

134 

135def categorizeOrderByName(dimensions: DimensionGroup, name: str) -> tuple[DimensionElement, str | None]: 

136 """Categorize an identifier in an ORDER BY clause. 

137 

138 Parameters 

139 ---------- 

140 dimensions : `DimensionGroup` 

141 All known dimensions. 

142 name : `str` 

143 Identifier to categorize. 

144 

145 Returns 

146 ------- 

147 element : `DimensionElement` 

148 The `DimensionElement` the identifier refers to. 

149 column : `str` or `None` 

150 The name of a column in the table for ``element``, or `None` if 

151 ``element`` is a `Dimension` and the requested column is its primary 

152 key. 

153 

154 Raises 

155 ------ 

156 ValueError 

157 Raised if element name is not found in a dimensions, metadata name is 

158 not recognized, or if there is more than one element has specified 

159 metadata. 

160 

161 Notes 

162 ----- 

163 For ORDER BY identifiers we use slightly different set of rules compared to 

164 the rules in `categorizeElementId`: 

165 

166 - Name can be a dimension element name. e.g. ``visit``. 

167 - Name can be an element name and a metadata name (or key name) separated 

168 by dot, e.g. ``detector.full_name``. 

169 - Name can be a metadata name without element name prefix, e.g. 

170 ``day_obs``; in that case metadata (or key) is searched in all elements 

171 present in a dimensions. Exception is raised if name appears in more than 

172 one element. 

173 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

174 used with temporal elements, if element name is not given then a temporal 

175 element from a dimensions is used. 

176 """ 

177 element: DimensionElement 

178 field_name: str | None = None 

179 if name in ("timespan.begin", "timespan.end"): 

180 matches = [ 

181 element 

182 for element_name in dimensions.elements 

183 if (element := dimensions.universe[element_name]).temporal 

184 ] 

185 if len(matches) == 1: 

186 element = matches[0] 

187 field_name = name 

188 elif len(matches) > 1: 

189 raise ValueError( 

190 "Timespan exists in more than one dimension element " 

191 f"({', '.join(element.name for element in matches)}); " 

192 "qualify timespan with specific dimension name." 

193 ) 

194 else: 

195 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.") 

196 elif "." not in name: 

197 # No dot, can be either a dimension name or a field name (in any of 

198 # the known elements) 

199 if name in dimensions.elements: 

200 element = dimensions.universe[name] 

201 else: 

202 # Can be a metadata name or any of unique keys 

203 match_pairs: list[tuple[DimensionElement, bool]] = [ 

204 (element, False) 

205 for element_name in dimensions.elements 

206 if name in (element := dimensions.universe[element_name]).metadata.names 

207 ] 

208 match_pairs += [ 

209 (dimension, True) 

210 for dimension_name in dimensions.names 

211 if name in (dimension := dimensions.universe.dimensions[dimension_name]).uniqueKeys.names 

212 ] 

213 if len(match_pairs) == 1: 

214 element, is_dimension_key = match_pairs[0] 

215 if is_dimension_key and name == cast(Dimension, element).primaryKey.name: 

216 # Need to treat reference to primary key field as a 

217 # reference to the dimension name. 

218 return element, None 

219 field_name = name 

220 elif len(match_pairs) > 1: 

221 raise ValueError( 

222 f"Metadata '{name}' exists in more than one dimension element " 

223 f"({', '.join(element.name for element, _ in match_pairs)}); " 

224 "qualify field name with dimension name." 

225 ) 

226 else: 

227 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.") 

228 else: 

229 # qualified name, must be a dimension element and a field 

230 elem_name, _, field_name = name.partition(".") 

231 if elem_name not in dimensions.elements: 

232 if field_name == "begin" or field_name == "end": 

233 raise ValueError( 

234 f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?" 

235 ) 

236 raise ValueError(f"Unknown dimension element {elem_name!r}.") 

237 element = dimensions.universe[elem_name] 

238 if field_name in ("timespan.begin", "timespan.end"): 

239 if not element.temporal: 

240 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

241 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

242 # Primary key is optional 

243 field_name = None 

244 else: 

245 if not ( 

246 field_name in element.metadata.names 

247 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names) 

248 ): 

249 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

250 

251 return element, field_name 

252 

253 

254def categorizeElementOrderByName(element: DimensionElement, name: str) -> str | None: 

255 """Categorize an identifier in an ORDER BY clause for a single element. 

256 

257 Parameters 

258 ---------- 

259 element : `DimensionElement` 

260 Dimension element. 

261 name : `str` 

262 Identifier to categorize. 

263 

264 Returns 

265 ------- 

266 column : `str` or `None` 

267 The name of a column in the table for ``element``, or `None` if 

268 ``element`` is a `Dimension` and the requested column is its primary 

269 key. 

270 

271 Raises 

272 ------ 

273 ValueError 

274 Raised if name is not recognized. 

275 

276 Notes 

277 ----- 

278 For ORDER BY identifiers we use slightly different set of rules compared to 

279 the rules in `categorizeElementId`: 

280 

281 - Name can be a dimension element name. e.g. ``visit``. 

282 - Name can be an element name and a metadata name (or key name) separated 

283 by dot, e.g. ``detector.full_name``, element name must correspond to 

284 ``element`` argument 

285 - Name can be a metadata name without element name prefix, e.g. 

286 ``day_obs``. 

287 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

288 used with temporal elements. 

289 """ 

290 field_name: str | None = None 

291 if name in ("timespan.begin", "timespan.end"): 

292 if element.temporal: 

293 field_name = name 

294 else: 

295 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

296 elif "." not in name: 

297 # No dot, can be either a dimension name or a field name (in any of 

298 # the known elements) 

299 if name == element.name: 

300 # Must be a dimension element 

301 if not isinstance(element, Dimension): 

302 raise ValueError(f"Element '{element}' is not a dimension.") 

303 else: 

304 # Can be a metadata name or any of the keys, but primary key needs 

305 # to be treated the same as a reference to the dimension name 

306 # itself. 

307 if isinstance(element, Dimension): 

308 if name == element.primaryKey.name: 

309 return None 

310 elif name in element.uniqueKeys.names: 

311 return name 

312 if name in element.metadata.names: 

313 return name 

314 raise ValueError(f"Field '{name}' does not exist in '{element}'.") 

315 else: 

316 # qualified name, must be a dimension element and a field 

317 elem_name, _, field_name = name.partition(".") 

318 if elem_name != element.name: 

319 if field_name == "begin" or field_name == "end": 

320 extra = f"; perhaps you meant 'timespan.{field_name}'?" 

321 else: 

322 extra = "." 

323 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}") 

324 if field_name in ("timespan.begin", "timespan.end"): 

325 if not element.temporal: 

326 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

327 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

328 # Primary key is optional 

329 field_name = None 

330 else: 

331 if not ( 

332 field_name in element.metadata.names 

333 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names) 

334 ): 

335 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

336 

337 return field_name