Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 7%

102 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25import enum 

26from typing import cast 

27 

28from ....core import Dimension, DimensionElement, DimensionGraph, DimensionUniverse 

29 

30 

31class ExpressionConstant(enum.Enum): 

32 """Enumeration for constants recognized in all expressions.""" 

33 

34 NULL = "null" 

35 INGEST_DATE = "ingest_date" 

36 

37 

38def categorizeConstant(name: str) -> ExpressionConstant | None: 

39 """Categorize an identifier in a parsed expression as one of a few global 

40 constants. 

41 

42 Parameters 

43 ---------- 

44 name : `str` 

45 Identifier to categorize. Case-insensitive. 

46 

47 Returns 

48 ------- 

49 categorized : `ExpressionConstant` or `None` 

50 Enumeration value if the string represents a constant, `None` 

51 otherwise. 

52 """ 

53 try: 

54 return ExpressionConstant(name.lower()) 

55 except ValueError: 

56 return None 

57 

58 

59def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[DimensionElement, str | None]: 

60 """Categorize an identifier in a parsed expression as either a `Dimension` 

61 name (indicating the primary key for that dimension) or a non-primary-key 

62 column in a `DimensionElement` table. 

63 

64 Parameters 

65 ---------- 

66 universe : `DimensionUniverse` 

67 All known dimensions. 

68 name : `str` 

69 Identifier to categorize. 

70 

71 Returns 

72 ------- 

73 element : `DimensionElement` 

74 The `DimensionElement` the identifier refers to. 

75 column : `str` or `None` 

76 The name of a column in the table for ``element``, or `None` if 

77 ``element`` is a `Dimension` and the requested column is its primary 

78 key. 

79 

80 Raises 

81 ------ 

82 LookupError 

83 Raised if the identifier refers to a nonexistent `DimensionElement` 

84 or column. 

85 RuntimeError 

86 Raised if the expression refers to a primary key in an illegal way. 

87 This exception includes a suggestion for how to rewrite the expression, 

88 so at least its message should generally be propagated up to a context 

89 where the error can be interpreted by a human. 

90 """ 

91 table, _, column = name.partition(".") 

92 if column: 

93 try: 

94 element = universe[table] 

95 except KeyError: 

96 if table == "timespan" or table == "datetime" or table == "timestamp": 

97 raise LookupError( 

98 "Dimension element name cannot be inferred in this context; " 

99 f"use <dimension>.timespan.{column} instead." 

100 ) from None 

101 raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None 

102 if isinstance(element, Dimension) and column == element.primaryKey.name: 

103 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

104 # can be clearer. 

105 return element, None 

106 elif column in element.graph.names: 

107 # User said something like "patch.tract = x" or 

108 # "tract.tract = x" instead of just "tract = x" or 

109 # "tract.id = x", which is at least needlessly confusing and 

110 # possibly not actually a column name, though we can guess 

111 # what they were trying to do. 

112 # Encourage them to clean that up and try again. 

113 name = universe[column].primaryKey.name # type: ignore 

114 raise RuntimeError( 

115 f"Invalid reference to '{table}.{column}' " 

116 f"in expression; please use '{column}' or " 

117 f"'{column}.{name}' instead." 

118 ) 

119 else: 

120 return element, column 

121 else: 

122 try: 

123 dimension = universe[table] 

124 except KeyError as err: 

125 raise LookupError(f"No dimension with name '{table}'.") from err 

126 return dimension, None 

127 

128 

129def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionElement, str | None]: 

130 """Categorize an identifier in an ORDER BY clause. 

131 

132 Parameters 

133 ---------- 

134 graph : `DimensionGraph` 

135 All known dimensions. 

136 name : `str` 

137 Identifier to categorize. 

138 

139 Returns 

140 ------- 

141 element : `DimensionElement` 

142 The `DimensionElement` the identifier refers to. 

143 column : `str` or `None` 

144 The name of a column in the table for ``element``, or `None` if 

145 ``element`` is a `Dimension` and the requested column is its primary 

146 key. 

147 

148 Raises 

149 ------ 

150 ValueError 

151 Raised if element name is not found in a graph, metadata name is not 

152 recognized, or if there is more than one element has specified 

153 metadata. 

154 

155 Notes 

156 ----- 

157 For ORDER BY identifiers we use slightly different set of rules compared to 

158 the rules in `categorizeElementId`: 

159 

160 - Name can be a dimension element name. e.g. ``visit``. 

161 - Name can be an element name and a metadata name (or key name) separated 

162 by dot, e.g. ``detector.full_name``. 

163 - Name can be a metadata name without element name prefix, e.g. 

164 ``day_obs``; in that case metadata (or key) is searched in all elements 

165 present in a graph. Exception is raised if name appears in more than one 

166 element. 

167 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

168 used with temporal elements, if element name is not given then a temporal 

169 element from a graph is used. 

170 """ 

171 element: DimensionElement 

172 field_name: str | None = None 

173 if name in ("timespan.begin", "timespan.end"): 

174 matches = [element for element in graph.elements if element.temporal] 

175 if len(matches) == 1: 

176 element = matches[0] 

177 field_name = name 

178 elif len(matches) > 1: 

179 raise ValueError( 

180 "Timespan exists in more than one dimension element " 

181 f"({', '.join(element.name for element in matches)}); " 

182 "qualify timespan with specific dimension name." 

183 ) 

184 else: 

185 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.") 

186 elif "." not in name: 

187 # No dot, can be either a dimension name or a field name (in any of 

188 # the known elements) 

189 if name in graph.elements.names: 

190 element = graph.elements[name] 

191 else: 

192 # Can be a metadata name or any of unique keys 

193 match_pairs: list[tuple[DimensionElement, bool]] = [ 

194 (elem, False) for elem in graph.elements if name in elem.metadata.names 

195 ] 

196 match_pairs += [(dim, True) for dim in graph if name in dim.uniqueKeys.names] 

197 if len(match_pairs) == 1: 

198 element, is_dimension_key = match_pairs[0] 

199 if is_dimension_key and name == cast(Dimension, element).primaryKey.name: 

200 # Need to treat reference to primary key field as a 

201 # reference to the dimension name. 

202 return element, None 

203 field_name = name 

204 elif len(match_pairs) > 1: 

205 raise ValueError( 

206 f"Metadata '{name}' exists in more than one dimension element " 

207 f"({', '.join(element.name for element, _ in match_pairs)}); " 

208 "qualify field name with dimension name." 

209 ) 

210 else: 

211 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.") 

212 else: 

213 # qualified name, must be a dimension element and a field 

214 elem_name, _, field_name = name.partition(".") 

215 if elem_name not in graph.elements.names: 

216 if field_name == "begin" or field_name == "end": 

217 raise ValueError( 

218 f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?" 

219 ) 

220 raise ValueError(f"Unknown dimension element {elem_name!r}.") 

221 element = graph.elements[elem_name] 

222 if field_name in ("timespan.begin", "timespan.end"): 

223 if not element.temporal: 

224 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

225 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

226 # Primary key is optional 

227 field_name = None 

228 else: 

229 if not ( 

230 field_name in element.metadata.names 

231 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names) 

232 ): 

233 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

234 

235 return element, field_name 

236 

237 

238def categorizeElementOrderByName(element: DimensionElement, name: str) -> str | None: 

239 """Categorize an identifier in an ORDER BY clause for a single element. 

240 

241 Parameters 

242 ---------- 

243 element : `DimensionElement` 

244 Dimension element. 

245 name : `str` 

246 Identifier to categorize. 

247 

248 Returns 

249 ------- 

250 column : `str` or `None` 

251 The name of a column in the table for ``element``, or `None` if 

252 ``element`` is a `Dimension` and the requested column is its primary 

253 key. 

254 

255 Raises 

256 ------ 

257 ValueError 

258 Raised if name is not recognized. 

259 

260 Notes 

261 ----- 

262 For ORDER BY identifiers we use slightly different set of rules compared to 

263 the rules in `categorizeElementId`: 

264 

265 - Name can be a dimension element name. e.g. ``visit``. 

266 - Name can be an element name and a metadata name (or key name) separated 

267 by dot, e.g. ``detector.full_name``, element name must correspond to 

268 ``element`` argument 

269 - Name can be a metadata name without element name prefix, e.g. 

270 ``day_obs``. 

271 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

272 used with temporal elements. 

273 """ 

274 field_name: str | None = None 

275 if name in ("timespan.begin", "timespan.end"): 

276 if element.temporal: 

277 field_name = name 

278 else: 

279 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

280 elif "." not in name: 

281 # No dot, can be either a dimension name or a field name (in any of 

282 # the known elements) 

283 if name == element.name: 

284 # Must be a dimension element 

285 if not isinstance(element, Dimension): 

286 raise ValueError(f"Element '{element}' is not a dimension.") 

287 else: 

288 # Can be a metadata name or any of the keys, but primary key needs 

289 # to be treated the same as a reference to the dimension name 

290 # itself. 

291 if isinstance(element, Dimension): 

292 if name == element.primaryKey.name: 

293 return None 

294 elif name in element.uniqueKeys.names: 

295 return name 

296 if name in element.metadata.names: 

297 return name 

298 raise ValueError(f"Field '{name}' does not exist in '{element}'.") 

299 else: 

300 # qualified name, must be a dimension element and a field 

301 elem_name, _, field_name = name.partition(".") 

302 if elem_name != element.name: 

303 if field_name == "begin" or field_name == "end": 

304 extra = f"; perhaps you meant 'timespan.{field_name}'?" 

305 else: 

306 extra = "." 

307 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}") 

308 if field_name in ("timespan.begin", "timespan.end"): 

309 if not element.temporal: 

310 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

311 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

312 # Primary key is optional 

313 field_name = None 

314 else: 

315 if not ( 

316 field_name in element.metadata.names 

317 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names) 

318 ): 

319 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

320 

321 return field_name