Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 6%

105 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 10:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = () # all symbols intentionally private; for internal package use. 

30 

31import enum 

32from typing import cast 

33 

34from ....dimensions import Dimension, DimensionElement, DimensionGroup, DimensionUniverse 

35 

36 

37class ExpressionConstant(enum.Enum): 

38 """Enumeration for constants recognized in all expressions.""" 

39 

40 NULL = "null" 

41 INGEST_DATE = "ingest_date" 

42 

43 

44def categorizeConstant(name: str) -> ExpressionConstant | None: 

45 """Categorize an identifier in a parsed expression as one of a few global 

46 constants. 

47 

48 Parameters 

49 ---------- 

50 name : `str` 

51 Identifier to categorize. Case-insensitive. 

52 

53 Returns 

54 ------- 

55 categorized : `ExpressionConstant` or `None` 

56 Enumeration value if the string represents a constant, `None` 

57 otherwise. 

58 """ 

59 try: 

60 return ExpressionConstant(name.lower()) 

61 except ValueError: 

62 return None 

63 

64 

65def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[DimensionElement, str | None]: 

66 """Categorize an identifier in a parsed expression as either a `Dimension` 

67 name (indicating the primary key for that dimension) or a non-primary-key 

68 column in a `DimensionElement` table. 

69 

70 Parameters 

71 ---------- 

72 universe : `DimensionUniverse` 

73 All known dimensions. 

74 name : `str` 

75 Identifier to categorize. 

76 

77 Returns 

78 ------- 

79 element : `DimensionElement` 

80 The `DimensionElement` the identifier refers to. 

81 column : `str` or `None` 

82 The name of a column in the table for ``element``, or `None` if 

83 ``element`` is a `Dimension` and the requested column is its primary 

84 key. 

85 

86 Raises 

87 ------ 

88 LookupError 

89 Raised if the identifier refers to a nonexistent `DimensionElement` 

90 or column. 

91 RuntimeError 

92 Raised if the expression refers to a primary key in an illegal way. 

93 This exception includes a suggestion for how to rewrite the expression, 

94 so at least its message should generally be propagated up to a context 

95 where the error can be interpreted by a human. 

96 """ 

97 table, _, column = name.partition(".") 

98 if column: 

99 try: 

100 element = universe[table] 

101 except KeyError: 

102 if table == "timespan" or table == "datetime" or table == "timestamp": 

103 raise LookupError( 

104 "Dimension element name cannot be inferred in this context; " 

105 f"use <dimension>.timespan.{column} instead." 

106 ) from None 

107 raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None 

108 if isinstance(element, Dimension) and column == element.primaryKey.name: 

109 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

110 # can be clearer. 

111 return element, None 

112 elif column in element.dimensions.names: 

113 # User said something like "patch.tract = x" or 

114 # "tract.tract = x" instead of just "tract = x" or 

115 # "tract.id = x". Return the column as the element instead. 

116 return element.dimensions[column], None 

117 else: 

118 return element, column 

119 else: 

120 try: 

121 dimension = universe[table] 

122 except KeyError as err: 

123 raise LookupError(f"No dimension with name '{table}'.") from err 

124 return dimension, None 

125 

126 

127def categorizeOrderByName(dimensions: DimensionGroup, name: str) -> tuple[DimensionElement, str | None]: 

128 """Categorize an identifier in an ORDER BY clause. 

129 

130 Parameters 

131 ---------- 

132 dimensions : `DimensionGroup` 

133 All known dimensions. 

134 name : `str` 

135 Identifier to categorize. 

136 

137 Returns 

138 ------- 

139 element : `DimensionElement` 

140 The `DimensionElement` the identifier refers to. 

141 column : `str` or `None` 

142 The name of a column in the table for ``element``, or `None` if 

143 ``element`` is a `Dimension` and the requested column is its primary 

144 key. 

145 

146 Raises 

147 ------ 

148 ValueError 

149 Raised if element name is not found in a dimensions, metadata name is 

150 not recognized, or if there is more than one element has specified 

151 metadata. 

152 

153 Notes 

154 ----- 

155 For ORDER BY identifiers we use slightly different set of rules compared to 

156 the rules in `categorizeElementId`: 

157 

158 - Name can be a dimension element name. e.g. ``visit``. 

159 - Name can be an element name and a metadata name (or key name) separated 

160 by dot, e.g. ``detector.full_name``. 

161 - Name can be a metadata name without element name prefix, e.g. 

162 ``day_obs``; in that case metadata (or key) is searched in all elements 

163 present in a dimensions. Exception is raised if name appears in more than 

164 one element. 

165 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

166 used with temporal elements, if element name is not given then a temporal 

167 element from a dimensions is used. 

168 """ 

169 element: DimensionElement 

170 field_name: str | None = None 

171 if name in ("timespan.begin", "timespan.end"): 

172 matches = [ 

173 element 

174 for element_name in dimensions.elements 

175 if (element := dimensions.universe[element_name]).temporal 

176 ] 

177 if len(matches) == 1: 

178 element = matches[0] 

179 field_name = name 

180 elif len(matches) > 1: 

181 raise ValueError( 

182 "Timespan exists in more than one dimension element " 

183 f"({', '.join(element.name for element in matches)}); " 

184 "qualify timespan with specific dimension name." 

185 ) 

186 else: 

187 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.") 

188 elif "." not in name: 

189 # No dot, can be either a dimension name or a field name (in any of 

190 # the known elements) 

191 if name in dimensions.elements: 

192 element = dimensions.universe[name] 

193 else: 

194 # Can be a metadata name or any of unique keys 

195 match_pairs: list[tuple[DimensionElement, bool]] = [ 

196 (element, False) 

197 for element_name in dimensions.elements 

198 if name in (element := dimensions.universe[element_name]).metadata.names 

199 ] 

200 match_pairs += [ 

201 (dimension, True) 

202 for dimension_name in dimensions.names 

203 if name in (dimension := dimensions.universe.dimensions[dimension_name]).uniqueKeys.names 

204 ] 

205 if len(match_pairs) == 1: 

206 element, is_dimension_key = match_pairs[0] 

207 if is_dimension_key and name == cast(Dimension, element).primaryKey.name: 

208 # Need to treat reference to primary key field as a 

209 # reference to the dimension name. 

210 return element, None 

211 field_name = name 

212 elif len(match_pairs) > 1: 

213 raise ValueError( 

214 f"Metadata '{name}' exists in more than one dimension element " 

215 f"({', '.join(element.name for element, _ in match_pairs)}); " 

216 "qualify field name with dimension name." 

217 ) 

218 else: 

219 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.") 

220 else: 

221 # qualified name, must be a dimension element and a field 

222 elem_name, _, field_name = name.partition(".") 

223 if elem_name not in dimensions.elements: 

224 if field_name == "begin" or field_name == "end": 

225 raise ValueError( 

226 f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?" 

227 ) 

228 raise ValueError(f"Unknown dimension element {elem_name!r}.") 

229 element = dimensions.universe[elem_name] 

230 if field_name in ("timespan.begin", "timespan.end"): 

231 if not element.temporal: 

232 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

233 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

234 # Primary key is optional 

235 field_name = None 

236 elif field_name in element.dimensions.names: 

237 # Something like visit.physical_filter, which which want to remap 

238 # to just "physical_filter". 

239 return dimensions.universe[field_name], None 

240 else: 

241 if not ( 

242 field_name in element.metadata.names 

243 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names) 

244 ): 

245 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

246 

247 return element, field_name 

248 

249 

250def categorizeElementOrderByName(element: DimensionElement, name: str) -> tuple[DimensionElement, str | None]: 

251 """Categorize an identifier in an ORDER BY clause for a single element. 

252 

253 Parameters 

254 ---------- 

255 element : `DimensionElement` 

256 Dimension element. 

257 name : `str` 

258 Identifier to categorize. 

259 

260 Returns 

261 ------- 

262 element : `DimensionElement` 

263 The `DimensionElement` the identifier refers to. 

264 column : `str` or `None` 

265 The name of a column in the table for ``element``, or `None` if 

266 ``element`` is a `Dimension` and the requested column is its primary 

267 key. 

268 

269 Raises 

270 ------ 

271 ValueError 

272 Raised if name is not recognized. 

273 

274 Notes 

275 ----- 

276 For ORDER BY identifiers we use slightly different set of rules compared to 

277 the rules in `categorizeElementId`: 

278 

279 - Name can be a dimension element name. e.g. ``visit``. 

280 - Name can be an element name and a metadata name (or key name) separated 

281 by dot, e.g. ``detector.full_name``, element name must correspond to 

282 ``element`` argument 

283 - Name can be a metadata name without element name prefix, e.g. 

284 ``day_obs``. 

285 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

286 used with temporal elements. 

287 """ 

288 field_name: str | None = None 

289 if name in ("timespan.begin", "timespan.end"): 

290 if element.temporal: 

291 field_name = name 

292 else: 

293 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

294 elif "." not in name: 

295 # No dot, can be either a dimension name or a field name (in any of 

296 # the known elements) 

297 if name == element.name: 

298 # Must be a dimension element 

299 if not isinstance(element, Dimension): 

300 raise ValueError(f"Element '{element}' is not a dimension.") 

301 elif name in element.dimensions.names and name != element.name: 

302 # Something like visit.physical_filter, which which want to remap 

303 # to just "physical_filter". 

304 return element.universe[name], None 

305 else: 

306 # Can be a metadata name or any of the keys, but primary key needs 

307 # to be treated the same as a reference to the dimension name 

308 # itself. 

309 if isinstance(element, Dimension): 

310 if name == element.primaryKey.name: 

311 return element, None 

312 elif name in element.uniqueKeys.names: 

313 return element, name 

314 if name in element.metadata.names: 

315 return element, name 

316 raise ValueError(f"Field '{name}' does not exist in '{element}'.") 

317 else: 

318 # qualified name, must be a dimension element and a field 

319 elem_name, _, field_name = name.partition(".") 

320 if elem_name != element.name: 

321 if field_name == "begin" or field_name == "end": 

322 extra = f"; perhaps you meant 'timespan.{field_name}'?" 

323 else: 

324 extra = "." 

325 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}") 

326 if field_name in ("timespan.begin", "timespan.end"): 

327 if not element.temporal: 

328 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

329 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

330 # Primary key is optional 

331 field_name = None 

332 else: 

333 if not ( 

334 field_name in element.metadata.names 

335 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names) 

336 ): 

337 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

338 

339 return element, field_name