Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25from typing import Any, List, Optional, Tuple, TYPE_CHECKING, Union 

26 

27import sqlalchemy 

28 

29from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet 

30from .exprParser import Node, TreeVisitor 

31from ._structs import QueryColumns 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 import astropy.time 

35 

36 

37def categorizeIdentifier(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]: 

38 """Categorize an identifier in a parsed expression as either a `Dimension` 

39 name (indicating the primary key for that dimension) or a non-primary-key 

40 column in a `DimensionElement` table. 

41 

42 Parameters 

43 ---------- 

44 universe : `DimensionUniverse` 

45 All known dimensions. 

46 name : `str` 

47 Identifier to categorize. 

48 

49 Returns 

50 ------- 

51 element : `DimensionElement` 

52 The `DimensionElement` the identifier refers to. 

53 column : `str` or `None` 

54 The name of a column in the table for ``element``, or `None` if 

55 ``element`` is a `Dimension` and the requested column is its primary 

56 key. 

57 

58 Raises 

59 ------ 

60 LookupError 

61 Raised if the identifier refers to a nonexistent `DimensionElement` 

62 or column. 

63 RuntimeError 

64 Raised if the expression refers to a primary key in an illegal way. 

65 This exception includes a suggestion for how to rewrite the expression, 

66 so at least its message should generally be propagated up to a context 

67 where the error can be interpreted by a human. 

68 """ 

69 table, sep, column = name.partition('.') 

70 if column: 

71 try: 

72 element = universe[table] 

73 except KeyError as err: 

74 raise LookupError(f"No dimension element with name '{table}'.") from err 

75 if isinstance(element, Dimension) and column == element.primaryKey.name: 

76 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

77 # can be clearer. 

78 return element, None 

79 elif column in element.graph.names: 

80 # User said something like "patch.tract = x" or 

81 # "tract.tract = x" instead of just "tract = x" or 

82 # "tract.id = x", which is at least needlessly confusing and 

83 # possibly not actually a column name, though we can guess 

84 # what they were trying to do. 

85 # Encourage them to clean that up and try again. 

86 raise RuntimeError( 

87 f"Invalid reference to '{table}.{column}' " # type: ignore 

88 f"in expression; please use '{column}' or " 

89 f"'{column}.{universe[column].primaryKey.name}' instead." 

90 ) 

91 else: 

92 if column not in element.RecordClass.fields.standard.names: 

93 raise LookupError(f"Column '{column} not found in table for {element}.") 

94 return element, column 

95 else: 

96 try: 

97 dimension = universe[table] 

98 except KeyError as err: 

99 raise LookupError(f"No dimension with name '{table}.") from err 

100 return dimension, None 

101 

102 

103class InspectionVisitor(TreeVisitor[None]): 

104 """Implements TreeVisitor to identify dimension elements that need 

105 to be included in a query, prior to actually constructing a SQLAlchemy 

106 WHERE clause from it. 

107 

108 Parameters 

109 ---------- 

110 universe : `DimensionUniverse` 

111 All known dimensions. 

112 """ 

113 

114 def __init__(self, universe: DimensionUniverse): 

115 self.universe = universe 

116 self.keys: NamedValueSet[Dimension] = NamedValueSet() 

117 self.metadata: NamedKeyDict[DimensionElement, List[str]] = NamedKeyDict() 

118 

119 def visitNumericLiteral(self, value: str, node: Node) -> None: 

120 # Docstring inherited from TreeVisitor.visitNumericLiteral 

121 pass 

122 

123 def visitStringLiteral(self, value: str, node: Node) -> None: 

124 # Docstring inherited from TreeVisitor.visitStringLiteral 

125 pass 

126 

127 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> None: 

128 # Docstring inherited from TreeVisitor.visitTimeLiteral 

129 pass 

130 

131 def visitIdentifier(self, name: str, node: Node) -> None: 

132 # Docstring inherited from TreeVisitor.visitIdentifier 

133 element, column = categorizeIdentifier(self.universe, name) 

134 if column is not None: 

135 self.metadata.setdefault(element, []).append(column) 

136 else: 

137 assert isinstance(element, Dimension) 

138 self.keys.add(element) 

139 

140 def visitUnaryOp(self, operator: str, operand: Any, node: Node) -> None: 

141 # Docstring inherited from TreeVisitor.visitUnaryOp 

142 pass 

143 

144 def visitBinaryOp(self, operator: str, lhs: Any, rhs: Any, node: Node) -> None: 

145 # Docstring inherited from TreeVisitor.visitBinaryOp 

146 pass 

147 

148 def visitIsIn(self, lhs: Any, values: List[Any], not_in: bool, node: Node) -> None: 

149 # Docstring inherited from TreeVisitor.visitIsIn 

150 pass 

151 

152 def visitParens(self, expression: Any, node: Node) -> None: 

153 # Docstring inherited from TreeVisitor.visitParens 

154 pass 

155 

156 def visitTupleNode(self, items: Tuple[Any, ...], node: Node) -> None: 

157 # Docstring inherited from base class 

158 pass 

159 

160 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node) -> None: 

161 # Docstring inherited from TreeVisitor.visitRangeLiteral 

162 pass 

163 

164 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None: 

165 # Docstring inherited from base class 

166 pass 

167 

168 

169class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]): 

170 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE 

171 clause. 

172 

173 Parameters 

174 ---------- 

175 universe : `DimensionUniverse` 

176 All known dimensions. 

177 columns: `QueryColumns` 

178 Struct that organizes the special columns known to the query 

179 under construction. 

180 elements: `NamedKeyDict` 

181 `DimensionElement` instances and their associated tables. 

182 """ 

183 

184 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 184 ↛ exitline 184 didn't run the lambda on line 184

185 "+": lambda x: +x, 

186 "-": lambda x: -x} 

187 """Mapping or unary operator names to corresponding functions""" 

188 

189 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 189 ↛ exitline 189 didn't run the lambda on line 189

190 "AND": lambda x, y: sqlalchemy.sql.and_(x, y), 

191 "=": lambda x, y: x == y, 

192 "!=": lambda x, y: x != y, 

193 "<": lambda x, y: x < y, 

194 "<=": lambda x, y: x <= y, 

195 ">": lambda x, y: x > y, 

196 ">=": lambda x, y: x >= y, 

197 "+": lambda x, y: x + y, 

198 "-": lambda x, y: x - y, 

199 "*": lambda x, y: x * y, 

200 "/": lambda x, y: x / y, 

201 "%": lambda x, y: x % y} 

202 """Mapping or binary operator names to corresponding functions""" 

203 

204 def __init__(self, universe: DimensionUniverse, 

205 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]): 

206 self.universe = universe 

207 self.columns = columns 

208 self.elements = elements 

209 

210 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement: 

211 # Docstring inherited from TreeVisitor.visitNumericLiteral 

212 # Convert string value into float or int 

213 coerced: Union[int, float] 

214 try: 

215 coerced = int(value) 

216 except ValueError: 

217 coerced = float(value) 

218 return sqlalchemy.sql.literal(coerced) 

219 

220 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement: 

221 # Docstring inherited from TreeVisitor.visitStringLiteral 

222 return sqlalchemy.sql.literal(value) 

223 

224 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement: 

225 # Docstring inherited from TreeVisitor.visitTimeLiteral 

226 return sqlalchemy.sql.literal(value) 

227 

228 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement: 

229 # Docstring inherited from TreeVisitor.visitIdentifier 

230 element, column = categorizeIdentifier(self.universe, name) 

231 if column is not None: 

232 return self.elements[element].columns[column] 

233 else: 

234 assert isinstance(element, Dimension) 

235 return self.columns.getKeyColumn(element) 

236 

237 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node 

238 ) -> sqlalchemy.sql.ColumnElement: 

239 # Docstring inherited from TreeVisitor.visitUnaryOp 

240 func = self.unaryOps.get(operator) 

241 if func: 

242 return func(operand) 

243 else: 

244 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.") 

245 

246 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement, 

247 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement: 

248 # Docstring inherited from TreeVisitor.visitBinaryOp 

249 func = self.binaryOps.get(operator) 

250 if func: 

251 return func(lhs, rhs) 

252 else: 

253 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.") 

254 

255 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement], 

256 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement: 

257 # Docstring inherited from TreeVisitor.visitIsIn 

258 

259 # `values` is a list of literals and ranges, range is represented 

260 # by a tuple (start, stop, stride). We need to transform range into 

261 # some SQL construct, simplest would be to generate a set of literals 

262 # and add it to the same list but it could become too long. What we 

263 # do here is to introduce some large limit on the total number of 

264 # items in IN() and if range exceeds that limit then we do something 

265 # like: 

266 # 

267 # X IN (1, 2, 3) 

268 # OR 

269 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

270 # 

271 # or for NOT IN case 

272 # 

273 # NOT (X IN (1, 2, 3) 

274 # OR 

275 # (X BETWEEN START AND STOP 

276 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

277 

278 max_in_items = 1000 

279 

280 # split the list into literals and ranges 

281 literals, ranges = [], [] 

282 for item in values: 

283 if isinstance(item, tuple): 

284 ranges.append(item) 

285 else: 

286 literals.append(item) 

287 

288 clauses = [] 

289 for start, stop, stride in ranges: 

290 count = (stop - start + 1) // stride 

291 if len(literals) + count > max_in_items: 

292 # X BETWEEN START AND STOP 

293 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

294 expr = lhs.between(start, stop) 

295 if stride != 1: 

296 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride)) 

297 clauses.append(expr) 

298 else: 

299 # add all values to literal list, stop is inclusive 

300 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)] 

301 

302 if literals: 

303 # add IN() in front of BETWEENs 

304 clauses.insert(0, lhs.in_(literals)) 

305 

306 expr = sqlalchemy.sql.or_(*clauses) 

307 if not_in: 

308 expr = sqlalchemy.sql.not_(expr) 

309 

310 return expr 

311 

312 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node 

313 ) -> sqlalchemy.sql.ColumnElement: 

314 # Docstring inherited from TreeVisitor.visitParens 

315 return expression.self_group() 

316 

317 def visitTupleNode(self, items: Tuple[sqlalchemy.sql.ColumnElement, ...], node: Node 

318 ) -> sqlalchemy.sql.ColumnElement: 

319 # Docstring inherited from base class 

320 return sqlalchemy.sql.expression.Tuple(*items) 

321 

322 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node 

323 ) -> sqlalchemy.sql.ColumnElement: 

324 # Docstring inherited from TreeVisitor.visitRangeLiteral 

325 

326 # Just return a triple and let parent clauses handle it, 

327 # stride can be None which means the same as 1. 

328 return (start, stop, stride or 1) 

329 

330 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None: 

331 # Docstring inherited from base class 

332 

333 # this is a placeholder for future extension, we enabled syntax but 

334 # do not support actual use just yet. 

335 raise NotImplementedError("POINT() function is not supported yet")