Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25 

26from sqlalchemy.sql import not_, or_, and_, literal, FromClause 

27 

28from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet 

29from .exprParser import TreeVisitor 

30from ._structs import QueryColumns 

31 

32 

33def categorizeIdentifier(universe: DimensionUniverse, name: str): 

34 """Categorize an identifier in a parsed expression as either a `Dimension` 

35 name (indicating the primary key for that dimension) or a non-primary-key 

36 column in a `DimensionElement` table. 

37 

38 Parameters 

39 ---------- 

40 universe : `DimensionUniverse` 

41 All known dimensions. 

42 name : `str` 

43 Identifier to categorize. 

44 

45 Returns 

46 ------- 

47 element : `DimensionElement` 

48 The `DimensionElement` the identifier refers to. 

49 column : `str` or `None` 

50 The name of a column in the table for ``element``, or `None` if 

51 ``element`` is a `Dimension` and the requested column is its primary 

52 key. 

53 

54 Raises 

55 ------ 

56 LookupError 

57 Raised if the identifier refers to a nonexistent `DimensionElement` 

58 or column. 

59 RuntimeError 

60 Raised if the expression refers to a primary key in an illegal way. 

61 This exception includes a suggestion for how to rewrite the expression, 

62 so at least its message should generally be propagated up to a context 

63 where the error can be interpreted by a human. 

64 """ 

65 table, sep, column = name.partition('.') 

66 if column: 

67 try: 

68 element = universe[table] 

69 except KeyError as err: 

70 raise LookupError(f"No dimension element with name '{table}'.") from err 

71 if isinstance(element, Dimension) and column == element.primaryKey.name: 

72 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

73 # can be clearer. 

74 return element, None 

75 elif column in element.graph.names: 

76 # User said something like "patch.tract = x" or 

77 # "tract.tract = x" instead of just "tract = x" or 

78 # "tract.id = x", which is at least needlessly confusing and 

79 # possibly not actually a column name, though we can guess 

80 # what they were trying to do. 

81 # Encourage them to clean that up and try again. 

82 raise RuntimeError( 

83 f"Invalid reference to '{table}.{column}' in expression; please use " 

84 f"'{column}' or '{column}.{universe[column].primaryKey.name}' instead." 

85 ) 

86 else: 

87 if column not in element.RecordClass.__slots__: 

88 raise LookupError(f"Column '{column} not found in table for {element}.") 

89 return element, column 

90 else: 

91 try: 

92 dimension = universe.dimensions[table] 

93 except KeyError as err: 

94 raise LookupError(f"No dimension with name '{table}.") from err 

95 return dimension, None 

96 

97 

98class InspectionVisitor(TreeVisitor): 

99 """Implements TreeVisitor to identify dimension elements that need 

100 to be included in a query, prior to actually constructing a SQLAlchemy 

101 WHERE clause from it. 

102 

103 Parameters 

104 ---------- 

105 universe : `DimensionUniverse` 

106 All known dimensions. 

107 """ 

108 

109 def __init__(self, universe: DimensionUniverse): 

110 self.universe = universe 

111 self.keys = NamedValueSet() 

112 self.metadata = NamedKeyDict() 

113 

114 def visitNumericLiteral(self, value, node): 

115 # Docstring inherited from TreeVisitor.visitNumericLiteral 

116 pass 

117 

118 def visitStringLiteral(self, value, node): 

119 # Docstring inherited from TreeVisitor.visitStringLiteral 

120 pass 

121 

122 def visitTimeLiteral(self, value, node): 

123 # Docstring inherited from TreeVisitor.visitTimeLiteral 

124 pass 

125 

126 def visitIdentifier(self, name, node): 

127 # Docstring inherited from TreeVisitor.visitIdentifier 

128 element, column = categorizeIdentifier(self.universe, name) 

129 if column is not None: 

130 self.metadata.setdefault(element, []).append(column) 

131 else: 

132 self.keys.add(element) 

133 

134 def visitUnaryOp(self, operator, operand, node): 

135 # Docstring inherited from TreeVisitor.visitUnaryOp 

136 pass 

137 

138 def visitBinaryOp(self, operator, lhs, rhs, node): 

139 # Docstring inherited from TreeVisitor.visitBinaryOp 

140 pass 

141 

142 def visitIsIn(self, lhs, values, not_in, node): 

143 # Docstring inherited from TreeVisitor.visitIsIn 

144 pass 

145 

146 def visitParens(self, expression, node): 

147 # Docstring inherited from TreeVisitor.visitParens 

148 pass 

149 

150 def visitRangeLiteral(self, start, stop, stride, node): 

151 # Docstring inherited from TreeVisitor.visitRangeLiteral 

152 pass 

153 

154 

155class ClauseVisitor(TreeVisitor): 

156 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE 

157 clause. 

158 

159 Parameters 

160 ---------- 

161 universe : `DimensionUniverse` 

162 All known dimensions. 

163 columns: `QueryColumns` 

164 Struct that organizes the special columns known to the query 

165 under construction. 

166 elements: `NamedKeyDict` 

167 `DimensionElement` instances and their associated tables. 

168 """ 

169 

170 unaryOps = {"NOT": lambda x: not_(x), 170 ↛ exitline 170 didn't run the lambda on line 170

171 "+": lambda x: +x, 

172 "-": lambda x: -x} 

173 """Mapping or unary operator names to corresponding functions""" 

174 

175 binaryOps = {"OR": lambda x, y: or_(x, y), 175 ↛ exitline 175 didn't run the lambda on line 175

176 "AND": lambda x, y: and_(x, y), 

177 "=": lambda x, y: x == y, 

178 "!=": lambda x, y: x != y, 

179 "<": lambda x, y: x < y, 

180 "<=": lambda x, y: x <= y, 

181 ">": lambda x, y: x > y, 

182 ">=": lambda x, y: x >= y, 

183 "+": lambda x, y: x + y, 

184 "-": lambda x, y: x - y, 

185 "*": lambda x, y: x * y, 

186 "/": lambda x, y: x / y, 

187 "%": lambda x, y: x % y} 

188 """Mapping or binary operator names to corresponding functions""" 

189 

190 def __init__(self, universe: DimensionUniverse, 

191 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, FromClause]): 

192 self.universe = universe 

193 self.columns = columns 

194 self.elements = elements 

195 

196 def visitNumericLiteral(self, value, node): 

197 # Docstring inherited from TreeVisitor.visitNumericLiteral 

198 # Convert string value into float or int 

199 try: 

200 value = int(value) 

201 except ValueError: 

202 value = float(value) 

203 return literal(value) 

204 

205 def visitStringLiteral(self, value, node): 

206 # Docstring inherited from TreeVisitor.visitStringLiteral 

207 return literal(value) 

208 

209 def visitTimeLiteral(self, value, node): 

210 # Docstring inherited from TreeVisitor.visitTimeLiteral 

211 return literal(value) 

212 

213 def visitIdentifier(self, name, node): 

214 # Docstring inherited from TreeVisitor.visitIdentifier 

215 element, column = categorizeIdentifier(self.universe, name) 

216 if column is not None: 

217 return self.elements[element].columns[column] 

218 else: 

219 return self.columns.getKeyColumn(element) 

220 

221 def visitUnaryOp(self, operator, operand, node): 

222 # Docstring inherited from TreeVisitor.visitUnaryOp 

223 func = self.unaryOps.get(operator) 

224 if func: 

225 return func(operand) 

226 else: 

227 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.") 

228 

229 def visitBinaryOp(self, operator, lhs, rhs, node): 

230 # Docstring inherited from TreeVisitor.visitBinaryOp 

231 func = self.binaryOps.get(operator) 

232 if func: 

233 return func(lhs, rhs) 

234 else: 

235 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.") 

236 

237 def visitIsIn(self, lhs, values, not_in, node): 

238 # Docstring inherited from TreeVisitor.visitIsIn 

239 

240 # `values` is a list of literals and ranges, range is represented 

241 # by a tuple (start, stop, stride). We need to transform range into 

242 # some SQL construct, simplest would be to generate a set of literals 

243 # and add it to the same list but it could become too long. What we 

244 # do here is to introduce some large limit on the total number of 

245 # items in IN() and if range exceeds that limit then we do something 

246 # like: 

247 # 

248 # X IN (1, 2, 3) 

249 # OR 

250 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

251 # 

252 # or for NOT IN case 

253 # 

254 # NOT (X IN (1, 2, 3) 

255 # OR 

256 # (X BETWEEN START AND STOP 

257 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

258 

259 max_in_items = 1000 

260 

261 # split the list into literals and ranges 

262 literals, ranges = [], [] 

263 for item in values: 

264 if isinstance(item, tuple): 

265 ranges.append(item) 

266 else: 

267 literals.append(item) 

268 

269 clauses = [] 

270 for start, stop, stride in ranges: 

271 count = (stop - start + 1) // stride 

272 if len(literals) + count > max_in_items: 

273 # X BETWEEN START AND STOP 

274 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

275 expr = lhs.between(start, stop) 

276 if stride != 1: 

277 expr = and_(expr, (lhs % stride) == (start % stride)) 

278 clauses.append(expr) 

279 else: 

280 # add all values to literal list, stop is inclusive 

281 literals += [literal(value) for value in range(start, stop+1, stride)] 

282 

283 if literals: 

284 # add IN() in front of BETWEENs 

285 clauses.insert(0, lhs.in_(literals)) 

286 

287 expr = or_(*clauses) 

288 if not_in: 

289 expr = not_(expr) 

290 

291 return expr 

292 

293 def visitParens(self, expression, node): 

294 # Docstring inherited from TreeVisitor.visitParens 

295 return expression.self_group() 

296 

297 def visitRangeLiteral(self, start, stop, stride, node): 

298 # Docstring inherited from TreeVisitor.visitRangeLiteral 

299 

300 # Just return a triple and let parent clauses handle it, 

301 # stride can be None which means the same as 1. 

302 return (start, stop, stride or 1)