Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25 

26from sqlalchemy.sql import not_, or_, and_, literal, FromClause 

27 

28from ...core.utils import NamedValueSet, NamedKeyDict 

29from ...core import DimensionUniverse, Dimension, DimensionElement 

30from .exprParser import TreeVisitor 

31from ._structs import QueryColumns 

32 

33 

34def categorizeIdentifier(universe: DimensionUniverse, name: str): 

35 """Categorize an identifier in a parsed expression as either a `Dimension` 

36 name (indicating the primary key for that dimension) or a non-primary-key 

37 column in a `DimensionElement` table. 

38 

39 Parameters 

40 ---------- 

41 universe : `DimensionUniverse` 

42 All known dimensions. 

43 name : `str` 

44 Identifier to categorize. 

45 

46 Returns 

47 ------- 

48 element : `DimensionElement` 

49 The `DimensionElement` the identifier refers to. 

50 column : `str` or `None` 

51 The name of a column in the table for ``element``, or `None` if 

52 ``element`` is a `Dimension` and the requested column is its primary 

53 key. 

54 

55 Raises 

56 ------ 

57 LookupError 

58 Raised if the identifier refers to a nonexistent `DimensionElement` 

59 or column. 

60 RuntimeError 

61 Raised if the expression refers to a primary key in an illegal way. 

62 This exception includes a suggestion for how to rewrite the expression, 

63 so at least its message should generally be propagated up to a context 

64 where the error can be interpreted by a human. 

65 """ 

66 table, sep, column = name.partition('.') 

67 if column: 

68 try: 

69 element = universe[table] 

70 except KeyError as err: 

71 raise LookupError(f"No dimension element with name '{table}'.") from err 

72 if isinstance(element, Dimension) and column == element.primaryKey.name: 

73 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

74 # can be clearer. 

75 return element, None 

76 elif column in element.graph.names: 

77 # User said something like "patch.tract = x" or 

78 # "tract.tract = x" instead of just "tract = x" or 

79 # "tract.id = x", which is at least needlessly confusing and 

80 # possibly not actually a column name, though we can guess 

81 # what they were trying to do. 

82 # Encourage them to clean that up and try again. 

83 raise RuntimeError( 

84 f"Invalid reference to '{table}.{column}' in expression; please use " 

85 f"'{column}' or '{column}.{universe[column].primaryKey.name}' instead." 

86 ) 

87 else: 

88 if column not in element.RecordClass.__slots__: 

89 raise LookupError(f"Column '{column} not found in table for {element}.") 

90 return element, column 

91 else: 

92 try: 

93 dimension = universe.dimensions[table] 

94 except KeyError as err: 

95 raise LookupError(f"No dimension with name '{table}.") from err 

96 return dimension, None 

97 

98 

99class InspectionVisitor(TreeVisitor): 

100 """Implements TreeVisitor to identify dimension elements that need 

101 to be included in a query, prior to actually constructing a SQLAlchemy 

102 WHERE clause from it. 

103 

104 Parameters 

105 ---------- 

106 universe : `DimensionUniverse` 

107 All known dimensions. 

108 """ 

109 

110 def __init__(self, universe: DimensionUniverse): 

111 self.universe = universe 

112 self.keys = NamedValueSet() 

113 self.metadata = NamedKeyDict() 

114 

115 def visitNumericLiteral(self, value, node): 

116 # Docstring inherited from TreeVisitor.visitNumericLiteral 

117 pass 

118 

119 def visitStringLiteral(self, value, node): 

120 # Docstring inherited from TreeVisitor.visitStringLiteral 

121 pass 

122 

123 def visitTimeLiteral(self, value, node): 

124 # Docstring inherited from TreeVisitor.visitTimeLiteral 

125 pass 

126 

127 def visitIdentifier(self, name, node): 

128 # Docstring inherited from TreeVisitor.visitIdentifier 

129 element, column = categorizeIdentifier(self.universe, name) 

130 if column is not None: 

131 self.metadata.setdefault(element, []).append(column) 

132 else: 

133 self.keys.add(element) 

134 

135 def visitUnaryOp(self, operator, operand, node): 

136 # Docstring inherited from TreeVisitor.visitUnaryOp 

137 pass 

138 

139 def visitBinaryOp(self, operator, lhs, rhs, node): 

140 # Docstring inherited from TreeVisitor.visitBinaryOp 

141 pass 

142 

143 def visitIsIn(self, lhs, values, not_in, node): 

144 # Docstring inherited from TreeVisitor.visitIsIn 

145 pass 

146 

147 def visitParens(self, expression, node): 

148 # Docstring inherited from TreeVisitor.visitParens 

149 pass 

150 

151 def visitRangeLiteral(self, start, stop, stride, node): 

152 # Docstring inherited from TreeVisitor.visitRangeLiteral 

153 pass 

154 

155 

156class ClauseVisitor(TreeVisitor): 

157 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE 

158 clause. 

159 

160 Parameters 

161 ---------- 

162 universe : `DimensionUniverse` 

163 All known dimensions. 

164 columns: `QueryColumns` 

165 Struct that organizes the special columns known to the query 

166 under construction. 

167 elements: `NamedKeyDict` 

168 `DimensionElement` instances and their associated tables. 

169 """ 

170 

171 unaryOps = {"NOT": lambda x: not_(x), 171 ↛ exitline 171 didn't run the lambda on line 171

172 "+": lambda x: +x, 

173 "-": lambda x: -x} 

174 """Mapping or unary operator names to corresponding functions""" 

175 

176 binaryOps = {"OR": lambda x, y: or_(x, y), 176 ↛ exitline 176 didn't run the lambda on line 176

177 "AND": lambda x, y: and_(x, y), 

178 "=": lambda x, y: x == y, 

179 "!=": lambda x, y: x != y, 

180 "<": lambda x, y: x < y, 

181 "<=": lambda x, y: x <= y, 

182 ">": lambda x, y: x > y, 

183 ">=": lambda x, y: x >= y, 

184 "+": lambda x, y: x + y, 

185 "-": lambda x, y: x - y, 

186 "*": lambda x, y: x * y, 

187 "/": lambda x, y: x / y, 

188 "%": lambda x, y: x % y} 

189 """Mapping or binary operator names to corresponding functions""" 

190 

191 def __init__(self, universe: DimensionUniverse, 

192 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, FromClause]): 

193 self.universe = universe 

194 self.columns = columns 

195 self.elements = elements 

196 

197 def visitNumericLiteral(self, value, node): 

198 # Docstring inherited from TreeVisitor.visitNumericLiteral 

199 # Convert string value into float or int 

200 try: 

201 value = int(value) 

202 except ValueError: 

203 value = float(value) 

204 return literal(value) 

205 

206 def visitStringLiteral(self, value, node): 

207 # Docstring inherited from TreeVisitor.visitStringLiteral 

208 return literal(value) 

209 

210 def visitTimeLiteral(self, value, node): 

211 # Docstring inherited from TreeVisitor.visitTimeLiteral 

212 return literal(value) 

213 

214 def visitIdentifier(self, name, node): 

215 # Docstring inherited from TreeVisitor.visitIdentifier 

216 element, column = categorizeIdentifier(self.universe, name) 

217 if column is not None: 

218 return self.elements[element].columns[column] 

219 else: 

220 return self.columns.getKeyColumn(element) 

221 

222 def visitUnaryOp(self, operator, operand, node): 

223 # Docstring inherited from TreeVisitor.visitUnaryOp 

224 func = self.unaryOps.get(operator) 

225 if func: 

226 return func(operand) 

227 else: 

228 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.") 

229 

230 def visitBinaryOp(self, operator, lhs, rhs, node): 

231 # Docstring inherited from TreeVisitor.visitBinaryOp 

232 func = self.binaryOps.get(operator) 

233 if func: 

234 return func(lhs, rhs) 

235 else: 

236 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.") 

237 

238 def visitIsIn(self, lhs, values, not_in, node): 

239 # Docstring inherited from TreeVisitor.visitIsIn 

240 

241 # `values` is a list of literals and ranges, range is represented 

242 # by a tuple (start, stop, stride). We need to transform range into 

243 # some SQL construct, simplest would be to generate a set of literals 

244 # and add it to the same list but it could become too long. What we 

245 # do here is to introduce some large limit on the total number of 

246 # items in IN() and if range exceeds that limit then we do something 

247 # like: 

248 # 

249 # X IN (1, 2, 3) 

250 # OR 

251 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

252 # 

253 # or for NOT IN case 

254 # 

255 # NOT (X IN (1, 2, 3) 

256 # OR 

257 # (X BETWEEN START AND STOP 

258 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

259 

260 max_in_items = 1000 

261 

262 # split the list into literals and ranges 

263 literals, ranges = [], [] 

264 for item in values: 

265 if isinstance(item, tuple): 

266 ranges.append(item) 

267 else: 

268 literals.append(item) 

269 

270 clauses = [] 

271 for start, stop, stride in ranges: 

272 count = (stop - start + 1) // stride 

273 if len(literals) + count > max_in_items: 

274 # X BETWEEN START AND STOP 

275 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

276 expr = lhs.between(start, stop) 

277 if stride != 1: 

278 expr = and_(expr, (lhs % stride) == (start % stride)) 

279 clauses.append(expr) 

280 else: 

281 # add all values to literal list, stop is inclusive 

282 literals += [literal(value) for value in range(start, stop+1, stride)] 

283 

284 if literals: 

285 # add IN() in front of BETWEENs 

286 clauses.insert(0, lhs.in_(literals)) 

287 

288 expr = or_(*clauses) 

289 if not_in: 

290 expr = not_(expr) 

291 

292 return expr 

293 

294 def visitParens(self, expression, node): 

295 # Docstring inherited from TreeVisitor.visitParens 

296 return expression.self_group() 

297 

298 def visitRangeLiteral(self, start, stop, stride, node): 

299 # Docstring inherited from TreeVisitor.visitRangeLiteral 

300 

301 # Just return a triple and let parent clauses handle it, 

302 # stride can be None which means the same as 1. 

303 return (start, stop, stride or 1)