Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25 

26from sqlalchemy.sql import not_, or_, and_, literal, FromClause 

27 

28from ...core.utils import NamedValueSet, NamedKeyDict 

29from ...core import DimensionUniverse, Dimension, DimensionElement 

30from .exprParser import TreeVisitor 

31from ._structs import QueryColumns 

32 

33 

34def categorizeIdentifier(universe: DimensionUniverse, name: str): 

35 """Categorize an identifier in a parsed expression as either a `Dimension` 

36 name (indicating the primary key for that dimension) or a non-primary-key 

37 column in a `DimensionElement` table. 

38 

39 Parameters 

40 ---------- 

41 universe : `DimensionUniverse` 

42 All known dimensions. 

43 name : `str` 

44 Identifier to categorize. 

45 

46 Returns 

47 ------- 

48 element : `DimensionElement` 

49 The `DimensionElement` the identifier refers to. 

50 column : `str` or `None` 

51 The name of a column in the table for ``element``, or `None` if 

52 ``element`` is a `Dimension` and the requested column is its primary 

53 key. 

54 

55 Raises 

56 ------ 

57 LookupError 

58 Raised if the identifier refers to a nonexistent `DimensionElement` 

59 or column. 

60 RuntimeError 

61 Raised if the expression refers to a primary key in an illegal way. 

62 This exception includes a suggestion for how to rewrite the expression, 

63 so at least its message should generally be propagated up to a context 

64 where the error can be interpreted by a human. 

65 """ 

66 table, sep, column = name.partition('.') 

67 if column: 

68 try: 

69 element = universe[table] 

70 except KeyError as err: 

71 raise LookupError(f"No dimension element with name '{table}'.") from err 

72 if isinstance(element, Dimension) and column == element.primaryKey.name: 

73 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

74 # can be clearer. 

75 return element, None 

76 elif column in element.graph.names: 

77 # User said something like "patch.tract = x" or 

78 # "tract.tract = x" instead of just "tract = x" or 

79 # "tract.id = x", which is at least needlessly confusing and 

80 # possibly not actually a column name, though we can guess 

81 # what they were trying to do. 

82 # Encourage them to clean that up and try again. 

83 raise RuntimeError( 

84 f"Invalid reference to '{table}.{column}' in expression; please use " 

85 f"'{column}' or '{column}.{universe[column].primaryKey.name}' instead." 

86 ) 

87 else: 

88 if column not in element.RecordClass.__slots__: 

89 raise LookupError(f"Column '{column} not found in table for {element}.") 

90 return element, column 

91 else: 

92 try: 

93 dimension = universe.dimensions[table] 

94 except KeyError as err: 

95 raise LookupError(f"No dimension with name '{table}.") from err 

96 return dimension, None 

97 

98 

99class InspectionVisitor(TreeVisitor): 

100 """Implements TreeVisitor to identify dimension elements that need 

101 to be included in a query, prior to actually constructing a SQLAlchemy 

102 WHERE clause from it. 

103 

104 Parameters 

105 ---------- 

106 universe : `DimensionUniverse` 

107 All known dimensions. 

108 """ 

109 

110 def __init__(self, universe: DimensionUniverse): 

111 self.universe = universe 

112 self.keys = NamedValueSet() 

113 self.metadata = NamedKeyDict() 

114 

115 def visitNumericLiteral(self, value, node): 

116 # Docstring inherited from TreeVisitor.visitNumericLiteral 

117 pass 

118 

119 def visitStringLiteral(self, value, node): 

120 # Docstring inherited from TreeVisitor.visitStringLiteral 

121 pass 

122 

123 def visitIdentifier(self, name, node): 

124 # Docstring inherited from TreeVisitor.visitIdentifier 

125 element, column = categorizeIdentifier(self.universe, name) 

126 if column is not None: 

127 self.metadata.setdefault(element, []).append(column) 

128 else: 

129 self.keys.add(element) 

130 

131 def visitUnaryOp(self, operator, operand, node): 

132 # Docstring inherited from TreeVisitor.visitUnaryOp 

133 pass 

134 

135 def visitBinaryOp(self, operator, lhs, rhs, node): 

136 # Docstring inherited from TreeVisitor.visitBinaryOp 

137 pass 

138 

139 def visitIsIn(self, lhs, values, not_in, node): 

140 # Docstring inherited from TreeVisitor.visitIsIn 

141 pass 

142 

143 def visitParens(self, expression, node): 

144 # Docstring inherited from TreeVisitor.visitParens 

145 pass 

146 

147 def visitRangeLiteral(self, start, stop, stride, node): 

148 # Docstring inherited from TreeVisitor.visitRangeLiteral 

149 pass 

150 

151 

152class ClauseVisitor(TreeVisitor): 

153 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE 

154 clause. 

155 

156 Parameters 

157 ---------- 

158 universe : `DimensionUniverse` 

159 All known dimensions. 

160 columns: `QueryColumns` 

161 Struct that organizes the special columns known to the query 

162 under construction. 

163 elements: `NamedKeyDict` 

164 `DimensionElement` instances and their associated tables. 

165 """ 

166 

167 unaryOps = {"NOT": lambda x: not_(x), 167 ↛ exitline 167 didn't run the lambda on line 167

168 "+": lambda x: +x, 

169 "-": lambda x: -x} 

170 """Mapping or unary operator names to corresponding functions""" 

171 

172 binaryOps = {"OR": lambda x, y: or_(x, y), 172 ↛ exitline 172 didn't run the lambda on line 172

173 "AND": lambda x, y: and_(x, y), 

174 "=": lambda x, y: x == y, 

175 "!=": lambda x, y: x != y, 

176 "<": lambda x, y: x < y, 

177 "<=": lambda x, y: x <= y, 

178 ">": lambda x, y: x > y, 

179 ">=": lambda x, y: x >= y, 

180 "+": lambda x, y: x + y, 

181 "-": lambda x, y: x - y, 

182 "*": lambda x, y: x * y, 

183 "/": lambda x, y: x / y, 

184 "%": lambda x, y: x % y} 

185 """Mapping or binary operator names to corresponding functions""" 

186 

187 def __init__(self, universe: DimensionUniverse, 

188 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, FromClause]): 

189 self.universe = universe 

190 self.columns = columns 

191 self.elements = elements 

192 

193 def visitNumericLiteral(self, value, node): 

194 # Docstring inherited from TreeVisitor.visitNumericLiteral 

195 # Convert string value into float or int 

196 try: 

197 value = int(value) 

198 except ValueError: 

199 value = float(value) 

200 return literal(value) 

201 

202 def visitStringLiteral(self, value, node): 

203 # Docstring inherited from TreeVisitor.visitStringLiteral 

204 return literal(value) 

205 

206 def visitIdentifier(self, name, node): 

207 # Docstring inherited from TreeVisitor.visitIdentifier 

208 element, column = categorizeIdentifier(self.universe, name) 

209 if column is not None: 

210 return self.elements[element].columns[column] 

211 else: 

212 return self.columns.getKeyColumn(element) 

213 

214 def visitUnaryOp(self, operator, operand, node): 

215 # Docstring inherited from TreeVisitor.visitUnaryOp 

216 func = self.unaryOps.get(operator) 

217 if func: 

218 return func(operand) 

219 else: 

220 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.") 

221 

222 def visitBinaryOp(self, operator, lhs, rhs, node): 

223 # Docstring inherited from TreeVisitor.visitBinaryOp 

224 func = self.binaryOps.get(operator) 

225 if func: 

226 return func(lhs, rhs) 

227 else: 

228 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.") 

229 

230 def visitIsIn(self, lhs, values, not_in, node): 

231 # Docstring inherited from TreeVisitor.visitIsIn 

232 

233 # `values` is a list of literals and ranges, range is represented 

234 # by a tuple (start, stop, stride). We need to transform range into 

235 # some SQL construct, simplest would be to generate a set of literals 

236 # and add it to the same list but it could become too long. What we 

237 # do here is to introduce some large limit on the total number of 

238 # items in IN() and if range exceeds that limit then we do something 

239 # like: 

240 # 

241 # X IN (1, 2, 3) 

242 # OR 

243 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

244 # 

245 # or for NOT IN case 

246 # 

247 # NOT (X IN (1, 2, 3) 

248 # OR 

249 # (X BETWEEN START AND STOP 

250 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

251 

252 max_in_items = 1000 

253 

254 # split the list into literals and ranges 

255 literals, ranges = [], [] 

256 for item in values: 

257 if isinstance(item, tuple): 

258 ranges.append(item) 

259 else: 

260 literals.append(item) 

261 

262 clauses = [] 

263 for start, stop, stride in ranges: 

264 count = (stop - start + 1) // stride 

265 if len(literals) + count > max_in_items: 

266 # X BETWEEN START AND STOP 

267 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

268 expr = lhs.between(start, stop) 

269 if stride != 1: 

270 expr = and_(expr, (lhs % stride) == (start % stride)) 

271 clauses.append(expr) 

272 else: 

273 # add all values to literal list, stop is inclusive 

274 literals += [literal(value) for value in range(start, stop+1, stride)] 

275 

276 if literals: 

277 # add IN() in front of BETWEENs 

278 clauses.insert(0, lhs.in_(literals)) 

279 

280 expr = or_(*clauses) 

281 if not_in: 

282 expr = not_(expr) 

283 

284 return expr 

285 

286 def visitParens(self, expression, node): 

287 # Docstring inherited from TreeVisitor.visitParens 

288 return expression.self_group() 

289 

290 def visitRangeLiteral(self, start, stop, stride, node): 

291 # Docstring inherited from TreeVisitor.visitRangeLiteral 

292 

293 # Just return a triple and let parent clauses handle it, 

294 # stride can be None which means the same as 1. 

295 return (start, stop, stride or 1)