Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25from typing import Any, List, Mapping, Optional, Tuple, TYPE_CHECKING, Union 

26 

27import sqlalchemy 

28from sqlalchemy.ext.compiler import compiles 

29 

30from ...core import DimensionUniverse, Dimension, DimensionElement, NamedKeyDict, NamedValueSet 

31from ...core.ddl import AstropyTimeNsecTai 

32from .exprParser import Node, TreeVisitor 

33from ._structs import QueryColumns 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 import astropy.time 

37 

38 

39class _TimestampColumnElement(sqlalchemy.sql.ColumnElement): 

40 """Special ColumnElement type used for TIMESTAMP columns in expressions. 

41 

42 TIMESTAMP columns in expressions are usually compared to time literals 

43 which are `astropy.time.Time` instances that are converted to integer 

44 nanoseconds since Epoch. For comparison we need to convert TIMESTAMP 

45 column value to the same type. This type is a wrapper for actual column 

46 that has special dialect-specific compilation methods defined below 

47 transforming column in that common type. 

48 

49 This mechanism is only used for expressions in WHERE clause, values of the 

50 TIMESTAMP columns returned from queries are still handled by standard 

51 mechanism and they are converted to `datetime` instances. 

52 """ 

53 def __init__(self, column: sqlalchemy.sql.ColumnElement): 

54 super().__init__() 

55 self._column = column 

56 

57 

58@compiles(_TimestampColumnElement, "sqlite") 

59def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

60 """Compilation of TIMESTAMP column for SQLite. 

61 

62 SQLite defines ``strftime`` function that can be used to convert timestamp 

63 value to Unix seconds. 

64 """ 

65 return f"STRFTIME('%s', {element._column.name})*1000000000" 

66 

67 

68@compiles(_TimestampColumnElement, "postgresql") 

69def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

70 """Compilation of TIMESTAMP column for PostgreSQL. 

71 

72 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function. 

73 """ 

74 return f"EXTRACT(epoch FROM {element._column.name})*1000000000" 

75 

76 

77def categorizeIngestDateId(name: str) -> bool: 

78 """Categorize an identifier in a parsed expression as an ingest_date 

79 attribute of a dataset table. 

80 

81 Parameters 

82 ---------- 

83 name : `str` 

84 Identifier to categorize. 

85 

86 Returns 

87 ------- 

88 isIngestDate : `bool` 

89 True is returned if identifier name is ``ingest_date``. 

90 """ 

91 # TODO: this is hardcoded for now, may be better to extract it from schema 

92 # but I do not know how to do it yet. 

93 return name == "ingest_date" 

94 

95 

96def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]: 

97 """Categorize an identifier in a parsed expression as either a `Dimension` 

98 name (indicating the primary key for that dimension) or a non-primary-key 

99 column in a `DimensionElement` table. 

100 

101 Parameters 

102 ---------- 

103 universe : `DimensionUniverse` 

104 All known dimensions. 

105 name : `str` 

106 Identifier to categorize. 

107 

108 Returns 

109 ------- 

110 element : `DimensionElement` 

111 The `DimensionElement` the identifier refers to. 

112 column : `str` or `None` 

113 The name of a column in the table for ``element``, or `None` if 

114 ``element`` is a `Dimension` and the requested column is its primary 

115 key. 

116 

117 Raises 

118 ------ 

119 LookupError 

120 Raised if the identifier refers to a nonexistent `DimensionElement` 

121 or column. 

122 RuntimeError 

123 Raised if the expression refers to a primary key in an illegal way. 

124 This exception includes a suggestion for how to rewrite the expression, 

125 so at least its message should generally be propagated up to a context 

126 where the error can be interpreted by a human. 

127 """ 

128 table, sep, column = name.partition('.') 

129 if column: 

130 try: 

131 element = universe[table] 

132 except KeyError as err: 

133 raise LookupError(f"No dimension element with name '{table}'.") from err 

134 if isinstance(element, Dimension) and column == element.primaryKey.name: 

135 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

136 # can be clearer. 

137 return element, None 

138 elif column in element.graph.names: 

139 # User said something like "patch.tract = x" or 

140 # "tract.tract = x" instead of just "tract = x" or 

141 # "tract.id = x", which is at least needlessly confusing and 

142 # possibly not actually a column name, though we can guess 

143 # what they were trying to do. 

144 # Encourage them to clean that up and try again. 

145 raise RuntimeError( 

146 f"Invalid reference to '{table}.{column}' " # type: ignore 

147 f"in expression; please use '{column}' or " 

148 f"'{column}.{universe[column].primaryKey.name}' instead." 

149 ) 

150 else: 

151 if column not in element.RecordClass.fields.standard.names: 

152 raise LookupError(f"Column '{column} not found in table for {element}.") 

153 return element, column 

154 else: 

155 try: 

156 dimension = universe[table] 

157 except KeyError as err: 

158 raise LookupError(f"No dimension with name '{table}.") from err 

159 return dimension, None 

160 

161 

162class InspectionVisitor(TreeVisitor[None]): 

163 """Implements TreeVisitor to identify dimension elements that need 

164 to be included in a query, prior to actually constructing a SQLAlchemy 

165 WHERE clause from it. 

166 

167 Parameters 

168 ---------- 

169 universe : `DimensionUniverse` 

170 All known dimensions. 

171 """ 

172 

173 def __init__(self, universe: DimensionUniverse): 

174 self.universe = universe 

175 self.keys: NamedValueSet[Dimension] = NamedValueSet() 

176 self.metadata: NamedKeyDict[DimensionElement, List[str]] = NamedKeyDict() 

177 self.hasIngestDate: bool = False 

178 

179 def visitNumericLiteral(self, value: str, node: Node) -> None: 

180 # Docstring inherited from TreeVisitor.visitNumericLiteral 

181 pass 

182 

183 def visitStringLiteral(self, value: str, node: Node) -> None: 

184 # Docstring inherited from TreeVisitor.visitStringLiteral 

185 pass 

186 

187 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> None: 

188 # Docstring inherited from TreeVisitor.visitTimeLiteral 

189 pass 

190 

191 def visitIdentifier(self, name: str, node: Node) -> None: 

192 # Docstring inherited from TreeVisitor.visitIdentifier 

193 if categorizeIngestDateId(name): 

194 self.hasIngestDate = True 

195 return 

196 element, column = categorizeElementId(self.universe, name) 

197 if column is not None: 

198 self.metadata.setdefault(element, []).append(column) 

199 else: 

200 assert isinstance(element, Dimension) 

201 self.keys.add(element) 

202 

203 def visitUnaryOp(self, operator: str, operand: Any, node: Node) -> None: 

204 # Docstring inherited from TreeVisitor.visitUnaryOp 

205 pass 

206 

207 def visitBinaryOp(self, operator: str, lhs: Any, rhs: Any, node: Node) -> None: 

208 # Docstring inherited from TreeVisitor.visitBinaryOp 

209 pass 

210 

211 def visitIsIn(self, lhs: Any, values: List[Any], not_in: bool, node: Node) -> None: 

212 # Docstring inherited from TreeVisitor.visitIsIn 

213 pass 

214 

215 def visitParens(self, expression: Any, node: Node) -> None: 

216 # Docstring inherited from TreeVisitor.visitParens 

217 pass 

218 

219 def visitTupleNode(self, items: Tuple[Any, ...], node: Node) -> None: 

220 # Docstring inherited from base class 

221 pass 

222 

223 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node) -> None: 

224 # Docstring inherited from TreeVisitor.visitRangeLiteral 

225 pass 

226 

227 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None: 

228 # Docstring inherited from base class 

229 pass 

230 

231 

232class ClauseVisitor(TreeVisitor[sqlalchemy.sql.ColumnElement]): 

233 """Implements TreeVisitor to convert the tree into a SQLAlchemy WHERE 

234 clause. 

235 

236 Parameters 

237 ---------- 

238 universe : `DimensionUniverse` 

239 All known dimensions. 

240 columns: `QueryColumns` 

241 Struct that organizes the special columns known to the query 

242 under construction. 

243 elements: `NamedKeyDict` 

244 `DimensionElement` instances and their associated tables. 

245 """ 

246 

247 unaryOps = {"NOT": lambda x: sqlalchemy.sql.not_(x), 247 ↛ exitline 247 didn't run the lambda on line 247

248 "+": lambda x: +x, 

249 "-": lambda x: -x} 

250 """Mapping or unary operator names to corresponding functions""" 

251 

252 binaryOps = {"OR": lambda x, y: sqlalchemy.sql.or_(x, y), 252 ↛ exitline 252 didn't run the lambda on line 252

253 "AND": lambda x, y: sqlalchemy.sql.and_(x, y), 

254 "=": lambda x, y: x == y, 

255 "!=": lambda x, y: x != y, 

256 "<": lambda x, y: x < y, 

257 "<=": lambda x, y: x <= y, 

258 ">": lambda x, y: x > y, 

259 ">=": lambda x, y: x >= y, 

260 "+": lambda x, y: x + y, 

261 "-": lambda x, y: x - y, 

262 "*": lambda x, y: x * y, 

263 "/": lambda x, y: x / y, 

264 "%": lambda x, y: x % y} 

265 """Mapping or binary operator names to corresponding functions""" 

266 

267 def __init__(self, universe: DimensionUniverse, 

268 columns: QueryColumns, elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause]): 

269 self.universe = universe 

270 self.columns = columns 

271 self.elements = elements 

272 self.hasIngestDate: bool = False 

273 

274 def visitNumericLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement: 

275 # Docstring inherited from TreeVisitor.visitNumericLiteral 

276 # Convert string value into float or int 

277 coerced: Union[int, float] 

278 try: 

279 coerced = int(value) 

280 except ValueError: 

281 coerced = float(value) 

282 return sqlalchemy.sql.literal(coerced) 

283 

284 def visitStringLiteral(self, value: str, node: Node) -> sqlalchemy.sql.ColumnElement: 

285 # Docstring inherited from TreeVisitor.visitStringLiteral 

286 return sqlalchemy.sql.literal(value) 

287 

288 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> sqlalchemy.sql.ColumnElement: 

289 # Docstring inherited from TreeVisitor.visitTimeLiteral 

290 return sqlalchemy.sql.literal(value, type_=AstropyTimeNsecTai) 

291 

292 def visitIdentifier(self, name: str, node: Node) -> sqlalchemy.sql.ColumnElement: 

293 # Docstring inherited from TreeVisitor.visitIdentifier 

294 if categorizeIngestDateId(name): 

295 self.hasIngestDate = True 

296 assert self.columns.datasets is not None 

297 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query" 

298 return _TimestampColumnElement(self.columns.datasets.ingestDate) 

299 element, column = categorizeElementId(self.universe, name) 

300 if column is not None: 

301 return self.elements[element].columns[column] 

302 else: 

303 assert isinstance(element, Dimension) 

304 return self.columns.getKeyColumn(element) 

305 

306 def visitUnaryOp(self, operator: str, operand: sqlalchemy.sql.ColumnElement, node: Node 

307 ) -> sqlalchemy.sql.ColumnElement: 

308 # Docstring inherited from TreeVisitor.visitUnaryOp 

309 func = self.unaryOps.get(operator) 

310 if func: 

311 return func(operand) 

312 else: 

313 raise ValueError(f"Unexpected unary operator `{operator}' in `{node}'.") 

314 

315 def visitBinaryOp(self, operator: str, lhs: sqlalchemy.sql.ColumnElement, 

316 rhs: sqlalchemy.sql.ColumnElement, node: Node) -> sqlalchemy.sql.ColumnElement: 

317 # Docstring inherited from TreeVisitor.visitBinaryOp 

318 func = self.binaryOps.get(operator) 

319 if func: 

320 return func(lhs, rhs) 

321 else: 

322 raise ValueError(f"Unexpected binary operator `{operator}' in `{node}'.") 

323 

324 def visitIsIn(self, lhs: sqlalchemy.sql.ColumnElement, values: List[sqlalchemy.sql.ColumnElement], 

325 not_in: bool, node: Node) -> sqlalchemy.sql.ColumnElement: 

326 # Docstring inherited from TreeVisitor.visitIsIn 

327 

328 # `values` is a list of literals and ranges, range is represented 

329 # by a tuple (start, stop, stride). We need to transform range into 

330 # some SQL construct, simplest would be to generate a set of literals 

331 # and add it to the same list but it could become too long. What we 

332 # do here is to introduce some large limit on the total number of 

333 # items in IN() and if range exceeds that limit then we do something 

334 # like: 

335 # 

336 # X IN (1, 2, 3) 

337 # OR 

338 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

339 # 

340 # or for NOT IN case 

341 # 

342 # NOT (X IN (1, 2, 3) 

343 # OR 

344 # (X BETWEEN START AND STOP 

345 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

346 

347 max_in_items = 1000 

348 

349 # split the list into literals and ranges 

350 literals, ranges = [], [] 

351 for item in values: 

352 if isinstance(item, tuple): 

353 ranges.append(item) 

354 else: 

355 literals.append(item) 

356 

357 clauses = [] 

358 for start, stop, stride in ranges: 

359 count = (stop - start + 1) // stride 

360 if len(literals) + count > max_in_items: 

361 # X BETWEEN START AND STOP 

362 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

363 expr = lhs.between(start, stop) 

364 if stride != 1: 

365 expr = sqlalchemy.sql.and_(expr, (lhs % stride) == (start % stride)) 

366 clauses.append(expr) 

367 else: 

368 # add all values to literal list, stop is inclusive 

369 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop+1, stride)] 

370 

371 if literals: 

372 # add IN() in front of BETWEENs 

373 clauses.insert(0, lhs.in_(literals)) 

374 

375 expr = sqlalchemy.sql.or_(*clauses) 

376 if not_in: 

377 expr = sqlalchemy.sql.not_(expr) 

378 

379 return expr 

380 

381 def visitParens(self, expression: sqlalchemy.sql.ColumnElement, node: Node 

382 ) -> sqlalchemy.sql.ColumnElement: 

383 # Docstring inherited from TreeVisitor.visitParens 

384 return expression.self_group() 

385 

386 def visitTupleNode(self, items: Tuple[sqlalchemy.sql.ColumnElement, ...], node: Node 

387 ) -> sqlalchemy.sql.ColumnElement: 

388 # Docstring inherited from base class 

389 return sqlalchemy.sql.expression.Tuple(*items) 

390 

391 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node 

392 ) -> sqlalchemy.sql.ColumnElement: 

393 # Docstring inherited from TreeVisitor.visitRangeLiteral 

394 

395 # Just return a triple and let parent clauses handle it, 

396 # stride can be None which means the same as 1. 

397 return (start, stop, stride or 1) 

398 

399 def visitPointNode(self, ra: Any, dec: Any, node: Node) -> None: 

400 # Docstring inherited from base class 

401 

402 # this is a placeholder for future extension, we enabled syntax but 

403 # do not support actual use just yet. 

404 raise NotImplementedError("POINT() function is not supported yet")