Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py: 29%

323 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-22 02:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "convertExpressionToSql", 

26 "ExpressionTypeError", 

27) 

28 

29import operator 

30import warnings 

31from abc import ABC, abstractmethod 

32from datetime import datetime 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 Dict, 

38 Iterable, 

39 List, 

40 Mapping, 

41 Optional, 

42 Tuple, 

43 Type, 

44 TypeVar, 

45 Union, 

46) 

47 

48import astropy.utils.exceptions 

49import sqlalchemy 

50from astropy.time import Time 

51from lsst.utils.iteration import ensure_iterable 

52from sqlalchemy.ext.compiler import compiles 

53from sqlalchemy.sql.expression import func 

54from sqlalchemy.sql.visitors import InternalTraversal 

55 

56from ....core import ( 

57 Dimension, 

58 DimensionElement, 

59 DimensionUniverse, 

60 NamedKeyMapping, 

61 Timespan, 

62 TimespanDatabaseRepresentation, 

63 ddl, 

64) 

65from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId 

66from .parser import Node, TreeVisitor 

67 

68# As of astropy 4.2, the erfa interface is shipped independently and 

69# ErfaWarning is no longer an AstropyWarning 

70try: 

71 import erfa 

72except ImportError: 

73 erfa = None 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from .._structs import QueryColumns 

77 

78 

79def convertExpressionToSql( 

80 tree: Node, 

81 universe: DimensionUniverse, 

82 columns: QueryColumns, 

83 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause], 

84 bind: Mapping[str, Any], 

85 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

86) -> sqlalchemy.sql.ColumnElement: 

87 """Convert a query expression tree into a SQLAlchemy expression object. 

88 

89 Parameters 

90 ---------- 

91 tree : `Node` 

92 Root node of the query expression tree. 

93 universe : `DimensionUniverse` 

94 All known dimensions. 

95 columns : `QueryColumns` 

96 Struct that organizes the special columns known to the query 

97 under construction. 

98 elements : `NamedKeyMapping` 

99 `DimensionElement` instances and their associated tables. 

100 bind : `Mapping` 

101 Mapping from string names to literal values that should be subsituted 

102 for those names when they appear (as identifiers) in the expression. 

103 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation` 

104 Class that encapsulates the representation of `Timespan` objects in 

105 the database. 

106 

107 Returns 

108 ------- 

109 sql : `sqlalchemy.sql.ColumnElement` 

110 A boolean SQLAlchemy column expression. 

111 

112 Raises 

113 ------ 

114 ExpressionTypeError 

115 Raised if the operands in a query expression operation are incompatible 

116 with the operator, or if the expression does not evaluate to a boolean. 

117 """ 

118 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass) 

119 converter = tree.visit(visitor) 

120 return converter.finish(tree) 

121 

122 

123class ExpressionTypeError(TypeError): 

124 """Exception raised when the types in a query expression are not 

125 compatible with the operators or other syntax. 

126 """ 

127 

128 

129class _TimestampLiteral(sqlalchemy.sql.ColumnElement): 

130 """Special ColumnElement type used for TIMESTAMP literals in expressions. 

131 

132 SQLite stores timestamps as strings which sometimes can cause issues when 

133 comparing strings. For more reliable comparison SQLite needs DATETIME() 

134 wrapper for those strings. For PostgreSQL it works better if we add 

135 TIMESTAMP to string literals. 

136 """ 

137 

138 inherit_cache = True 

139 _traverse_internals = [("_literal", InternalTraversal.dp_plain_obj)] 

140 

141 def __init__(self, literal: datetime): 

142 super().__init__() 

143 self._literal = literal 

144 

145 

146@compiles(_TimestampLiteral, "sqlite") 

147def compile_timestamp_literal_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

148 """Compilation of TIMESTAMP literal for SQLite. 

149 

150 SQLite defines ``datetiem`` function that can be used to convert timestamp 

151 value to Unix seconds. 

152 """ 

153 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw) 

154 

155 

156@compiles(_TimestampLiteral, "postgresql") 

157def compile_timestamp_literal_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

158 """Compilation of TIMESTAMP literal for PostgreSQL. 

159 

160 For PostgreSQL it works better if we add TIMESTAMP to string literals. 

161 """ 

162 literal = element._literal.isoformat(sep=" ", timespec="microseconds") 

163 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw) 

164 

165 

166class _TimestampColumnElement(sqlalchemy.sql.ColumnElement): 

167 """Special ColumnElement type used for TIMESTAMP columns or in expressions. 

168 

169 SQLite stores timestamps as strings which sometimes can cause issues when 

170 comparing strings. For more reliable comparison SQLite needs DATETIME() 

171 wrapper for columns. 

172 

173 This mechanism is only used for expressions in WHERE clause, values of the 

174 TIMESTAMP columns returned from queries are still handled by standard 

175 mechanism and they are converted to `datetime` instances. 

176 """ 

177 

178 inherit_cache = True 

179 _traverse_internals = [("_column", InternalTraversal.dp_clauseelement)] 

180 

181 def __init__(self, column: sqlalchemy.sql.ColumnElement): 

182 super().__init__() 

183 self._column = column 

184 

185 

186@compiles(_TimestampColumnElement, "sqlite") 

187def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

188 """Compilation of TIMESTAMP column for SQLite. 

189 

190 SQLite defines ``datetime`` function that can be used to convert timestamp 

191 value to Unix seconds. 

192 """ 

193 return compiler.process(func.datetime(element._column), **kw) 

194 

195 

196@compiles(_TimestampColumnElement, "postgresql") 

197def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

198 """Compilation of TIMESTAMP column for PostgreSQL.""" 

199 return compiler.process(element._column, **kw) 

200 

201 

202class WhereClauseConverter(ABC): 

203 """Abstract base class for the objects used to transform a butler query 

204 expression tree into SQLAlchemy expression objects. 

205 

206 WhereClauseConverter instances are created and consumed by 

207 `WhereClauseConverterVisitor`, which is in turn created and used only by 

208 the `convertExpressionToSql` function. 

209 """ 

210 

211 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement: 

212 """Finish converting this [boolean] expression, returning a SQLAlchemy 

213 expression object. 

214 

215 Parameters 

216 ---------- 

217 node : `Node` 

218 Original expression tree nodethis converter represents; used only 

219 for error reporting. 

220 

221 Returns 

222 ------- 

223 sql : `sqlalchemy.sql.ColumnElement` 

224 A boolean SQLAlchemy column expression. 

225 

226 Raises 

227 ------ 

228 ExpressionTypeError 

229 Raised if this node does not represent a boolean expression. The 

230 default implementation always raises this exception; subclasses 

231 that may actually represent a boolean should override. 

232 """ 

233 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.') 

234 

235 @property 

236 @abstractmethod 

237 def dtype(self) -> type: 

238 """The Python type of the expression tree node associated with this 

239 converter (`type`). 

240 

241 This should be the exact type of any literal or bind object, and the 

242 type produced by SQLAlchemy's converter mechanism when returning rows 

243 from the database in the case of expressions that map to database 

244 entities or expressions. 

245 """ 

246 raise NotImplementedError() 

247 

248 @abstractmethod 

249 def categorizeForIn( 

250 self, 

251 literals: List[sqlalchemy.sql.ColumnElement], 

252 ranges: List[Tuple[int, int, int]], 

253 dtype: type, 

254 node: Node, 

255 ) -> None: 

256 """Visit this expression when it appears as an element in the 

257 right-hand side of an IN expression. 

258 

259 Implementations must either: 

260 

261 - append or extend to ``literals`` 

262 - append or extend to ``ranges`` 

263 - raise `ExpressionTypeError`. 

264 

265 Parameters 

266 ---------- 

267 literals : `list` [ `sqlalchemy.sql.ColumnElement` ] 

268 List of SQL expression objects that the left-hand side of the IN 

269 operation may match exactly. 

270 ranges : `list` of `tuple` 

271 List of (start, stop, step) tuples that represent ranges that the 

272 left-hand side of the IN operation may match. 

273 dtype : `type` 

274 Type of the left-hand side operand for the IN expression. Literals 

275 should only be appended to if ``self.dtype is dtype``, and 

276 ``ranges`` should only be appended to if ``dtype is int``. 

277 node : `Node` 

278 Original expression tree node this converter represents; for use 

279 only in error reporting. 

280 

281 Raises 

282 ------ 

283 ExpressionTypeError 

284 Raised if this node can never appear on the right-hand side of an 

285 IN expression, or if it is incompatible with the left-hand side 

286 type. 

287 """ 

288 raise NotImplementedError() 

289 

290 

291class ScalarWhereClauseConverter(WhereClauseConverter): 

292 """Primary implementation of WhereClauseConverter, for expressions that can 

293 always be represented directly by a single `sqlalchemy.sql.ColumnElement` 

294 instance. 

295 

296 Should be constructed by calling either `fromExpression` or `fromLiteral`. 

297 

298 Parameters 

299 ---------- 

300 column : `sqlalchemy.sql.ColumnElement` 

301 A SQLAlchemy column expression. 

302 value 

303 The Python literal this expression was constructed from, or `None` if 

304 it was not constructed from a literal. Note that this is also `None` 

305 this object corresponds to the literal `None`, in which case 

306 ``dtype is type(None)``. 

307 dtype : `type` 

308 Python type this expression maps to. 

309 """ 

310 

311 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type): 

312 self.column = column 

313 self.value = value 

314 self._dtype = dtype 

315 

316 @classmethod 

317 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter: 

318 """Construct from an existing SQLAlchemy column expression and type. 

319 

320 Parameters 

321 ---------- 

322 column : `sqlalchemy.sql.ColumnElement` 

323 A SQLAlchemy column expression. 

324 dtype : `type` 

325 Python type this expression maps to. 

326 

327 Returns 

328 ------- 

329 converter : `ScalarWhereClauseConverter` 

330 Converter instance that wraps ``column``. 

331 """ 

332 return cls(column, None, dtype) 

333 

334 @classmethod 

335 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter: 

336 """Construct from a Python literal. 

337 

338 Parameters 

339 ---------- 

340 value 

341 The Python literal to wrap. 

342 

343 Returns 

344 ------- 

345 converter : `ScalarWhereClauseConverter` 

346 Converter instance that wraps ``value``. 

347 """ 

348 dtype = type(value) 

349 if dtype is datetime: 

350 column = _TimestampLiteral(value) 

351 else: 

352 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None) 

353 return cls(column, value, dtype) 

354 

355 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement: 

356 # Docstring inherited. 

357 if self.dtype is not bool: 

358 return super().finish(node) # will raise; just avoids duplicate error message 

359 return self.column 

360 

361 @property 

362 def dtype(self) -> type: 

363 # Docstring inherited. 

364 return self._dtype 

365 

366 def categorizeForIn( 

367 self, 

368 literals: List[sqlalchemy.sql.ColumnElement], 

369 ranges: List[Tuple[int, int, int]], 

370 dtype: type, 

371 node: Node, 

372 ) -> None: 

373 # Docstring inherited. 

374 if dtype is not self.dtype: 

375 raise ExpressionTypeError( 

376 f'Error in IN expression "{node}": left hand side has type ' 

377 f"{dtype.__name__}, but item has type {self.dtype.__name__}." 

378 ) 

379 literals.append(self.column) 

380 

381 

382class TimespanWhereClauseConverter(WhereClauseConverter): 

383 """Implementation of WhereClauseConverter for `Timespan` expressions. 

384 

385 Parameters 

386 ---------- 

387 timespan : `TimespanDatabaseRepresentation` 

388 Object that represents a logical timespan column or column expression 

389 (which may or may not be backed by multiple real columns). 

390 """ 

391 

392 def __init__(self, timespan: TimespanDatabaseRepresentation): 

393 self.timespan = timespan 

394 

395 @classmethod 

396 def fromPair( 

397 cls, 

398 begin: ScalarWhereClauseConverter, 

399 end: ScalarWhereClauseConverter, 

400 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

401 ) -> TimespanWhereClauseConverter: 

402 """Construct from a pair of literal expressions. 

403 

404 Parameters 

405 ---------- 

406 begin : `ScalarWhereClauseConverter` 

407 Converter object associated with an expression of type 

408 `astropy.time.Time` or `None` (for a timespan that is unbounded 

409 from below). 

410 end : `ScalarWhereClauseConverter` 

411 Converter object associated with an expression of type 

412 `astropy.time.Time` or `None` (for a timespan that is unbounded 

413 from above). 

414 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass 

415 Class that encapsulates the representation of `Timespan` objects in 

416 the database. 

417 

418 Returns 

419 ------- 

420 converter : `TimespanWhereClauseConverter` 

421 Converter instance that represents a `Timespan` literal. 

422 

423 Raises 

424 ------ 

425 ExpressionTypeError 

426 Raised if begin or end is a time column from the database or other 

427 time expression, not a literal or bind time value. 

428 """ 

429 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules." 

430 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules." 

431 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time): 

432 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.") 

433 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value))) 

434 

435 @property 

436 def dtype(self) -> type: 

437 # Docstring inherited. 

438 return Timespan 

439 

440 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter: 

441 """Construct a boolean converter expression that represents the overlap 

442 of this timespan with another. 

443 

444 Parameters 

445 ---------- 

446 other : `TimespanWhereClauseConverter` 

447 RHS operand for the overlap operation. 

448 

449 Returns 

450 ------- 

451 overlaps : `ScalarWhereClauseConverter` 

452 Converter that wraps the boolean overlaps expression. 

453 """ 

454 assert other.dtype is Timespan, "Guaranteed by dispatch table rules" 

455 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool) 

456 

457 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter: 

458 """Construct a boolean converter expression that represents whether 

459 this timespans contains a scalar time. 

460 

461 Parameters 

462 ---------- 

463 other : `ScalarWhereClauseConverter` 

464 RHS operand for the overlap operation. 

465 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass 

466 Ignored; provided for signature compatibility with `DispatchTable`. 

467 

468 Returns 

469 ------- 

470 overlaps : `ScalarWhereClauseConverter` 

471 Converter that wraps the boolean overlaps expression. 

472 """ 

473 assert other.dtype is Time, "Guaranteed by dispatch table rules" 

474 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool) 

475 

476 def categorizeForIn( 

477 self, 

478 literals: List[sqlalchemy.sql.ColumnElement], 

479 ranges: List[Tuple[int, int, int]], 

480 dtype: type, 

481 node: Node, 

482 ) -> None: 

483 # Docstring inherited. 

484 raise ExpressionTypeError( 

485 f'Invalid element on right side of IN expression "{node}": ' 

486 "Timespans are not allowed in this context." 

487 ) 

488 

489 

490class RangeWhereClauseConverter(WhereClauseConverter): 

491 """Implementation of WhereClauseConverters for integer range literals. 

492 

493 Range literals may only appear on the right-hand side of IN operations 

494 where the left-hand side operand is of type `int`. 

495 

496 Parameters 

497 ---------- 

498 start : `int` 

499 Starting point (inclusive) for the range. 

500 stop : `int` 

501 Stopping point (exclusive) for the range. 

502 step : `int` 

503 Step size for the range. 

504 """ 

505 

506 def __init__(self, start: int, stop: int, step: int): 

507 self.start = start 

508 self.stop = stop 

509 self.step = step 

510 

511 @property 

512 def dtype(self) -> type: 

513 # Docstring inherited. 

514 return range 

515 

516 def categorizeForIn( 

517 self, 

518 literals: List[sqlalchemy.sql.ColumnElement], 

519 ranges: List[Tuple[int, int, int]], 

520 dtype: type, 

521 node: Node, 

522 ) -> None: 

523 # Docstring inherited. 

524 if dtype is not int: 

525 raise ExpressionTypeError( 

526 f'Error in IN expression "{node}": range expressions ' 

527 f"are only supported for int operands, not {dtype.__name__}." 

528 ) 

529 ranges.append((self.start, self.stop, self.step)) 

530 

531 

532UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter] 

533"""Signature of unary-operation callables directly stored in `DispatchTable`. 

534""" 

535 

536BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter] 

537"""Signature of binary-operation callables directly stored in `DispatchTable`. 

538""" 

539 

540UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement] 

541"""Signature for unary-operation callables that can work directly on SQLAlchemy 

542column expressions. 

543""" 

544 

545BinaryColumnFunc = Callable[ 

546 [sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement 

547] 

548"""Signature for binary-operation callables that can work directly on 

549SQLAlchemy column expressions. 

550""" 

551 

552_F = TypeVar("_F") 

553 

554 

555def adaptIdentity(func: _F, result: Optional[type]) -> _F: 

556 """An adapter function for `DispatchTable.registerUnary` and 

557 `DispatchTable.registerBinary` that just returns this original function. 

558 """ 

559 return func 

560 

561 

562def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc: 

563 """An adapter function for `DispatchTable.registerUnary` that converts a 

564 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a 

565 `ScalarWhereClauseConverter`. 

566 """ 

567 

568 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter: 

569 assert isinstance(operand, ScalarWhereClauseConverter) 

570 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result) 

571 

572 return adapted 

573 

574 

575def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc: 

576 """An adapter function for `DispatchTable.registerBinary` that converts a 

577 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be 

578 `ScalarWhereClauseConverter` instances. 

579 """ 

580 

581 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter: 

582 assert isinstance(lhs, ScalarWhereClauseConverter) 

583 assert isinstance(rhs, ScalarWhereClauseConverter) 

584 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result) 

585 

586 return adapted 

587 

588 

589class TimeBinaryOperator: 

590 def __init__(self, operator: Callable, dtype: type): 

591 self.operator = operator 

592 self.dtype = dtype 

593 

594 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter: 

595 assert isinstance(lhs, ScalarWhereClauseConverter) 

596 assert isinstance(rhs, ScalarWhereClauseConverter) 

597 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)] 

598 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype) 

599 

600 @classmethod 

601 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]: 

602 """Coerce one or more ScalarWhereClauseConverters to datetime type if 

603 necessary. 

604 

605 If any of the arguments has `datetime` type then all other arguments 

606 are converted to `datetime` type as well. 

607 

608 Parameters 

609 ---------- 

610 *args : `ScalarWhereClauseConverter` 

611 Instances which represent time objects, their type can be one of 

612 `Time` or `datetime`. If coercion happens, then `Time` objects can 

613 only be literals, not expressions. 

614 

615 Returns 

616 ------- 

617 converters : `list` [ `ScalarWhereClauseConverter` ] 

618 List of converters in the same order as they appera in argument 

619 list, some of them can be coerced to `datetime` type, non-coerced 

620 arguments are returned without any change. 

621 """ 

622 

623 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter: 

624 """Coerce single ScalarWhereClauseConverter to datetime literal.""" 

625 if arg.dtype is not datetime: 

626 assert arg.value is not None, "Cannot coerce non-literals" 

627 assert arg.dtype is Time, "Cannot coerce non-Time literals" 

628 with warnings.catch_warnings(): 

629 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

630 if erfa is not None: 

631 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

632 dt = arg.value.to_datetime() 

633 arg = ScalarWhereClauseConverter.fromLiteral(dt) 

634 return arg 

635 

636 if any(arg.dtype is datetime for arg in args): 

637 return [_coerce(arg) for arg in args] 

638 else: 

639 return list(args) 

640 

641 

642class DispatchTable: 

643 """An object that manages unary- and binary-operator type-dispatch tables 

644 for `WhereClauseConverter`. 

645 

646 Notes 

647 ----- 

648 A lot of the machinery in this class (and in the preceding function 

649 signature type aliases) is all in service of making the actual dispatch 

650 rules in the `build` method concise and easy to read, because that's where 

651 all of the important high-level logic lives. 

652 

653 Double-dispatch is a pain in Python, as it is in most languages; it's worth 

654 noting that I first tried the traditional visitor-pattern approach here, 

655 and it was *definitely* much harder to see the actual behavior. 

656 """ 

657 

658 def __init__(self) -> None: 

659 self._unary: Dict[Tuple[str, type], UnaryFunc] = {} 

660 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {} 

661 

662 def registerUnary( 

663 self, 

664 operator: str, 

665 operand: Union[type, Iterable[type]], 

666 func: _F, 

667 *, 

668 result: Optional[type] = None, 

669 adapt: Any = True, 

670 ) -> None: 

671 """Register a unary operation for one or more types. 

672 

673 Parameters 

674 ---------- 

675 operator : `str` 

676 Operator as it appears in the string expression language. Unary 

677 operations that are not mapped to operators may use their own 

678 arbitrary strings, as long as these are used consistently in 

679 `build` and `applyUnary`. 

680 operand : `type` or `Iterable` [ `type` ] 

681 Type or types for which this operation is implemented by the given 

682 ``func``. 

683 func : `Callable` 

684 Callable that implements the unary operation. If 

685 ``adapt is True``, this should be a `UnaryColumnFunc`. If 

686 ``adapt is False``, this should be a `UnaryFunc`. Otherwise, 

687 this is whatever type is accepted as the first argument to 

688 ``adapt``. 

689 result : `type`, optional 

690 Type of the expression returned by this operation. If not 

691 provided, the type of the operand is assumed. 

692 adapt : `bool` or `Callable` 

693 A callable that wraps ``func`` (the first argument) and ``result`` 

694 (the second argument), returning a new callable with the 

695 signature of `UnaryFunc`. `True` (default) and `False` invoke a 

696 default adapter or no adapter (see ``func`` docs). 

697 """ 

698 if adapt is True: 

699 adapt = adaptUnaryColumnFunc 

700 elif adapt is False: 

701 adapt = adaptIdentity 

702 for item in ensure_iterable(operand): 

703 self._unary[operator, item] = adapt(func, result if result is not None else item) 

704 

705 def registerBinary( 

706 self, 

707 operator: str, 

708 lhs: Union[type, Iterable[type]], 

709 func: _F, 

710 *, 

711 rhs: Optional[Union[type, Iterable[type]]] = None, 

712 result: Optional[type] = None, 

713 adapt: Any = True, 

714 ) -> None: 

715 """Register a binary operation for one or more types. 

716 

717 Parameters 

718 ---------- 

719 operator : `str` 

720 Operator as it appears in the string expression language. Binary 

721 operations that are not mapped to operators may use their own 

722 arbitrary strings, as long as these are used consistently in 

723 `build` and `applyBinary`. 

724 lhs : `type` or `Iterable` [ `type` ] 

725 Left-hand side type or types for which this operation is 

726 implemented by the given ``func``. 

727 func : `Callable` 

728 Callable that implements the binary operation. If 

729 ``adapt is True``, this should be a `BinaryColumnFunc`. If 

730 ``adapt is False``, this should be a `BinaryFunc`. Otherwise, 

731 this is whatever type is accepted as the first argument to 

732 ``adapt``. 

733 rhs : `type` or `Iterable` [ `type` ] 

734 Right-hand side type or types for which this operation is 

735 implemented by the given ``func``. If multiple types, all 

736 combinations of ``lhs`` and ``rhs`` are registered. If not 

737 provided, each element of ``lhs`` is assumed to be paired with 

738 itself, but mixed-type combinations are not registered. 

739 result : `type`, optional 

740 Type of the expression returned by this operation. If not 

741 provided and ``rhs`` is also not provided, the type of the operand 

742 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided, 

743 then ``result=None`` will be forwarded to ``adapt``. 

744 adapt : `bool` or `Callable` 

745 A callable that wraps ``func`` (the first argument) and ``result`` 

746 (the second argument), returning a new callable with the 

747 signature of `BinaryFunc`. `True` (default) and `False` invoke a 

748 default adapter or no adapter (see ``func`` docs). 

749 """ 

750 if adapt is True: 

751 adapt = adaptBinaryColumnFunc 

752 elif adapt is False: 

753 adapt = adaptIdentity 

754 for lh in ensure_iterable(lhs): 

755 if rhs is None: 

756 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh) 

757 else: 

758 for rh in ensure_iterable(rhs): 

759 self._binary[operator, lh, rh] = adapt(func, result) 

760 

761 def applyUnary( 

762 self, 

763 operator: str, 

764 operand: WhereClauseConverter, 

765 ) -> WhereClauseConverter: 

766 """Look up and apply the appropriate function for a registered unary 

767 operation. 

768 

769 Parameters 

770 ---------- 

771 operator : `str` 

772 Operator for the operation to apply. 

773 operand : `WhereClauseConverter` 

774 Operand, with ``operand.dtype`` and ``operator`` used to look up 

775 the appropriate function. 

776 

777 Returns 

778 ------- 

779 expression : `WhereClauseConverter` 

780 Converter instance that represents the operation, created by 

781 calling the registered function. 

782 

783 Raises 

784 ------ 

785 KeyError 

786 Raised if the operator and operand type combination is not 

787 recognized. 

788 """ 

789 return self._unary[operator, operand.dtype](operand) 

790 

791 def applyBinary( 

792 self, 

793 operator: str, 

794 lhs: WhereClauseConverter, 

795 rhs: WhereClauseConverter, 

796 ) -> WhereClauseConverter: 

797 """Look up and apply the appropriate function for a registered binary 

798 operation. 

799 

800 Parameters 

801 ---------- 

802 operator : `str` 

803 Operator for the operation to apply. 

804 lhs : `WhereClauseConverter` 

805 Left-hand side operand. 

806 rhs : `WhereClauseConverter` 

807 Right-hand side operand. 

808 

809 Returns 

810 ------- 

811 expression : `WhereClauseConverter` 

812 Converter instance that represents the operation, created by 

813 calling the registered function. 

814 

815 Raises 

816 ------ 

817 KeyError 

818 Raised if the operator and operand type combination is not 

819 recognized. 

820 """ 

821 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs) 

822 

823 @classmethod 

824 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable: 

825 table = DispatchTable() 

826 # Standard scalar unary and binary operators: just delegate to 

827 # SQLAlchemy operators. 

828 table.registerUnary("NOT", bool, sqlalchemy.sql.not_) 

829 table.registerUnary("+", (int, float), operator.__pos__) 

830 table.registerUnary("-", (int, float), operator.__neg__) 

831 table.registerBinary("AND", bool, sqlalchemy.sql.and_) 

832 table.registerBinary("OR", bool, sqlalchemy.sql.or_) 

833 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool) 

834 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool) 

835 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool) 

836 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool) 

837 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool) 

838 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool) 

839 table.registerBinary("+", (int, float), operator.__add__) 

840 table.registerBinary("-", (int, float), operator.__sub__) 

841 table.registerBinary("*", (int, float), operator.__mul__) 

842 table.registerBinary("/", (int, float), operator.__truediv__) 

843 table.registerBinary("%", (int, float), operator.__mod__) 

844 table.registerBinary( 

845 "=", 

846 (Time, datetime), 

847 TimeBinaryOperator(operator.__eq__, bool), 

848 rhs=(Time, datetime), 

849 adapt=False, 

850 ) 

851 table.registerBinary( 

852 "!=", 

853 (Time, datetime), 

854 TimeBinaryOperator(operator.__ne__, bool), 

855 rhs=(Time, datetime), 

856 adapt=False, 

857 ) 

858 table.registerBinary( 

859 "<", 

860 (Time, datetime), 

861 TimeBinaryOperator(operator.__lt__, bool), 

862 rhs=(Time, datetime), 

863 adapt=False, 

864 ) 

865 table.registerBinary( 

866 ">", 

867 (Time, datetime), 

868 TimeBinaryOperator(operator.__gt__, bool), 

869 rhs=(Time, datetime), 

870 adapt=False, 

871 ) 

872 table.registerBinary( 

873 "<=", 

874 (Time, datetime), 

875 TimeBinaryOperator(operator.__le__, bool), 

876 rhs=(Time, datetime), 

877 adapt=False, 

878 ) 

879 table.registerBinary( 

880 ">=", 

881 (Time, datetime), 

882 TimeBinaryOperator(operator.__ge__, bool), 

883 rhs=(Time, datetime), 

884 adapt=False, 

885 ) 

886 table.registerBinary( 

887 "=", 

888 lhs=(int, float, str, Time, type(None)), 

889 rhs=(type(None),), 

890 func=sqlalchemy.sql.expression.ColumnOperators.is_, 

891 result=bool, 

892 ) 

893 table.registerBinary( 

894 "=", 

895 lhs=(type(None),), 

896 rhs=(int, float, str, Time, type(None)), 

897 func=sqlalchemy.sql.expression.ColumnOperators.is_, 

898 result=bool, 

899 ) 

900 table.registerBinary( 

901 "!=", 

902 lhs=(int, float, str, Time, type(None)), 

903 rhs=(type(None),), 

904 func=sqlalchemy.sql.expression.ColumnOperators.is_not, 

905 result=bool, 

906 ) 

907 table.registerBinary( 

908 "!=", 

909 lhs=(type(None),), 

910 rhs=(int, float, str, Time, type(None)), 

911 func=sqlalchemy.sql.expression.ColumnOperators.is_not, 

912 result=bool, 

913 ) 

914 # Construct Timespan literals from 2-element tuples (A, B), where A and 

915 # B are each either Time or None. 

916 table.registerBinary( 

917 "PAIR", 

918 lhs=(Time, type(None)), 

919 rhs=(Time, type(None)), 

920 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass), 

921 adapt=False, 

922 ) 

923 # Less-than and greater-than between Timespans. 

924 table.registerBinary( 

925 "<", 

926 lhs=Timespan, 

927 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool), 

928 adapt=False, 

929 ) 

930 table.registerBinary( 

931 ">", 

932 lhs=Timespan, 

933 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool), 

934 adapt=False, 

935 ) 

936 # Less-than and greater-than between Timespans and Times. 

937 table.registerBinary( 

938 "<", 

939 lhs=Timespan, 

940 rhs=Time, 

941 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool), 

942 adapt=False, 

943 ) 

944 table.registerBinary( 

945 ">", 

946 lhs=Timespan, 

947 rhs=Time, 

948 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool), 

949 adapt=False, 

950 ) 

951 table.registerBinary( 

952 "<", 

953 lhs=Time, 

954 rhs=Timespan, 

955 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool), 

956 adapt=False, 

957 ) 

958 table.registerBinary( 

959 ">", 

960 lhs=Time, 

961 rhs=Timespan, 

962 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool), 

963 adapt=False, 

964 ) 

965 # OVERLAPS operator between Timespans. 

966 table.registerBinary( 

967 "OVERLAPS", 

968 lhs=Timespan, 

969 func=TimespanWhereClauseConverter.overlaps, 

970 adapt=False, 

971 ) 

972 # OVERLAPS operator between Timespans and Time is equivalent to 

973 # "contains", but expression language only has OVERLAPS to keep it 

974 # simple. 

975 table.registerBinary( 

976 "OVERLAPS", 

977 lhs=Timespan, 

978 rhs=Time, 

979 func=TimespanWhereClauseConverter.contains, 

980 adapt=False, 

981 ) 

982 table.registerBinary( 

983 "OVERLAPS", 

984 lhs=Time, 

985 rhs=Timespan, 

986 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a), 

987 adapt=False, 

988 ) 

989 return table 

990 

991 

992class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]): 

993 """Implements TreeVisitor to convert the tree into 

994 `WhereClauseConverter` objects. 

995 

996 This class should be used only by the `convertExpressionToSql` function; 

997 external code should just call that function. 

998 

999 Parameters 

1000 ---------- 

1001 universe : `DimensionUniverse` 

1002 All known dimensions. 

1003 columns: `QueryColumns` 

1004 Struct that organizes the special columns known to the query 

1005 under construction. 

1006 elements: `NamedKeyMapping` 

1007 `DimensionElement` instances and their associated tables. 

1008 bind: `Mapping` 

1009 Mapping from string names to literal values that should be subsituted 

1010 for those names when they appear (as identifiers) in the expression. 

1011 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation` 

1012 Class that encapsulates the representation of `Timespan` objects in 

1013 the database. 

1014 """ 

1015 

1016 def __init__( 

1017 self, 

1018 universe: DimensionUniverse, 

1019 columns: QueryColumns, 

1020 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause], 

1021 bind: Mapping[str, Any], 

1022 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

1023 ): 

1024 self.universe = universe 

1025 self.columns = columns 

1026 self.elements = elements 

1027 self.bind = bind 

1028 self._TimespanReprClass = TimespanReprClass 

1029 self._dispatch = DispatchTable.build(TimespanReprClass) 

1030 

1031 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter: 

1032 # Docstring inherited from TreeVisitor.visitNumericLiteral 

1033 # Convert string value into float or int 

1034 coerced: Union[int, float] 

1035 try: 

1036 coerced = int(value) 

1037 except ValueError: 

1038 coerced = float(value) 

1039 return ScalarWhereClauseConverter.fromLiteral(coerced) 

1040 

1041 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter: 

1042 # Docstring inherited from TreeVisitor.visitStringLiteral 

1043 return ScalarWhereClauseConverter.fromLiteral(value) 

1044 

1045 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter: 

1046 # Docstring inherited from TreeVisitor.visitTimeLiteral 

1047 return ScalarWhereClauseConverter.fromLiteral(value) 

1048 

1049 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter: 

1050 # Docstring inherited from TreeVisitor.visitIdentifier 

1051 if name in self.bind: 

1052 value = self.bind[name] 

1053 if isinstance(value, Timespan): 

1054 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value)) 

1055 return ScalarWhereClauseConverter.fromLiteral(value) 

1056 constant = categorizeConstant(name) 

1057 if constant is ExpressionConstant.INGEST_DATE: 

1058 assert self.columns.datasets is not None 

1059 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query" 

1060 return ScalarWhereClauseConverter.fromExpression( 

1061 _TimestampColumnElement(self.columns.datasets.ingestDate), 

1062 datetime, 

1063 ) 

1064 elif constant is ExpressionConstant.NULL: 

1065 return ScalarWhereClauseConverter.fromLiteral(None) 

1066 assert constant is None, "Check for enum values should be exhaustive." 

1067 element, column = categorizeElementId(self.universe, name) 

1068 if column is not None: 

1069 if column == TimespanDatabaseRepresentation.NAME: 

1070 if element.temporal is None: 

1071 raise ExpressionTypeError( 

1072 f"No timespan column exists for non-temporal element '{element.name}'." 

1073 ) 

1074 return TimespanWhereClauseConverter(self.columns.timespans[element]) 

1075 else: 

1076 if column not in element.RecordClass.fields.standard.names: 

1077 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.") 

1078 return ScalarWhereClauseConverter.fromExpression( 

1079 self.elements[element].columns[column], 

1080 element.RecordClass.fields.standard[column].getPythonType(), 

1081 ) 

1082 else: 

1083 assert isinstance(element, Dimension) 

1084 return ScalarWhereClauseConverter.fromExpression( 

1085 self.columns.getKeyColumn(element), element.primaryKey.getPythonType() 

1086 ) 

1087 

1088 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter: 

1089 # Docstring inherited from TreeVisitor.visitUnaryOp 

1090 try: 

1091 return self._dispatch.applyUnary(operator, operand) 

1092 except KeyError: 

1093 raise ExpressionTypeError( 

1094 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".' 

1095 ) from None 

1096 

1097 def visitBinaryOp( 

1098 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node 

1099 ) -> WhereClauseConverter: 

1100 # Docstring inherited from TreeVisitor.visitBinaryOp 

1101 try: 

1102 return self._dispatch.applyBinary(operator, lhs, rhs) 

1103 except KeyError: 

1104 raise ExpressionTypeError( 

1105 f"Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary " 

1106 f'operator {operator} in "{node}".' 

1107 ) from None 

1108 

1109 def visitIsIn( 

1110 self, 

1111 lhs: WhereClauseConverter, 

1112 values: List[WhereClauseConverter], 

1113 not_in: bool, 

1114 node: Node, 

1115 ) -> WhereClauseConverter: 

1116 if not isinstance(lhs, ScalarWhereClauseConverter): 

1117 raise ExpressionTypeError(f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".') 

1118 # Docstring inherited from TreeVisitor.visitIsIn 

1119 # 

1120 # `values` is a list of literals and ranges, range is represented 

1121 # by a tuple (start, stop, stride). We need to transform range into 

1122 # some SQL construct, simplest would be to generate a set of literals 

1123 # and add it to the same list but it could become too long. What we 

1124 # do here is to introduce some large limit on the total number of 

1125 # items in IN() and if range exceeds that limit then we do something 

1126 # like: 

1127 # 

1128 # X IN (1, 2, 3) 

1129 # OR 

1130 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

1131 # 

1132 # or for NOT IN case 

1133 # 

1134 # NOT (X IN (1, 2, 3) 

1135 # OR 

1136 # (X BETWEEN START AND STOP 

1137 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

1138 # 

1139 max_in_items = 1000 

1140 clauses: List[sqlalchemy.sql.ColumnElement] = [] 

1141 # Split the list into literals and ranges 

1142 literals: List[sqlalchemy.sql.ColumnElement] = [] 

1143 ranges: List[Tuple[int, int, int]] = [] 

1144 for value in values: 

1145 value.categorizeForIn(literals, ranges, lhs.dtype, node) 

1146 # Handle ranges (maybe by converting them to literals). 

1147 for start, stop, stride in ranges: 

1148 count = (stop - start + 1) // stride 

1149 if len(literals) + count > max_in_items: 

1150 # X BETWEEN START AND STOP 

1151 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

1152 expr = lhs.column.between(start, stop) 

1153 if stride != 1: 

1154 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride)) 

1155 clauses.append(expr) 

1156 else: 

1157 # add all values to literal list, stop is inclusive 

1158 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)] 

1159 # Handle literals. 

1160 if literals: 

1161 # add IN() in front of BETWEENs 

1162 clauses.insert(0, lhs.column.in_(literals)) 

1163 # Assemble the full expression. 

1164 expr = sqlalchemy.sql.or_(*clauses) 

1165 if not_in: 

1166 expr = sqlalchemy.sql.not_(expr) 

1167 return ScalarWhereClauseConverter.fromExpression(expr, bool) 

1168 

1169 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter: 

1170 # Docstring inherited from TreeVisitor.visitParens 

1171 return expression 

1172 

1173 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter: 

1174 # Docstring inherited from base class 

1175 if len(items) != 2: 

1176 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".') 

1177 try: 

1178 return self._dispatch.applyBinary("PAIR", items[0], items[1]) 

1179 except KeyError: 

1180 raise ExpressionTypeError( 

1181 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" ' 

1182 '(Note that date/time strings must be preceded by "T" to be recognized).' 

1183 ) 

1184 

1185 def visitRangeLiteral( 

1186 self, start: int, stop: int, stride: Optional[int], node: Node 

1187 ) -> WhereClauseConverter: 

1188 # Docstring inherited from TreeVisitor.visitRangeLiteral 

1189 # stride can be None which means the same as 1. 

1190 return RangeWhereClauseConverter(start, stop, stride or 1) 

1191 

1192 def visitPointNode( 

1193 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node 

1194 ) -> WhereClauseConverter: 

1195 # Docstring inherited from base class 

1196 

1197 # this is a placeholder for future extension, we enabled syntax but 

1198 # do not support actual use just yet. 

1199 raise NotImplementedError("POINT() function is not supported yet")