Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "convertExpressionToSql", 

26 "ExpressionTypeError", 

27) 

28 

29from abc import ABC, abstractmethod 

30from datetime import datetime 

31import operator 

32from typing import ( 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 TYPE_CHECKING, 

43 TypeVar, 

44 Union, 

45) 

46import warnings 

47 

48from astropy.time import Time 

49import astropy.utils.exceptions 

50import sqlalchemy 

51from sqlalchemy.ext.compiler import compiles 

52from sqlalchemy.sql.expression import func 

53 

54from lsst.utils.iteration import ensure_iterable 

55from ....core import ( 

56 ddl, 

57 Dimension, 

58 DimensionElement, 

59 DimensionUniverse, 

60 NamedKeyMapping, 

61 Timespan, 

62 TimespanDatabaseRepresentation, 

63) 

64from .parser import Node, TreeVisitor 

65from .categorize import categorizeElementId, categorizeConstant, ExpressionConstant 

66 

67# As of astropy 4.2, the erfa interface is shipped independently and 

68# ErfaWarning is no longer an AstropyWarning 

69try: 

70 import erfa 

71except ImportError: 

72 erfa = None 

73 

74if TYPE_CHECKING: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 from .._structs import QueryColumns 

76 

77 

78def convertExpressionToSql( 

79 tree: Node, 

80 universe: DimensionUniverse, 

81 columns: QueryColumns, 

82 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause], 

83 bind: Mapping[str, Any], 

84 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

85) -> sqlalchemy.sql.ColumnElement: 

86 """Convert a query expression tree into a SQLAlchemy expression object. 

87 

88 Parameters 

89 ---------- 

90 tree : `Node` 

91 Root node of the query expression tree. 

92 universe : `DimensionUniverse` 

93 All known dimensions. 

94 columns : `QueryColumns` 

95 Struct that organizes the special columns known to the query 

96 under construction. 

97 elements : `NamedKeyMapping` 

98 `DimensionElement` instances and their associated tables. 

99 bind : `Mapping` 

100 Mapping from string names to literal values that should be subsituted 

101 for those names when they appear (as identifiers) in the expression. 

102 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation` 

103 Class that encapsulates the representation of `Timespan` objects in 

104 the database. 

105 

106 Returns 

107 ------- 

108 sql : `sqlalchemy.sql.ColumnElement` 

109 A boolean SQLAlchemy column expression. 

110 

111 Raises 

112 ------ 

113 ExpressionTypeError 

114 Raised if the operands in a query expression operation are incompatible 

115 with the operator, or if the expression does not evaluate to a boolean. 

116 """ 

117 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass) 

118 converter = tree.visit(visitor) 

119 return converter.finish(tree) 

120 

121 

122class ExpressionTypeError(TypeError): 

123 """Exception raised when the types in a query expression are not 

124 compatible with the operators or other syntax. 

125 """ 

126 

127 

128class _TimestampColumnElement(sqlalchemy.sql.ColumnElement): 

129 """Special ColumnElement type used for TIMESTAMP columns or literals in 

130 expressions. 

131 

132 SQLite stores timestamps as strings which sometimes can cause issues when 

133 comparing strings. For more reliable comparison SQLite needs DATETIME() 

134 wrapper for those strings. For PostgreSQL it works better if we add 

135 TIMESTAMP to string literals. 

136 

137 This mechanism is only used for expressions in WHERE clause, values of the 

138 TIMESTAMP columns returned from queries are still handled by standard 

139 mechanism and they are converted to `datetime` instances. 

140 """ 

141 def __init__(self, column: Optional[sqlalchemy.sql.ColumnElement] = None, 

142 literal: Optional[datetime] = None): 

143 super().__init__() 

144 self._column = column 

145 self._literal = literal 

146 

147 

148@compiles(_TimestampColumnElement, "sqlite") 

149def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

150 """Compilation of TIMESTAMP column for SQLite. 

151 

152 SQLite defines ``strftime`` function that can be used to convert timestamp 

153 value to Unix seconds. 

154 """ 

155 assert element._column is not None or element._literal is not None, "Must have column or literal" 

156 if element._column is not None: 

157 return compiler.process(func.datetime(element._column), **kw) 

158 else: 

159 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw) 

160 

161 

162@compiles(_TimestampColumnElement, "postgresql") 

163def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

164 """Compilation of TIMESTAMP column for PostgreSQL. 

165 

166 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function. 

167 """ 

168 assert element._column is not None or element._literal is not None, "Must have column or literal" 

169 if element._column is not None: 

170 return compiler.process(element._column, **kw) 

171 else: 

172 literal = element._literal.isoformat(sep=" ", timespec="microseconds") 

173 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw) 

174 

175 

176class WhereClauseConverter(ABC): 

177 """Abstract base class for the objects used to transform a butler query 

178 expression tree into SQLAlchemy expression objects. 

179 

180 WhereClauseConverter instances are created and consumed by 

181 `WhereClauseConverterVisitor`, which is in turn created and used only by 

182 the `convertExpressionToSql` function. 

183 """ 

184 

185 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement: 

186 """Finish converting this [boolean] expression, returning a SQLAlchemy 

187 expression object. 

188 

189 Parameters 

190 ---------- 

191 node : `Node` 

192 Original expression tree nodethis converter represents; used only 

193 for error reporting. 

194 

195 Returns 

196 ------- 

197 sql : `sqlalchemy.sql.ColumnElement` 

198 A boolean SQLAlchemy column expression. 

199 

200 Raises 

201 ------ 

202 ExpressionTypeError 

203 Raised if this node does not represent a boolean expression. The 

204 default implementation always raises this exception; subclasses 

205 that may actually represent a boolean should override. 

206 """ 

207 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.') 

208 

209 @property 

210 @abstractmethod 

211 def dtype(self) -> type: 

212 """The Python type of the expression tree node associated with this 

213 converter (`type`). 

214 

215 This should be the exact type of any literal or bind object, and the 

216 type produced by SQLAlchemy's converter mechanism when returning rows 

217 from the database in the case of expressions that map to database 

218 entities or expressions. 

219 """ 

220 raise NotImplementedError() 

221 

222 @abstractmethod 

223 def categorizeForIn( 

224 self, 

225 literals: List[sqlalchemy.sql.ColumnElement], 

226 ranges: List[Tuple[int, int, int]], 

227 dtype: type, 

228 node: Node, 

229 ) -> None: 

230 """Visit this expression when it appears as an element in the 

231 right-hand side of an IN expression. 

232 

233 Implementations must either: 

234 

235 - append or extend to ``literals`` 

236 - append or extend to ``ranges`` 

237 - raise `ExpressionTypeError`. 

238 

239 Parameters 

240 ---------- 

241 literals : `list` [ `sqlalchemy.sql.ColumnElement` ] 

242 List of SQL expression objects that the left-hand side of the IN 

243 operation may match exactly. 

244 ranges : `list` of `tuple` 

245 List of (start, stop, step) tuples that represent ranges that the 

246 left-hand side of the IN operation may match. 

247 dtype : `type` 

248 Type of the left-hand side operand for the IN expression. Literals 

249 should only be appended to if ``self.dtype is dtype``, and 

250 ``ranges`` should only be appended to if ``dtype is int``. 

251 node : `Node` 

252 Original expression tree node this converter represents; for use 

253 only in error reporting. 

254 

255 Raises 

256 ------ 

257 ExpressionTypeError 

258 Raised if this node can never appear on the right-hand side of an 

259 IN expression, or if it is incompatible with the left-hand side 

260 type. 

261 """ 

262 raise NotImplementedError() 

263 

264 

265class ScalarWhereClauseConverter(WhereClauseConverter): 

266 """Primary implementation of WhereClauseConverter, for expressions that can 

267 always be represented directly by a single `sqlalchemy.sql.ColumnElement` 

268 instance. 

269 

270 Should be constructed by calling either `fromExpression` or `fromLiteral`. 

271 

272 Parameters 

273 ---------- 

274 column : `sqlalchemy.sql.ColumnElement` 

275 A SQLAlchemy column expression. 

276 value 

277 The Python literal this expression was constructed from, or `None` if 

278 it was not constructed from a literal. Note that this is also `None` 

279 this object corresponds to the literal `None`, in which case 

280 ``dtype is type(None)``. 

281 dtype : `type` 

282 Python type this expression maps to. 

283 """ 

284 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type): 

285 self.column = column 

286 self.value = value 

287 self._dtype = dtype 

288 

289 @classmethod 

290 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter: 

291 """Construct from an existing SQLAlchemy column expression and type. 

292 

293 Parameters 

294 ---------- 

295 column : `sqlalchemy.sql.ColumnElement` 

296 A SQLAlchemy column expression. 

297 dtype : `type` 

298 Python type this expression maps to. 

299 

300 Returns 

301 ------- 

302 converter : `ScalarWhereClauseConverter` 

303 Converter instance that wraps ``column``. 

304 """ 

305 return cls(column, None, dtype) 

306 

307 @classmethod 

308 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter: 

309 """Construct from a Python literal. 

310 

311 Parameters 

312 ---------- 

313 value 

314 The Python literal to wrap. 

315 

316 Returns 

317 ------- 

318 converter : `ScalarWhereClauseConverter` 

319 Converter instance that wraps ``value``. 

320 """ 

321 dtype = type(value) 

322 if dtype is datetime: 

323 column = _TimestampColumnElement(literal=value) 

324 else: 

325 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None) 

326 return cls(column, value, dtype) 

327 

328 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement: 

329 # Docstring inherited. 

330 if self.dtype is not bool: 

331 return super().finish(node) # will raise; just avoids duplicate error message 

332 return self.column 

333 

334 @property 

335 def dtype(self) -> type: 

336 # Docstring inherited. 

337 return self._dtype 

338 

339 def categorizeForIn( 

340 self, 

341 literals: List[sqlalchemy.sql.ColumnElement], 

342 ranges: List[Tuple[int, int, int]], 

343 dtype: type, 

344 node: Node, 

345 ) -> None: 

346 # Docstring inherited. 

347 if dtype is not self.dtype: 

348 raise ExpressionTypeError( 

349 f'Error in IN expression "{node}": left hand side has type ' 

350 f'{dtype.__name__}, but item has type {self.dtype.__name__}.' 

351 ) 

352 literals.append(self.column) 

353 

354 

355class TimespanWhereClauseConverter(WhereClauseConverter): 

356 """Implementation of WhereClauseConverter for `Timespan` expressions. 

357 

358 Parameters 

359 ---------- 

360 timespan : `TimespanDatabaseRepresentation` 

361 Object that represents a logical timespan column or column expression 

362 (which may or may not be backed by multiple real columns). 

363 """ 

364 def __init__(self, timespan: TimespanDatabaseRepresentation): 

365 self.timespan = timespan 

366 

367 @classmethod 

368 def fromPair( 

369 cls, 

370 begin: ScalarWhereClauseConverter, 

371 end: ScalarWhereClauseConverter, 

372 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

373 ) -> TimespanWhereClauseConverter: 

374 """Construct from a pair of literal expressions. 

375 

376 Parameters 

377 ---------- 

378 begin : `ScalarWhereClauseConverter` 

379 Converter object associated with an expression of type 

380 `astropy.time.Time` or `None` (for a timespan that is unbounded 

381 from below). 

382 end : `ScalarWhereClauseConverter` 

383 Converter object associated with an expression of type 

384 `astropy.time.Time` or `None` (for a timespan that is unbounded 

385 from above). 

386 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass 

387 Class that encapsulates the representation of `Timespan` objects in 

388 the database. 

389 

390 Returns 

391 ------- 

392 converter : `TimespanWhereClauseConverter` 

393 Converter instance that represents a `Timespan` literal. 

394 

395 Raises 

396 ------ 

397 ExpressionTypeError 

398 Raised if begin or end is a time column from the database or other 

399 time expression, not a literal or bind time value. 

400 """ 

401 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules." 

402 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules." 

403 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time): 

404 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.") 

405 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value))) 

406 

407 @property 

408 def dtype(self) -> type: 

409 # Docstring inherited. 

410 return Timespan 

411 

412 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter: 

413 """Construct a boolean converter expression that represents the overlap 

414 of this timespan with another. 

415 

416 Parameters 

417 ---------- 

418 other : `TimespanWhereClauseConverter` 

419 RHS operand for the overlap operation. 

420 

421 Returns 

422 ------- 

423 overlaps : `ScalarWhereClauseConverter` 

424 Converter that wraps the boolean overlaps expression. 

425 """ 

426 assert other.dtype is Timespan, "Guaranteed by dispatch table rules" 

427 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool) 

428 

429 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter: 

430 """Construct a boolean converter expression that represents whether 

431 this timespans contains a scalar time. 

432 

433 Parameters 

434 ---------- 

435 other : `ScalarWhereClauseConverter` 

436 RHS operand for the overlap operation. 

437 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass 

438 Ignored; provided for signature compatibility with `DispatchTable`. 

439 

440 Returns 

441 ------- 

442 overlaps : `ScalarWhereClauseConverter` 

443 Converter that wraps the boolean overlaps expression. 

444 """ 

445 assert other.dtype is Time, "Guaranteed by dispatch table rules" 

446 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool) 

447 

448 def categorizeForIn( 

449 self, 

450 literals: List[sqlalchemy.sql.ColumnElement], 

451 ranges: List[Tuple[int, int, int]], 

452 dtype: type, 

453 node: Node, 

454 ) -> None: 

455 # Docstring inherited. 

456 raise ExpressionTypeError( 

457 f'Invalid element on right side of IN expression "{node}": ' 

458 'Timespans are not allowed in this context.' 

459 ) 

460 

461 

462class RangeWhereClauseConverter(WhereClauseConverter): 

463 """Implementation of WhereClauseConverters for integer range literals. 

464 

465 Range literals may only appear on the right-hand side of IN operations 

466 where the left-hand side operand is of type `int`. 

467 

468 Parameters 

469 ---------- 

470 start : `int` 

471 Starting point (inclusive) for the range. 

472 stop : `int` 

473 Stopping point (exclusive) for the range. 

474 step : `int` 

475 Step size for the range. 

476 """ 

477 def __init__(self, start: int, stop: int, step: int): 

478 self.start = start 

479 self.stop = stop 

480 self.step = step 

481 

482 @property 

483 def dtype(self) -> type: 

484 # Docstring inherited. 

485 return range 

486 

487 def categorizeForIn( 

488 self, 

489 literals: List[sqlalchemy.sql.ColumnElement], 

490 ranges: List[Tuple[int, int, int]], 

491 dtype: type, 

492 node: Node, 

493 ) -> None: 

494 # Docstring inherited. 

495 if dtype is not int: 

496 raise ExpressionTypeError( 

497 f'Error in IN expression "{node}": range expressions ' 

498 f'are only supported for int operands, not {dtype.__name__}.' 

499 ) 

500 ranges.append((self.start, self.stop, self.step)) 

501 

502 

503UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter] 

504"""Signature of unary-operation callables directly stored in `DispatchTable`. 

505""" 

506 

507BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter] 

508"""Signature of binary-operation callables directly stored in `DispatchTable`. 

509""" 

510 

511UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement] 

512"""Signature for unary-operation callables that can work directly on SQLAlchemy 

513column expressions. 

514""" 

515 

516BinaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], 

517 sqlalchemy.sql.ColumnElement] 

518"""Signature for binary-operation callables that can work directly on 

519SQLAlchemy column expressions. 

520""" 

521 

522_F = TypeVar("_F") 

523 

524 

525def adaptIdentity(func: _F, result: Optional[type]) -> _F: 

526 """An adapter function for `DispatchTable.registerUnary` and 

527 `DispatchTable.registerBinary` that just returns this original function. 

528 """ 

529 return func 

530 

531 

532def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc: 

533 """An adapter function for `DispatchTable.registerUnary` that converts a 

534 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a 

535 `ScalarWhereClauseConverter`. 

536 """ 

537 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter: 

538 assert isinstance(operand, ScalarWhereClauseConverter) 

539 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result) 

540 return adapted 

541 

542 

543def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc: 

544 """An adapter function for `DispatchTable.registerBinary` that converts a 

545 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be 

546 `ScalarWhereClauseConverter` instances. 

547 """ 

548 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter: 

549 assert isinstance(lhs, ScalarWhereClauseConverter) 

550 assert isinstance(rhs, ScalarWhereClauseConverter) 

551 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result) 

552 return adapted 

553 

554 

555class TimeBinaryOperator: 

556 

557 def __init__(self, operator: Callable, dtype: type): 

558 self.operator = operator 

559 self.dtype = dtype 

560 

561 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter: 

562 assert isinstance(lhs, ScalarWhereClauseConverter) 

563 assert isinstance(rhs, ScalarWhereClauseConverter) 

564 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)] 

565 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype) 

566 

567 @classmethod 

568 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]: 

569 """Coerce one or more ScalarWhereClauseConverters to datetime type if 

570 necessary. 

571 

572 If any of the arguments has `datetime` type then all other arguments 

573 are converted to `datetime` type as well. 

574 

575 Parameters 

576 ---------- 

577 *args : `ScalarWhereClauseConverter` 

578 Instances which represent time objects, their type can be one of 

579 `Time` or `datetime`. If coercion happens, then `Time` objects can 

580 only be literals, not expressions. 

581 

582 Returns 

583 ------- 

584 converters : `list` [ `ScalarWhereClauseConverter` ] 

585 List of converters in the same order as they appera in argument 

586 list, some of them can be coerced to `datetime` type, non-coerced 

587 arguments are returned without any change. 

588 """ 

589 

590 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter: 

591 """Coerce single ScalarWhereClauseConverter to datetime literal. 

592 """ 

593 if arg.dtype is not datetime: 

594 assert arg.value is not None, "Cannot coerce non-literals" 

595 assert arg.dtype is Time, "Cannot coerce non-Time literals" 

596 with warnings.catch_warnings(): 

597 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

598 if erfa is not None: 

599 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

600 dt = arg.value.to_datetime() 

601 arg = ScalarWhereClauseConverter.fromLiteral(dt) 

602 return arg 

603 

604 if any(arg.dtype is datetime for arg in args): 

605 return [_coerce(arg) for arg in args] 

606 else: 

607 return list(args) 

608 

609 

610class DispatchTable: 

611 """An object that manages unary- and binary-operator type-dispatch tables 

612 for `WhereClauseConverter`. 

613 

614 Notes 

615 ----- 

616 A lot of the machinery in this class (and in the preceding function 

617 signature type aliases) is all in service of making the actual dispatch 

618 rules in the `build` method concise and easy to read, because that's where 

619 all of the important high-level logic lives. 

620 

621 Double-dispatch is a pain in Python, as it is in most languages; it's worth 

622 noting that I first tried the traditional visitor-pattern approach here, 

623 and it was *definitely* much harder to see the actual behavior. 

624 """ 

625 def __init__(self) -> None: 

626 self._unary: Dict[Tuple[str, type], UnaryFunc] = {} 

627 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {} 

628 

629 def registerUnary( 

630 self, 

631 operator: str, 

632 operand: Union[type, Iterable[type]], 

633 func: _F, 

634 *, 

635 result: Optional[type] = None, 

636 adapt: Any = True, 

637 ) -> None: 

638 """Register a unary operation for one or more types. 

639 

640 Parameters 

641 ---------- 

642 operator : `str` 

643 Operator as it appears in the string expression language. Unary 

644 operations that are not mapped to operators may use their own 

645 arbitrary strings, as long as these are used consistently in 

646 `build` and `applyUnary`. 

647 operand : `type` or `Iterable` [ `type` ] 

648 Type or types for which this operation is implemented by the given 

649 ``func``. 

650 func : `Callable` 

651 Callable that implements the unary operation. If 

652 ``adapt is True``, this should be a `UnaryColumnFunc`. If 

653 ``adapt is False``, this should be a `UnaryFunc`. Otherwise, 

654 this is whatever type is accepted as the first argument to 

655 ``adapt``. 

656 result : `type`, optional 

657 Type of the expression returned by this operation. If not 

658 provided, the type of the operand is assumed. 

659 adapt : `bool` or `Callable` 

660 A callable that wraps ``func`` (the first argument) and ``result`` 

661 (the second argument), returning a new callable with the 

662 signature of `UnaryFunc`. `True` (default) and `False` invoke a 

663 default adapter or no adapter (see ``func`` docs). 

664 """ 

665 if adapt is True: 

666 adapt = adaptUnaryColumnFunc 

667 elif adapt is False: 

668 adapt = adaptIdentity 

669 for item in ensure_iterable(operand): 

670 self._unary[operator, item] = adapt( 

671 func, 

672 result if result is not None else item 

673 ) 

674 

675 def registerBinary( 

676 self, 

677 operator: str, 

678 lhs: Union[type, Iterable[type]], 

679 func: _F, 

680 *, 

681 rhs: Optional[Union[type, Iterable[type]]] = None, 

682 result: Optional[type] = None, 

683 adapt: Any = True, 

684 ) -> None: 

685 """Register a binary operation for one or more types. 

686 

687 Parameters 

688 ---------- 

689 operator : `str` 

690 Operator as it appears in the string expression language. Binary 

691 operations that are not mapped to operators may use their own 

692 arbitrary strings, as long as these are used consistently in 

693 `build` and `applyBinary`. 

694 lhs : `type` or `Iterable` [ `type` ] 

695 Left-hand side type or types for which this operation is 

696 implemented by the given ``func``. 

697 func : `Callable` 

698 Callable that implements the binary operation. If 

699 ``adapt is True``, this should be a `BinaryColumnFunc`. If 

700 ``adapt is False``, this should be a `BinaryFunc`. Otherwise, 

701 this is whatever type is accepted as the first argument to 

702 ``adapt``. 

703 rhs : `type` or `Iterable` [ `type` ] 

704 Right-hand side type or types for which this operation is 

705 implemented by the given ``func``. If multiple types, all 

706 combinations of ``lhs`` and ``rhs`` are registered. If not 

707 provided, each element of ``lhs`` is assumed to be paired with 

708 itself, but mixed-type combinations are not registered. 

709 result : `type`, optional 

710 Type of the expression returned by this operation. If not 

711 provided and ``rhs`` is also not provided, the type of the operand 

712 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided, 

713 then ``result=None`` will be forwarded to ``adapt``. 

714 adapt : `bool` or `Callable` 

715 A callable that wraps ``func`` (the first argument) and ``result`` 

716 (the second argument), returning a new callable with the 

717 signature of `BinaryFunc`. `True` (default) and `False` invoke a 

718 default adapter or no adapter (see ``func`` docs). 

719 """ 

720 if adapt is True: 

721 adapt = adaptBinaryColumnFunc 

722 elif adapt is False: 

723 adapt = adaptIdentity 

724 for lh in ensure_iterable(lhs): 

725 if rhs is None: 

726 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh) 

727 else: 

728 for rh in ensure_iterable(rhs): 

729 self._binary[operator, lh, rh] = adapt(func, result) 

730 

731 def applyUnary( 

732 self, 

733 operator: str, 

734 operand: WhereClauseConverter, 

735 ) -> WhereClauseConverter: 

736 """Look up and apply the appropriate function for a registered unary 

737 operation. 

738 

739 Parameters 

740 ---------- 

741 operator : `str` 

742 Operator for the operation to apply. 

743 operand : `WhereClauseConverter` 

744 Operand, with ``operand.dtype`` and ``operator`` used to look up 

745 the appropriate function. 

746 

747 Returns 

748 ------- 

749 expression : `WhereClauseConverter` 

750 Converter instance that represents the operation, created by 

751 calling the registered function. 

752 

753 Raises 

754 ------ 

755 KeyError 

756 Raised if the operator and operand type combination is not 

757 recognized. 

758 """ 

759 return self._unary[operator, operand.dtype](operand) 

760 

761 def applyBinary( 

762 self, 

763 operator: str, 

764 lhs: WhereClauseConverter, 

765 rhs: WhereClauseConverter, 

766 ) -> WhereClauseConverter: 

767 """Look up and apply the appropriate function for a registered binary 

768 operation. 

769 

770 Parameters 

771 ---------- 

772 operator : `str` 

773 Operator for the operation to apply. 

774 lhs : `WhereClauseConverter` 

775 Left-hand side operand. 

776 rhs : `WhereClauseConverter` 

777 Right-hand side operand. 

778 

779 Returns 

780 ------- 

781 expression : `WhereClauseConverter` 

782 Converter instance that represents the operation, created by 

783 calling the registered function. 

784 

785 Raises 

786 ------ 

787 KeyError 

788 Raised if the operator and operand type combination is not 

789 recognized. 

790 """ 

791 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs) 

792 

793 @classmethod 

794 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable: 

795 table = DispatchTable() 

796 # Standard scalar unary and binary operators: just delegate to 

797 # SQLAlchemy operators. 

798 table.registerUnary("NOT", bool, sqlalchemy.sql.not_) 

799 table.registerUnary("+", (int, float), operator.__pos__) 

800 table.registerUnary("-", (int, float), operator.__neg__) 

801 table.registerBinary("AND", bool, sqlalchemy.sql.and_) 

802 table.registerBinary("OR", bool, sqlalchemy.sql.or_) 

803 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool) 

804 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool) 

805 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool) 

806 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool) 

807 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool) 

808 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool) 

809 table.registerBinary("+", (int, float), operator.__add__) 

810 table.registerBinary("-", (int, float), operator.__sub__) 

811 table.registerBinary("*", (int, float), operator.__mul__) 

812 table.registerBinary("/", (int, float), operator.__truediv__) 

813 table.registerBinary("%", (int, float), operator.__mod__) 

814 table.registerBinary("=", (Time, datetime), TimeBinaryOperator(operator.__eq__, bool), 

815 rhs=(Time, datetime), adapt=False) 

816 table.registerBinary("!=", (Time, datetime), TimeBinaryOperator(operator.__ne__, bool), 

817 rhs=(Time, datetime), adapt=False) 

818 table.registerBinary("<", (Time, datetime), TimeBinaryOperator(operator.__lt__, bool), 

819 rhs=(Time, datetime), adapt=False) 

820 table.registerBinary(">", (Time, datetime), TimeBinaryOperator(operator.__gt__, bool), 

821 rhs=(Time, datetime), adapt=False) 

822 table.registerBinary("<=", (Time, datetime), TimeBinaryOperator(operator.__le__, bool), 

823 rhs=(Time, datetime), adapt=False) 

824 table.registerBinary(">=", (Time, datetime), TimeBinaryOperator(operator.__ge__, bool), 

825 rhs=(Time, datetime), adapt=False) 

826 table.registerBinary( 

827 "=", 

828 lhs=(int, float, str, Time, type(None)), 

829 rhs=(type(None),), 

830 func=sqlalchemy.sql.expression.ColumnOperators.is_, 

831 result=bool, 

832 ) 

833 table.registerBinary( 

834 "=", 

835 lhs=(type(None),), 

836 rhs=(int, float, str, Time, type(None)), 

837 func=sqlalchemy.sql.expression.ColumnOperators.is_, 

838 result=bool, 

839 ) 

840 table.registerBinary( 

841 "!=", 

842 lhs=(int, float, str, Time, type(None)), 

843 rhs=(type(None),), 

844 func=sqlalchemy.sql.expression.ColumnOperators.is_not, 

845 result=bool, 

846 ) 

847 table.registerBinary( 

848 "!=", 

849 lhs=(type(None),), 

850 rhs=(int, float, str, Time, type(None)), 

851 func=sqlalchemy.sql.expression.ColumnOperators.is_not, 

852 result=bool, 

853 ) 

854 # Construct Timespan literals from 2-element tuples (A, B), where A and 

855 # B are each either Time or None. 

856 table.registerBinary( 

857 "PAIR", 

858 lhs=(Time, type(None)), 

859 rhs=(Time, type(None)), 

860 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass), 

861 adapt=False, 

862 ) 

863 # Less-than and greater-than between Timespans. 

864 table.registerBinary( 

865 "<", 

866 lhs=Timespan, 

867 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool), 

868 adapt=False, 

869 ) 

870 table.registerBinary( 

871 ">", 

872 lhs=Timespan, 

873 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool), 

874 adapt=False, 

875 ) 

876 # Less-than and greater-than between Timespans and Times. 

877 table.registerBinary( 

878 "<", 

879 lhs=Timespan, 

880 rhs=Time, 

881 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool), 

882 adapt=False, 

883 ) 

884 table.registerBinary( 

885 ">", 

886 lhs=Timespan, 

887 rhs=Time, 

888 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool), 

889 adapt=False, 

890 ) 

891 table.registerBinary( 

892 "<", 

893 lhs=Time, 

894 rhs=Timespan, 

895 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool), 

896 adapt=False, 

897 ) 

898 table.registerBinary( 

899 ">", 

900 lhs=Time, 

901 rhs=Timespan, 

902 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool), 

903 adapt=False, 

904 ) 

905 # OVERLAPS operator between Timespans. 

906 table.registerBinary( 

907 "OVERLAPS", 

908 lhs=Timespan, 

909 func=TimespanWhereClauseConverter.overlaps, 

910 adapt=False, 

911 ) 

912 # OVERLAPS operator between Timespans and Time is equivalent to 

913 # "contains", but expression language only has OVERLAPS to keep it 

914 # simple. 

915 table.registerBinary( 

916 "OVERLAPS", 

917 lhs=Timespan, 

918 rhs=Time, 

919 func=TimespanWhereClauseConverter.contains, 

920 adapt=False, 

921 ) 

922 table.registerBinary( 

923 "OVERLAPS", 

924 lhs=Time, 

925 rhs=Timespan, 

926 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a), 

927 adapt=False, 

928 ) 

929 return table 

930 

931 

932class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]): 

933 """Implements TreeVisitor to convert the tree into 

934 `WhereClauseConverter` objects. 

935 

936 This class should be used only by the `convertExpressionToSql` function; 

937 external code should just call that function. 

938 

939 Parameters 

940 ---------- 

941 universe : `DimensionUniverse` 

942 All known dimensions. 

943 columns: `QueryColumns` 

944 Struct that organizes the special columns known to the query 

945 under construction. 

946 elements: `NamedKeyMapping` 

947 `DimensionElement` instances and their associated tables. 

948 bind: `Mapping` 

949 Mapping from string names to literal values that should be subsituted 

950 for those names when they appear (as identifiers) in the expression. 

951 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation` 

952 Class that encapsulates the representation of `Timespan` objects in 

953 the database. 

954 """ 

955 def __init__( 

956 self, 

957 universe: DimensionUniverse, 

958 columns: QueryColumns, 

959 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause], 

960 bind: Mapping[str, Any], 

961 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

962 ): 

963 self.universe = universe 

964 self.columns = columns 

965 self.elements = elements 

966 self.bind = bind 

967 self._TimespanReprClass = TimespanReprClass 

968 self._dispatch = DispatchTable.build(TimespanReprClass) 

969 

970 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter: 

971 # Docstring inherited from TreeVisitor.visitNumericLiteral 

972 # Convert string value into float or int 

973 coerced: Union[int, float] 

974 try: 

975 coerced = int(value) 

976 except ValueError: 

977 coerced = float(value) 

978 return ScalarWhereClauseConverter.fromLiteral(coerced) 

979 

980 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter: 

981 # Docstring inherited from TreeVisitor.visitStringLiteral 

982 return ScalarWhereClauseConverter.fromLiteral(value) 

983 

984 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter: 

985 # Docstring inherited from TreeVisitor.visitTimeLiteral 

986 return ScalarWhereClauseConverter.fromLiteral(value) 

987 

988 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter: 

989 # Docstring inherited from TreeVisitor.visitIdentifier 

990 if name in self.bind: 

991 value = self.bind[name] 

992 if isinstance(value, Timespan): 

993 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value)) 

994 return ScalarWhereClauseConverter.fromLiteral(value) 

995 constant = categorizeConstant(name) 

996 if constant is ExpressionConstant.INGEST_DATE: 

997 assert self.columns.datasets is not None 

998 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query" 

999 return ScalarWhereClauseConverter.fromExpression( 

1000 _TimestampColumnElement(column=self.columns.datasets.ingestDate), 

1001 datetime, 

1002 ) 

1003 elif constant is ExpressionConstant.NULL: 

1004 return ScalarWhereClauseConverter.fromLiteral(None) 

1005 assert constant is None, "Check for enum values should be exhaustive." 

1006 element, column = categorizeElementId(self.universe, name) 

1007 if column is not None: 

1008 if column == TimespanDatabaseRepresentation.NAME: 

1009 if element.temporal is None: 

1010 raise ExpressionTypeError( 

1011 f"No timespan column exists for non-temporal element '{element.name}'." 

1012 ) 

1013 return TimespanWhereClauseConverter(self.columns.timespans[element]) 

1014 else: 

1015 if column not in element.RecordClass.fields.standard.names: 

1016 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.") 

1017 return ScalarWhereClauseConverter.fromExpression( 

1018 self.elements[element].columns[column], 

1019 element.RecordClass.fields.standard[column].getPythonType(), 

1020 ) 

1021 else: 

1022 assert isinstance(element, Dimension) 

1023 return ScalarWhereClauseConverter.fromExpression( 

1024 self.columns.getKeyColumn(element), 

1025 element.primaryKey.getPythonType() 

1026 ) 

1027 

1028 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter: 

1029 # Docstring inherited from TreeVisitor.visitUnaryOp 

1030 try: 

1031 return self._dispatch.applyUnary(operator, operand) 

1032 except KeyError: 

1033 raise ExpressionTypeError( 

1034 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".' 

1035 ) from None 

1036 

1037 def visitBinaryOp( 

1038 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node 

1039 ) -> WhereClauseConverter: 

1040 # Docstring inherited from TreeVisitor.visitBinaryOp 

1041 try: 

1042 return self._dispatch.applyBinary(operator, lhs, rhs) 

1043 except KeyError: 

1044 raise ExpressionTypeError( 

1045 f'Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary ' 

1046 f'operator {operator} in "{node}".' 

1047 ) from None 

1048 

1049 def visitIsIn( 

1050 self, 

1051 lhs: WhereClauseConverter, 

1052 values: List[WhereClauseConverter], 

1053 not_in: bool, 

1054 node: Node, 

1055 ) -> WhereClauseConverter: 

1056 if not isinstance(lhs, ScalarWhereClauseConverter): 

1057 raise ExpressionTypeError( 

1058 f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".' 

1059 ) 

1060 # Docstring inherited from TreeVisitor.visitIsIn 

1061 # 

1062 # `values` is a list of literals and ranges, range is represented 

1063 # by a tuple (start, stop, stride). We need to transform range into 

1064 # some SQL construct, simplest would be to generate a set of literals 

1065 # and add it to the same list but it could become too long. What we 

1066 # do here is to introduce some large limit on the total number of 

1067 # items in IN() and if range exceeds that limit then we do something 

1068 # like: 

1069 # 

1070 # X IN (1, 2, 3) 

1071 # OR 

1072 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

1073 # 

1074 # or for NOT IN case 

1075 # 

1076 # NOT (X IN (1, 2, 3) 

1077 # OR 

1078 # (X BETWEEN START AND STOP 

1079 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

1080 # 

1081 max_in_items = 1000 

1082 clauses: List[sqlalchemy.sql.ColumnElement] = [] 

1083 # Split the list into literals and ranges 

1084 literals: List[sqlalchemy.sql.ColumnElement] = [] 

1085 ranges: List[Tuple[int, int, int]] = [] 

1086 for value in values: 

1087 value.categorizeForIn(literals, ranges, lhs.dtype, node) 

1088 # Handle ranges (maybe by converting them to literals). 

1089 for start, stop, stride in ranges: 

1090 count = (stop - start + 1) // stride 

1091 if len(literals) + count > max_in_items: 

1092 # X BETWEEN START AND STOP 

1093 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

1094 expr = lhs.column.between(start, stop) 

1095 if stride != 1: 

1096 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride)) 

1097 clauses.append(expr) 

1098 else: 

1099 # add all values to literal list, stop is inclusive 

1100 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)] 

1101 # Handle literals. 

1102 if literals: 

1103 # add IN() in front of BETWEENs 

1104 clauses.insert(0, lhs.column.in_(literals)) 

1105 # Assemble the full expression. 

1106 expr = sqlalchemy.sql.or_(*clauses) 

1107 if not_in: 

1108 expr = sqlalchemy.sql.not_(expr) 

1109 return ScalarWhereClauseConverter.fromExpression(expr, bool) 

1110 

1111 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter: 

1112 # Docstring inherited from TreeVisitor.visitParens 

1113 return expression 

1114 

1115 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter: 

1116 # Docstring inherited from base class 

1117 if len(items) != 2: 

1118 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".') 

1119 try: 

1120 return self._dispatch.applyBinary("PAIR", items[0], items[1]) 

1121 except KeyError: 

1122 raise ExpressionTypeError( 

1123 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" ' 

1124 '(Note that date/time strings must be preceded by "T" to be recognized).' 

1125 ) 

1126 

1127 def visitRangeLiteral( 

1128 self, start: int, stop: int, stride: Optional[int], node: Node 

1129 ) -> WhereClauseConverter: 

1130 # Docstring inherited from TreeVisitor.visitRangeLiteral 

1131 # stride can be None which means the same as 1. 

1132 return RangeWhereClauseConverter(start, stop, stride or 1) 

1133 

1134 def visitPointNode( 

1135 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node 

1136 ) -> WhereClauseConverter: 

1137 # Docstring inherited from base class 

1138 

1139 # this is a placeholder for future extension, we enabled syntax but 

1140 # do not support actual use just yet. 

1141 raise NotImplementedError("POINT() function is not supported yet")