Coverage for python/lsst/daf/butler/registry/queries/expressions/convert.py: 26%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

311 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "convertExpressionToSql", 

26 "ExpressionTypeError", 

27) 

28 

29import operator 

30import warnings 

31from abc import ABC, abstractmethod 

32from datetime import datetime 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 Dict, 

38 Iterable, 

39 List, 

40 Mapping, 

41 Optional, 

42 Tuple, 

43 Type, 

44 TypeVar, 

45 Union, 

46) 

47 

48import astropy.utils.exceptions 

49import sqlalchemy 

50from astropy.time import Time 

51from lsst.utils.iteration import ensure_iterable 

52from sqlalchemy.ext.compiler import compiles 

53from sqlalchemy.sql.expression import func 

54 

55from ....core import ( 

56 Dimension, 

57 DimensionElement, 

58 DimensionUniverse, 

59 NamedKeyMapping, 

60 Timespan, 

61 TimespanDatabaseRepresentation, 

62 ddl, 

63) 

64from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId 

65from .parser import Node, TreeVisitor 

66 

67# As of astropy 4.2, the erfa interface is shipped independently and 

68# ErfaWarning is no longer an AstropyWarning 

69try: 

70 import erfa 

71except ImportError: 

72 erfa = None 

73 

74if TYPE_CHECKING: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 from .._structs import QueryColumns 

76 

77 

78def convertExpressionToSql( 

79 tree: Node, 

80 universe: DimensionUniverse, 

81 columns: QueryColumns, 

82 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause], 

83 bind: Mapping[str, Any], 

84 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

85) -> sqlalchemy.sql.ColumnElement: 

86 """Convert a query expression tree into a SQLAlchemy expression object. 

87 

88 Parameters 

89 ---------- 

90 tree : `Node` 

91 Root node of the query expression tree. 

92 universe : `DimensionUniverse` 

93 All known dimensions. 

94 columns : `QueryColumns` 

95 Struct that organizes the special columns known to the query 

96 under construction. 

97 elements : `NamedKeyMapping` 

98 `DimensionElement` instances and their associated tables. 

99 bind : `Mapping` 

100 Mapping from string names to literal values that should be subsituted 

101 for those names when they appear (as identifiers) in the expression. 

102 TimespanReprClass : `type`; subclass of `TimespanDatabaseRepresentation` 

103 Class that encapsulates the representation of `Timespan` objects in 

104 the database. 

105 

106 Returns 

107 ------- 

108 sql : `sqlalchemy.sql.ColumnElement` 

109 A boolean SQLAlchemy column expression. 

110 

111 Raises 

112 ------ 

113 ExpressionTypeError 

114 Raised if the operands in a query expression operation are incompatible 

115 with the operator, or if the expression does not evaluate to a boolean. 

116 """ 

117 visitor = WhereClauseConverterVisitor(universe, columns, elements, bind, TimespanReprClass) 

118 converter = tree.visit(visitor) 

119 return converter.finish(tree) 

120 

121 

122class ExpressionTypeError(TypeError): 

123 """Exception raised when the types in a query expression are not 

124 compatible with the operators or other syntax. 

125 """ 

126 

127 

128class _TimestampColumnElement(sqlalchemy.sql.ColumnElement): 

129 """Special ColumnElement type used for TIMESTAMP columns or literals in 

130 expressions. 

131 

132 SQLite stores timestamps as strings which sometimes can cause issues when 

133 comparing strings. For more reliable comparison SQLite needs DATETIME() 

134 wrapper for those strings. For PostgreSQL it works better if we add 

135 TIMESTAMP to string literals. 

136 

137 This mechanism is only used for expressions in WHERE clause, values of the 

138 TIMESTAMP columns returned from queries are still handled by standard 

139 mechanism and they are converted to `datetime` instances. 

140 """ 

141 

142 def __init__( 

143 self, column: Optional[sqlalchemy.sql.ColumnElement] = None, literal: Optional[datetime] = None 

144 ): 

145 super().__init__() 

146 self._column = column 

147 self._literal = literal 

148 

149 

150@compiles(_TimestampColumnElement, "sqlite") 

151def compile_timestamp_sqlite(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

152 """Compilation of TIMESTAMP column for SQLite. 

153 

154 SQLite defines ``strftime`` function that can be used to convert timestamp 

155 value to Unix seconds. 

156 """ 

157 assert element._column is not None or element._literal is not None, "Must have column or literal" 

158 if element._column is not None: 

159 return compiler.process(func.datetime(element._column), **kw) 

160 else: 

161 return compiler.process(func.datetime(sqlalchemy.sql.literal(element._literal)), **kw) 

162 

163 

164@compiles(_TimestampColumnElement, "postgresql") 

165def compile_timestamp_pg(element: Any, compiler: Any, **kw: Mapping[str, Any]) -> str: 

166 """Compilation of TIMESTAMP column for PostgreSQL. 

167 

168 PostgreSQL can use `EXTRACT(epoch FROM timestamp)` function. 

169 """ 

170 assert element._column is not None or element._literal is not None, "Must have column or literal" 

171 if element._column is not None: 

172 return compiler.process(element._column, **kw) 

173 else: 

174 literal = element._literal.isoformat(sep=" ", timespec="microseconds") 

175 return "TIMESTAMP " + compiler.process(sqlalchemy.sql.literal(literal), **kw) 

176 

177 

178class WhereClauseConverter(ABC): 

179 """Abstract base class for the objects used to transform a butler query 

180 expression tree into SQLAlchemy expression objects. 

181 

182 WhereClauseConverter instances are created and consumed by 

183 `WhereClauseConverterVisitor`, which is in turn created and used only by 

184 the `convertExpressionToSql` function. 

185 """ 

186 

187 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement: 

188 """Finish converting this [boolean] expression, returning a SQLAlchemy 

189 expression object. 

190 

191 Parameters 

192 ---------- 

193 node : `Node` 

194 Original expression tree nodethis converter represents; used only 

195 for error reporting. 

196 

197 Returns 

198 ------- 

199 sql : `sqlalchemy.sql.ColumnElement` 

200 A boolean SQLAlchemy column expression. 

201 

202 Raises 

203 ------ 

204 ExpressionTypeError 

205 Raised if this node does not represent a boolean expression. The 

206 default implementation always raises this exception; subclasses 

207 that may actually represent a boolean should override. 

208 """ 

209 raise ExpressionTypeError(f'Expression "{node}" has type {self.dtype}, not bool.') 

210 

211 @property 

212 @abstractmethod 

213 def dtype(self) -> type: 

214 """The Python type of the expression tree node associated with this 

215 converter (`type`). 

216 

217 This should be the exact type of any literal or bind object, and the 

218 type produced by SQLAlchemy's converter mechanism when returning rows 

219 from the database in the case of expressions that map to database 

220 entities or expressions. 

221 """ 

222 raise NotImplementedError() 

223 

224 @abstractmethod 

225 def categorizeForIn( 

226 self, 

227 literals: List[sqlalchemy.sql.ColumnElement], 

228 ranges: List[Tuple[int, int, int]], 

229 dtype: type, 

230 node: Node, 

231 ) -> None: 

232 """Visit this expression when it appears as an element in the 

233 right-hand side of an IN expression. 

234 

235 Implementations must either: 

236 

237 - append or extend to ``literals`` 

238 - append or extend to ``ranges`` 

239 - raise `ExpressionTypeError`. 

240 

241 Parameters 

242 ---------- 

243 literals : `list` [ `sqlalchemy.sql.ColumnElement` ] 

244 List of SQL expression objects that the left-hand side of the IN 

245 operation may match exactly. 

246 ranges : `list` of `tuple` 

247 List of (start, stop, step) tuples that represent ranges that the 

248 left-hand side of the IN operation may match. 

249 dtype : `type` 

250 Type of the left-hand side operand for the IN expression. Literals 

251 should only be appended to if ``self.dtype is dtype``, and 

252 ``ranges`` should only be appended to if ``dtype is int``. 

253 node : `Node` 

254 Original expression tree node this converter represents; for use 

255 only in error reporting. 

256 

257 Raises 

258 ------ 

259 ExpressionTypeError 

260 Raised if this node can never appear on the right-hand side of an 

261 IN expression, or if it is incompatible with the left-hand side 

262 type. 

263 """ 

264 raise NotImplementedError() 

265 

266 

267class ScalarWhereClauseConverter(WhereClauseConverter): 

268 """Primary implementation of WhereClauseConverter, for expressions that can 

269 always be represented directly by a single `sqlalchemy.sql.ColumnElement` 

270 instance. 

271 

272 Should be constructed by calling either `fromExpression` or `fromLiteral`. 

273 

274 Parameters 

275 ---------- 

276 column : `sqlalchemy.sql.ColumnElement` 

277 A SQLAlchemy column expression. 

278 value 

279 The Python literal this expression was constructed from, or `None` if 

280 it was not constructed from a literal. Note that this is also `None` 

281 this object corresponds to the literal `None`, in which case 

282 ``dtype is type(None)``. 

283 dtype : `type` 

284 Python type this expression maps to. 

285 """ 

286 

287 def __init__(self, column: sqlalchemy.sql.ColumnElement, value: Any, dtype: type): 

288 self.column = column 

289 self.value = value 

290 self._dtype = dtype 

291 

292 @classmethod 

293 def fromExpression(cls, column: sqlalchemy.sql.ColumnElement, dtype: type) -> ScalarWhereClauseConverter: 

294 """Construct from an existing SQLAlchemy column expression and type. 

295 

296 Parameters 

297 ---------- 

298 column : `sqlalchemy.sql.ColumnElement` 

299 A SQLAlchemy column expression. 

300 dtype : `type` 

301 Python type this expression maps to. 

302 

303 Returns 

304 ------- 

305 converter : `ScalarWhereClauseConverter` 

306 Converter instance that wraps ``column``. 

307 """ 

308 return cls(column, None, dtype) 

309 

310 @classmethod 

311 def fromLiteral(cls, value: Any) -> ScalarWhereClauseConverter: 

312 """Construct from a Python literal. 

313 

314 Parameters 

315 ---------- 

316 value 

317 The Python literal to wrap. 

318 

319 Returns 

320 ------- 

321 converter : `ScalarWhereClauseConverter` 

322 Converter instance that wraps ``value``. 

323 """ 

324 dtype = type(value) 

325 if dtype is datetime: 

326 column = _TimestampColumnElement(literal=value) 

327 else: 

328 column = sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai if dtype is Time else None) 

329 return cls(column, value, dtype) 

330 

331 def finish(self, node: Node) -> sqlalchemy.sql.ColumnElement: 

332 # Docstring inherited. 

333 if self.dtype is not bool: 

334 return super().finish(node) # will raise; just avoids duplicate error message 

335 return self.column 

336 

337 @property 

338 def dtype(self) -> type: 

339 # Docstring inherited. 

340 return self._dtype 

341 

342 def categorizeForIn( 

343 self, 

344 literals: List[sqlalchemy.sql.ColumnElement], 

345 ranges: List[Tuple[int, int, int]], 

346 dtype: type, 

347 node: Node, 

348 ) -> None: 

349 # Docstring inherited. 

350 if dtype is not self.dtype: 

351 raise ExpressionTypeError( 

352 f'Error in IN expression "{node}": left hand side has type ' 

353 f"{dtype.__name__}, but item has type {self.dtype.__name__}." 

354 ) 

355 literals.append(self.column) 

356 

357 

358class TimespanWhereClauseConverter(WhereClauseConverter): 

359 """Implementation of WhereClauseConverter for `Timespan` expressions. 

360 

361 Parameters 

362 ---------- 

363 timespan : `TimespanDatabaseRepresentation` 

364 Object that represents a logical timespan column or column expression 

365 (which may or may not be backed by multiple real columns). 

366 """ 

367 

368 def __init__(self, timespan: TimespanDatabaseRepresentation): 

369 self.timespan = timespan 

370 

371 @classmethod 

372 def fromPair( 

373 cls, 

374 begin: ScalarWhereClauseConverter, 

375 end: ScalarWhereClauseConverter, 

376 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

377 ) -> TimespanWhereClauseConverter: 

378 """Construct from a pair of literal expressions. 

379 

380 Parameters 

381 ---------- 

382 begin : `ScalarWhereClauseConverter` 

383 Converter object associated with an expression of type 

384 `astropy.time.Time` or `None` (for a timespan that is unbounded 

385 from below). 

386 end : `ScalarWhereClauseConverter` 

387 Converter object associated with an expression of type 

388 `astropy.time.Time` or `None` (for a timespan that is unbounded 

389 from above). 

390 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass 

391 Class that encapsulates the representation of `Timespan` objects in 

392 the database. 

393 

394 Returns 

395 ------- 

396 converter : `TimespanWhereClauseConverter` 

397 Converter instance that represents a `Timespan` literal. 

398 

399 Raises 

400 ------ 

401 ExpressionTypeError 

402 Raised if begin or end is a time column from the database or other 

403 time expression, not a literal or bind time value. 

404 """ 

405 assert begin.dtype in (Time, type(None)), "Guaranteed by dispatch table rules." 

406 assert end.dtype in (Time, type(None)), "Guaranteed by dispatch table rules." 

407 if (begin.value is None and begin.dtype is Time) or (end.value is None and end.dtype is Time): 

408 raise ExpressionTypeError("Time pairs in expressions must be literals or bind values.") 

409 return cls(TimespanReprClass.fromLiteral(Timespan(begin.value, end.value))) 

410 

411 @property 

412 def dtype(self) -> type: 

413 # Docstring inherited. 

414 return Timespan 

415 

416 def overlaps(self, other: TimespanWhereClauseConverter) -> ScalarWhereClauseConverter: 

417 """Construct a boolean converter expression that represents the overlap 

418 of this timespan with another. 

419 

420 Parameters 

421 ---------- 

422 other : `TimespanWhereClauseConverter` 

423 RHS operand for the overlap operation. 

424 

425 Returns 

426 ------- 

427 overlaps : `ScalarWhereClauseConverter` 

428 Converter that wraps the boolean overlaps expression. 

429 """ 

430 assert other.dtype is Timespan, "Guaranteed by dispatch table rules" 

431 return ScalarWhereClauseConverter.fromExpression(self.timespan.overlaps(other.timespan), bool) 

432 

433 def contains(self, other: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter: 

434 """Construct a boolean converter expression that represents whether 

435 this timespans contains a scalar time. 

436 

437 Parameters 

438 ---------- 

439 other : `ScalarWhereClauseConverter` 

440 RHS operand for the overlap operation. 

441 TimespanReprClass : `type`; `TimespanDatabaseRepresentation` subclass 

442 Ignored; provided for signature compatibility with `DispatchTable`. 

443 

444 Returns 

445 ------- 

446 overlaps : `ScalarWhereClauseConverter` 

447 Converter that wraps the boolean overlaps expression. 

448 """ 

449 assert other.dtype is Time, "Guaranteed by dispatch table rules" 

450 return ScalarWhereClauseConverter.fromExpression(self.timespan.contains(other.column), bool) 

451 

452 def categorizeForIn( 

453 self, 

454 literals: List[sqlalchemy.sql.ColumnElement], 

455 ranges: List[Tuple[int, int, int]], 

456 dtype: type, 

457 node: Node, 

458 ) -> None: 

459 # Docstring inherited. 

460 raise ExpressionTypeError( 

461 f'Invalid element on right side of IN expression "{node}": ' 

462 "Timespans are not allowed in this context." 

463 ) 

464 

465 

466class RangeWhereClauseConverter(WhereClauseConverter): 

467 """Implementation of WhereClauseConverters for integer range literals. 

468 

469 Range literals may only appear on the right-hand side of IN operations 

470 where the left-hand side operand is of type `int`. 

471 

472 Parameters 

473 ---------- 

474 start : `int` 

475 Starting point (inclusive) for the range. 

476 stop : `int` 

477 Stopping point (exclusive) for the range. 

478 step : `int` 

479 Step size for the range. 

480 """ 

481 

482 def __init__(self, start: int, stop: int, step: int): 

483 self.start = start 

484 self.stop = stop 

485 self.step = step 

486 

487 @property 

488 def dtype(self) -> type: 

489 # Docstring inherited. 

490 return range 

491 

492 def categorizeForIn( 

493 self, 

494 literals: List[sqlalchemy.sql.ColumnElement], 

495 ranges: List[Tuple[int, int, int]], 

496 dtype: type, 

497 node: Node, 

498 ) -> None: 

499 # Docstring inherited. 

500 if dtype is not int: 

501 raise ExpressionTypeError( 

502 f'Error in IN expression "{node}": range expressions ' 

503 f"are only supported for int operands, not {dtype.__name__}." 

504 ) 

505 ranges.append((self.start, self.stop, self.step)) 

506 

507 

508UnaryFunc = Callable[[WhereClauseConverter], WhereClauseConverter] 

509"""Signature of unary-operation callables directly stored in `DispatchTable`. 

510""" 

511 

512BinaryFunc = Callable[[WhereClauseConverter, WhereClauseConverter], WhereClauseConverter] 

513"""Signature of binary-operation callables directly stored in `DispatchTable`. 

514""" 

515 

516UnaryColumnFunc = Callable[[sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement] 

517"""Signature for unary-operation callables that can work directly on SQLAlchemy 

518column expressions. 

519""" 

520 

521BinaryColumnFunc = Callable[ 

522 [sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], sqlalchemy.sql.ColumnElement 

523] 

524"""Signature for binary-operation callables that can work directly on 

525SQLAlchemy column expressions. 

526""" 

527 

528_F = TypeVar("_F") 

529 

530 

531def adaptIdentity(func: _F, result: Optional[type]) -> _F: 

532 """An adapter function for `DispatchTable.registerUnary` and 

533 `DispatchTable.registerBinary` that just returns this original function. 

534 """ 

535 return func 

536 

537 

538def adaptUnaryColumnFunc(func: UnaryColumnFunc, result: type) -> UnaryFunc: 

539 """An adapter function for `DispatchTable.registerUnary` that converts a 

540 `UnaryColumnFunc` into a `UnaryFunc`, requiring the operand to be a 

541 `ScalarWhereClauseConverter`. 

542 """ 

543 

544 def adapted(operand: WhereClauseConverter) -> WhereClauseConverter: 

545 assert isinstance(operand, ScalarWhereClauseConverter) 

546 return ScalarWhereClauseConverter.fromExpression(func(operand.column), dtype=result) 

547 

548 return adapted 

549 

550 

551def adaptBinaryColumnFunc(func: BinaryColumnFunc, result: type) -> BinaryFunc: 

552 """An adapter function for `DispatchTable.registerBinary` that converts a 

553 `BinaryColumnFunc` into a `BinaryFunc`, requiring the operands to be 

554 `ScalarWhereClauseConverter` instances. 

555 """ 

556 

557 def adapted(lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter: 

558 assert isinstance(lhs, ScalarWhereClauseConverter) 

559 assert isinstance(rhs, ScalarWhereClauseConverter) 

560 return ScalarWhereClauseConverter.fromExpression(func(lhs.column, rhs.column), dtype=result) 

561 

562 return adapted 

563 

564 

565class TimeBinaryOperator: 

566 def __init__(self, operator: Callable, dtype: type): 

567 self.operator = operator 

568 self.dtype = dtype 

569 

570 def __call__(self, lhs: WhereClauseConverter, rhs: WhereClauseConverter) -> WhereClauseConverter: 

571 assert isinstance(lhs, ScalarWhereClauseConverter) 

572 assert isinstance(rhs, ScalarWhereClauseConverter) 

573 operands = [arg.column for arg in self.coerceTimes(lhs, rhs)] 

574 return ScalarWhereClauseConverter.fromExpression(self.operator(*operands), dtype=self.dtype) 

575 

576 @classmethod 

577 def coerceTimes(cls, *args: ScalarWhereClauseConverter) -> List[ScalarWhereClauseConverter]: 

578 """Coerce one or more ScalarWhereClauseConverters to datetime type if 

579 necessary. 

580 

581 If any of the arguments has `datetime` type then all other arguments 

582 are converted to `datetime` type as well. 

583 

584 Parameters 

585 ---------- 

586 *args : `ScalarWhereClauseConverter` 

587 Instances which represent time objects, their type can be one of 

588 `Time` or `datetime`. If coercion happens, then `Time` objects can 

589 only be literals, not expressions. 

590 

591 Returns 

592 ------- 

593 converters : `list` [ `ScalarWhereClauseConverter` ] 

594 List of converters in the same order as they appera in argument 

595 list, some of them can be coerced to `datetime` type, non-coerced 

596 arguments are returned without any change. 

597 """ 

598 

599 def _coerce(arg: ScalarWhereClauseConverter) -> ScalarWhereClauseConverter: 

600 """Coerce single ScalarWhereClauseConverter to datetime literal.""" 

601 if arg.dtype is not datetime: 

602 assert arg.value is not None, "Cannot coerce non-literals" 

603 assert arg.dtype is Time, "Cannot coerce non-Time literals" 

604 with warnings.catch_warnings(): 

605 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

606 if erfa is not None: 

607 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

608 dt = arg.value.to_datetime() 

609 arg = ScalarWhereClauseConverter.fromLiteral(dt) 

610 return arg 

611 

612 if any(arg.dtype is datetime for arg in args): 

613 return [_coerce(arg) for arg in args] 

614 else: 

615 return list(args) 

616 

617 

618class DispatchTable: 

619 """An object that manages unary- and binary-operator type-dispatch tables 

620 for `WhereClauseConverter`. 

621 

622 Notes 

623 ----- 

624 A lot of the machinery in this class (and in the preceding function 

625 signature type aliases) is all in service of making the actual dispatch 

626 rules in the `build` method concise and easy to read, because that's where 

627 all of the important high-level logic lives. 

628 

629 Double-dispatch is a pain in Python, as it is in most languages; it's worth 

630 noting that I first tried the traditional visitor-pattern approach here, 

631 and it was *definitely* much harder to see the actual behavior. 

632 """ 

633 

634 def __init__(self) -> None: 

635 self._unary: Dict[Tuple[str, type], UnaryFunc] = {} 

636 self._binary: Dict[Tuple[str, type, type], BinaryFunc] = {} 

637 

638 def registerUnary( 

639 self, 

640 operator: str, 

641 operand: Union[type, Iterable[type]], 

642 func: _F, 

643 *, 

644 result: Optional[type] = None, 

645 adapt: Any = True, 

646 ) -> None: 

647 """Register a unary operation for one or more types. 

648 

649 Parameters 

650 ---------- 

651 operator : `str` 

652 Operator as it appears in the string expression language. Unary 

653 operations that are not mapped to operators may use their own 

654 arbitrary strings, as long as these are used consistently in 

655 `build` and `applyUnary`. 

656 operand : `type` or `Iterable` [ `type` ] 

657 Type or types for which this operation is implemented by the given 

658 ``func``. 

659 func : `Callable` 

660 Callable that implements the unary operation. If 

661 ``adapt is True``, this should be a `UnaryColumnFunc`. If 

662 ``adapt is False``, this should be a `UnaryFunc`. Otherwise, 

663 this is whatever type is accepted as the first argument to 

664 ``adapt``. 

665 result : `type`, optional 

666 Type of the expression returned by this operation. If not 

667 provided, the type of the operand is assumed. 

668 adapt : `bool` or `Callable` 

669 A callable that wraps ``func`` (the first argument) and ``result`` 

670 (the second argument), returning a new callable with the 

671 signature of `UnaryFunc`. `True` (default) and `False` invoke a 

672 default adapter or no adapter (see ``func`` docs). 

673 """ 

674 if adapt is True: 

675 adapt = adaptUnaryColumnFunc 

676 elif adapt is False: 

677 adapt = adaptIdentity 

678 for item in ensure_iterable(operand): 

679 self._unary[operator, item] = adapt(func, result if result is not None else item) 

680 

681 def registerBinary( 

682 self, 

683 operator: str, 

684 lhs: Union[type, Iterable[type]], 

685 func: _F, 

686 *, 

687 rhs: Optional[Union[type, Iterable[type]]] = None, 

688 result: Optional[type] = None, 

689 adapt: Any = True, 

690 ) -> None: 

691 """Register a binary operation for one or more types. 

692 

693 Parameters 

694 ---------- 

695 operator : `str` 

696 Operator as it appears in the string expression language. Binary 

697 operations that are not mapped to operators may use their own 

698 arbitrary strings, as long as these are used consistently in 

699 `build` and `applyBinary`. 

700 lhs : `type` or `Iterable` [ `type` ] 

701 Left-hand side type or types for which this operation is 

702 implemented by the given ``func``. 

703 func : `Callable` 

704 Callable that implements the binary operation. If 

705 ``adapt is True``, this should be a `BinaryColumnFunc`. If 

706 ``adapt is False``, this should be a `BinaryFunc`. Otherwise, 

707 this is whatever type is accepted as the first argument to 

708 ``adapt``. 

709 rhs : `type` or `Iterable` [ `type` ] 

710 Right-hand side type or types for which this operation is 

711 implemented by the given ``func``. If multiple types, all 

712 combinations of ``lhs`` and ``rhs`` are registered. If not 

713 provided, each element of ``lhs`` is assumed to be paired with 

714 itself, but mixed-type combinations are not registered. 

715 result : `type`, optional 

716 Type of the expression returned by this operation. If not 

717 provided and ``rhs`` is also not provided, the type of the operand 

718 (``lhs``) is assumed. If not provided and ``rhs`` *is* provided, 

719 then ``result=None`` will be forwarded to ``adapt``. 

720 adapt : `bool` or `Callable` 

721 A callable that wraps ``func`` (the first argument) and ``result`` 

722 (the second argument), returning a new callable with the 

723 signature of `BinaryFunc`. `True` (default) and `False` invoke a 

724 default adapter or no adapter (see ``func`` docs). 

725 """ 

726 if adapt is True: 

727 adapt = adaptBinaryColumnFunc 

728 elif adapt is False: 

729 adapt = adaptIdentity 

730 for lh in ensure_iterable(lhs): 

731 if rhs is None: 

732 self._binary[operator, lh, lh] = adapt(func, result if result is not None else lh) 

733 else: 

734 for rh in ensure_iterable(rhs): 

735 self._binary[operator, lh, rh] = adapt(func, result) 

736 

737 def applyUnary( 

738 self, 

739 operator: str, 

740 operand: WhereClauseConverter, 

741 ) -> WhereClauseConverter: 

742 """Look up and apply the appropriate function for a registered unary 

743 operation. 

744 

745 Parameters 

746 ---------- 

747 operator : `str` 

748 Operator for the operation to apply. 

749 operand : `WhereClauseConverter` 

750 Operand, with ``operand.dtype`` and ``operator`` used to look up 

751 the appropriate function. 

752 

753 Returns 

754 ------- 

755 expression : `WhereClauseConverter` 

756 Converter instance that represents the operation, created by 

757 calling the registered function. 

758 

759 Raises 

760 ------ 

761 KeyError 

762 Raised if the operator and operand type combination is not 

763 recognized. 

764 """ 

765 return self._unary[operator, operand.dtype](operand) 

766 

767 def applyBinary( 

768 self, 

769 operator: str, 

770 lhs: WhereClauseConverter, 

771 rhs: WhereClauseConverter, 

772 ) -> WhereClauseConverter: 

773 """Look up and apply the appropriate function for a registered binary 

774 operation. 

775 

776 Parameters 

777 ---------- 

778 operator : `str` 

779 Operator for the operation to apply. 

780 lhs : `WhereClauseConverter` 

781 Left-hand side operand. 

782 rhs : `WhereClauseConverter` 

783 Right-hand side operand. 

784 

785 Returns 

786 ------- 

787 expression : `WhereClauseConverter` 

788 Converter instance that represents the operation, created by 

789 calling the registered function. 

790 

791 Raises 

792 ------ 

793 KeyError 

794 Raised if the operator and operand type combination is not 

795 recognized. 

796 """ 

797 return self._binary[operator, lhs.dtype, rhs.dtype](lhs, rhs) 

798 

799 @classmethod 

800 def build(cls, TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> DispatchTable: 

801 table = DispatchTable() 

802 # Standard scalar unary and binary operators: just delegate to 

803 # SQLAlchemy operators. 

804 table.registerUnary("NOT", bool, sqlalchemy.sql.not_) 

805 table.registerUnary("+", (int, float), operator.__pos__) 

806 table.registerUnary("-", (int, float), operator.__neg__) 

807 table.registerBinary("AND", bool, sqlalchemy.sql.and_) 

808 table.registerBinary("OR", bool, sqlalchemy.sql.or_) 

809 table.registerBinary("=", (int, float, str), operator.__eq__, result=bool) 

810 table.registerBinary("!=", (int, float, str), operator.__ne__, result=bool) 

811 table.registerBinary("<", (int, float, str), operator.__lt__, result=bool) 

812 table.registerBinary(">", (int, float, str), operator.__gt__, result=bool) 

813 table.registerBinary("<=", (int, float, str), operator.__le__, result=bool) 

814 table.registerBinary(">=", (int, float, str), operator.__ge__, result=bool) 

815 table.registerBinary("+", (int, float), operator.__add__) 

816 table.registerBinary("-", (int, float), operator.__sub__) 

817 table.registerBinary("*", (int, float), operator.__mul__) 

818 table.registerBinary("/", (int, float), operator.__truediv__) 

819 table.registerBinary("%", (int, float), operator.__mod__) 

820 table.registerBinary( 

821 "=", 

822 (Time, datetime), 

823 TimeBinaryOperator(operator.__eq__, bool), 

824 rhs=(Time, datetime), 

825 adapt=False, 

826 ) 

827 table.registerBinary( 

828 "!=", 

829 (Time, datetime), 

830 TimeBinaryOperator(operator.__ne__, bool), 

831 rhs=(Time, datetime), 

832 adapt=False, 

833 ) 

834 table.registerBinary( 

835 "<", 

836 (Time, datetime), 

837 TimeBinaryOperator(operator.__lt__, bool), 

838 rhs=(Time, datetime), 

839 adapt=False, 

840 ) 

841 table.registerBinary( 

842 ">", 

843 (Time, datetime), 

844 TimeBinaryOperator(operator.__gt__, bool), 

845 rhs=(Time, datetime), 

846 adapt=False, 

847 ) 

848 table.registerBinary( 

849 "<=", 

850 (Time, datetime), 

851 TimeBinaryOperator(operator.__le__, bool), 

852 rhs=(Time, datetime), 

853 adapt=False, 

854 ) 

855 table.registerBinary( 

856 ">=", 

857 (Time, datetime), 

858 TimeBinaryOperator(operator.__ge__, bool), 

859 rhs=(Time, datetime), 

860 adapt=False, 

861 ) 

862 table.registerBinary( 

863 "=", 

864 lhs=(int, float, str, Time, type(None)), 

865 rhs=(type(None),), 

866 func=sqlalchemy.sql.expression.ColumnOperators.is_, 

867 result=bool, 

868 ) 

869 table.registerBinary( 

870 "=", 

871 lhs=(type(None),), 

872 rhs=(int, float, str, Time, type(None)), 

873 func=sqlalchemy.sql.expression.ColumnOperators.is_, 

874 result=bool, 

875 ) 

876 table.registerBinary( 

877 "!=", 

878 lhs=(int, float, str, Time, type(None)), 

879 rhs=(type(None),), 

880 func=sqlalchemy.sql.expression.ColumnOperators.is_not, 

881 result=bool, 

882 ) 

883 table.registerBinary( 

884 "!=", 

885 lhs=(type(None),), 

886 rhs=(int, float, str, Time, type(None)), 

887 func=sqlalchemy.sql.expression.ColumnOperators.is_not, 

888 result=bool, 

889 ) 

890 # Construct Timespan literals from 2-element tuples (A, B), where A and 

891 # B are each either Time or None. 

892 table.registerBinary( 

893 "PAIR", 

894 lhs=(Time, type(None)), 

895 rhs=(Time, type(None)), 

896 func=lambda lhs, rhs: TimespanWhereClauseConverter.fromPair(lhs, rhs, TimespanReprClass), 

897 adapt=False, 

898 ) 

899 # Less-than and greater-than between Timespans. 

900 table.registerBinary( 

901 "<", 

902 lhs=Timespan, 

903 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.timespan, dtype=bool), 

904 adapt=False, 

905 ) 

906 table.registerBinary( 

907 ">", 

908 lhs=Timespan, 

909 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.timespan, dtype=bool), 

910 adapt=False, 

911 ) 

912 # Less-than and greater-than between Timespans and Times. 

913 table.registerBinary( 

914 "<", 

915 lhs=Timespan, 

916 rhs=Time, 

917 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan < b.column, dtype=bool), 

918 adapt=False, 

919 ) 

920 table.registerBinary( 

921 ">", 

922 lhs=Timespan, 

923 rhs=Time, 

924 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(a.timespan > b.column, dtype=bool), 

925 adapt=False, 

926 ) 

927 table.registerBinary( 

928 "<", 

929 lhs=Time, 

930 rhs=Timespan, 

931 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan > a.column, dtype=bool), 

932 adapt=False, 

933 ) 

934 table.registerBinary( 

935 ">", 

936 lhs=Time, 

937 rhs=Timespan, 

938 func=lambda a, b: ScalarWhereClauseConverter.fromExpression(b.timespan < a.column, dtype=bool), 

939 adapt=False, 

940 ) 

941 # OVERLAPS operator between Timespans. 

942 table.registerBinary( 

943 "OVERLAPS", 

944 lhs=Timespan, 

945 func=TimespanWhereClauseConverter.overlaps, 

946 adapt=False, 

947 ) 

948 # OVERLAPS operator between Timespans and Time is equivalent to 

949 # "contains", but expression language only has OVERLAPS to keep it 

950 # simple. 

951 table.registerBinary( 

952 "OVERLAPS", 

953 lhs=Timespan, 

954 rhs=Time, 

955 func=TimespanWhereClauseConverter.contains, 

956 adapt=False, 

957 ) 

958 table.registerBinary( 

959 "OVERLAPS", 

960 lhs=Time, 

961 rhs=Timespan, 

962 func=lambda a, b: TimespanWhereClauseConverter.contains(b, a), 

963 adapt=False, 

964 ) 

965 return table 

966 

967 

968class WhereClauseConverterVisitor(TreeVisitor[WhereClauseConverter]): 

969 """Implements TreeVisitor to convert the tree into 

970 `WhereClauseConverter` objects. 

971 

972 This class should be used only by the `convertExpressionToSql` function; 

973 external code should just call that function. 

974 

975 Parameters 

976 ---------- 

977 universe : `DimensionUniverse` 

978 All known dimensions. 

979 columns: `QueryColumns` 

980 Struct that organizes the special columns known to the query 

981 under construction. 

982 elements: `NamedKeyMapping` 

983 `DimensionElement` instances and their associated tables. 

984 bind: `Mapping` 

985 Mapping from string names to literal values that should be subsituted 

986 for those names when they appear (as identifiers) in the expression. 

987 TimespanReprClass: `type`; subclass of `TimespanDatabaseRepresentation` 

988 Class that encapsulates the representation of `Timespan` objects in 

989 the database. 

990 """ 

991 

992 def __init__( 

993 self, 

994 universe: DimensionUniverse, 

995 columns: QueryColumns, 

996 elements: NamedKeyMapping[DimensionElement, sqlalchemy.sql.FromClause], 

997 bind: Mapping[str, Any], 

998 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

999 ): 

1000 self.universe = universe 

1001 self.columns = columns 

1002 self.elements = elements 

1003 self.bind = bind 

1004 self._TimespanReprClass = TimespanReprClass 

1005 self._dispatch = DispatchTable.build(TimespanReprClass) 

1006 

1007 def visitNumericLiteral(self, value: str, node: Node) -> WhereClauseConverter: 

1008 # Docstring inherited from TreeVisitor.visitNumericLiteral 

1009 # Convert string value into float or int 

1010 coerced: Union[int, float] 

1011 try: 

1012 coerced = int(value) 

1013 except ValueError: 

1014 coerced = float(value) 

1015 return ScalarWhereClauseConverter.fromLiteral(coerced) 

1016 

1017 def visitStringLiteral(self, value: str, node: Node) -> WhereClauseConverter: 

1018 # Docstring inherited from TreeVisitor.visitStringLiteral 

1019 return ScalarWhereClauseConverter.fromLiteral(value) 

1020 

1021 def visitTimeLiteral(self, value: Time, node: Node) -> WhereClauseConverter: 

1022 # Docstring inherited from TreeVisitor.visitTimeLiteral 

1023 return ScalarWhereClauseConverter.fromLiteral(value) 

1024 

1025 def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter: 

1026 # Docstring inherited from TreeVisitor.visitIdentifier 

1027 if name in self.bind: 

1028 value = self.bind[name] 

1029 if isinstance(value, Timespan): 

1030 return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value)) 

1031 return ScalarWhereClauseConverter.fromLiteral(value) 

1032 constant = categorizeConstant(name) 

1033 if constant is ExpressionConstant.INGEST_DATE: 

1034 assert self.columns.datasets is not None 

1035 assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query" 

1036 return ScalarWhereClauseConverter.fromExpression( 

1037 _TimestampColumnElement(column=self.columns.datasets.ingestDate), 

1038 datetime, 

1039 ) 

1040 elif constant is ExpressionConstant.NULL: 

1041 return ScalarWhereClauseConverter.fromLiteral(None) 

1042 assert constant is None, "Check for enum values should be exhaustive." 

1043 element, column = categorizeElementId(self.universe, name) 

1044 if column is not None: 

1045 if column == TimespanDatabaseRepresentation.NAME: 

1046 if element.temporal is None: 

1047 raise ExpressionTypeError( 

1048 f"No timespan column exists for non-temporal element '{element.name}'." 

1049 ) 

1050 return TimespanWhereClauseConverter(self.columns.timespans[element]) 

1051 else: 

1052 if column not in element.RecordClass.fields.standard.names: 

1053 raise ExpressionTypeError(f"No column '{column}' in dimension table '{element.name}'.") 

1054 return ScalarWhereClauseConverter.fromExpression( 

1055 self.elements[element].columns[column], 

1056 element.RecordClass.fields.standard[column].getPythonType(), 

1057 ) 

1058 else: 

1059 assert isinstance(element, Dimension) 

1060 return ScalarWhereClauseConverter.fromExpression( 

1061 self.columns.getKeyColumn(element), element.primaryKey.getPythonType() 

1062 ) 

1063 

1064 def visitUnaryOp(self, operator: str, operand: WhereClauseConverter, node: Node) -> WhereClauseConverter: 

1065 # Docstring inherited from TreeVisitor.visitUnaryOp 

1066 try: 

1067 return self._dispatch.applyUnary(operator, operand) 

1068 except KeyError: 

1069 raise ExpressionTypeError( 

1070 f'Invalid operand of type {operand.dtype} for unary operator {operator} in "{node}".' 

1071 ) from None 

1072 

1073 def visitBinaryOp( 

1074 self, operator: str, lhs: WhereClauseConverter, rhs: WhereClauseConverter, node: Node 

1075 ) -> WhereClauseConverter: 

1076 # Docstring inherited from TreeVisitor.visitBinaryOp 

1077 try: 

1078 return self._dispatch.applyBinary(operator, lhs, rhs) 

1079 except KeyError: 

1080 raise ExpressionTypeError( 

1081 f"Invalid operand types ({lhs.dtype}, {rhs.dtype}) for binary " 

1082 f'operator {operator} in "{node}".' 

1083 ) from None 

1084 

1085 def visitIsIn( 

1086 self, 

1087 lhs: WhereClauseConverter, 

1088 values: List[WhereClauseConverter], 

1089 not_in: bool, 

1090 node: Node, 

1091 ) -> WhereClauseConverter: 

1092 if not isinstance(lhs, ScalarWhereClauseConverter): 

1093 raise ExpressionTypeError(f'Invalid LHS operand of type {lhs.dtype} for IN operator in "{node}".') 

1094 # Docstring inherited from TreeVisitor.visitIsIn 

1095 # 

1096 # `values` is a list of literals and ranges, range is represented 

1097 # by a tuple (start, stop, stride). We need to transform range into 

1098 # some SQL construct, simplest would be to generate a set of literals 

1099 # and add it to the same list but it could become too long. What we 

1100 # do here is to introduce some large limit on the total number of 

1101 # items in IN() and if range exceeds that limit then we do something 

1102 # like: 

1103 # 

1104 # X IN (1, 2, 3) 

1105 # OR 

1106 # (X BETWEEN START AND STOP AND MOD(X, STRIDE) = MOD(START, STRIDE)) 

1107 # 

1108 # or for NOT IN case 

1109 # 

1110 # NOT (X IN (1, 2, 3) 

1111 # OR 

1112 # (X BETWEEN START AND STOP 

1113 # AND MOD(X, STRIDE) = MOD(START, STRIDE))) 

1114 # 

1115 max_in_items = 1000 

1116 clauses: List[sqlalchemy.sql.ColumnElement] = [] 

1117 # Split the list into literals and ranges 

1118 literals: List[sqlalchemy.sql.ColumnElement] = [] 

1119 ranges: List[Tuple[int, int, int]] = [] 

1120 for value in values: 

1121 value.categorizeForIn(literals, ranges, lhs.dtype, node) 

1122 # Handle ranges (maybe by converting them to literals). 

1123 for start, stop, stride in ranges: 

1124 count = (stop - start + 1) // stride 

1125 if len(literals) + count > max_in_items: 

1126 # X BETWEEN START AND STOP 

1127 # AND MOD(X, STRIDE) = MOD(START, STRIDE) 

1128 expr = lhs.column.between(start, stop) 

1129 if stride != 1: 

1130 expr = sqlalchemy.sql.and_(expr, (lhs.column % stride) == (start % stride)) 

1131 clauses.append(expr) 

1132 else: 

1133 # add all values to literal list, stop is inclusive 

1134 literals += [sqlalchemy.sql.literal(value) for value in range(start, stop + 1, stride)] 

1135 # Handle literals. 

1136 if literals: 

1137 # add IN() in front of BETWEENs 

1138 clauses.insert(0, lhs.column.in_(literals)) 

1139 # Assemble the full expression. 

1140 expr = sqlalchemy.sql.or_(*clauses) 

1141 if not_in: 

1142 expr = sqlalchemy.sql.not_(expr) 

1143 return ScalarWhereClauseConverter.fromExpression(expr, bool) 

1144 

1145 def visitParens(self, expression: WhereClauseConverter, node: Node) -> WhereClauseConverter: 

1146 # Docstring inherited from TreeVisitor.visitParens 

1147 return expression 

1148 

1149 def visitTupleNode(self, items: Tuple[WhereClauseConverter, ...], node: Node) -> WhereClauseConverter: 

1150 # Docstring inherited from base class 

1151 if len(items) != 2: 

1152 raise ExpressionTypeError(f'Unrecognized {len(items)}-element tuple "{node}".') 

1153 try: 

1154 return self._dispatch.applyBinary("PAIR", items[0], items[1]) 

1155 except KeyError: 

1156 raise ExpressionTypeError( 

1157 f'Invalid type(s) ({items[0].dtype}, {items[1].dtype}) in timespan tuple "{node}" ' 

1158 '(Note that date/time strings must be preceded by "T" to be recognized).' 

1159 ) 

1160 

1161 def visitRangeLiteral( 

1162 self, start: int, stop: int, stride: Optional[int], node: Node 

1163 ) -> WhereClauseConverter: 

1164 # Docstring inherited from TreeVisitor.visitRangeLiteral 

1165 # stride can be None which means the same as 1. 

1166 return RangeWhereClauseConverter(start, stop, stride or 1) 

1167 

1168 def visitPointNode( 

1169 self, ra: WhereClauseConverter, dec: WhereClauseConverter, node: Node 

1170 ) -> WhereClauseConverter: 

1171 # Docstring inherited from base class 

1172 

1173 # this is a placeholder for future extension, we enabled syntax but 

1174 # do not support actual use just yet. 

1175 raise NotImplementedError("POINT() function is not supported yet")