Coverage for python / lsst / daf / butler / queries / expression_factory.py: 44%

208 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-17 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ExpressionFactory", "ExpressionProxy", "RegionProxy", "ScalarExpressionProxy", "TimespanProxy") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable 

34from typing import TYPE_CHECKING 

35 

36import astropy.time 

37 

38from lsst.sphgeom import Region 

39 

40from .._exceptions import InvalidQueryError 

41from ..dimensions import Dimension, DimensionElement, DimensionUniverse 

42from . import tree 

43 

44if TYPE_CHECKING: 

45 from .._timespan import Timespan 

46 from ._query import Query 

47 

48# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression, 

49# but it just returns OrderExpression and Predicate objects directly, because 

50# we don't need to overload any operators or define any methods on those. 

51 

52 

53class ExpressionProxy(ABC): 

54 """A wrapper for column expressions that overloads comparison operators 

55 to return new expression proxies. 

56 """ 

57 

58 def __repr__(self) -> str: 

59 return str(self._expression) 

60 

61 @property 

62 def is_null(self) -> tree.Predicate: 

63 """A boolean expression that tests whether this expression is NULL.""" 

64 return tree.Predicate.is_null(self._expression) 

65 

66 @staticmethod 

67 def _make_expression(other: object) -> tree.ColumnExpression: 

68 if isinstance(other, ExpressionProxy): 

69 return other._expression 

70 else: 

71 return tree.make_column_literal(other) 

72 

73 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate: 

74 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator) 

75 

76 @property 

77 @abstractmethod 

78 def _expression(self) -> tree.ColumnExpression: 

79 raise NotImplementedError() 

80 

81 

82class ScalarExpressionProxy(ExpressionProxy): 

83 """An `ExpressionProxy` specialized for simple single-value columns.""" 

84 

85 @property 

86 def desc(self) -> tree.Reversed: 

87 """An ordering expression that indicates that the sort on this 

88 expression should be reversed. 

89 """ 

90 return tree.Reversed(operand=self._expression) 

91 

92 def as_boolean(self) -> tree.Predicate: 

93 """If this scalar expression is a boolean, convert it to a `Predicate` 

94 so it can be used as a boolean expression. 

95 

96 Raises 

97 ------ 

98 InvalidQueryError 

99 If this expression is not a boolean. 

100 

101 Returns 

102 ------- 

103 predicate : `Predicate` 

104 This expression converted to a `Predicate`. 

105 """ 

106 expr = self._expression 

107 raise InvalidQueryError( 

108 f"Expression '{expr}' with type" 

109 f" '{expr.column_type}' can't be used directly as a boolean value." 

110 " Use a comparison operator like '>' or '==' instead." 

111 ) 

112 

113 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override] 

114 return self._make_comparison(other, "==") 

115 

116 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override] 

117 return self._make_comparison(other, "!=") 

118 

119 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

120 return self._make_comparison(other, "<") 

121 

122 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override] 

123 return self._make_comparison(other, "<=") 

124 

125 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

126 return self._make_comparison(other, ">") 

127 

128 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override] 

129 return self._make_comparison(other, ">=") 

130 

131 def __neg__(self) -> ScalarExpressionProxy: 

132 return ResolvedScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-")) 

133 

134 def __add__(self, other: object) -> ScalarExpressionProxy: 

135 return ResolvedScalarExpressionProxy( 

136 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+") 

137 ) 

138 

139 def __radd__(self, other: object) -> ScalarExpressionProxy: 

140 return ResolvedScalarExpressionProxy( 

141 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+") 

142 ) 

143 

144 def __sub__(self, other: object) -> ScalarExpressionProxy: 

145 return ResolvedScalarExpressionProxy( 

146 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-") 

147 ) 

148 

149 def __rsub__(self, other: object) -> ScalarExpressionProxy: 

150 return ResolvedScalarExpressionProxy( 

151 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-") 

152 ) 

153 

154 def __mul__(self, other: object) -> ScalarExpressionProxy: 

155 return ResolvedScalarExpressionProxy( 

156 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*") 

157 ) 

158 

159 def __rmul__(self, other: object) -> ScalarExpressionProxy: 

160 return ResolvedScalarExpressionProxy( 

161 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*") 

162 ) 

163 

164 def __truediv__(self, other: object) -> ScalarExpressionProxy: 

165 return ResolvedScalarExpressionProxy( 

166 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/") 

167 ) 

168 

169 def __rtruediv__(self, other: object) -> ScalarExpressionProxy: 

170 return ResolvedScalarExpressionProxy( 

171 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/") 

172 ) 

173 

174 def __mod__(self, other: object) -> ScalarExpressionProxy: 

175 return ResolvedScalarExpressionProxy( 

176 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%") 

177 ) 

178 

179 def __rmod__(self, other: object) -> ScalarExpressionProxy: 

180 return ResolvedScalarExpressionProxy( 

181 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%") 

182 ) 

183 

184 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate: 

185 """Return a boolean expression that tests whether this expression is 

186 within a literal integer range. 

187 

188 Parameters 

189 ---------- 

190 start : `int`, optional 

191 Lower bound (inclusive) for the slice. 

192 stop : `int` or `None`, optional 

193 Upper bound (exclusive) for the slice, or `None` for no bound. 

194 step : `int`, optional 

195 Spacing between integers in the range. 

196 

197 Returns 

198 ------- 

199 predicate : `tree.Predicate` 

200 Boolean expression object. 

201 """ 

202 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step) 

203 

204 def in_iterable(self, others: Iterable) -> tree.Predicate: 

205 """Return a boolean expression that tests whether this expression 

206 evaluates to a value that is in an iterable of other expressions. 

207 

208 Parameters 

209 ---------- 

210 others : `collections.abc.Iterable` 

211 An iterable of `ExpressionProxy` or values to be interpreted as 

212 literals. 

213 

214 Returns 

215 ------- 

216 predicate : `tree.Predicate` 

217 Boolean expression object. 

218 """ 

219 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others]) 

220 

221 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate: 

222 """Return a boolean expression that test whether this expression 

223 evaluates to a value that is in a single-column selection from another 

224 query. 

225 

226 Parameters 

227 ---------- 

228 column : `ExpressionProxy` 

229 Proxy for the column to extract from ``query``. 

230 query : `Query` 

231 Query to select from. 

232 

233 Returns 

234 ------- 

235 predicate : `tree.Predicate` 

236 Boolean expression object. 

237 """ 

238 return tree.Predicate.in_query(self._expression, column._expression, query._tree) 

239 

240 def glob(self, pattern: str) -> tree.Predicate: 

241 """Return a boolean expression that matches this expression against 

242 pattern. 

243 

244 Parameters 

245 ---------- 

246 pattern : `str` 

247 Pattern to use for matching. 

248 

249 Returns 

250 ------- 

251 predicate : `tree.Predicate` 

252 Boolean expression object. 

253 """ 

254 return self._make_comparison(pattern, "glob") 

255 

256 

257class ResolvedScalarExpressionProxy(ScalarExpressionProxy): 

258 """A `ScalarExpressionProxy` backed by an actual expression. 

259 

260 Parameters 

261 ---------- 

262 expression : `.tree.ColumnExpression` 

263 Expression that backs this proxy. 

264 """ 

265 

266 def __init__(self, expression: tree.ColumnExpression): 

267 self._expr = expression 

268 

269 @property 

270 def _expression(self) -> tree.ColumnExpression: 

271 return self._expr 

272 

273 

274class BooleanScalarExpressionProxy(ScalarExpressionProxy): 

275 """A `ScalarExpressionProxy` representing a boolean column. You should 

276 call `as_boolean()` on this object to convert it to an instance of 

277 `Predicate` before attempting to use it. 

278 

279 Parameters 

280 ---------- 

281 expression : `.tree.ColumnReference` 

282 Boolean column reference that backs this proxy. 

283 """ 

284 

285 # This is a hack/work-around to make static typing work when referencing 

286 # dimension record metadata boolean columns. From the perspective of 

287 # typing, anything boolean should be a `Predicate`, but the type system has 

288 # no way of knowing whether a given column is a bool or some other type. 

289 

290 def __init__(self, expression: tree.ColumnReference) -> None: 

291 if expression.column_type != "bool": 

292 raise ValueError(f"Expression is a {expression.column_type}, not a 'bool': {expression}") 

293 self._boolean_expression = expression 

294 

295 @property 

296 def is_null(self) -> tree.Predicate: 

297 return ResolvedScalarExpressionProxy(self._boolean_expression).is_null 

298 

299 def as_boolean(self) -> tree.Predicate: 

300 return tree.Predicate.from_bool_expression(self._boolean_expression) 

301 

302 @property 

303 def _expression(self) -> tree.ColumnExpression: 

304 raise InvalidQueryError( 

305 f"Boolean expression '{self._boolean_expression}' can't be used directly in other expressions." 

306 " Call the 'as_boolean()' method to convert it to a Predicate instead." 

307 ) 

308 

309 

310class TimespanProxy(ExpressionProxy): 

311 """An `ExpressionProxy` specialized for timespan columns and literals. 

312 

313 Parameters 

314 ---------- 

315 expression : `.tree.ColumnExpression` 

316 Expression that backs this proxy. 

317 """ 

318 

319 def __init__(self, expression: tree.ColumnExpression): 

320 self._expr = expression 

321 

322 @property 

323 def begin(self) -> ScalarExpressionProxy: 

324 """An expression representing the lower bound (inclusive).""" 

325 return ResolvedScalarExpressionProxy( 

326 tree.UnaryExpression(operand=self._expression, operator="begin_of") 

327 ) 

328 

329 @property 

330 def end(self) -> ScalarExpressionProxy: 

331 """An expression representing the upper bound (exclusive).""" 

332 return ResolvedScalarExpressionProxy( 

333 tree.UnaryExpression(operand=self._expression, operator="end_of") 

334 ) 

335 

336 def overlaps(self, other: TimespanProxy | Timespan | astropy.time.Time) -> tree.Predicate: 

337 """Return a boolean expression representing an overlap test between 

338 this timespan and another timespan or a datetime. 

339 

340 Parameters 

341 ---------- 

342 other : `TimespanProxy` or `Timespan` 

343 Expression or literal to compare to. 

344 

345 Returns 

346 ------- 

347 predicate : `tree.Predicate` 

348 Boolean expression object. 

349 """ 

350 return self._make_comparison(other, "overlaps") 

351 

352 @property 

353 def _expression(self) -> tree.ColumnExpression: 

354 return self._expr 

355 

356 

357class RegionProxy(ExpressionProxy): 

358 """An `ExpressionProxy` specialized for region columns and literals. 

359 

360 Parameters 

361 ---------- 

362 expression : `.tree.ColumnExpression` 

363 Expression that backs this proxy. 

364 """ 

365 

366 def __init__(self, expression: tree.ColumnExpression): 

367 self._expr = expression 

368 

369 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate: 

370 """Return a boolean expression representing an overlap test between 

371 this region and another. 

372 

373 Parameters 

374 ---------- 

375 other : `RegionProxy` or `lsst.sphgeom.Region` 

376 Expression or literal to compare to. 

377 

378 Returns 

379 ------- 

380 predicate : `tree.Predicate` 

381 Boolean expression object. 

382 """ 

383 return self._make_comparison(other, "overlaps") 

384 

385 @property 

386 def _expression(self) -> tree.ColumnExpression: 

387 return self._expr 

388 

389 

390class DimensionElementProxy(ScalarExpressionProxy): 

391 """An expression-creation proxy for a dimension element logical table. 

392 

393 Parameters 

394 ---------- 

395 element : `DimensionElement` 

396 Element this object wraps. 

397 

398 Notes 

399 ----- 

400 The (dynamic) attributes of this object are expression proxies for the 

401 non-dimension fields of the element's records. 

402 """ 

403 

404 def __init__(self, element: DimensionElement): 

405 self._element = element 

406 

407 @property 

408 def _expression(self) -> tree.ColumnExpression: 

409 if isinstance(self._element, Dimension): 

410 return tree.DimensionKeyReference(dimension=self._element) 

411 else: 

412 raise TypeError(f"Proxy expression {self!r} is does not resolve to a column.") 

413 

414 def __repr__(self) -> str: 

415 return self._element.name 

416 

417 def __getattr__(self, field: str) -> ScalarExpressionProxy: 

418 if field in self._element.schema.dimensions.names: 

419 if field not in self._element.dimensions.names: 

420 # This is a dimension self-reference, like visit.id. 

421 return self 

422 return DimensionElementProxy(self._element.dimensions[field]) 

423 try: 

424 expression = tree.DimensionFieldReference(element=self._element, field=field) 

425 except InvalidQueryError: 

426 raise AttributeError(field) from None 

427 if expression.column_type == "bool": 

428 return BooleanScalarExpressionProxy(expression) 

429 else: 

430 return ResolvedScalarExpressionProxy(expression) 

431 

432 @property 

433 def region(self) -> RegionProxy: 

434 try: 

435 expression = tree.DimensionFieldReference(element=self._element, field="region") 

436 except InvalidQueryError: 

437 raise AttributeError("region") 

438 return RegionProxy(expression) 

439 

440 @property 

441 def timespan(self) -> TimespanProxy: 

442 try: 

443 expression = tree.DimensionFieldReference(element=self._element, field="timespan") 

444 except InvalidQueryError: 

445 raise AttributeError("timespan") from None 

446 return TimespanProxy(expression) 

447 

448 def __dir__(self) -> list[str]: 

449 # We only want timespan and region to appear in dir() for elements that 

450 # have them, but we can't implement them in getattr without muddling 

451 # the type annotations. 

452 result = [entry for entry in super().__dir__() if entry != "timespan" and entry != "region"] 

453 result.extend(self._element.schema.names) 

454 return result 

455 

456 

457class DatasetTypeProxy: 

458 """An expression-creation proxy for a dataset type's logical table. 

459 

460 Parameters 

461 ---------- 

462 dataset_type : `str` 

463 Dataset type name or wildcard. Wildcards are usable only when the 

464 query contains exactly one dataset type or a wildcard. 

465 

466 Notes 

467 ----- 

468 The attributes of this object are expression proxies for the fields 

469 associated with datasets. 

470 """ 

471 

472 def __init__(self, dataset_type: str): 

473 self._dataset_type = dataset_type 

474 

475 def __repr__(self) -> str: 

476 return self._dataset_type 

477 

478 # Attributes are actually fixed, but we implement them with __getattr__ 

479 # and __dir__ to avoid repeating the list. And someday they might expand 

480 # to include Datastore record fields. 

481 

482 def __getattr__(self, field: str) -> ScalarExpressionProxy: 

483 if not tree.is_dataset_field(field): 

484 raise AttributeError(field) 

485 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field) 

486 return ResolvedScalarExpressionProxy(expression) 

487 

488 @property 

489 def timespan(self) -> TimespanProxy: 

490 try: 

491 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field="timespan") 

492 except InvalidQueryError: 

493 raise AttributeError("timespan") from None 

494 return TimespanProxy(expression) 

495 

496 def __dir__(self) -> list[str]: 

497 result = list(super().__dir__()) 

498 # "timespan" will be added by delegation to super() and we don't want 

499 # it to appear twice. 

500 result.extend(name for name in tree.DATASET_FIELD_NAMES if name != "timespan") 

501 return result 

502 

503 

504class ExpressionFactory: 

505 """A factory for creating column expressions that uses operator overloading 

506 to form a mini-language. 

507 

508 Instances of this class are usually obtained from 

509 `Query.expression_factory`; see that property's documentation for more 

510 information. 

511 

512 Parameters 

513 ---------- 

514 universe : `DimensionUniverse` 

515 Object that describes all dimensions. 

516 """ 

517 

518 def __init__(self, universe: DimensionUniverse): 

519 self._universe = universe 

520 

521 def __getattr__(self, name: str) -> DimensionElementProxy: 

522 try: 

523 element = self._universe.elements[name] 

524 except KeyError: 

525 raise AttributeError(name) 

526 return DimensionElementProxy(element) 

527 

528 def __getitem__(self, name: str) -> DatasetTypeProxy: 

529 return DatasetTypeProxy(name) 

530 

531 def not_(self, operand: tree.Predicate) -> tree.Predicate: 

532 """Apply a logical NOT operation to a boolean expression. 

533 

534 Parameters 

535 ---------- 

536 operand : `tree.Predicate` 

537 Expression to invetree. 

538 

539 Returns 

540 ------- 

541 logical_not : `tree.Predicate` 

542 A boolean expression that evaluates to the opposite of ``operand``. 

543 """ 

544 return operand.logical_not() 

545 

546 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

547 """Combine a sequence of boolean expressions with logical AND. 

548 

549 Parameters 

550 ---------- 

551 first : `tree.Predicate` 

552 First operand (required). 

553 *args 

554 Additional operands. 

555 

556 Returns 

557 ------- 

558 logical_and : `tree.Predicate` 

559 A boolean expression that evaluates to `True` only if all operands 

560 evaluate to `True`. 

561 """ 

562 return first.logical_and(*args) 

563 

564 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

565 """Combine a sequence of boolean expressions with logical OR. 

566 

567 Parameters 

568 ---------- 

569 first : `tree.Predicate` 

570 First operand (required). 

571 *args 

572 Additional operands. 

573 

574 Returns 

575 ------- 

576 logical_or : `tree.Predicate` 

577 A boolean expression that evaluates to `True` if any operand 

578 evaluates to `True`. 

579 """ 

580 return first.logical_or(*args) 

581 

582 @staticmethod 

583 def literal(value: object) -> ExpressionProxy: 

584 """Return an expression proxy that represents a literal value. 

585 

586 Expression proxy objects obtained from this factory can generally be 

587 compared directly to literals, so calling this method directly in user 

588 code should rarely be necessary. 

589 

590 Parameters 

591 ---------- 

592 value : `object` 

593 Value to include as a literal in an expression tree. 

594 

595 Returns 

596 ------- 

597 expression : `ExpressionProxy` 

598 Expression wrapper for this literal. 

599 """ 

600 expression = tree.make_column_literal(value) 

601 match expression.expression_type: 

602 case "timespan": 

603 return TimespanProxy(expression) 

604 case "region": 

605 return RegionProxy(expression) 

606 case "bool": 

607 raise NotImplementedError("Boolean literals are not supported.") 

608 case _: 

609 return ResolvedScalarExpressionProxy(expression) 

610 

611 @staticmethod 

612 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression: 

613 """Return the column expression object that backs a proxy. 

614 

615 Parameters 

616 ---------- 

617 proxy : `ExpressionProxy` 

618 Proxy constructed via an `ExpressionFactory`. 

619 

620 Returns 

621 ------- 

622 expression : `tree.ColumnExpression` 

623 Underlying column expression object. 

624 """ 

625 return proxy._expression