Coverage for python/lsst/daf/butler/queries/expression_factory.py: 51%

186 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 02:03 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ExpressionFactory", "ExpressionProxy", "ScalarExpressionProxy", "TimespanProxy", "RegionProxy") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable 

34from typing import TYPE_CHECKING 

35 

36from lsst.sphgeom import Region 

37 

38from ..dimensions import Dimension, DimensionElement, DimensionUniverse 

39from . import tree 

40 

41if TYPE_CHECKING: 

42 from .._timespan import Timespan 

43 from ._query import Query 

44 

45# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression, 

46# but it just returns OrderExpression and Predicate objects directly, because 

47# we don't need to overload any operators or define any methods on those. 

48 

49 

50class ExpressionProxy(ABC): 

51 """A wrapper for column expressions that overloads comparison operators 

52 to return new expression proxies. 

53 """ 

54 

55 def __repr__(self) -> str: 

56 return str(self._expression) 

57 

58 @property 

59 def is_null(self) -> tree.Predicate: 

60 """A boolean expression that tests whether this expression is NULL.""" 

61 return tree.Predicate.is_null(self._expression) 

62 

63 @staticmethod 

64 def _make_expression(other: object) -> tree.ColumnExpression: 

65 if isinstance(other, ExpressionProxy): 

66 return other._expression 

67 else: 

68 return tree.make_column_literal(other) 

69 

70 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate: 

71 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator) 

72 

73 @property 

74 @abstractmethod 

75 def _expression(self) -> tree.ColumnExpression: 

76 raise NotImplementedError() 

77 

78 

79class ScalarExpressionProxy(ExpressionProxy): 

80 """An `ExpressionProxy` specialized for simple single-value columns.""" 

81 

82 @property 

83 def desc(self) -> tree.Reversed: 

84 """An ordering expression that indicates that the sort on this 

85 expression should be reversed. 

86 """ 

87 return tree.Reversed(operand=self._expression) 

88 

89 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override] 

90 return self._make_comparison(other, "==") 

91 

92 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override] 

93 return self._make_comparison(other, "!=") 

94 

95 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

96 return self._make_comparison(other, "<") 

97 

98 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override] 

99 return self._make_comparison(other, "<=") 

100 

101 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

102 return self._make_comparison(other, ">") 

103 

104 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override] 

105 return self._make_comparison(other, ">=") 

106 

107 def __neg__(self) -> ScalarExpressionProxy: 

108 return ResolvedScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-")) 

109 

110 def __add__(self, other: object) -> ScalarExpressionProxy: 

111 return ResolvedScalarExpressionProxy( 

112 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+") 

113 ) 

114 

115 def __radd__(self, other: object) -> ScalarExpressionProxy: 

116 return ResolvedScalarExpressionProxy( 

117 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+") 

118 ) 

119 

120 def __sub__(self, other: object) -> ScalarExpressionProxy: 

121 return ResolvedScalarExpressionProxy( 

122 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-") 

123 ) 

124 

125 def __rsub__(self, other: object) -> ScalarExpressionProxy: 

126 return ResolvedScalarExpressionProxy( 

127 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-") 

128 ) 

129 

130 def __mul__(self, other: object) -> ScalarExpressionProxy: 

131 return ResolvedScalarExpressionProxy( 

132 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*") 

133 ) 

134 

135 def __rmul__(self, other: object) -> ScalarExpressionProxy: 

136 return ResolvedScalarExpressionProxy( 

137 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*") 

138 ) 

139 

140 def __truediv__(self, other: object) -> ScalarExpressionProxy: 

141 return ResolvedScalarExpressionProxy( 

142 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/") 

143 ) 

144 

145 def __rtruediv__(self, other: object) -> ScalarExpressionProxy: 

146 return ResolvedScalarExpressionProxy( 

147 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/") 

148 ) 

149 

150 def __mod__(self, other: object) -> ScalarExpressionProxy: 

151 return ResolvedScalarExpressionProxy( 

152 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%") 

153 ) 

154 

155 def __rmod__(self, other: object) -> ScalarExpressionProxy: 

156 return ResolvedScalarExpressionProxy( 

157 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%") 

158 ) 

159 

160 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate: 

161 """Return a boolean expression that tests whether this expression is 

162 within a literal integer range. 

163 

164 Parameters 

165 ---------- 

166 start : `int`, optional 

167 Lower bound (inclusive) for the slice. 

168 stop : `int` or `None`, optional 

169 Upper bound (exclusive) for the slice, or `None` for no bound. 

170 step : `int`, optional 

171 Spacing between integers in the range. 

172 

173 Returns 

174 ------- 

175 predicate : `tree.Predicate` 

176 Boolean expression object. 

177 """ 

178 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step) 

179 

180 def in_iterable(self, others: Iterable) -> tree.Predicate: 

181 """Return a boolean expression that tests whether this expression 

182 evaluates to a value that is in an iterable of other expressions. 

183 

184 Parameters 

185 ---------- 

186 others : `collections.abc.Iterable` 

187 An iterable of `ExpressionProxy` or values to be interpreted as 

188 literals. 

189 

190 Returns 

191 ------- 

192 predicate : `tree.Predicate` 

193 Boolean expression object. 

194 """ 

195 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others]) 

196 

197 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate: 

198 """Return a boolean expression that test whether this expression 

199 evaluates to a value that is in a single-column selection from another 

200 query. 

201 

202 Parameters 

203 ---------- 

204 column : `ExpressionProxy` 

205 Proxy for the column to extract from ``query``. 

206 query : `Query` 

207 Query to select from. 

208 

209 Returns 

210 ------- 

211 predicate : `tree.Predicate` 

212 Boolean expression object. 

213 """ 

214 return tree.Predicate.in_query(self._expression, column._expression, query._tree) 

215 

216 

217class ResolvedScalarExpressionProxy(ScalarExpressionProxy): 

218 """A `ScalarExpressionProxy` backed by an actual expression. 

219 

220 Parameters 

221 ---------- 

222 expression : `.tree.ColumnExpression` 

223 Expression that backs this proxy. 

224 """ 

225 

226 def __init__(self, expression: tree.ColumnExpression): 

227 self._expr = expression 

228 

229 @property 

230 def _expression(self) -> tree.ColumnExpression: 

231 return self._expr 

232 

233 

234class TimespanProxy(ExpressionProxy): 

235 """An `ExpressionProxy` specialized for timespan columns and literals. 

236 

237 Parameters 

238 ---------- 

239 expression : `.tree.ColumnExpression` 

240 Expression that backs this proxy. 

241 """ 

242 

243 def __init__(self, expression: tree.ColumnExpression): 

244 self._expr = expression 

245 

246 @property 

247 def begin(self) -> ScalarExpressionProxy: 

248 """An expression representing the lower bound (inclusive).""" 

249 return ResolvedScalarExpressionProxy( 

250 tree.UnaryExpression(operand=self._expression, operator="begin_of") 

251 ) 

252 

253 @property 

254 def end(self) -> ScalarExpressionProxy: 

255 """An expression representing the upper bound (exclusive).""" 

256 return ResolvedScalarExpressionProxy( 

257 tree.UnaryExpression(operand=self._expression, operator="end_of") 

258 ) 

259 

260 def overlaps(self, other: TimespanProxy | Timespan) -> tree.Predicate: 

261 """Return a boolean expression representing an overlap test between 

262 this timespan and another. 

263 

264 Parameters 

265 ---------- 

266 other : `TimespanProxy` or `Timespan` 

267 Expression or literal to compare to. 

268 

269 Returns 

270 ------- 

271 predicate : `tree.Predicate` 

272 Boolean expression object. 

273 """ 

274 return self._make_comparison(other, "overlaps") 

275 

276 @property 

277 def _expression(self) -> tree.ColumnExpression: 

278 return self._expr 

279 

280 

281class RegionProxy(ExpressionProxy): 

282 """An `ExpressionProxy` specialized for region columns and literals. 

283 

284 Parameters 

285 ---------- 

286 expression : `.tree.ColumnExpression` 

287 Expression that backs this proxy. 

288 """ 

289 

290 def __init__(self, expression: tree.ColumnExpression): 

291 self._expr = expression 

292 

293 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate: 

294 """Return a boolean expression representing an overlap test between 

295 this region and another. 

296 

297 Parameters 

298 ---------- 

299 other : `RegionProxy` or `Region` 

300 Expression or literal to compare to. 

301 

302 Returns 

303 ------- 

304 predicate : `tree.Predicate` 

305 Boolean expression object. 

306 """ 

307 return self._make_comparison(other, "overlaps") 

308 

309 @property 

310 def _expression(self) -> tree.ColumnExpression: 

311 return self._expr 

312 

313 

314class DimensionElementProxy(ScalarExpressionProxy): 

315 """An expression-creation proxy for a dimension element logical table. 

316 

317 Parameters 

318 ---------- 

319 element : `DimensionElement` 

320 Element this object wraps. 

321 

322 Notes 

323 ----- 

324 The (dynamic) attributes of this object are expression proxies for the 

325 non-dimension fields of the element's records. 

326 """ 

327 

328 def __init__(self, element: DimensionElement): 

329 self._element = element 

330 

331 @property 

332 def _expression(self) -> tree.ColumnExpression: 

333 if isinstance(self._element, Dimension): 

334 return tree.DimensionKeyReference(dimension=self._element) 

335 else: 

336 raise TypeError(f"Proxy expression {self!r} is does not resolve to a column.") 

337 

338 def __repr__(self) -> str: 

339 return self._element.name 

340 

341 def __getattr__(self, field: str) -> ScalarExpressionProxy: 

342 if field in self._element.schema.dimensions.names: 

343 if field not in self._element.dimensions.names: 

344 # This is a dimension self-reference, like visit.id. 

345 return self 

346 return DimensionElementProxy(self._element.dimensions[field]) 

347 try: 

348 expression = tree.DimensionFieldReference(element=self._element, field=field) 

349 except tree.InvalidQueryError: 

350 raise AttributeError(field) 

351 return ResolvedScalarExpressionProxy(expression) 

352 

353 @property 

354 def region(self) -> RegionProxy: 

355 try: 

356 expression = tree.DimensionFieldReference(element=self._element, field="region") 

357 except tree.InvalidQueryError: 

358 raise AttributeError("region") 

359 return RegionProxy(expression) 

360 

361 @property 

362 def timespan(self) -> TimespanProxy: 

363 try: 

364 expression = tree.DimensionFieldReference(element=self._element, field="timespan") 

365 except tree.InvalidQueryError: 

366 raise AttributeError("timespan") 

367 return TimespanProxy(expression) 

368 

369 def __dir__(self) -> list[str]: 

370 # We only want timespan and region to appear in dir() for elements that 

371 # have them, but we can't implement them in getattr without muddling 

372 # the type annotations. 

373 result = [entry for entry in super().__dir__() if entry != "timespan" and entry != "region"] 

374 result.extend(self._element.schema.names) 

375 return result 

376 

377 

378class DatasetTypeProxy: 

379 """An expression-creation proxy for a dataset type's logical table. 

380 

381 Parameters 

382 ---------- 

383 dataset_type : `str` 

384 Dataset type name or wildcard. Wildcards are usable only when the 

385 query contains exactly one dataset type or a wildcard. 

386 

387 Notes 

388 ----- 

389 The attributes of this object are expression proxies for the fields 

390 associated with datasets. 

391 """ 

392 

393 def __init__(self, dataset_type: str): 

394 self._dataset_type = dataset_type 

395 

396 def __repr__(self) -> str: 

397 return self._dataset_type 

398 

399 # Attributes are actually fixed, but we implement them with __getattr__ 

400 # and __dir__ to avoid repeating the list. And someday they might expand 

401 # to include Datastore record fields. 

402 

403 def __getattr__(self, field: str) -> ScalarExpressionProxy: 

404 if field not in tree.DATASET_FIELD_NAMES: 

405 raise AttributeError(field) 

406 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field) 

407 return ResolvedScalarExpressionProxy(expression) 

408 

409 @property 

410 def timespan(self) -> TimespanProxy: 

411 try: 

412 expression = tree.DimensionFieldReference(element=self._element, field="timespan") 

413 except tree.InvalidQueryError: 

414 raise AttributeError("timespan") 

415 return TimespanProxy(expression) 

416 

417 def __dir__(self) -> list[str]: 

418 result = list(super().__dir__()) 

419 # "timespan" will be added by delegation to super() and we don't want 

420 # it to appear twice. 

421 result.extend(name for name in tree.DATASET_FIELD_NAMES if name != "timespan") 

422 return result 

423 

424 

425class ExpressionFactory: 

426 """A factory for creating column expressions that uses operator overloading 

427 to form a mini-language. 

428 

429 Instances of this class are usually obtained from 

430 `Query.expression_factory`; see that property's documentation for more 

431 information. 

432 

433 Parameters 

434 ---------- 

435 universe : `DimensionUniverse` 

436 Object that describes all dimensions. 

437 """ 

438 

439 def __init__(self, universe: DimensionUniverse): 

440 self._universe = universe 

441 

442 def __getattr__(self, name: str) -> DimensionElementProxy: 

443 try: 

444 element = self._universe.elements[name] 

445 except KeyError: 

446 raise AttributeError(name) 

447 return DimensionElementProxy(element) 

448 

449 def __getitem__(self, name: str) -> DatasetTypeProxy: 

450 return DatasetTypeProxy(name) 

451 

452 def not_(self, operand: tree.Predicate) -> tree.Predicate: 

453 """Apply a logical NOT operation to a boolean expression. 

454 

455 Parameters 

456 ---------- 

457 operand : `tree.Predicate` 

458 Expression to invetree. 

459 

460 Returns 

461 ------- 

462 logical_not : `tree.Predicate` 

463 A boolean expression that evaluates to the opposite of ``operand``. 

464 """ 

465 return operand.logical_not() 

466 

467 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

468 """Combine a sequence of boolean expressions with logical AND. 

469 

470 Parameters 

471 ---------- 

472 first : `tree.Predicate` 

473 First operand (required). 

474 *args 

475 Additional operands. 

476 

477 Returns 

478 ------- 

479 logical_and : `tree.Predicate` 

480 A boolean expression that evaluates to `True` only if all operands 

481 evaluate to `True. 

482 """ 

483 return first.logical_and(*args) 

484 

485 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

486 """Combine a sequence of boolean expressions with logical OR. 

487 

488 Parameters 

489 ---------- 

490 first : `tree.Predicate` 

491 First operand (required). 

492 *args 

493 Additional operands. 

494 

495 Returns 

496 ------- 

497 logical_or : `tree.Predicate` 

498 A boolean expression that evaluates to `True` if any operand 

499 evaluates to `True. 

500 """ 

501 return first.logical_or(*args) 

502 

503 @staticmethod 

504 def literal(value: object) -> ExpressionProxy: 

505 """Return an expression proxy that represents a literal value. 

506 

507 Expression proxy objects obtained from this factory can generally be 

508 compared directly to literals, so calling this method directly in user 

509 code should rarely be necessary. 

510 

511 Parameters 

512 ---------- 

513 value : `object` 

514 Value to include as a literal in an expression tree. 

515 

516 Returns 

517 ------- 

518 expression : `ExpressionProxy` 

519 Expression wrapper for this literal. 

520 """ 

521 expression = tree.make_column_literal(value) 

522 match expression.expression_type: 

523 case "timespan": 

524 return TimespanProxy(expression) 

525 case "region": 

526 return RegionProxy(expression) 

527 case "bool": 

528 raise NotImplementedError("Boolean literals are not supported.") 

529 case _: 

530 return ResolvedScalarExpressionProxy(expression) 

531 

532 @staticmethod 

533 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression: 

534 """Return the column expression object that backs a proxy. 

535 

536 Parameters 

537 ---------- 

538 proxy : `ExpressionProxy` 

539 Proxy constructed via an `ExpressionFactory`. 

540 

541 Returns 

542 ------- 

543 expression : `tree.ColumnExpression` 

544 Underlying column expression object. 

545 """ 

546 return proxy._expression