Coverage for python/lsst/daf/butler/queries/expression_factory.py: 51%

187 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 02:46 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ExpressionFactory", "ExpressionProxy", "ScalarExpressionProxy", "TimespanProxy", "RegionProxy") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable 

34from typing import TYPE_CHECKING 

35 

36from lsst.sphgeom import Region 

37 

38from .._exceptions import InvalidQueryError 

39from ..dimensions import Dimension, DimensionElement, DimensionUniverse 

40from . import tree 

41 

42if TYPE_CHECKING: 

43 from .._timespan import Timespan 

44 from ._query import Query 

45 

46# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression, 

47# but it just returns OrderExpression and Predicate objects directly, because 

48# we don't need to overload any operators or define any methods on those. 

49 

50 

51class ExpressionProxy(ABC): 

52 """A wrapper for column expressions that overloads comparison operators 

53 to return new expression proxies. 

54 """ 

55 

56 def __repr__(self) -> str: 

57 return str(self._expression) 

58 

59 @property 

60 def is_null(self) -> tree.Predicate: 

61 """A boolean expression that tests whether this expression is NULL.""" 

62 return tree.Predicate.is_null(self._expression) 

63 

64 @staticmethod 

65 def _make_expression(other: object) -> tree.ColumnExpression: 

66 if isinstance(other, ExpressionProxy): 

67 return other._expression 

68 else: 

69 return tree.make_column_literal(other) 

70 

71 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate: 

72 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator) 

73 

74 @property 

75 @abstractmethod 

76 def _expression(self) -> tree.ColumnExpression: 

77 raise NotImplementedError() 

78 

79 

80class ScalarExpressionProxy(ExpressionProxy): 

81 """An `ExpressionProxy` specialized for simple single-value columns.""" 

82 

83 @property 

84 def desc(self) -> tree.Reversed: 

85 """An ordering expression that indicates that the sort on this 

86 expression should be reversed. 

87 """ 

88 return tree.Reversed(operand=self._expression) 

89 

90 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override] 

91 return self._make_comparison(other, "==") 

92 

93 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override] 

94 return self._make_comparison(other, "!=") 

95 

96 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

97 return self._make_comparison(other, "<") 

98 

99 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override] 

100 return self._make_comparison(other, "<=") 

101 

102 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

103 return self._make_comparison(other, ">") 

104 

105 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override] 

106 return self._make_comparison(other, ">=") 

107 

108 def __neg__(self) -> ScalarExpressionProxy: 

109 return ResolvedScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-")) 

110 

111 def __add__(self, other: object) -> ScalarExpressionProxy: 

112 return ResolvedScalarExpressionProxy( 

113 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+") 

114 ) 

115 

116 def __radd__(self, other: object) -> ScalarExpressionProxy: 

117 return ResolvedScalarExpressionProxy( 

118 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+") 

119 ) 

120 

121 def __sub__(self, other: object) -> ScalarExpressionProxy: 

122 return ResolvedScalarExpressionProxy( 

123 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-") 

124 ) 

125 

126 def __rsub__(self, other: object) -> ScalarExpressionProxy: 

127 return ResolvedScalarExpressionProxy( 

128 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-") 

129 ) 

130 

131 def __mul__(self, other: object) -> ScalarExpressionProxy: 

132 return ResolvedScalarExpressionProxy( 

133 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*") 

134 ) 

135 

136 def __rmul__(self, other: object) -> ScalarExpressionProxy: 

137 return ResolvedScalarExpressionProxy( 

138 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*") 

139 ) 

140 

141 def __truediv__(self, other: object) -> ScalarExpressionProxy: 

142 return ResolvedScalarExpressionProxy( 

143 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/") 

144 ) 

145 

146 def __rtruediv__(self, other: object) -> ScalarExpressionProxy: 

147 return ResolvedScalarExpressionProxy( 

148 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/") 

149 ) 

150 

151 def __mod__(self, other: object) -> ScalarExpressionProxy: 

152 return ResolvedScalarExpressionProxy( 

153 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%") 

154 ) 

155 

156 def __rmod__(self, other: object) -> ScalarExpressionProxy: 

157 return ResolvedScalarExpressionProxy( 

158 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%") 

159 ) 

160 

161 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate: 

162 """Return a boolean expression that tests whether this expression is 

163 within a literal integer range. 

164 

165 Parameters 

166 ---------- 

167 start : `int`, optional 

168 Lower bound (inclusive) for the slice. 

169 stop : `int` or `None`, optional 

170 Upper bound (exclusive) for the slice, or `None` for no bound. 

171 step : `int`, optional 

172 Spacing between integers in the range. 

173 

174 Returns 

175 ------- 

176 predicate : `tree.Predicate` 

177 Boolean expression object. 

178 """ 

179 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step) 

180 

181 def in_iterable(self, others: Iterable) -> tree.Predicate: 

182 """Return a boolean expression that tests whether this expression 

183 evaluates to a value that is in an iterable of other expressions. 

184 

185 Parameters 

186 ---------- 

187 others : `collections.abc.Iterable` 

188 An iterable of `ExpressionProxy` or values to be interpreted as 

189 literals. 

190 

191 Returns 

192 ------- 

193 predicate : `tree.Predicate` 

194 Boolean expression object. 

195 """ 

196 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others]) 

197 

198 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate: 

199 """Return a boolean expression that test whether this expression 

200 evaluates to a value that is in a single-column selection from another 

201 query. 

202 

203 Parameters 

204 ---------- 

205 column : `ExpressionProxy` 

206 Proxy for the column to extract from ``query``. 

207 query : `Query` 

208 Query to select from. 

209 

210 Returns 

211 ------- 

212 predicate : `tree.Predicate` 

213 Boolean expression object. 

214 """ 

215 return tree.Predicate.in_query(self._expression, column._expression, query._tree) 

216 

217 

218class ResolvedScalarExpressionProxy(ScalarExpressionProxy): 

219 """A `ScalarExpressionProxy` backed by an actual expression. 

220 

221 Parameters 

222 ---------- 

223 expression : `.tree.ColumnExpression` 

224 Expression that backs this proxy. 

225 """ 

226 

227 def __init__(self, expression: tree.ColumnExpression): 

228 self._expr = expression 

229 

230 @property 

231 def _expression(self) -> tree.ColumnExpression: 

232 return self._expr 

233 

234 

235class TimespanProxy(ExpressionProxy): 

236 """An `ExpressionProxy` specialized for timespan columns and literals. 

237 

238 Parameters 

239 ---------- 

240 expression : `.tree.ColumnExpression` 

241 Expression that backs this proxy. 

242 """ 

243 

244 def __init__(self, expression: tree.ColumnExpression): 

245 self._expr = expression 

246 

247 @property 

248 def begin(self) -> ScalarExpressionProxy: 

249 """An expression representing the lower bound (inclusive).""" 

250 return ResolvedScalarExpressionProxy( 

251 tree.UnaryExpression(operand=self._expression, operator="begin_of") 

252 ) 

253 

254 @property 

255 def end(self) -> ScalarExpressionProxy: 

256 """An expression representing the upper bound (exclusive).""" 

257 return ResolvedScalarExpressionProxy( 

258 tree.UnaryExpression(operand=self._expression, operator="end_of") 

259 ) 

260 

261 def overlaps(self, other: TimespanProxy | Timespan) -> tree.Predicate: 

262 """Return a boolean expression representing an overlap test between 

263 this timespan and another. 

264 

265 Parameters 

266 ---------- 

267 other : `TimespanProxy` or `Timespan` 

268 Expression or literal to compare to. 

269 

270 Returns 

271 ------- 

272 predicate : `tree.Predicate` 

273 Boolean expression object. 

274 """ 

275 return self._make_comparison(other, "overlaps") 

276 

277 @property 

278 def _expression(self) -> tree.ColumnExpression: 

279 return self._expr 

280 

281 

282class RegionProxy(ExpressionProxy): 

283 """An `ExpressionProxy` specialized for region columns and literals. 

284 

285 Parameters 

286 ---------- 

287 expression : `.tree.ColumnExpression` 

288 Expression that backs this proxy. 

289 """ 

290 

291 def __init__(self, expression: tree.ColumnExpression): 

292 self._expr = expression 

293 

294 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate: 

295 """Return a boolean expression representing an overlap test between 

296 this region and another. 

297 

298 Parameters 

299 ---------- 

300 other : `RegionProxy` or `Region` 

301 Expression or literal to compare to. 

302 

303 Returns 

304 ------- 

305 predicate : `tree.Predicate` 

306 Boolean expression object. 

307 """ 

308 return self._make_comparison(other, "overlaps") 

309 

310 @property 

311 def _expression(self) -> tree.ColumnExpression: 

312 return self._expr 

313 

314 

315class DimensionElementProxy(ScalarExpressionProxy): 

316 """An expression-creation proxy for a dimension element logical table. 

317 

318 Parameters 

319 ---------- 

320 element : `DimensionElement` 

321 Element this object wraps. 

322 

323 Notes 

324 ----- 

325 The (dynamic) attributes of this object are expression proxies for the 

326 non-dimension fields of the element's records. 

327 """ 

328 

329 def __init__(self, element: DimensionElement): 

330 self._element = element 

331 

332 @property 

333 def _expression(self) -> tree.ColumnExpression: 

334 if isinstance(self._element, Dimension): 

335 return tree.DimensionKeyReference(dimension=self._element) 

336 else: 

337 raise TypeError(f"Proxy expression {self!r} is does not resolve to a column.") 

338 

339 def __repr__(self) -> str: 

340 return self._element.name 

341 

342 def __getattr__(self, field: str) -> ScalarExpressionProxy: 

343 if field in self._element.schema.dimensions.names: 

344 if field not in self._element.dimensions.names: 

345 # This is a dimension self-reference, like visit.id. 

346 return self 

347 return DimensionElementProxy(self._element.dimensions[field]) 

348 try: 

349 expression = tree.DimensionFieldReference(element=self._element, field=field) 

350 except InvalidQueryError: 

351 raise AttributeError(field) 

352 return ResolvedScalarExpressionProxy(expression) 

353 

354 @property 

355 def region(self) -> RegionProxy: 

356 try: 

357 expression = tree.DimensionFieldReference(element=self._element, field="region") 

358 except InvalidQueryError: 

359 raise AttributeError("region") 

360 return RegionProxy(expression) 

361 

362 @property 

363 def timespan(self) -> TimespanProxy: 

364 try: 

365 expression = tree.DimensionFieldReference(element=self._element, field="timespan") 

366 except InvalidQueryError: 

367 raise AttributeError("timespan") 

368 return TimespanProxy(expression) 

369 

370 def __dir__(self) -> list[str]: 

371 # We only want timespan and region to appear in dir() for elements that 

372 # have them, but we can't implement them in getattr without muddling 

373 # the type annotations. 

374 result = [entry for entry in super().__dir__() if entry != "timespan" and entry != "region"] 

375 result.extend(self._element.schema.names) 

376 return result 

377 

378 

379class DatasetTypeProxy: 

380 """An expression-creation proxy for a dataset type's logical table. 

381 

382 Parameters 

383 ---------- 

384 dataset_type : `str` 

385 Dataset type name or wildcard. Wildcards are usable only when the 

386 query contains exactly one dataset type or a wildcard. 

387 

388 Notes 

389 ----- 

390 The attributes of this object are expression proxies for the fields 

391 associated with datasets. 

392 """ 

393 

394 def __init__(self, dataset_type: str): 

395 self._dataset_type = dataset_type 

396 

397 def __repr__(self) -> str: 

398 return self._dataset_type 

399 

400 # Attributes are actually fixed, but we implement them with __getattr__ 

401 # and __dir__ to avoid repeating the list. And someday they might expand 

402 # to include Datastore record fields. 

403 

404 def __getattr__(self, field: str) -> ScalarExpressionProxy: 

405 if field not in tree.DATASET_FIELD_NAMES: 

406 raise AttributeError(field) 

407 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field) 

408 return ResolvedScalarExpressionProxy(expression) 

409 

410 @property 

411 def timespan(self) -> TimespanProxy: 

412 try: 

413 expression = tree.DimensionFieldReference(element=self._element, field="timespan") 

414 except InvalidQueryError: 

415 raise AttributeError("timespan") 

416 return TimespanProxy(expression) 

417 

418 def __dir__(self) -> list[str]: 

419 result = list(super().__dir__()) 

420 # "timespan" will be added by delegation to super() and we don't want 

421 # it to appear twice. 

422 result.extend(name for name in tree.DATASET_FIELD_NAMES if name != "timespan") 

423 return result 

424 

425 

426class ExpressionFactory: 

427 """A factory for creating column expressions that uses operator overloading 

428 to form a mini-language. 

429 

430 Instances of this class are usually obtained from 

431 `Query.expression_factory`; see that property's documentation for more 

432 information. 

433 

434 Parameters 

435 ---------- 

436 universe : `DimensionUniverse` 

437 Object that describes all dimensions. 

438 """ 

439 

440 def __init__(self, universe: DimensionUniverse): 

441 self._universe = universe 

442 

443 def __getattr__(self, name: str) -> DimensionElementProxy: 

444 try: 

445 element = self._universe.elements[name] 

446 except KeyError: 

447 raise AttributeError(name) 

448 return DimensionElementProxy(element) 

449 

450 def __getitem__(self, name: str) -> DatasetTypeProxy: 

451 return DatasetTypeProxy(name) 

452 

453 def not_(self, operand: tree.Predicate) -> tree.Predicate: 

454 """Apply a logical NOT operation to a boolean expression. 

455 

456 Parameters 

457 ---------- 

458 operand : `tree.Predicate` 

459 Expression to invetree. 

460 

461 Returns 

462 ------- 

463 logical_not : `tree.Predicate` 

464 A boolean expression that evaluates to the opposite of ``operand``. 

465 """ 

466 return operand.logical_not() 

467 

468 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

469 """Combine a sequence of boolean expressions with logical AND. 

470 

471 Parameters 

472 ---------- 

473 first : `tree.Predicate` 

474 First operand (required). 

475 *args 

476 Additional operands. 

477 

478 Returns 

479 ------- 

480 logical_and : `tree.Predicate` 

481 A boolean expression that evaluates to `True` only if all operands 

482 evaluate to `True. 

483 """ 

484 return first.logical_and(*args) 

485 

486 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

487 """Combine a sequence of boolean expressions with logical OR. 

488 

489 Parameters 

490 ---------- 

491 first : `tree.Predicate` 

492 First operand (required). 

493 *args 

494 Additional operands. 

495 

496 Returns 

497 ------- 

498 logical_or : `tree.Predicate` 

499 A boolean expression that evaluates to `True` if any operand 

500 evaluates to `True. 

501 """ 

502 return first.logical_or(*args) 

503 

504 @staticmethod 

505 def literal(value: object) -> ExpressionProxy: 

506 """Return an expression proxy that represents a literal value. 

507 

508 Expression proxy objects obtained from this factory can generally be 

509 compared directly to literals, so calling this method directly in user 

510 code should rarely be necessary. 

511 

512 Parameters 

513 ---------- 

514 value : `object` 

515 Value to include as a literal in an expression tree. 

516 

517 Returns 

518 ------- 

519 expression : `ExpressionProxy` 

520 Expression wrapper for this literal. 

521 """ 

522 expression = tree.make_column_literal(value) 

523 match expression.expression_type: 

524 case "timespan": 

525 return TimespanProxy(expression) 

526 case "region": 

527 return RegionProxy(expression) 

528 case "bool": 

529 raise NotImplementedError("Boolean literals are not supported.") 

530 case _: 

531 return ResolvedScalarExpressionProxy(expression) 

532 

533 @staticmethod 

534 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression: 

535 """Return the column expression object that backs a proxy. 

536 

537 Parameters 

538 ---------- 

539 proxy : `ExpressionProxy` 

540 Proxy constructed via an `ExpressionFactory`. 

541 

542 Returns 

543 ------- 

544 expression : `tree.ColumnExpression` 

545 Underlying column expression object. 

546 """ 

547 return proxy._expression