Coverage for python/lsst/daf/butler/queries/expression_factory.py: 44%

156 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ExpressionFactory", "ExpressionProxy", "ScalarExpressionProxy", "TimespanProxy", "RegionProxy") 

31 

32from collections.abc import Iterable 

33from typing import TYPE_CHECKING, cast 

34 

35from lsst.sphgeom import Region 

36 

37from ..dimensions import Dimension, DimensionElement, DimensionUniverse 

38from . import tree 

39 

40if TYPE_CHECKING: 

41 from .._timespan import Timespan 

42 from ._query import Query 

43 

44# This module uses ExpressionProxy and its subclasses to wrap ColumnExpression, 

45# but it just returns OrderExpression and Predicate objects directly, because 

46# we don't need to overload any operators or define any methods on those. 

47 

48 

49class ExpressionProxy: 

50 """A wrapper for column expressions that overloads comparison operators 

51 to return new expression proxies. 

52 

53 Parameters 

54 ---------- 

55 expression : `tree.ColumnExpression` 

56 Underlying expression object. 

57 """ 

58 

59 def __init__(self, expression: tree.ColumnExpression): 

60 self._expression = expression 

61 

62 def __repr__(self) -> str: 

63 return str(self._expression) 

64 

65 @property 

66 def is_null(self) -> tree.Predicate: 

67 """A boolean expression that tests whether this expression is NULL.""" 

68 return tree.Predicate.is_null(self._expression) 

69 

70 @staticmethod 

71 def _make_expression(other: object) -> tree.ColumnExpression: 

72 if isinstance(other, ExpressionProxy): 

73 return other._expression 

74 else: 

75 return tree.make_column_literal(other) 

76 

77 def _make_comparison(self, other: object, operator: tree.ComparisonOperator) -> tree.Predicate: 

78 return tree.Predicate.compare(a=self._expression, b=self._make_expression(other), operator=operator) 

79 

80 

81class ScalarExpressionProxy(ExpressionProxy): 

82 """An `ExpressionProxy` specialized for simple single-value columns.""" 

83 

84 @property 

85 def desc(self) -> tree.Reversed: 

86 """An ordering expression that indicates that the sort on this 

87 expression should be reversed. 

88 """ 

89 return tree.Reversed(operand=self._expression) 

90 

91 def __eq__(self, other: object) -> tree.Predicate: # type: ignore[override] 

92 return self._make_comparison(other, "==") 

93 

94 def __ne__(self, other: object) -> tree.Predicate: # type: ignore[override] 

95 return self._make_comparison(other, "!=") 

96 

97 def __lt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

98 return self._make_comparison(other, "<") 

99 

100 def __le__(self, other: object) -> tree.Predicate: # type: ignore[override] 

101 return self._make_comparison(other, "<=") 

102 

103 def __gt__(self, other: object) -> tree.Predicate: # type: ignore[override] 

104 return self._make_comparison(other, ">") 

105 

106 def __ge__(self, other: object) -> tree.Predicate: # type: ignore[override] 

107 return self._make_comparison(other, ">=") 

108 

109 def __neg__(self) -> ScalarExpressionProxy: 

110 return ScalarExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="-")) 

111 

112 def __add__(self, other: object) -> ScalarExpressionProxy: 

113 return ScalarExpressionProxy( 

114 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="+") 

115 ) 

116 

117 def __radd__(self, other: object) -> ScalarExpressionProxy: 

118 return ScalarExpressionProxy( 

119 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="+") 

120 ) 

121 

122 def __sub__(self, other: object) -> ScalarExpressionProxy: 

123 return ScalarExpressionProxy( 

124 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="-") 

125 ) 

126 

127 def __rsub__(self, other: object) -> ScalarExpressionProxy: 

128 return ScalarExpressionProxy( 

129 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="-") 

130 ) 

131 

132 def __mul__(self, other: object) -> ScalarExpressionProxy: 

133 return ScalarExpressionProxy( 

134 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="*") 

135 ) 

136 

137 def __rmul__(self, other: object) -> ScalarExpressionProxy: 

138 return ScalarExpressionProxy( 

139 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="*") 

140 ) 

141 

142 def __truediv__(self, other: object) -> ScalarExpressionProxy: 

143 return ScalarExpressionProxy( 

144 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="/") 

145 ) 

146 

147 def __rtruediv__(self, other: object) -> ScalarExpressionProxy: 

148 return ScalarExpressionProxy( 

149 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="/") 

150 ) 

151 

152 def __mod__(self, other: object) -> ScalarExpressionProxy: 

153 return ScalarExpressionProxy( 

154 tree.BinaryExpression(a=self._expression, b=self._make_expression(other), operator="%") 

155 ) 

156 

157 def __rmod__(self, other: object) -> ScalarExpressionProxy: 

158 return ScalarExpressionProxy( 

159 tree.BinaryExpression(a=self._make_expression(other), b=self._expression, operator="%") 

160 ) 

161 

162 def in_range(self, start: int = 0, stop: int | None = None, step: int = 1) -> tree.Predicate: 

163 """Return a boolean expression that tests whether this expression is 

164 within a literal integer range. 

165 

166 Parameters 

167 ---------- 

168 start : `int`, optional 

169 Lower bound (inclusive) for the slice. 

170 stop : `int` or `None`, optional 

171 Upper bound (exclusive) for the slice, or `None` for no bound. 

172 step : `int`, optional 

173 Spacing between integers in the range. 

174 

175 Returns 

176 ------- 

177 predicate : `tree.Predicate` 

178 Boolean expression object. 

179 """ 

180 return tree.Predicate.in_range(self._expression, start=start, stop=stop, step=step) 

181 

182 def in_iterable(self, others: Iterable) -> tree.Predicate: 

183 """Return a boolean expression that tests whether this expression 

184 evaluates to a value that is in an iterable of other expressions. 

185 

186 Parameters 

187 ---------- 

188 others : `collections.abc.Iterable` 

189 An iterable of `ExpressionProxy` or values to be interpreted as 

190 literals. 

191 

192 Returns 

193 ------- 

194 predicate : `tree.Predicate` 

195 Boolean expression object. 

196 """ 

197 return tree.Predicate.in_container(self._expression, [self._make_expression(item) for item in others]) 

198 

199 def in_query(self, column: ExpressionProxy, query: Query) -> tree.Predicate: 

200 """Return a boolean expression that test whether this expression 

201 evaluates to a value that is in a single-column selection from another 

202 query. 

203 

204 Parameters 

205 ---------- 

206 column : `ExpressionProxy` 

207 Proxy for the column to extract from ``query``. 

208 query : `Query` 

209 Query to select from. 

210 

211 Returns 

212 ------- 

213 predicate : `tree.Predicate` 

214 Boolean expression object. 

215 """ 

216 return tree.Predicate.in_query(self._expression, column._expression, query._tree) 

217 

218 

219class TimespanProxy(ExpressionProxy): 

220 """An `ExpressionProxy` specialized for timespan columns and literals.""" 

221 

222 @property 

223 def begin(self) -> ExpressionProxy: 

224 """An expression representing the lower bound (inclusive).""" 

225 return ExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="begin_of")) 

226 

227 @property 

228 def end(self) -> ExpressionProxy: 

229 """An expression representing the upper bound (exclusive).""" 

230 return ExpressionProxy(tree.UnaryExpression(operand=self._expression, operator="end_of")) 

231 

232 def overlaps(self, other: TimespanProxy | Timespan) -> tree.Predicate: 

233 """Return a boolean expression representing an overlap test between 

234 this timespan and another. 

235 

236 Parameters 

237 ---------- 

238 other : `TimespanProxy` or `Timespan` 

239 Expression or literal to compare to. 

240 

241 Returns 

242 ------- 

243 predicate : `tree.Predicate` 

244 Boolean expression object. 

245 """ 

246 return self._make_comparison(other, "overlaps") 

247 

248 

249class RegionProxy(ExpressionProxy): 

250 """An `ExpressionProxy` specialized for region columns and literals.""" 

251 

252 def overlaps(self, other: RegionProxy | Region) -> tree.Predicate: 

253 """Return a boolean expression representing an overlap test between 

254 this region and another. 

255 

256 Parameters 

257 ---------- 

258 other : `RegionProxy` or `Region` 

259 Expression or literal to compare to. 

260 

261 Returns 

262 ------- 

263 predicate : `tree.Predicate` 

264 Boolean expression object. 

265 """ 

266 return self._make_comparison(other, "overlaps") 

267 

268 

269class DimensionElementProxy: 

270 """An expression-creation proxy for a dimension element logical table. 

271 

272 Parameters 

273 ---------- 

274 element : `DimensionElement` 

275 Element this object wraps. 

276 

277 Notes 

278 ----- 

279 The (dynamic) attributes of this object are expression proxies for the 

280 non-dimension fields of the element's records. 

281 """ 

282 

283 def __init__(self, element: DimensionElement): 

284 self._element = element 

285 

286 def __repr__(self) -> str: 

287 return self._element.name 

288 

289 def __getattr__(self, field: str) -> ExpressionProxy: 

290 if field in self._element.schema.dimensions.names: 

291 return DimensionProxy(self._element.dimensions[field]) 

292 try: 

293 expression = tree.DimensionFieldReference(element=self._element, field=field) 

294 except tree.InvalidQueryError: 

295 raise AttributeError(field) 

296 match expression.column_type: 

297 case "region": 

298 return RegionProxy(expression) 

299 case "timespan": 

300 return TimespanProxy(expression) 

301 return ScalarExpressionProxy(expression) 

302 

303 def __dir__(self) -> list[str]: 

304 result = list(super().__dir__()) 

305 result.extend(self._element.schema.names) 

306 return result 

307 

308 

309class DimensionProxy(ScalarExpressionProxy, DimensionElementProxy): 

310 """An expression-creation proxy for a dimension logical table. 

311 

312 Parameters 

313 ---------- 

314 dimension : `Dimension` 

315 Dimension this object wraps. 

316 

317 Notes 

318 ----- 

319 This class combines record-field attribute access from `DimensionElement` 

320 proxy with direct interpretation as a dimension key column via 

321 `ScalarExpressionProxy`. For example:: 

322 

323 x = query.expression_factory 

324 query.where( 

325 x.detector.purpose == "SCIENCE", # field access 

326 x.detector > 100, # direct usage as an expression 

327 ) 

328 """ 

329 

330 def __init__(self, dimension: Dimension): 

331 ScalarExpressionProxy.__init__(self, tree.DimensionKeyReference(dimension=dimension)) 

332 DimensionElementProxy.__init__(self, dimension) 

333 

334 def __getattr__(self, field: str) -> ExpressionProxy: 

335 if field == self._element.primary_key.name: 

336 return self 

337 return super().__getattr__(field) 

338 

339 _element: Dimension 

340 

341 

342class DatasetTypeProxy: 

343 """An expression-creation proxy for a dataset type's logical table. 

344 

345 Parameters 

346 ---------- 

347 dataset_type : `str` 

348 Dataset type name or wildcard. Wildcards are usable only when the 

349 query contains exactly one dataset type or a wildcard. 

350 

351 Notes 

352 ----- 

353 The attributes of this object are expression proxies for the fields 

354 associated with datasets. 

355 """ 

356 

357 def __init__(self, dataset_type: str): 

358 self._dataset_type = dataset_type 

359 

360 def __repr__(self) -> str: 

361 return self._dataset_type 

362 

363 # Attributes are actually fixed, but we implement them with __getattr__ 

364 # and __dir__ to avoid repeating the list. And someday they might expand 

365 # to include Datastore record fields. 

366 

367 def __getattr__(self, field: str) -> ExpressionProxy: 

368 if field not in tree.DATASET_FIELD_NAMES: 

369 raise AttributeError(field) 

370 expression = tree.DatasetFieldReference(dataset_type=self._dataset_type, field=field) 

371 match expression.column_type: 

372 case "timespan": 

373 return TimespanProxy(expression) 

374 return ScalarExpressionProxy(expression) 

375 

376 def __dir__(self) -> list[str]: 

377 result = list(super().__dir__()) 

378 result.extend(tree.DATASET_FIELD_NAMES) 

379 return result 

380 

381 

382class ExpressionFactory: 

383 """A factory for creating column expressions that uses operator overloading 

384 to form a mini-language. 

385 

386 Instances of this class are usually obtained from 

387 `Query.expression_factory`; see that property's documentation for more 

388 information. 

389 

390 Parameters 

391 ---------- 

392 universe : `DimensionUniverse` 

393 Object that describes all dimensions. 

394 """ 

395 

396 def __init__(self, universe: DimensionUniverse): 

397 self._universe = universe 

398 

399 def __getattr__(self, name: str) -> DimensionElementProxy: 

400 element = self._universe.elements[name] 

401 if element in self._universe.dimensions: 

402 return DimensionProxy(cast(Dimension, element)) 

403 return DimensionElementProxy(element) 

404 

405 def __getitem__(self, name: str) -> DatasetTypeProxy: 

406 return DatasetTypeProxy(name) 

407 

408 def not_(self, operand: tree.Predicate) -> tree.Predicate: 

409 """Apply a logical NOT operation to a boolean expression. 

410 

411 Parameters 

412 ---------- 

413 operand : `tree.Predicate` 

414 Expression to invetree. 

415 

416 Returns 

417 ------- 

418 logical_not : `tree.Predicate` 

419 A boolean expression that evaluates to the opposite of ``operand``. 

420 """ 

421 return operand.logical_not() 

422 

423 def all(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

424 """Combine a sequence of boolean expressions with logical AND. 

425 

426 Parameters 

427 ---------- 

428 first : `tree.Predicate` 

429 First operand (required). 

430 *args 

431 Additional operands. 

432 

433 Returns 

434 ------- 

435 logical_and : `tree.Predicate` 

436 A boolean expression that evaluates to `True` only if all operands 

437 evaluate to `True. 

438 """ 

439 return first.logical_and(*args) 

440 

441 def any(self, first: tree.Predicate, /, *args: tree.Predicate) -> tree.Predicate: 

442 """Combine a sequence of boolean expressions with logical OR. 

443 

444 Parameters 

445 ---------- 

446 first : `tree.Predicate` 

447 First operand (required). 

448 *args 

449 Additional operands. 

450 

451 Returns 

452 ------- 

453 logical_or : `tree.Predicate` 

454 A boolean expression that evaluates to `True` if any operand 

455 evaluates to `True. 

456 """ 

457 return first.logical_or(*args) 

458 

459 @staticmethod 

460 def literal(value: object) -> ExpressionProxy: 

461 """Return an expression proxy that represents a literal value. 

462 

463 Expression proxy objects obtained from this factory can generally be 

464 compared directly to literals, so calling this method directly in user 

465 code should rarely be necessary. 

466 

467 Parameters 

468 ---------- 

469 value : `object` 

470 Value to include as a literal in an expression tree. 

471 

472 Returns 

473 ------- 

474 expression : `ExpressionProxy` 

475 Expression wrapper for this literal. 

476 """ 

477 expression = tree.make_column_literal(value) 

478 match expression.expression_type: 

479 case "timespan": 

480 return TimespanProxy(expression) 

481 case "region": 

482 return RegionProxy(expression) 

483 case "bool": 

484 raise NotImplementedError("Boolean literals are not supported.") 

485 case _: 

486 return ScalarExpressionProxy(expression) 

487 

488 @staticmethod 

489 def unwrap(proxy: ExpressionProxy) -> tree.ColumnExpression: 

490 """Return the column expression object that backs a proxy. 

491 

492 Parameters 

493 ---------- 

494 proxy : `ExpressionProxy` 

495 Proxy constructed via an `ExpressionFactory`. 

496 

497 Returns 

498 ------- 

499 expression : `tree.ColumnExpression` 

500 Underlying column expression object. 

501 """ 

502 return proxy._expression