Coverage for python/lsst/daf/relation/_columns/_expression.py: 62%

129 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-22 02:59 -0800

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "ColumnExpression", 

26 "ColumnLiteral", 

27 "ColumnReference", 

28 "ColumnFunction", 

29 "PredicateFunction", 

30) 

31 

32import dataclasses 

33from abc import ABC, abstractmethod 

34from collections.abc import Iterable, Set 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.utils.classes import cached_getter 

38 

39from .._exceptions import RelationalAlgebraError 

40from ._predicate import Predicate 

41from ._tag import ColumnTag 

42 

43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from .._engine import Engine 

45 

46 

47class ColumnExpression(ABC): 

48 """An abstract base class and factory for scalar, non-boolean column 

49 expressions. 

50 

51 `ColumnExpression` inheritance is closed to the types already provided by 

52 this package, but considerable custom behavior can still be provided via 

53 the `ColumnFunction` class and an `Engine` that knows how to interpret its 

54 `~ColumnFunction.name` value. These concrete types can all be constructed 

55 via factory methods on `ColumnExpression` itself, so the derived types 

56 themselves only need to be referenced when writing `match` expressions that 

57 process an expression tree. See 

58 :ref:`lsst.daf.relation-overview-extensibility` for rationale and details. 

59 """ 

60 

61 def __init_subclass__(cls) -> None: 

62 assert cls.__name__ in { 

63 "ColumnLiteral", 

64 "ColumnReference", 

65 "ColumnFunction", 

66 }, "ColumnExpression inheritance is closed to predefined types in daf_relation." 

67 

68 dtype: type | None 

69 """The Python type this expression evaluates to (`type` or `None`). 

70 

71 Interpretation of this attribute is up to the `Engine` or other algorithms 

72 that operate on the expression tree; it is ignored by all code in the 

73 `lsst.daf.relation` package. 

74 """ 

75 

76 @property 

77 @abstractmethod 

78 def columns_required(self) -> Set[ColumnTag]: 

79 """Columns required by this expression 

80 (`~collections.abc.Set` [ `ColumnTag` ]). 

81 

82 This includes columns required by expressions nested within this one. 

83 """ 

84 raise NotImplementedError() 

85 

86 @abstractmethod 

87 def is_supported_by(self, engine: Engine) -> bool: 

88 """Test whether the given engine is capable of evaluating this 

89 expression. 

90 

91 Parameters 

92 ---------- 

93 engine : `Engine` 

94 Engine to test. 

95 

96 Returns 

97 ------- 

98 supported : `bool` 

99 Whether the engine supports this expression and all expressions 

100 nested within it. 

101 """ 

102 raise NotImplementedError() 

103 

104 @classmethod 

105 def literal(cls, value: Any, dtype: type | None = None) -> ColumnLiteral: 

106 """Construct an expression backed by a regular Python object. 

107 

108 Parameters 

109 ---------- 

110 value 

111 Value for the expression. 

112 dtype : `type` or `None`, optional 

113 The Python type this expression evaluates to (`type` or `None`). 

114 

115 Returns 

116 ------- 

117 literal : `ColumnLiteral` 

118 A column expression backed by the given value. 

119 """ 

120 return ColumnLiteral(value, dtype) 

121 

122 @classmethod 

123 def reference(cls, tag: ColumnTag, dtype: type | None = None) -> ColumnReference: 

124 """Construct an expression that refers to a column in a relation. 

125 

126 Parameters 

127 ---------- 

128 tag : `ColumnTag` 

129 Identifier for the column to reference. 

130 dtype : `type` or `None`, optional 

131 The Python type this expression evaluates to (`type` or `None`). 

132 

133 Returns 

134 ------- 

135 reference : `ColumnReference` 

136 A column expression that refers the given relation column. 

137 """ 

138 return ColumnReference(tag, dtype) 

139 

140 def method( 

141 self, 

142 name: str, 

143 *args: ColumnExpression, 

144 dtype: type | None = None, 

145 supporting_engine_types: Iterable[type[Engine]] | None = None, 

146 ) -> ColumnFunction: 

147 """Construct an expression that represents a method call with 

148 expression arguments. 

149 

150 Parameters 

151 ---------- 

152 name : `str` 

153 Name of the method, to be interpreted by the `Engine` or other 

154 algorithm. 

155 *args : `ColumnExpression` 

156 Expressions to pass as arguments to the method (after ``self``). 

157 dtype : `type` or `None`, optional 

158 The Python type this expression evaluates to (`type` or `None`). 

159 supporting_engine_types : `~collections.abc.Iterable` [ `type` ], \ 

160 optional 

161 If provided, the set of `Engine` types that are expected to support 

162 this expression. If `None` (default), all engines are assumed to 

163 support it. 

164 

165 Returns 

166 ------- 

167 function : `ColumnFunction` 

168 Column expression that represents this function call. 

169 

170 Notes 

171 ----- 

172 `ColumnExpression` cannot actually force an engine to interpret the 

173 given name as the name of a method rather than something else; calling 

174 this method like this:: 

175 

176 a.method("name", b) 

177 

178 is exactly equivalent to:: 

179 

180 ColumnExpression.function("name", a, b) 

181 

182 The provided `iteration` and `sql` engines both interpret these names 

183 as method names if and only if they are not first found in the 

184 built-in `operator` module. 

185 """ 

186 return self.function( 

187 name, 

188 self, 

189 *args, 

190 dtype=dtype, 

191 supporting_engine_types=( 

192 tuple(supporting_engine_types) if supporting_engine_types is not None else None 

193 ), 

194 ) 

195 

196 @classmethod 

197 def function( 

198 cls, 

199 name: str, 

200 *args: ColumnExpression, 

201 dtype: type | None = None, 

202 supporting_engine_types: Iterable[type[Engine]] | None = None, 

203 ) -> ColumnFunction: 

204 """Construct an expression that represents a function call with 

205 expression arguments. 

206 

207 Parameters 

208 ---------- 

209 name : `str` 

210 Name of the method, to be interpreted by the `Engine` or other 

211 algorithm. 

212 *args : `ColumnExpression` 

213 Expressions to pass as arguments to the method (not including 

214 ``self``; this is a `classmethod`, so it never has access to 

215 ``self``). 

216 dtype : `type` or `None`, optional 

217 The Python type this expression evaluates to (`type` or `None`). 

218 supporting_engine_types : `~collections.abc.Iterable` [ `type` ], \ 

219 optional 

220 If provided, the set of `Engine` types that are expected to support 

221 this expression. If `None` (default), all engines are assumed to 

222 support it. 

223 

224 Returns 

225 ------- 

226 function : `ColumnFunction` 

227 Column expression that represents this function call. 

228 """ 

229 return ColumnFunction( 

230 name, 

231 args, 

232 dtype, 

233 supporting_engine_types=( 

234 tuple(supporting_engine_types) if supporting_engine_types is not None else None 

235 ), 

236 ) 

237 

238 def eq(self, other: ColumnExpression) -> PredicateFunction: 

239 """Construct a boolean equality-comparison expression. 

240 

241 Parameters 

242 ---------- 

243 other : `ColumnExpression` 

244 Expression whose value will be compared to that of ``self``. 

245 

246 Returns 

247 ------- 

248 comparison : `Predicate` 

249 Boolean column expression. 

250 """ 

251 return self.predicate_method("__eq__", other) 

252 

253 def ne(self, other: ColumnExpression) -> PredicateFunction: 

254 """Construct a boolean inequality-comparison expression. 

255 

256 Parameters 

257 ---------- 

258 other : `ColumnExpression` 

259 Expression whose value will be compared to that of ``self``. 

260 

261 Returns 

262 ------- 

263 comparison : `Predicate` 

264 Boolean column expression. 

265 """ 

266 return self.predicate_method("__ne__", other) 

267 

268 def lt(self, other: ColumnExpression) -> PredicateFunction: 

269 """Construct a boolean less-than-comparison expression. 

270 

271 Parameters 

272 ---------- 

273 other : `ColumnExpression` 

274 Expression whose value will be compared to that of ``self``. 

275 

276 Returns 

277 ------- 

278 comparison : `Predicate` 

279 Boolean column expression. 

280 """ 

281 return self.predicate_method("__lt__", other) 

282 

283 def gt(self, other: ColumnExpression) -> PredicateFunction: 

284 """Construct a boolean greater-than-comparison expression. 

285 

286 Parameters 

287 ---------- 

288 other : `ColumnExpression` 

289 Expression whose value will be compared to that of ``self``. 

290 

291 Returns 

292 ------- 

293 comparison : `Predicate` 

294 Boolean column expression. 

295 """ 

296 return self.predicate_method("__gt__", other) 

297 

298 def le(self, other: ColumnExpression) -> PredicateFunction: 

299 """Construct a boolean less-or-equal-comparison expression. 

300 

301 Parameters 

302 ---------- 

303 other : `ColumnExpression` 

304 Expression whose value will be compared to that of ``self``. 

305 

306 Returns 

307 ------- 

308 comparison : `Predicate` 

309 Boolean column expression. 

310 """ 

311 return self.predicate_method("__le__", other) 

312 

313 def ge(self, other: ColumnExpression) -> PredicateFunction: 

314 """Construct a boolean greater-or-equal-comparison expression. 

315 

316 Parameters 

317 ---------- 

318 other : `ColumnExpression` 

319 Expression whose value will be compared to that of ``self``. 

320 

321 Returns 

322 ------- 

323 comparison : `Predicate` 

324 Boolean column expression. 

325 """ 

326 return self.predicate_method("__ge__", other) 

327 

328 def predicate_method( 

329 self, 

330 name: str, 

331 *args: ColumnExpression, 

332 supporting_engine_types: Set[type[Engine]] | None = None, 

333 ) -> PredicateFunction: 

334 """Construct an expression that represents a method call with 

335 expression arguments and a boolean result. 

336 

337 Parameters 

338 ---------- 

339 name : `str` 

340 Name of the method, to be interpreted by the `Engine` or other 

341 algorithm. 

342 *args : `ColumnExpression` 

343 Expressions to pass as arguments to the method (after ``self``). 

344 dtype : `type` or `None`, optional 

345 The Python type this expression evaluates to (`type` or `None`). 

346 supporting_engine_types : `~collections.abc.Iterable` [ `type` ] \ 

347 optional 

348 If provided, the set of `Engine` types that are expected to support 

349 this expression. If `None` (default), all engines are assumed to 

350 support it. 

351 

352 Returns 

353 ------- 

354 function : `PredicateFunction` 

355 Boolean column expression that represents this function call. 

356 

357 Notes 

358 ----- 

359 `ColumnExpression` cannot actually force an engine to interpret the 

360 given name as the name of a method rather than something else; calling 

361 this method like this:: 

362 

363 a.predicate_method("name", b) 

364 

365 is exactly equivalent to:: 

366 

367 ColumnExpression.predicate_function("name", a, b) 

368 

369 The provided `iteration` and `sql` engines both interpret these names 

370 as method names if and only if they are not first found in the 

371 built-in `operator` module. 

372 """ 

373 return self.predicate_function( 

374 name, 

375 self, 

376 *args, 

377 supporting_engine_types=( 

378 tuple(supporting_engine_types) if supporting_engine_types is not None else None 

379 ), 

380 ) 

381 

382 @classmethod 

383 def predicate_function( 

384 cls, 

385 name: str, 

386 *args: ColumnExpression, 

387 supporting_engine_types: Iterable[type[Engine]] | None = None, 

388 ) -> PredicateFunction: 

389 """Construct an expression that represents a function call with 

390 expression arguments and a boolean result. 

391 

392 Parameters 

393 ---------- 

394 name : `str` 

395 Name of the method, to be interpreted by the `Engine` or other 

396 algorithm. 

397 *args : `ColumnExpression` 

398 Expressions to pass as arguments to the method (not including 

399 ``self``; this is a `classmethod`, so it never has access to 

400 ``self``). 

401 dtype : `type` or `None`, optional 

402 The Python type this expression evaluates to (`type` or `None`). 

403 supporting_engine_types : `~collections.abc.Iterable` [ `type` ], \ 

404 optional 

405 If provided, the set of `Engine` types that are expected to support 

406 this expression. If `None` (default), all engines are assumed to 

407 support it. 

408 

409 Returns 

410 ------- 

411 function : `PredicateFunction` 

412 Boolean column expression that represents this function call. 

413 """ 

414 return PredicateFunction( 

415 name, 

416 args, 

417 supporting_engine_types=( 

418 tuple(supporting_engine_types) if supporting_engine_types is not None else None 

419 ), 

420 ) 

421 

422 

423@dataclasses.dataclass(frozen=True) 

424class ColumnLiteral(ColumnExpression): 

425 """A concrete column expression backed by a regular Python value.""" 

426 

427 value: Any 

428 """Python value for the expression.""" 

429 

430 dtype: type | None 

431 """The Python type this expression evaluates to (`type` or `None`).""" 

432 

433 @property 

434 def columns_required(self) -> Set[ColumnTag]: 

435 # Docstring inherited 

436 return frozenset() 

437 

438 def __str__(self) -> str: 

439 return repr(self.value) 

440 

441 def is_supported_by(self, engine: Engine) -> bool: 

442 # Docstring inherited 

443 return True 

444 

445 

446@dataclasses.dataclass(frozen=True) 

447class ColumnReference(ColumnExpression): 

448 """A concrete column expression that refers to a relation column.""" 

449 

450 tag: ColumnTag 

451 """Identifier for the column this expression refers to (`ColumnTag`).""" 

452 

453 dtype: type | None 

454 """The Python type this expression evaluates to (`type` or `None`).""" 

455 

456 @property 

457 def columns_required(self) -> Set[ColumnTag]: 

458 # Docstring inherited 

459 return {self.tag} 

460 

461 def __str__(self) -> str: 

462 return str(self.tag) 

463 

464 def is_supported_by(self, engine: Engine) -> bool: 

465 # Docstring inherited 

466 return True 

467 

468 

469_OPERATOR_STRINGS = { 

470 "__eq__": "=", 

471 "__ne__": "≠", 

472 "__lt__": "<", 

473 "__le__": "≤", 

474 "__gt__": ">", 

475 "__ge__": "≥", 

476} 

477 

478 

479@dataclasses.dataclass(frozen=True) 

480class ColumnFunction(ColumnExpression): 

481 """A concrete column expression that represents calling a named function 

482 with column expression arguments. 

483 """ 

484 

485 name: str 

486 """Name of the function to apply (`str`). 

487 

488 Interpretation of this name is entirely up to the `Engine` or other 

489 relation-processing algorithm. 

490 """ 

491 

492 args: tuple[ColumnExpression, ...] 

493 """Column expressions to pass as arguments to the function 

494 (`tuple` [ `ColumnExpression`, ... ]). 

495 """ 

496 

497 dtype: type | None 

498 """The Python type this expression evaluates to (`type` or `None`).""" 

499 

500 supporting_engine_types: tuple[type[Engine], ...] | None = dataclasses.field(compare=False) 

501 """The set of `Engine` types that are expected to support this expression 

502 (`tuple` [ `type` [ `Engine` ], ... ]). 

503 """ 

504 

505 def __post_init__(self) -> None: 

506 if not self.args: 

507 raise RelationalAlgebraError(f"No arguments for function {self.name}.") 

508 

509 @property 

510 @cached_getter 

511 def columns_required(self) -> Set[ColumnTag]: 

512 # Docstring inherited. 

513 result: set[ColumnTag] = set() 

514 for arg in self.args: 

515 result.update(arg.columns_required) 

516 return result 

517 

518 def __str__(self) -> str: 

519 return f"{self.name}({', '.join(str(a) for a in self.args)})" 

520 

521 def is_supported_by(self, engine: Engine) -> bool: 

522 # Docstring inherited. 

523 return ( 

524 self.supporting_engine_types is None or isinstance(engine, self.supporting_engine_types) 

525 ) and all(arg.is_supported_by(engine) for arg in self.args) 

526 

527 

528@dataclasses.dataclass(frozen=True) 

529class PredicateFunction(Predicate): 

530 """A concrete boolean expression that represents calling an named function 

531 with column expression arguments. 

532 """ 

533 

534 name: str 

535 """Name of the function to apply (`str`). 

536 

537 Interpretation of this name is entirely up to the `Engine` or other 

538 relation-processing algorithm. 

539 """ 

540 

541 args: tuple[ColumnExpression, ...] 

542 """Column expressions to pass as arguments to the function 

543 (`tuple` [ `ColumnExpression`, ... ]). 

544 """ 

545 

546 supporting_engine_types: tuple[type[Engine], ...] | None = dataclasses.field(compare=False) 

547 """The set of `Engine` types that are expected to support this expression 

548 (`tuple` [ `type` [ `Engine` ], ... ]). 

549 """ 

550 

551 def __post_init__(self) -> None: 

552 if not self.args: 

553 raise RelationalAlgebraError(f"No arguments for predicate function {self.name}.") 

554 

555 @property 

556 @cached_getter 

557 def columns_required(self) -> Set[ColumnTag]: 

558 # Docstring inherited. 

559 result: set[ColumnTag] = set() 

560 for arg in self.args: 

561 result.update(arg.columns_required) 

562 return result 

563 

564 def __str__(self) -> str: 

565 if (op_str := _OPERATOR_STRINGS.get(self.name)) is not None: 

566 return f"{self.args[0]}{op_str}{self.args[1]}" 

567 else: 

568 return f"{self.name}({', '.join(str(a) for a in self.args)})" 

569 

570 def is_supported_by(self, engine: Engine) -> bool: 

571 # Docstring inherited. 

572 return ( 

573 self.supporting_engine_types is None or isinstance(engine, self.supporting_engine_types) 

574 ) and all(arg.is_supported_by(engine) for arg in self.args) 

575 

576 def as_trivial(self) -> None: 

577 # Docstring inherited. 

578 return None