Coverage for python/lsst/daf/relation/_relation.py: 87%

116 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-19 09:55 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "BaseRelation", 

26 "Relation", 

27) 

28 

29import dataclasses 

30from abc import abstractmethod 

31from collections.abc import Sequence, Set 

32from typing import TYPE_CHECKING, Any, Protocol, TypeVar 

33 

34from ._columns import ColumnTag 

35 

36if TYPE_CHECKING: 

37 from ._columns import ColumnExpression, Predicate 

38 from ._engine import Engine 

39 from ._operations import SortTerm 

40 

41 

42class Relation(Protocol): 

43 """An abstract interface for expression trees on tabular data. 

44 

45 Notes 

46 ----- 

47 This ABC is a `typing.Protocol`, which means that classes that implement 

48 its interface can be recognized as such by static type checkers without 

49 actually inheriting from it, and in fact all concrete relation types 

50 inherit only from `BaseRelation` (which provides implementations of many 

51 `Relation` methods, but does not include the complete interface or inherit 

52 from `Relation` itself) instead. This split allows subclasses to implement 

53 attributes that are defined as properties here as `~dataclasses.dataclass` 

54 attributes instead of true properties, something `typing.Protocol` 

55 explicitly permits and recommends that nevertheless works only if the 

56 protocol is not actually inherited from. 

57 

58 In almost all cases, users should use `Relation` instead of `BaseRelation`: 

59 the only exception is when writing an `isinstance` check to see if a type 

60 is a relation at all, rather than a particular relation subclass. 

61 `BaseRelation` may become an alias to `Relation` itself in the future if 

62 `typing.Protocol` inheritance interaction with properties is improved. 

63 

64 All concrete `Relation` types are frozen, equality-comparable 

65 `dataclasses`. They also provide a very concise `str` representation (in 

66 addition to the dataclass-provided `repr`) suitable for summarizing an 

67 entire relation tree. 

68 

69 See Also 

70 -------- 

71 :ref:`lsst.daf.relation-overview` 

72 """ 

73 

74 @property 

75 @abstractmethod 

76 def columns(self) -> Set[ColumnTag]: 

77 """The columns in this relation (`~collections.abc.Set` [ `ColumnTag` ] 

78 ). 

79 """ 

80 raise NotImplementedError() 

81 

82 @property 

83 @abstractmethod 

84 def payload(self) -> Any: 

85 """The engine-specific contents of the relation. 

86 

87 This is `None` in the common case that engine-specific contents are to 

88 be computed on-the-fly. Relation payloads permit "deferred 

89 initialization" - while relation objects are otherwise immutable, the 

90 payload may be set (once) after construction, via `attach_payload`. 

91 """ 

92 raise NotImplementedError() 

93 

94 @property 

95 @abstractmethod 

96 def engine(self) -> Engine: 

97 """The engine that is responsible for interpreting this relation 

98 (`Engine`). 

99 """ 

100 raise NotImplementedError() 

101 

102 @property 

103 @abstractmethod 

104 def min_rows(self) -> int: 

105 """The minimum number of rows this relation might have (`int`).""" 

106 raise NotImplementedError() 

107 

108 @property 

109 @abstractmethod 

110 def max_rows(self) -> int | None: 

111 """The maximum number of rows this relation might have (`int` or 

112 `None`). 

113 

114 This is `None` for relations whose size is not bounded from above. 

115 """ 

116 raise NotImplementedError() 

117 

118 @property 

119 @abstractmethod 

120 def is_locked(self) -> bool: 

121 """Whether this relation and those upstream of it should be considered 

122 fixed by tree-manipulation algorithms (`bool`). 

123 """ 

124 raise NotImplementedError() 

125 

126 @property 

127 @abstractmethod 

128 def is_join_identity(self) -> bool: 

129 """Whether a `join` to this relation will result in the other relation 

130 being returned directly (`bool`). 

131 

132 Join identity relations have exactly one row and no columns. 

133 

134 See Also 

135 -------- 

136 LeafRelation.make_join_identity 

137 """ 

138 raise NotImplementedError() 

139 

140 @property 

141 @abstractmethod 

142 def is_trivial(self) -> bool: 

143 """Whether this relation has no real content (`bool`). 

144 

145 A trivial relation is either a `join identity <is_join_identity>` with 

146 no columns and exactly one row, or a relation with an arbitrary number 

147 of columns and no rows (i.e. ``min_rows==max_rows==0``). 

148 """ 

149 raise NotImplementedError() 

150 

151 @abstractmethod 

152 def attach_payload(self, payload: Any) -> None: 

153 """Attach an engine-specific ``payload`` to this relation. 

154 

155 This method may be called exactly once on a `Relation` instance that 

156 was not initialized with a ``payload``, despite the fact that 

157 `Relation` objects are otherwise considered immutable. 

158 

159 Parameters 

160 ---------- 

161 payload 

162 Engine-specific content to attach. 

163 

164 Raises 

165 ------ 

166 TypeError 

167 Raised if this relation already has a payload, or can never have a 

168 payload. `TypeError` is used here for consistency with other 

169 attempts to assign to an attribute of an immutable object. 

170 """ 

171 raise NotImplementedError() 

172 

173 @abstractmethod 

174 def with_calculated_column( 

175 self, 

176 tag: ColumnTag, 

177 expression: ColumnExpression, 

178 *, 

179 preferred_engine: Engine | None = None, 

180 backtrack: bool = True, 

181 transfer: bool = False, 

182 require_preferred_engine: bool = False, 

183 ) -> Relation: 

184 """Return a new relation that adds a calculated column to this one. 

185 

186 This is a convenience method chat constructs and applies a 

187 `Calculation` operation. 

188 

189 Parameters 

190 ---------- 

191 tag : `ColumnTag` 

192 Identifier for the new column. 

193 expression : `ColumnExpression` 

194 Expression used to populate the new column. 

195 preferred_engine : `Engine`, optional 

196 Engine that the operation would ideally be performed in. If this 

197 is not equal to ``self.engine``, the ``backtrack``, ``transfer``, 

198 and ``require_preferred_engine`` arguments control the behavior. 

199 backtrack : `bool`, optional 

200 If `True` (default) and the current engine is not the preferred 

201 engine, attempt to insert this calculation before a transfer 

202 upstream of the current relation, as long as this can be done 

203 without breaking up any locked relations or changing the resulting 

204 relation content. 

205 transfer : `bool`, optional 

206 If `True` (`False` is default) and the current engine is not the 

207 preferred engine, insert a new `Transfer` before the `Calculation`. 

208 If ``backtrack`` is also true, the transfer is added only if the 

209 backtrack attempt fails. 

210 require_preferred_engine : `bool`, optional 

211 If `True` (`False` is default) and the current engine is not the 

212 preferred engine, raise `EngineError`. If ``backtrack`` is also 

213 true, the exception is only raised if the backtrack attempt fails. 

214 Ignored if ``transfer`` is true. 

215 

216 Returns 

217 ------- 

218 relation : `Relation` 

219 Relation that contains the calculated column. 

220 

221 Raises 

222 ------ 

223 ColumnError 

224 Raised if the expression requires columns that are not present in 

225 ``self.columns``, or if ``tag`` is already present in 

226 ``self.columns``. 

227 EngineError 

228 Raised if ``require_preferred_engine=True`` and it was impossible 

229 to insert this operation in the preferred engine, or if the 

230 expression was not supported by the engine. 

231 """ 

232 raise NotImplementedError() 

233 

234 @abstractmethod 

235 def chain(self, rhs: Relation) -> Relation: 

236 """Return a new relation with all rows from this relation and another. 

237 

238 This is a convenience method that constructs and applies a `Chain` 

239 operation. 

240 

241 Parameters 

242 ---------- 

243 rhs : `Relation` 

244 Other relation to chain to ``self``. Must have the same columns 

245 and engine as ``self``. 

246 

247 Returns 

248 ------- 

249 relation : `Relation` 

250 New relation with all rows from both relations. This method never 

251 returns an operand directly, even if the other has ``max_rows==0``, 

252 as it is assumed that even relations with no rows are useful to 

253 preserve in the tree for `diagnostics <Diagnostics>`. 

254 

255 Raises 

256 ------ 

257 ColumnError 

258 Raised if the two relations do not have the same columns. 

259 EngineError 

260 Raised if the two relations do not have the same engine. 

261 """ 

262 raise NotImplementedError() 

263 

264 @abstractmethod 

265 def without_duplicates( 

266 self, 

267 *, 

268 preferred_engine: Engine | None = None, 

269 backtrack: bool = True, 

270 transfer: bool = False, 

271 require_preferred_engine: bool = False, 

272 ) -> Relation: 

273 """Return a new relation that removes any duplicate rows from this one. 

274 

275 This is a convenience method that constructs and applies a 

276 `Deduplication` operation. 

277 

278 Parameters 

279 ---------- 

280 preferred_engine : `Engine`, optional 

281 Engine that the operation would ideally be performed in. If this 

282 is not equal to ``self.engine``, the ``backtrack``, ``transfer``, 

283 and ``require_preferred_engine`` arguments control the behavior. 

284 backtrack : `bool`, optional 

285 If `True` (default) and the current engine is not the preferred 

286 engine, attempt to insert this deduplication before a transfer 

287 upstream of the current relation, as long as this can be done 

288 without breaking up any locked relations or changing the resulting 

289 relation content. 

290 transfer : `bool`, optional 

291 If `True` (`False` is default) and the current engine is not the 

292 preferred engine, insert a new `Transfer` before the 

293 `Deduplication`. If ``backtrack`` is also true, the transfer is 

294 added only if the backtrack attempt fails. 

295 require_preferred_engine : `bool`, optional 

296 If `True` (`False` is default) and the current engine is not the 

297 preferred engine, raise `EngineError`. If ``backtrack`` is also 

298 true, the exception is only raised if the backtrack attempt fails. 

299 Ignored if ``transfer`` is true. 

300 

301 Returns 

302 ------- 

303 relation : `Relation` 

304 Relation with no duplicate rows. This may be ``self`` if it can be 

305 determined that there is no duplication already, but this is not 

306 guaranteed. 

307 

308 Raises 

309 ------ 

310 EngineError 

311 Raised if ``require_preferred_engine=True`` and it was impossible 

312 to insert this operation in the preferred engine. 

313 """ 

314 raise NotImplementedError() 

315 

316 @abstractmethod 

317 def join( 

318 self, 

319 rhs: Relation, 

320 predicate: Predicate | None = None, 

321 *, 

322 backtrack: bool = True, 

323 transfer: bool = False, 

324 ) -> Relation: 

325 """Return a new relation that joins this one to the given one. 

326 

327 This is a convenience method that constructs and applies a `Join` 

328 operation, via `PartialJoin.apply`. 

329 

330 Parameters 

331 ---------- 

332 rhs : `Relation` 

333 Relation to join to ``self``. 

334 predicate : `Predicate`, optional 

335 Boolean expression that must evaluate to true in order to join a a 

336 pair of rows, in addition to an implicit equality constraint on any 

337 columns in both relations. 

338 backtrack : `bool`, optional 

339 If `True` (default) and ``self.engine != rhs.engine``, attempt to 

340 insert this join before a transfer upstream of ``self``, as long as 

341 this can be done without breaking up any locked relations or 

342 changing the resulting relation content. 

343 transfer : `bool`, optional 

344 If `True` (`False` is default) and ``self.engine != rhs.engine``, 

345 insert a new `Transfer` before the `Join`. If ``backtrack`` is 

346 also true, the transfer is added only if the backtrack attempt 

347 fails. 

348 

349 Returns 

350 ------- 

351 relation : `Relation` 

352 New relation that joins ``self`` to ``rhs``. May be ``self`` or 

353 ``rhs`` if the other is a `join identity <is_join_identity>`. 

354 

355 Raises 

356 ------ 

357 ColumnError 

358 Raised if the given predicate requires columns not present in 

359 ``self`` or ``rhs``. 

360 EngineError 

361 Raised if it was impossible to insert this operation in 

362 ``rhs.engine`` via backtracks or transfers on ``self``, or if the 

363 predicate was not supported by the engine. 

364 

365 Notes 

366 ----- 

367 This method does not treat ``self`` and ``rhs`` symmetrically: it 

368 always considers ``rhs`` fixed, and only backtracks into or considers 

369 applying transfers to ``self``. 

370 """ 

371 raise NotImplementedError() 

372 

373 @abstractmethod 

374 def materialized( 

375 self, 

376 name: str | None = None, 

377 *, 

378 name_prefix: str = "materialization", 

379 ) -> Relation: 

380 """Return a new relation that indicates that this relation's 

381 payload should be cached after it is first processed. 

382 

383 This is a convenience method that constructs and applies a 

384 `Materialization` operation. 

385 

386 Parameters 

387 ---------- 

388 name : `str`, optional 

389 Name to use for the cached payload within the engine (e.g. the name 

390 for a temporary table in SQL). If not provided, a name will be 

391 created via a call to `Engine.get_relation_name`. 

392 name_prefix : `str`, optional 

393 Prefix to pass to `Engine.get_relation_name`; ignored if ``name`` 

394 is provided. Unlike 

395 most operations, `Materialization` relations are locked by default, 

396 since they reflect user intent to mark a specific tree as 

397 cacheable. 

398 

399 Returns 

400 ------- 

401 relation : `Relation` 

402 New relation that marks its upstream tree for caching. May be 

403 ``self`` if it is already a `LeafRelation` or another 

404 materialization (in which case the given name or name prefix will 

405 be ignored). 

406 

407 See Also 

408 -------- 

409 Processor.materialize 

410 """ 

411 raise NotImplementedError() 

412 

413 @abstractmethod 

414 def with_only_columns( 

415 self, 

416 columns: Set[ColumnTag], 

417 *, 

418 preferred_engine: Engine | None = None, 

419 backtrack: bool = True, 

420 transfer: bool = False, 

421 require_preferred_engine: bool = False, 

422 ) -> Relation: 

423 """Return a new relation whose columns are a subset of this relation's. 

424 

425 This is a convenience method that constructs and applies a `Projection` 

426 operation. 

427 

428 Parameters 

429 ---------- 

430 columns : `~collections.abc.Set` [ `ColumnTag` ] 

431 Columns to be propagated to the new relation; must be a subset of 

432 ``self.columns``. 

433 preferred_engine : `Engine`, optional 

434 Engine that the operation would ideally be performed in. If this 

435 is not equal to ``self.engine``, the ``backtrack``, ``transfer``, 

436 and ``require_preferred_engine`` arguments control the behavior. 

437 backtrack : `bool`, optional 

438 If `True` (default) and the current engine is not the preferred 

439 engine, attempt to insert this projection before a transfer 

440 upstream of the current relation, as long as this can be done 

441 without breaking up any locked relations or changing the resulting 

442 relation content. 

443 transfer : `bool`, optional 

444 If `True` (`False` is default) and the current engine is not the 

445 preferred engine, insert a new `Transfer` before the 

446 `Projection`. If ``backtrack`` is also true, the transfer is 

447 added only if the backtrack attempt fails. 

448 require_preferred_engine : `bool`, optional 

449 If `True` (`False` is default) and the current engine is not the 

450 preferred engine, raise `EngineError`. If ``backtrack`` is also 

451 true, the exception is only raised if the backtrack attempt fails. 

452 Ignored if ``transfer`` is true. 

453 

454 Returns 

455 ------- 

456 relation : `Relation` 

457 New relation with only the given columns. Will be ``self`` if 

458 ``columns == self.columns``. 

459 

460 Raises 

461 ------ 

462 ColumnError 

463 Raised if ``columns`` is not a subset of ``self.columns``. 

464 EngineError 

465 Raised if ``require_preferred_engine=True`` and it was impossible 

466 to insert this operation in the preferred engine. 

467 """ 

468 raise NotImplementedError() 

469 

470 @abstractmethod 

471 def with_rows_satisfying( 

472 self, 

473 predicate: Predicate, 

474 *, 

475 preferred_engine: Engine | None = None, 

476 backtrack: bool = True, 

477 transfer: bool = False, 

478 require_preferred_engine: bool = False, 

479 ) -> Relation: 

480 """Return a new relation that filters out rows via a boolean 

481 expression. 

482 

483 This is a convenience method that constructions and applies a 

484 `Selection` operation. 

485 

486 Parameters 

487 ---------- 

488 predicate : `Predicate` 

489 Boolean expression that evaluates to `False` for rows that should 

490 be included and `False` for rows that should be filtered out. 

491 preferred_engine : `Engine`, optional 

492 Engine that the operation would ideally be performed in. If this 

493 is not equal to ``self.engine``, the ``backtrack``, ``transfer``, 

494 and ``require_preferred_engine`` arguments control the behavior. 

495 backtrack : `bool`, optional 

496 If `True` (default) and the current engine is not the preferred 

497 engine, attempt to insert this selection before a transfer 

498 upstream of the current relation, as long as this can be done 

499 without breaking up any locked relations or changing the resulting 

500 relation content. 

501 transfer : `bool`, optional 

502 If `True` (`False` is default) and the current engine is not the 

503 preferred engine, insert a new `Transfer` before the 

504 `Selection`. If ``backtrack`` is also true, the transfer is 

505 added only if the backtrack attempt fails. 

506 require_preferred_engine : `bool`, optional 

507 If `True` (`False` is default) and the current engine is not the 

508 preferred engine, raise `EngineError`. If ``backtrack`` is also 

509 true, the exception is only raised if the backtrack attempt fails. 

510 Ignored if ``transfer`` is true. 

511 

512 Returns 

513 ------- 

514 relation : `Relation` 

515 New relation with only the rows that satisfy the given predicate. 

516 May be ``self`` if the predicate is 

517 `trivially True <Predicate.as_trivial>`. 

518 

519 Raises 

520 ------ 

521 ColumnError 

522 Raised if ``predicate.columns_required`` is not a subset of 

523 ``self.columns``. 

524 EngineError 

525 Raised if ``require_preferred_engine=True`` and it was impossible 

526 to insert this operation in the preferred engine, or if the 

527 expression was not supported by the engine. 

528 """ 

529 raise NotImplementedError() 

530 

531 @abstractmethod 

532 def __getitem__(self, key: slice) -> Relation: 

533 """Return a new relation whose rows are a slice of ``self``. 

534 

535 This is a convenience method that constructs and applies a `Slice` 

536 operation. 

537 

538 Parameters 

539 ---------- 

540 key : `slice` 

541 Start and stop for the slice. Non-unit step values are not 

542 supported. 

543 

544 Returns 

545 ------- 

546 relation : `Relation` 

547 New relation with only the rows between the given start and stop 

548 indices. May be ``self`` if ``start=0`` and ``stop=None``. If 

549 ``self`` is already a slice operation relation, the operations will 

550 be merged. 

551 

552 Raises 

553 ------ 

554 TypeError 

555 Raised if ``slice.step`` is a value other than ``1`` or ``None``. 

556 """ 

557 raise NotImplementedError() 

558 

559 @abstractmethod 

560 def sorted( 

561 self, 

562 terms: Sequence[SortTerm], 

563 *, 

564 preferred_engine: Engine | None = None, 

565 backtrack: bool = True, 

566 transfer: bool = False, 

567 require_preferred_engine: bool = False, 

568 ) -> Relation: 

569 """Return a new relation that sorts rows according to a sequence of 

570 column expressions. 

571 

572 This is a convenience method that constructs and applies a `Sort` 

573 operation. 

574 

575 Parameters 

576 ---------- 

577 terms : `~collections.abc.Sequence` [ `SortTerm` ] 

578 Ordered sequence of column expressions to sort on, with whether to 

579 apply them in ascending or descending order. 

580 preferred_engine : `Engine`, optional 

581 Engine that the operation would ideally be performed in. If this 

582 is not equal to ``self.engine``, the ``backtrack``, ``transfer``, 

583 and ``require_preferred_engine`` arguments control the behavior. 

584 backtrack : `bool`, optional 

585 If `True` (default) and the current engine is not the preferred 

586 engine, attempt to insert this sort before a transfer upstream of 

587 the current relation, as long as this can be done without breaking 

588 up any locked relations or changing the resulting relation content. 

589 transfer : `bool`, optional 

590 If `True` (`False` is default) and the current engine is not the 

591 preferred engine, insert a new `Transfer` before the `Sort`. If 

592 ``backtrack`` is also true, the transfer is added only if the 

593 backtrack attempt fails. 

594 require_preferred_engine : `bool`, optional 

595 If `True` (`False` is default) and the current engine is not the 

596 preferred engine, raise `EngineError`. If ``backtrack`` is also 

597 true, the exception is only raised if the backtrack attempt fails. 

598 Ignored if ``transfer`` is true. 

599 

600 Returns 

601 ------- 

602 relation : `Relation` 

603 New relation with sorted rows. Will be ``self`` if ``terms`` is 

604 empty. If ``self`` is already a sort operation relation, the 

605 operations will be merged by concatenating their terms, which may 

606 result in duplicate sort terms that have no effect. 

607 

608 Raises 

609 ------ 

610 ColumnError 

611 Raised if any column required by a `SortTerm` is not present in 

612 ``self.columns``. 

613 EngineError 

614 Raised if ``require_preferred_engine=True`` and it was impossible 

615 to insert this operation in the preferred engine, or if a 

616 `SortTerm` expression was not supported by the engine. 

617 """ 

618 raise NotImplementedError() 

619 

620 @abstractmethod 

621 def transferred_to(self, destination: Engine) -> Relation: 

622 """Return a new relation that transfers this relation to a new engine. 

623 

624 This is a convenience method that constructs and applies a `Transfer` 

625 operation. 

626 

627 Parameters 

628 ---------- 

629 destination : `Engine` 

630 Engine for the new relation. 

631 

632 Returns 

633 ------- 

634 relation : `Relation` 

635 New relation in the given engine. Will be ``self`` if 

636 ``self.engine == destination``. 

637 """ 

638 raise NotImplementedError() 

639 

640 

641_M = TypeVar("_M", bound=Any) 

642 

643 

644def _copy_relation_docs(method: _M) -> _M: 

645 """Decorator that copies a docstring from the `Relation` class for the 

646 method of the same name. 

647 

648 We want to document `Relation` since that's the public interface, but we 

649 also want those docs to appear in the concrete derived classes, and that 

650 means we need to put them on the `BaseRelation` class so they can be 

651 inherited. 

652 """ # noqa: D401 

653 method.__doc__ = getattr(Relation, method.__name__).__doc__ 

654 return method 

655 

656 

657@dataclasses.dataclass(frozen=True) 

658class BaseRelation: 

659 """An implementation-focused target class for concrete `Relation` objects. 

660 

661 This class provides method implementations for much of the `Relation` 

662 interface and is actually inherited from (unlike `Relation` itself) by all 

663 concrete relations. It should only be used outside of the 

664 ``lsst.daf.relation`` package when needed for `isinstance` checks. 

665 """ 

666 

667 def __init_subclass__(cls) -> None: 

668 assert ( 

669 cls.__name__ 

670 in { 

671 "LeafRelation", 

672 "UnaryOperationRelation", 

673 "BinaryOperationRelation", 

674 "MarkerRelation", 

675 } 

676 or cls.__base__.__name__ != "Relation" 

677 ), "Relation inheritance is closed to predefined types in daf_relation and MarkerRelation subclasses." 

678 

679 @property 

680 @_copy_relation_docs 

681 def is_join_identity(self: Relation) -> bool: 

682 return not self.columns and self.max_rows == 1 and self.min_rows == 1 

683 

684 @property 

685 @_copy_relation_docs 

686 def is_trivial(self: Relation) -> bool: 

687 return self.is_join_identity or self.max_rows == 0 

688 

689 @_copy_relation_docs 

690 def attach_payload(self: Relation, payload: Any) -> None: 

691 raise TypeError(f"Cannot attach payload {payload} to relation {self}.") 

692 

693 @_copy_relation_docs 

694 def with_calculated_column( 

695 self: Relation, 

696 tag: ColumnTag, 

697 expression: ColumnExpression, 

698 *, 

699 preferred_engine: Engine | None = None, 

700 backtrack: bool = True, 

701 transfer: bool = False, 

702 require_preferred_engine: bool = False, 

703 ) -> Relation: 

704 from ._operations import Calculation 

705 

706 return Calculation(tag, expression).apply( 

707 self, 

708 preferred_engine=preferred_engine, 

709 backtrack=backtrack, 

710 transfer=transfer, 

711 require_preferred_engine=require_preferred_engine, 

712 ) 

713 

714 @_copy_relation_docs 

715 def chain(self: Relation, rhs: Relation) -> Relation: 

716 from ._operations import Chain 

717 

718 return Chain().apply(self, rhs) 

719 

720 @_copy_relation_docs 

721 def without_duplicates( 

722 self: Relation, 

723 *, 

724 preferred_engine: Engine | None = None, 

725 backtrack: bool = True, 

726 transfer: bool = False, 

727 require_preferred_engine: bool = False, 

728 ) -> Relation: 

729 from ._operations import Deduplication 

730 

731 return Deduplication().apply( 

732 self, 

733 preferred_engine=preferred_engine, 

734 backtrack=backtrack, 

735 transfer=transfer, 

736 require_preferred_engine=require_preferred_engine, 

737 ) 

738 

739 @_copy_relation_docs 

740 def join( 

741 self: Relation, 

742 rhs: Relation, 

743 predicate: Predicate | None = None, 

744 *, 

745 backtrack: bool = True, 

746 transfer: bool = False, 

747 ) -> Relation: 

748 from ._columns import Predicate 

749 from ._operations import Join 

750 

751 return ( 

752 Join(predicate if predicate is not None else Predicate.literal(True)) 

753 .partial(rhs) 

754 .apply(self, backtrack=backtrack, transfer=transfer) 

755 ) 

756 

757 @_copy_relation_docs 

758 def materialized( 

759 self: Relation, 

760 name: str | None = None, 

761 *, 

762 name_prefix: str = "materialization", 

763 ) -> Relation: 

764 return self.engine.materialize(self, name, name_prefix) 

765 

766 @_copy_relation_docs 

767 def with_only_columns( 

768 self: Relation, 

769 columns: Set[ColumnTag], 

770 *, 

771 preferred_engine: Engine | None = None, 

772 backtrack: bool = True, 

773 transfer: bool = False, 

774 require_preferred_engine: bool = False, 

775 ) -> Relation: 

776 from ._operations import Projection 

777 

778 return Projection(frozenset(columns)).apply( 

779 self, 

780 preferred_engine=preferred_engine, 

781 backtrack=backtrack, 

782 transfer=transfer, 

783 require_preferred_engine=require_preferred_engine, 

784 ) 

785 

786 @_copy_relation_docs 

787 def with_rows_satisfying( 

788 self: Relation, 

789 predicate: Predicate, 

790 *, 

791 preferred_engine: Engine | None = None, 

792 backtrack: bool = True, 

793 transfer: bool = False, 

794 require_preferred_engine: bool = False, 

795 ) -> Relation: 

796 from ._operations import Selection 

797 

798 return Selection(predicate).apply( 

799 self, 

800 preferred_engine=preferred_engine, 

801 backtrack=backtrack, 

802 transfer=transfer, 

803 require_preferred_engine=require_preferred_engine, 

804 ) 

805 

806 @_copy_relation_docs 

807 def __getitem__(self: Relation, key: slice) -> Relation: 

808 from ._operations import Slice 

809 

810 if not isinstance(key, slice): 

811 raise TypeError("Only slices are supported in relation indexing.") 

812 if key.step not in (1, None): 

813 raise TypeError("Slices with non-unit step are not supported.") 

814 return Slice(key.start if key.start is not None else 0, key.stop).apply(self) 

815 

816 @_copy_relation_docs 

817 def sorted( 

818 self: Relation, 

819 terms: Sequence[SortTerm], 

820 *, 

821 preferred_engine: Engine | None = None, 

822 backtrack: bool = True, 

823 transfer: bool = False, 

824 require_preferred_engine: bool = False, 

825 ) -> Relation: 

826 from ._operations import Sort 

827 

828 return Sort(tuple(terms)).apply( 

829 self, 

830 preferred_engine=preferred_engine, 

831 backtrack=backtrack, 

832 transfer=transfer, 

833 require_preferred_engine=require_preferred_engine, 

834 ) 

835 

836 @_copy_relation_docs 

837 def transferred_to(self: Relation, destination: Engine) -> Relation: 

838 return destination.transfer(self)