Coverage for python/lsst/daf/relation/_unary_operation.py: 51%

146 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-13 10:31 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("UnaryOperation", "RowFilter", "Reordering", "Identity", "UnaryCommutator") 

25 

26import dataclasses 

27from abc import ABC, abstractmethod 

28from collections.abc import Set 

29from typing import TYPE_CHECKING, Literal, final 

30 

31from ._columns import ColumnTag 

32from ._exceptions import EngineError 

33from ._relation import Relation 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 from ._engine import Engine 

37 from ._operation_relations import UnaryOperationRelation 

38 

39 

40class UnaryOperation(ABC): 

41 """An abstract base class for operations that act on a single relation. 

42 

43 Notes 

44 ----- 

45 A `UnaryOperation` represents the operation itself; the combination of an 

46 operation and the "target" relation it acts on to form a new relation is 

47 represented by the `UnaryOperationRelation` class. That combination should 

48 always be created via a call to the `apply` method (or something that calls 

49 it, like the convenience methods on the `Relation` class). In some cases, 

50 applying a `UnaryOperation` doesn't return something involving the original 

51 operation, because of some combination of defaulted-parameter population 

52 and simplification, and there are even some `UnaryOperation` classes that 

53 should never actually appear in a `UnaryOperationRelation`. 

54 

55 `UnaryOperation` cannot be subclassed directly by external code, but it has 

56 two more restricted subclasses that can be:`RowFilter` and `Reordering`. 

57 

58 All concrete `UnaryOperation` types are frozen, equality-comparable 

59 `dataclasses`. They also provide a very concise `str` representation (in 

60 addition to the dataclass-provided `repr`) suitable for summarizing an 

61 entire relation tree. 

62 

63 See Also 

64 -------- 

65 :ref:`lsst.daf.relation-overview-operations` 

66 """ 

67 

68 def __init_subclass__(cls) -> None: 

69 assert ( 

70 cls.__name__ 

71 in { 

72 "Calculation", 

73 "Deduplication", 

74 "Identity", 

75 "PartialJoin", 

76 "Projection", 

77 "RowFilter", 

78 "Reordering", 

79 } 

80 or cls.__base__ is not UnaryOperation 

81 ), ( 

82 "UnaryOperation inheritance is closed to predefined types in daf_relation, " 

83 "except for subclasses of RowFilter and Reordering." 

84 ) 

85 

86 @property 

87 def columns_required(self) -> Set[ColumnTag]: 

88 """The columns the target relation must have in order for this 

89 operation to be applied to it (`~collections.abc.Set` [ `ColumnTag` ] 

90 ). 

91 """ 

92 return frozenset() 

93 

94 @abstractmethod 

95 def __str__(self) -> str: 

96 raise NotImplementedError() 

97 

98 @property 

99 @abstractmethod 

100 def is_empty_invariant(self) -> bool: 

101 """Whether this operation can remove all rows from its target relation 

102 (`bool`). 

103 """ 

104 raise NotImplementedError() 

105 

106 @property 

107 @abstractmethod 

108 def is_count_invariant(self) -> bool: 

109 """Whether this operation can change the number of rows in its target 

110 relation (`bool`). 

111 

112 The number of rows here includes duplicates - removing duplicates is 

113 not considered a count-invariant operation. 

114 """ 

115 raise NotImplementedError() 

116 

117 @property 

118 def is_order_dependent(self) -> bool: 

119 """Whether this operation depends on the order of the rows in its 

120 target relation (`bool`). 

121 """ 

122 return False 

123 

124 @property 

125 def is_count_dependent(self) -> bool: 

126 """Whether this operation depends on the number of rows in its target 

127 relation (`bool`). 

128 """ 

129 return False 

130 

131 def is_supported_by(self, engine: Engine) -> bool: 

132 """Whether this operation is supported by the given engine (`bool`).""" 

133 return True 

134 

135 @final 

136 def apply( 

137 self, 

138 target: Relation, 

139 *, 

140 preferred_engine: Engine | None = None, 

141 backtrack: bool = True, 

142 transfer: bool = False, 

143 require_preferred_engine: bool = False, 

144 ) -> Relation: 

145 """Create a new relation that represents the action of this operation 

146 on an existing relation. 

147 

148 Parameters 

149 ---------- 

150 target : `Relation` 

151 Relation the operation will act on. 

152 preferred_engine : `Engine`, optional 

153 Engine that the operation would ideally be performed in. If this 

154 is not equal to ``target.engine``, the ``backtrack``, ``transfer``, 

155 and ``require_preferred_engine`` arguments control the behavior. 

156 Some operations may supply their own preferred engine default, such 

157 as the "fixed" operand's own engine in a `PartialJoin`. 

158 backtrack : `bool`, optional 

159 If `True` (default) and the current engine is not the preferred 

160 engine, attempt to insert this operation before a transfer upstream 

161 of the current relation, as long as this can be done without 

162 breaking up any locked relations or changing the resulting relation 

163 content. 

164 transfer : `bool`, optional 

165 If `True` (`False` is default) and the current engine is not the 

166 preferred engine, insert a new `Transfer` to the preferred engine 

167 before this operation. If ``backtrack`` is also true, the transfer 

168 is added only if the backtrack attempt fails. 

169 require_preferred_engine : `bool`, optional 

170 If `True` (`False` is default) and the current engine is not the 

171 preferred engine, raise `EngineError`. If ``backtrack`` is also 

172 true, the exception is only raised if the backtrack attempt fails. 

173 Ignored if ``transfer`` is true. 

174 

175 Returns 

176 ------- 

177 new_relation : `Relation` 

178 Relation that includes this operation. This may be ``target`` if 

179 the operation is a no-op, and it may not be a 

180 `UnaryOperationRelation` holding this operation (or even a similar 

181 one) if the operation was inserted earlier in the tree via 

182 commutation relations or if simplification occurred. 

183 

184 Raises 

185 ------ 

186 ColumnError 

187 Raised if the operation could not be applied due to problems with 

188 the target relation's columns. 

189 EngineError 

190 Raised if the operation could not be applied due to problems with 

191 the target relation's engine. 

192 

193 Notes 

194 ----- 

195 Adding operations to relation trees is a potentially complex process in 

196 order to give both the operation type and the engine to customize the 

197 opportunity to enforce their own invariants. This `~typing.final` 

198 method provides the bulk of the high-level implementation, and is 

199 called by the `Relation` class's convenience methods with essentially 

200 no additional logic. The overall sequence is as follows: 

201 

202 - `apply` starts by delegating to `_begin_apply`, which 

203 allows operation classes to replace the operation object itself, 

204 perform initial checks, and set the preferred engine. 

205 - `apply` then performs the ``preferred_engine`` logic indicated by the 

206 ``backtrack`` ``transfer``, and ``require_preferred_engine`` options, 

207 delegating backtracking to `Engine.backtrack_unary`. 

208 - `Engine.backtrack_unary` will typically call back to `commute` to 

209 determine how and whether to move the new operation upstream of 

210 existing ones. 

211 - If backtracking is not used or is not fully successful, `apply` then 

212 delegates to `Engine.append_unary` to add the operation to the root 

213 of the relation tree. 

214 - The `Engine` methods are expected to delegate back to 

215 `_finish_apply` when they have identified the location in the tree 

216 where the new operation should be inserted. 

217 - `_finish_apply` is responsible for actually constructing the 

218 `UnaryOperationRelation` when appropriate. The default 

219 implementation of `_finish_apply` also calls `simplify` to see if it 

220 is possible to merge the new operation with those immediately 

221 upstream of it or elide it entirely. 

222 """ 

223 operation, preferred_engine = self._begin_apply(target, preferred_engine) 

224 done = False 

225 result = target 

226 if preferred_engine != target.engine: 

227 if backtrack: 

228 result, done, messages = target.engine.backtrack_unary(operation, target, preferred_engine) 

229 else: 

230 messages = ("backtracking insertion not requested by caller",) 

231 if not done: 

232 if transfer: 

233 result = result.transferred_to(preferred_engine) 

234 elif require_preferred_engine: 

235 raise EngineError( 

236 f"No way to apply {operation} to {target} " 

237 f"with required engine '{preferred_engine}': {'; '.join(messages)}." 

238 ) 

239 if not done: 

240 result = result.engine.append_unary(operation, result) 

241 return result 

242 

243 def _begin_apply( 

244 self, target: Relation, preferred_engine: Engine | None 

245 ) -> tuple[UnaryOperation, Engine]: 

246 """A customization hook for the beginning of operation application. 

247 

248 Parameter 

249 --------- 

250 target : `Relation` 

251 Relation the operation should act on, at least conceptually. Later 

252 logic may actually apply the operation upstream of this relation, 

253 but only when the result of doing so would be equivalent to 

254 applying it here. 

255 preferred_engine : `Engine` or `None` 

256 Preferred engine passed to `apply`. 

257 

258 Returns 

259 ------- 

260 operation : `UnaryOperation` 

261 The operation to actually apply. The default implementation 

262 returns ``self``. 

263 preferred_engine : `Engine` 

264 The engine to actually prefer. The default implementation returns 

265 the given ``preferred_engine`` if it is not `None`, and 

266 ``target.engine`` if it is `None`. 

267 

268 Notes 

269 ----- 

270 This method provides an opportunity for operations to establish any 

271 invariants that must be satisfied only when the operation is part of 

272 a relation. Implementations can also return an `Identity` instance 

273 when they can determine that the operation will do nothing when applied 

274 to the given target. 

275 """ 

276 return self, preferred_engine if preferred_engine is not None else target.engine 

277 

278 def _finish_apply(self, target: Relation) -> Relation: 

279 """A customization hook for the end of operation application. 

280 

281 Parameters 

282 ---------- 

283 target : `Relation` 

284 Relation the operation will act upon directly. 

285 

286 Returns 

287 ------- 

288 applied : `Relation` 

289 Result of applying this operation to the given target. Usually - 

290 but not always - a `UnaryOperationRelation` that holds ``self`` and 

291 ``target``. 

292 

293 Notes 

294 ----- 

295 This method provides an opportunity for operations to perform final 

296 simplification at the point of insertion (which the default 

297 implementation does, via calls to `simplify`) and change the kind of 

298 relation produced (the default implementation constructs a 

299 `UnaryOperationRelation`). 

300 """ 

301 from ._operation_relations import UnaryOperationRelation 

302 

303 match target: 

304 case UnaryOperationRelation(): 

305 if simplified := self.simplify(target.operation): 

306 if simplified is target.operation: 

307 return target 

308 else: 

309 return simplified._finish_apply(target.target) 

310 

311 if not self.is_supported_by(target.engine): 

312 raise EngineError(f"Operation {self} is not supported by engine {target.engine}.") 

313 

314 return UnaryOperationRelation( 

315 operation=self, 

316 target=target, 

317 columns=self.applied_columns(target), 

318 ) 

319 

320 def applied_columns(self, target: Relation) -> Set[ColumnTag]: 

321 """Return the columns of the relation that results from applying this 

322 operation to the given target. 

323 

324 Parameters 

325 ---------- 

326 target : `Relation` 

327 Relation the operation will act on. 

328 

329 Returns 

330 ------- 

331 columns : `~collections.abc.Set` [ `ColumnTag` ] 

332 Columns the new relation would have. 

333 """ 

334 return target.columns 

335 

336 @abstractmethod 

337 def applied_min_rows(self, target: Relation) -> int: 

338 """Return the minimum number of rows of the relation that results from 

339 applying this operation to the given target. 

340 

341 Parameters 

342 ---------- 

343 target : `Relation` 

344 Relation the operation will act on. 

345 

346 Returns 

347 ------- 

348 min_rows : `int` 

349 Minimum number of rows the new relation would have. 

350 """ 

351 raise NotImplementedError() 

352 

353 def applied_max_rows(self, target: Relation) -> int | None: 

354 """Return the maximum number of rows of the relation that results from 

355 applying this operation to the given target. 

356 

357 Parameters 

358 ---------- 

359 target : `Relation` 

360 Relation the operation will act on. 

361 

362 Returns 

363 ------- 

364 max_rows : `int` or `None` 

365 Maximum number of rows the new relation would have. 

366 """ 

367 return target.max_rows 

368 

369 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator: 

370 """Describe whether and how this operation can be moved upstream of an 

371 existing one without changing the content of the resulting relation. 

372 

373 Parameters 

374 ---------- 

375 current : `UnaryOperationRelation` 

376 A unary operation relation that is the current logical target of 

377 ``self``. 

378 

379 Returns 

380 ------- 

381 commutator : `UnaryCommutator` 

382 A struct that either provides a version of ``current.operation`` 

383 that can be applied to ``current.target`` after a possibly-modified 

384 version of ``self``, or an explanation of why this is impossible. 

385 

386 Notes 

387 ----- 

388 The `commute` implementations for the provided concrete 

389 `UnaryOperation` types assume that all unary operations preserve row 

390 order. If this is not the case in an engine, that engine should not 

391 implement `Engine.backtrack_unary` or take this into account itself 

392 when determining whether operations commute. 

393 """ 

394 return UnaryCommutator( 

395 first=None, 

396 second=current.operation, 

397 done=False, 

398 messages=(f"{self} does not commute with anything",), 

399 ) 

400 

401 def simplify(self, upstream: UnaryOperation) -> UnaryOperation | None: 

402 """Return a simplified combination of this operation with another. 

403 

404 Parameters 

405 ---------- 

406 upstream : `UnaryOperation` 

407 Operation that acts immediately prior to ``self``. 

408 

409 Returns 

410 ------- 

411 simplified : `UnaryOperation` 

412 Operation that combines the action of ``upstream`` followed by 

413 ``self``, or `None` if no such combination is possible. 

414 """ 

415 return None 

416 

417 

418class RowFilter(UnaryOperation): 

419 """An extensible `UnaryOperation` subclass for operations that only remove 

420 rows from their target. 

421 """ 

422 

423 @final 

424 @property 

425 def is_count_invariant(self, engine: Engine | None = None) -> Literal[False]: 

426 # Docstring inherited. 

427 return False 

428 

429 @property 

430 @abstractmethod 

431 def is_order_dependent(self) -> bool: 

432 # Docstring inherited. 

433 raise NotImplementedError() 

434 

435 @final 

436 def applied_columns(self, target: Relation) -> Set[ColumnTag]: 

437 # Docstring inherited. 

438 return target.columns 

439 

440 def applied_min_rows(self, target: Relation) -> int: 

441 # Docstring inherited. 

442 if target.min_rows == 0 or self.is_count_invariant: 

443 return target.min_rows 

444 elif self.is_empty_invariant: 

445 return 1 

446 else: 

447 return 0 

448 

449 

450@final 

451class Identity(UnaryOperation): 

452 """A concrete unary operation that does nothing. 

453 

454 `Identity` operations never appear in relation trees; their `apply` method 

455 always just returns the target relation. 

456 """ 

457 

458 def __str__(self) -> str: 

459 return "identity" 

460 

461 def _finish_apply(self, target: Relation) -> Relation: 

462 # Docstring inherited. 

463 return target 

464 

465 @property 

466 def is_count_invariant(self) -> Literal[True]: 

467 # Docstring inherited. 

468 return True 

469 

470 @property 

471 def is_empty_invariant(self) -> Literal[True]: 

472 # Docstring inherited. 

473 return True 

474 

475 def applied_columns(self, target: Relation) -> Set[ColumnTag]: 

476 # Docstring inherited. 

477 return target.columns 

478 

479 def applied_min_rows(self, target: Relation) -> int: 

480 # Docstring inherited. 

481 return target.min_rows 

482 

483 def applied_max_rows(self, target: Relation) -> int | None: 

484 # Docstring inherited. 

485 return target.max_rows 

486 

487 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator: 

488 return UnaryCommutator(first=self, second=current.operation) 

489 

490 def simplify(self, current: UnaryOperation) -> UnaryOperation: 

491 return current 

492 

493 

494class Reordering(UnaryOperation): 

495 """An extensible `UnaryOperation` subclass for operations that only reorder 

496 rows. 

497 """ 

498 

499 @final 

500 @property 

501 def is_count_invariant(self) -> Literal[True]: 

502 # Docstring inherited. 

503 return True 

504 

505 @final 

506 @property 

507 def is_empty_invariant(self) -> Literal[True]: 

508 # Docstring inherited. 

509 return True 

510 

511 @final 

512 def applied_columns(self, target: Relation) -> Set[ColumnTag]: 

513 # Docstring inherited. 

514 return target.columns 

515 

516 @final 

517 def applied_min_rows(self, target: Relation) -> int: 

518 # Docstring inherited. 

519 return target.min_rows 

520 

521 @final 

522 def applied_max_rows(self, target: Relation) -> int | None: 

523 # Docstring inherited. 

524 return target.max_rows 

525 

526 

527@dataclasses.dataclass 

528class UnaryCommutator: 

529 """A struct for the return value of `UnaryOperation.commute`.""" 

530 

531 first: UnaryOperation | None 

532 """The first operation to apply in the commuted sequence (`UnaryOperation` 

533 or `None`). 

534 

535 When at least some commutation is possible, this is a possibly-modified 

536 version of ``current.operation``, where ``current`` is the argument to 

537 `UnaryOperation.commute`. When it is `None`, either the commutation failed 

538 or the original operation will simplify away entirely (as indicated by 

539 ``done``). 

540 """ 

541 

542 second: UnaryOperation 

543 """The second operation to apply in the commuted sequence 

544 (`UnaryOperation`). 

545 

546 When commutation is successful, this is usually ``self`` or a modification 

547 thereof. When commutation is unsuccessful, this should be exactly 

548 ``current.operation``, where ``current`` is the argument to 

549 `UnaryOperation.commute`. 

550 """ 

551 

552 done: bool = True 

553 """Whether the commutation was fully successful (`bool`). 

554 

555 When `False`, the original downstream relation (``self`` in call to 

556 `commute`) must still be applied after `first` (if not `None`) and 

557 `second`. While `first` is usually `None` in this case, `Projection` 

558 operations (and possibily some extension operations) can be partially 

559 commuted 

560 """ 

561 

562 messages: tuple[str, ...] = dataclasses.field(default_factory=tuple) 

563 """Messages that describe why commutation either failed or only 

564 partially succeeded. 

565 """