Coverage for python/lsst/daf/relation/_engine.py: 49%

69 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-19 09:55 +0000

1# This file is part of daf_relation. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Engine", "GenericConcreteEngine") 

25 

26import dataclasses 

27import operator 

28import uuid 

29from abc import abstractmethod 

30from collections.abc import Hashable, Sequence, Set 

31from typing import TYPE_CHECKING, Any, Generic, TypeVar 

32 

33from ._columns import ColumnTag 

34from ._exceptions import EngineError 

35 

36if TYPE_CHECKING: 

37 from ._binary_operation import BinaryOperation 

38 from ._relation import Relation 

39 from ._unary_operation import UnaryOperation 

40 

41 

42_F = TypeVar("_F") 

43 

44 

45class Engine(Hashable): 

46 """An abstract interface for the systems that hold relation data and know 

47 how to process relation trees. 

48 

49 Notes 

50 ----- 

51 A key part of any concrete engine's interface is not defined by the base 

52 class, because different engines can represent the content (or "payload") 

53 of a relation in very different ways. 

54 

55 Engines can impose their own invariants on the structure of a relation 

56 tree, by implementing `conform`. They can also maintain these invariants 

57 when new operations are added to the tree by implementing `append_unary` 

58 and `append_binary`, though any derived implementations of base-class 

59 methods that accept relation arguments should always conform them. 

60 """ 

61 

62 @abstractmethod 

63 def get_relation_name(self, prefix: str = "leaf") -> str: 

64 """Return a name suitable for a new relation in this engine. 

65 

66 Parameters 

67 ---------- 

68 prefix : `str`, optional 

69 Prefix to include in the returned name. 

70 

71 Returns 

72 ------- 

73 name : `str` 

74 Name for the relation; guaranteed to be unique over all of the 

75 relations in this engine. 

76 """ 

77 raise NotImplementedError() 

78 

79 def get_join_identity_payload(self) -> Any: 

80 """Return a `~Relation.payload` for a leaf relation that is the 

81 `join identity <Relation.is_join_identity>`. 

82 

83 Returns 

84 ------- 

85 payload 

86 The engine-specific content for this relation. 

87 """ 

88 return None 

89 

90 def get_doomed_payload(self, columns: Set[ColumnTag]) -> Any: 

91 """Return a `~Relation.payload` for a leaf relation that has no rows. 

92 

93 Parameters 

94 ---------- 

95 columns : `~collections.abc.Set` [ `ColumnTag` ] 

96 The columns the relation should have. 

97 

98 Returns 

99 ------- 

100 payload 

101 The engine-specific content for this relation. 

102 """ 

103 return None 

104 

105 def conform(self, relation: Relation) -> Relation: 

106 """Ensure a relation tree satisfies this engine's invariants. 

107 

108 This can include reordering operations (in a way consistent with their 

109 commutators) and/or inserting `MarkerRelation` nodes. 

110 

111 Parameters 

112 ---------- 

113 relation : `Relation` 

114 Original relation tree. 

115 

116 Returns 

117 ------- 

118 conformed : `Relation` 

119 Relation tree that satisfies this engine's invariants. 

120 

121 Notes 

122 ----- 

123 The default implementation returns the given relation. Engines with a 

124 non-trivial `conform` implementation should always call it on any 

125 relations they are passed, as algorithms that process the relation tree 

126 are not guaranteed to maintain those invariants themselves. It is 

127 recommended to use a custom `MarkerRelation` to indicate trees that 

128 satisfy invariants, allowing the corresponding `conform` implementation 

129 to short-circuit quickly. 

130 """ 

131 return relation 

132 

133 def materialize( 

134 self, target: Relation, name: str | None = None, name_prefix: str = "materialization_" 

135 ) -> Relation: 

136 """Mark that a target relation's payload should be cached. 

137 

138 Parameters 

139 ---------- 

140 target : `Relation` 

141 Relation to mark. 

142 name : `str`, optional 

143 Name to use for the cached payload within the engine. 

144 name_prefix : `str`, optional 

145 Prefix to pass to `get_relation_name`; ignored if ``name`` 

146 is provided. 

147 

148 Returns 

149 ------- 

150 relation : `Relation` 

151 New relation that marks its upstream tree for caching, unless 

152 the materialization was simplified away. 

153 

154 Notes 

155 ----- 

156 The base class implementation calls `Materialization.simplify` to avoid 

157 materializations of leaf relations or other materializations. Override 

158 implementations should generally do the same. 

159 

160 See Also 

161 -------- 

162 Processor.materialize 

163 """ 

164 from ._materialization import Materialization 

165 

166 if Materialization.simplify(target): 

167 return target 

168 if name is None: 

169 name = target.engine.get_relation_name(name_prefix) 

170 return Materialization(target=target, name=name) 

171 

172 def transfer(self, target: Relation, payload: Any | None = None) -> Relation: 

173 """Mark that a relation's payload should be transferred from some other 

174 engine to this one. 

175 

176 Parameters 

177 ---------- 

178 target : Relation 

179 Relation to transfer. If ``target.engine == self``, this relation 

180 will be returned directly and no transfer will be performed. 

181 Back-to-back transfers from one engine to another and back again 

182 are also simplified away (via a call to `Transfer.simplify`). 

183 Sequences of transfers involving more than two engines are not 

184 simplified. 

185 payload, optional 

186 Destination-engine-specific content for the relation to attach to 

187 the transfer. Most `Transfer` relations do not have a payload; 

188 their ability to do so is mostly to support the special relation 

189 trees returned by the `Processor` class. 

190 

191 Returns 

192 ------- 

193 relation : `Relation` 

194 New relation that marks its upstream tree to be transferred to a 

195 new engine. 

196 

197 Notes 

198 ----- 

199 The default implementation calls `conform` on the target relation using 

200 the target relation's engine (i.e. not ``self``). All override 

201 implementations should do this as well. 

202 

203 See Also 

204 -------- 

205 Processor.transfer 

206 """ 

207 from ._transfer import Transfer 

208 

209 if simplified := Transfer.simplify(target, self): 

210 target = simplified 

211 if target.engine == self: 

212 if payload is not None: 

213 raise EngineError("Cannot attach payload to transfer that will be simplified away.") 

214 return target 

215 conformed_target = target.engine.conform(target) 

216 return Transfer(conformed_target, destination=self, payload=payload) 

217 

218 def make_doomed_relation( 

219 self, columns: Set[ColumnTag], messages: Sequence[str], name: str = "0" 

220 ) -> Relation: 

221 """Construct a leaf relation with no rows and one or more messages 

222 explaining why. 

223 

224 Parameters 

225 ---------- 

226 columns : `~collections.abc.Set` [ `ColumnTag` ] 

227 The columns in this relation. 

228 messages : `~collections.abc.Sequence` [ `str` ] 

229 One or more messages explaining why the relation has no rows. 

230 name : `str`, optional 

231 Name used to identify and reconstruct this relation. 

232 

233 Returns 

234 ------- 

235 relation : `Relation` 

236 Doomed relation. 

237 

238 Notes 

239 ----- 

240 This is simplify a convenience method that delegates to 

241 `LeafRelation.make_doomed`. Derived engines with a nontrivial 

242 `conform` should override this method to conform the return value. 

243 """ 

244 from ._leaf_relation import LeafRelation 

245 

246 return LeafRelation.make_doomed(self, columns, messages, name) 

247 

248 def make_join_identity_relation(self, name: str = "I") -> Relation: 

249 """Construct a leaf relation with no columns and exactly one row. 

250 

251 Parameters 

252 ---------- 

253 engine : `Engine` 

254 The engine that is responsible for interpreting this relation. 

255 name : `str`, optional 

256 Name used to identify and reconstruct this relation. 

257 

258 Returns 

259 ------- 

260 relation : `Relation` 

261 Relation with no columns and one row. 

262 """ 

263 from ._leaf_relation import LeafRelation 

264 

265 return LeafRelation.make_join_identity(self, name) 

266 

267 def append_unary(self, operation: UnaryOperation, target: Relation) -> Relation: 

268 """Hook for maintaining the engine's `conform` invariants through 

269 `UnaryOperation.apply`. 

270 

271 This method should only be called by `UnaryOperation.apply` and the 

272 engine's own methods and helper classes. External code should call 

273 `UnaryOperation.apply` or a `Relation` factory method instead. 

274 

275 Parameters 

276 ---------- 

277 operation : `UnaryOperation` 

278 Operation to apply; should already be filtered through 

279 `UnaryOperation._begin_apply`. 

280 target : `Relation` 

281 Relation to apply the operation to directly. 

282 

283 Returns 

284 ------- 

285 relation : `Relation` 

286 Relation that includes the given operation acting on ``target``, 

287 or a simplified equivalent. 

288 

289 Notes 

290 ----- 

291 Implementations should delegate back to `UnaryOperation._finish_apply` 

292 to actually create a `UnaryOperationRelation` and perform final 

293 simplification and checks. This is all the default implementation 

294 does. 

295 """ # noqa: D401 

296 return operation._finish_apply(target) 

297 

298 def append_binary(self, operation: BinaryOperation, lhs: Relation, rhs: Relation) -> Relation: 

299 """Hook for maintaining the engine's `conform` invariants through 

300 `BinaryOperation.apply`. 

301 

302 This method should only be called by `BinaryOperation.apply` and the 

303 engine's own methods and helper classes. External code should call 

304 `BinaryOperation.apply` or a `Relation` factory method instead. 

305 

306 Parameters 

307 ---------- 

308 operation : `BinaryOperation` 

309 Operation to apply; should already be filtered through 

310 `BinaryOperation._begin_apply`. 

311 lhs : `Relation` 

312 One relation to apply the operation to directly. 

313 rhs : `Relation` 

314 The other relation to apply the operation to directly. 

315 

316 Returns 

317 ------- 

318 relation : `Relation` 

319 Relation that includes the given operation acting on ``lhs`` and 

320 ``rhs``, or a simplified equivalent. 

321 

322 Notes 

323 ----- 

324 Implementations should delegate back to `UnaryOperation._finish_apply` 

325 to actually create a `UnaryOperationRelation` and perform final 

326 simplification and checks. This is all the default implementation 

327 does. 

328 """ # noqa: D401 

329 return operation._finish_apply(lhs, rhs) 

330 

331 def backtrack_unary( 

332 self, operation: UnaryOperation, tree: Relation, preferred: Engine 

333 ) -> tuple[Relation, bool, tuple[str, ...]]: 

334 """Attempt to insert a unary operation in another engine upstream of 

335 this one by via operation commutators. 

336 

337 Parameters 

338 ---------- 

339 operation : `UnaryOperation` 

340 Unary operation to apply. 

341 tree : `Relation` 

342 Relation tree the operation logically acts on; any upstream 

343 insertion of the given operation should be equivalent to applying 

344 it to the root of this tree. Caller guarantees that ``tree.engine 

345 == self``. 

346 preferred : `Engine` 

347 Engine in which the operation or its commuted equivalent should be 

348 performed. 

349 

350 Returns 

351 ------- 

352 new_tree : `Relation` 

353 Possibly-updated relation tree. 

354 done : `bool` 

355 If `True`, the operation has been fully inserted upstream in the 

356 preferred engine. If `False`, either ``tree`` was returned 

357 unmodified or only a part of the operation (e.g. a projection whose 

358 columns are superset of the given projection's) was inserted 

359 upstream. 

360 messages : `~collections.abc.Sequence` [ `str` ] 

361 Messages explaining why backtracking insertion was unsuccessful or 

362 incomplete. Should be sentences with no trailing ``.`` and no 

363 capitalization; they will be joined with semicolons. 

364 """ 

365 return tree, False, (f"engine {self} does not support backtracking insertion",) 

366 

367 

368@dataclasses.dataclass(repr=False, eq=False, kw_only=True) 

369class GenericConcreteEngine(Engine, Generic[_F]): 

370 """An implementation-focused base class for `Engine` objects. 

371 

372 This class provides common functionality for the provided `iteration` and 

373 `sql` engines. It may be used in external engine implementations as well. 

374 """ 

375 

376 name: str 

377 """Name of the engine; primarily used for display purposes (`str`). 

378 """ 

379 

380 functions: dict[str, _F] = dataclasses.field(default_factory=dict) 

381 """A mapping of engine-specific callables that are used to satisfy 

382 `ColumnFunction` and `PredicateFunction` name lookups. 

383 """ 

384 

385 relation_name_counter: int = 0 

386 """An integer counter used to generate relation names (`int`). 

387 """ 

388 

389 def __str__(self) -> str: 

390 return self.name 

391 

392 def __hash__(self) -> int: 

393 return id(self) 

394 

395 def __eq__(self, other: Any) -> bool: 

396 return self is other 

397 

398 def get_relation_name(self, prefix: str = "leaf") -> str: 

399 """Return a name suitable for a new relation in this engine. 

400 

401 Parameters 

402 ---------- 

403 prefix : `str`, optional 

404 Prefix to include in the returned name. 

405 

406 Returns 

407 ------- 

408 name : `str` 

409 Name for the relation; guaranteed to be unique over all of the 

410 relations in this engine. 

411 

412 Notes 

413 ----- 

414 This implementation combines the given prefix with both the current 

415 `relation_name_counter` value and a random hexadecimal suffix. 

416 """ 

417 name = f"{prefix}_{self.relation_name_counter:04d}_{uuid.uuid4().hex}" 

418 self.relation_name_counter += 1 

419 return name 

420 

421 def get_function(self, name: str) -> _F | None: 

422 """Return the named column expression function. 

423 

424 Parameters 

425 ---------- 

426 name : `str` 

427 Name of the function, from `ColumnFunction.name` or 

428 `PredicateFunction.name` 

429 

430 Returns 

431 ------- 

432 function 

433 Engine-specific callable, or `None` if no match was found. 

434 

435 Notes 

436 ----- 

437 This implementation first looks for a symbol with this name in the 

438 built-in `operator` module, to handle the common case (shared by both 

439 the `iteration` and `sql` engines) where these functions are 

440 appropriate for the engine due to operator overloading. When this 

441 fails, the name is looked up in the `functions` attribute. 

442 """ 

443 return getattr(operator, name, self.functions.get(name))