Coverage for python/lsst/daf/butler/core/_topology.py: 62%

133 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-02 02:01 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "SpatialRegionDatabaseRepresentation", 

26 "TopologicalSpace", 

27 "TopologicalFamily", 

28 "TopologicalRelationshipEndpoint", 

29 "TopologicalExtentDatabaseRepresentation", 

30) 

31 

32import enum 

33from abc import ABC, abstractmethod 

34from typing import Any, ClassVar, Dict, Generic, Iterator, Mapping, Optional, Tuple, Type, TypeVar 

35 

36import lsst.sphgeom 

37import sqlalchemy 

38from lsst.utils.classes import immutable 

39 

40from . import ddl 

41from .named import NamedValueAbstractSet 

42 

43 

44@enum.unique 

45class TopologicalSpace(enum.Enum): 

46 """Enumeration of continuous-variable relationships for dimensions. 

47 

48 Most dimension relationships are discrete, in that they are regular foreign 

49 key relationships between tables. Those connected to a 

50 `TopologicalSpace` are not - a row in a table instead occupies some 

51 region in a continuous-variable space, and topological operators like 

52 "overlaps" between regions in that space define the relationships between 

53 rows. 

54 """ 

55 

56 SPATIAL = enum.auto() 

57 """The (spherical) sky, using `lsst.sphgeom.Region` objects to represent 

58 those regions in memory. 

59 """ 

60 

61 TEMPORAL = enum.auto() 

62 """Time, using `Timespan` instances (with TAI endpoints) to represent 

63 intervals in memory. 

64 """ 

65 

66 

67@immutable 

68class TopologicalFamily(ABC): 

69 """A grouping of `TopologicalRelationshipEndpoint` objects. 

70 

71 These regions form a hierarchy in which one endpoint's rows always contain 

72 another's in a predefined way. 

73 

74 This hierarchy means that endpoints in the same family do not generally 

75 have to be have to be related using (e.g.) overlaps; instead, the regions 

76 from one "best" endpoint from each family are related to the best endpoint 

77 from each other family in a query. 

78 

79 Parameters 

80 ---------- 

81 name : `str` 

82 Unique string identifier for this family. 

83 category : `TopologicalSpace` 

84 Space in which the regions of this family live. 

85 """ 

86 

87 def __init__( 

88 self, 

89 name: str, 

90 space: TopologicalSpace, 

91 ): 

92 self.name = name 

93 self.space = space 

94 

95 def __eq__(self, other: Any) -> bool: 

96 if isinstance(other, TopologicalFamily): 

97 return self.space == other.space and self.name == other.name 

98 return False 

99 

100 def __hash__(self) -> int: 

101 return hash(self.name) 

102 

103 def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool: 

104 return other.topology.get(self.space) == self 

105 

106 @abstractmethod 

107 def choose( 

108 self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint] 

109 ) -> TopologicalRelationshipEndpoint: 

110 """Select the best member of this family to use. 

111 

112 These are to be used in a query join or data ID when more than one 

113 is present. 

114 

115 Usually this should correspond to the most fine-grained region. 

116 

117 Parameters 

118 ---------- 

119 endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`] 

120 Endpoints to choose from. May include endpoints that are not 

121 members of this family (which should be ignored). 

122 

123 Returns 

124 ------- 

125 best : `TopologicalRelationshipEndpoint` 

126 The best endpoint that is both a member of ``self`` and in 

127 ``endpoints``. 

128 """ 

129 raise NotImplementedError() 

130 

131 name: str 

132 """Unique string identifier for this family (`str`). 

133 """ 

134 

135 space: TopologicalSpace 

136 """Space in which the regions of this family live (`TopologicalSpace`). 

137 """ 

138 

139 

140@immutable 

141class TopologicalRelationshipEndpoint(ABC): 

142 """Representation of a logical table that can participate in overlap joins. 

143 

144 An abstract base class whose instances represent a logical table that 

145 may participate in overlap joins defined by a `TopologicalSpace`. 

146 """ 

147 

148 @property 

149 @abstractmethod 

150 def name(self) -> str: 

151 """Return unique string identifier for this endpoint (`str`).""" 

152 raise NotImplementedError() 

153 

154 @property 

155 @abstractmethod 

156 def topology(self) -> Mapping[TopologicalSpace, TopologicalFamily]: 

157 """Return the relationship families to which this endpoint belongs. 

158 

159 It is keyed by the category for that family. 

160 """ 

161 raise NotImplementedError() 

162 

163 @property 

164 def spatial(self) -> Optional[TopologicalFamily]: 

165 """Return this endpoint's `~TopologicalSpace.SPATIAL` family.""" 

166 return self.topology.get(TopologicalSpace.SPATIAL) 

167 

168 @property 

169 def temporal(self) -> Optional[TopologicalFamily]: 

170 """Return this endpoint's `~TopologicalSpace.TEMPORAL` family.""" 

171 return self.topology.get(TopologicalSpace.TEMPORAL) 

172 

173 

174_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation") 

175_R = TypeVar("_R") 

176 

177 

178class TopologicalExtentDatabaseRepresentation(Generic[_R]): 

179 """Mapping of in-memory representation of a region to DB representation. 

180 

181 An abstract base class whose subclasses provide a mapping from the 

182 in-memory representation of a `TopologicalSpace` region to a 

183 database-storage representation, and whose instances act like a 

184 SQLAlchemy-based column expression. 

185 """ 

186 

187 NAME: ClassVar[str] 

188 """Name to use for this logical column in tables (`str`). 

189 

190 If the representation actually uses multiple columns, this will just be 

191 part of the names of those columns. Queries (and tables that represent 

192 materialized queries) may use a different name (via the ``name`` parameters 

193 to various methods) in order to disambiguate between the regions associated 

194 with different tables. 

195 """ 

196 

197 SPACE: ClassVar[TopologicalSpace] 

198 """Topological space where regions represented by this class exist. 

199 """ 

200 

201 @classmethod 

202 @abstractmethod 

203 def makeFieldSpecs( 

204 cls, nullable: bool, name: Optional[str] = None, **kwargs: Any 

205 ) -> Tuple[ddl.FieldSpec, ...]: 

206 """Make objects that reflect the fields that must be added to table. 

207 

208 Makes one or more `ddl.FieldSpec` objects that reflect the fields 

209 that must be added to a table for this representation. 

210 

211 Parameters 

212 ---------- 

213 nullable : `bool` 

214 If `True`, the region is permitted to be logically ``NULL`` 

215 (mapped to `None` in Python), though the correspoding value(s) in 

216 the database are implementation-defined. Nullable region fields 

217 default to NULL, while others default to (-∞, ∞). 

218 name : `str`, optional 

219 Name for the logical column; a part of the name for multi-column 

220 representations. Defaults to ``cls.NAME``. 

221 **kwargs 

222 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor 

223 for all fields; implementations only provide the ``name``, 

224 ``dtype``, and ``default`` arguments themselves. 

225 

226 Returns 

227 ------- 

228 specs : `tuple` [ `ddl.FieldSpec` ] 

229 Field specification objects; length of the tuple is 

230 subclass-dependent, but is guaranteed to match the length of the 

231 return values of `getFieldNames` and `update`. 

232 """ 

233 raise NotImplementedError() 

234 

235 @classmethod 

236 @abstractmethod 

237 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]: 

238 """Return the actual field names used by this representation. 

239 

240 Parameters 

241 ---------- 

242 name : `str`, optional 

243 Name for the logical column; a part of the name for multi-column 

244 representations. Defaults to ``cls.NAME``. 

245 

246 Returns 

247 ------- 

248 names : `tuple` [ `str` ] 

249 Field name(s). Guaranteed to be the same as the names of the field 

250 specifications returned by `makeFieldSpecs`. 

251 """ 

252 raise NotImplementedError() 

253 

254 @classmethod 

255 @abstractmethod 

256 def update( 

257 cls, extent: Optional[_R], name: Optional[str] = None, result: Optional[Dict[str, Any]] = None 

258 ) -> Dict[str, Any]: 

259 """Add a region to a dictionary. 

260 

261 This region represents a database row in this representation. 

262 

263 Parameters 

264 ---------- 

265 extent 

266 An instance of the region type this class provides a database 

267 representation for, or `None` for ``NULL``. 

268 name : `str`, optional 

269 Name for the logical column; a part of the name for multi-column 

270 representations. Defaults to ``cls.NAME``. 

271 result : `dict` [ `str`, `Any` ], optional 

272 A dictionary representing a database row that fields should be 

273 added to, or `None` to create and return a new one. 

274 

275 Returns 

276 ------- 

277 result : `dict` [ `str`, `Any` ] 

278 A dictionary containing this representation of a region. Exactly 

279 the `dict` passed as ``result`` if that is not `None`. 

280 """ 

281 raise NotImplementedError() 

282 

283 @classmethod 

284 @abstractmethod 

285 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]: 

286 """Extract a region from a dictionary. 

287 

288 This region represents a database row in this representation. 

289 

290 Parameters 

291 ---------- 

292 mapping : `Mapping` [ `str`, `Any` ] 

293 A dictionary representing a database row containing a `Timespan` 

294 in this representation. Should have key(s) equal to the return 

295 value of `getFieldNames`. 

296 name : `str`, optional 

297 Name for the logical column; a part of the name for multi-column 

298 representations. Defaults to ``cls.NAME``. 

299 

300 Returns 

301 ------- 

302 region 

303 Python representation of the region. 

304 """ 

305 raise NotImplementedError() 

306 

307 @classmethod 

308 def hasExclusionConstraint(cls) -> bool: 

309 """Return `True` if this representation supports exclusion constraints. 

310 

311 Returns 

312 ------- 

313 supported : `bool` 

314 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that 

315 includes the fields of this representation is allowed. 

316 """ 

317 return False 

318 

319 @classmethod 

320 @abstractmethod 

321 def fromSelectable( 

322 cls: Type[_S], selectable: sqlalchemy.sql.FromClause, name: Optional[str] = None 

323 ) -> _S: 

324 """Construct representation of a column in the table or subquery. 

325 

326 Constructs an instance that represents a logical column (which may 

327 actually be backed by multiple columns) in the given table or subquery. 

328 

329 Parameters 

330 ---------- 

331 selectable : `sqlalchemy.sql.FromClause` 

332 SQLAlchemy object representing a table or subquery. 

333 name : `str`, optional 

334 Name for the logical column; a part of the name for multi-column 

335 representations. Defaults to ``cls.NAME``. 

336 

337 Returns 

338 ------- 

339 representation : `TopologicalExtentDatabaseRepresentation` 

340 Object representing a logical column. 

341 """ 

342 raise NotImplementedError() 

343 

344 @property 

345 @abstractmethod 

346 def name(self) -> str: 

347 """Return base logical name for the topological extent (`str`). 

348 

349 If the representation uses only one actual column, this should be the 

350 full name of the column. In other cases it is an unspecified subset of 

351 the column names. 

352 """ 

353 raise NotImplementedError() 

354 

355 @abstractmethod 

356 def isNull(self) -> sqlalchemy.sql.ColumnElement: 

357 """Return expression that tests where region is ``NULL``. 

358 

359 Returns a SQLAlchemy expression that tests whether this region is 

360 logically ``NULL``. 

361 

362 Returns 

363 ------- 

364 isnull : `sqlalchemy.sql.ColumnElement` 

365 A boolean SQLAlchemy expression object. 

366 """ 

367 raise NotImplementedError() 

368 

369 @abstractmethod 

370 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]: 

371 """Return the actual column(s) that comprise this logical column. 

372 

373 Parameters 

374 ---------- 

375 name : `str`, optional 

376 If provided, a name for the logical column that should be used to 

377 label the columns. If not provided, the columns' native names will 

378 be used. 

379 

380 Returns 

381 ------- 

382 columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ] 

383 The true column or columns that back this object. 

384 """ 

385 raise NotImplementedError() 

386 

387 

388class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]): 

389 """Class reflecting how spatial regions are represented inside the DB. 

390 

391 An instance of this class encapsulates how spatial regions on the sky are 

392 represented in a database engine. 

393 

394 Instances should be constructed via `fromSelectable`, not by calling the 

395 constructor directly. 

396 

397 Parameters 

398 ---------- 

399 column : `sqlalchemy.sql.ColumnElement` 

400 Column containing the opaque byte-string, with automatic conversion to 

401 `lsst.sphgeom.Region` implemented via SQLAlchemy hooks. 

402 name : `str` 

403 Name of the column. 

404 

405 Notes 

406 ----- 

407 Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because 

408 we currently do not support any database-native spatial regions, and 

409 instead rely on precomputed overlaps and opaque (to the database) byte 

410 string columns. As a result, it also does not support any in-database 

411 topological predicates. 

412 

413 If we add support for database-native regions in the future, this class may 

414 become an ABC with multiple concrete implementations. 

415 """ 

416 

417 def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str): 

418 self.column = column 

419 self._name = name 

420 

421 NAME: ClassVar[str] = "region" 

422 SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL 

423 

424 @classmethod 

425 def makeFieldSpecs( 

426 cls, nullable: bool, name: Optional[str] = None, **kwargs: Any 

427 ) -> Tuple[ddl.FieldSpec, ...]: 

428 # Docstring inherited. 

429 if name is None: 

430 name = cls.NAME 

431 # Most regions are small (they're quadrilaterals), but visit ones can 

432 # be quite large because they have a complicated boundary. For HSC, 

433 # that's about ~1400 bytes, and I've just rounded up to the nearest 

434 # power of two. Given what we now know about variable-length TEXT 

435 # having no performance penalties in PostgreSQL and SQLite vs. 

436 # fixed-length strings, there's probably a variable-length bytes type 

437 # we should be using instead, but that's a schema change and hence 

438 # something we won't be doing anytime soon. 

439 return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),) 

440 

441 @classmethod 

442 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]: 

443 # Docstring inherited. 

444 if name is None: 

445 name = cls.NAME 

446 return (name,) 

447 

448 @classmethod 

449 def update( 

450 cls, 

451 extent: Optional[lsst.sphgeom.Region], 

452 name: Optional[str] = None, 

453 result: Optional[Dict[str, Any]] = None, 

454 ) -> Dict[str, Any]: 

455 # Docstring inherited. 

456 if name is None: 

457 name = cls.NAME 

458 if result is None: 

459 result = {} 

460 result[name] = extent 

461 return result 

462 

463 @classmethod 

464 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]: 

465 # Docstring inherited. 

466 if name is None: 

467 name = cls.NAME 

468 return mapping[name] 

469 

470 @classmethod 

471 def fromSelectable( 

472 cls: Type[SpatialRegionDatabaseRepresentation], 

473 selectable: sqlalchemy.sql.FromClause, 

474 name: Optional[str] = None, 

475 ) -> SpatialRegionDatabaseRepresentation: 

476 # Docstring inherited 

477 if name is None: 

478 name = cls.NAME 

479 return cls(selectable.columns[name], name) 

480 

481 @property 

482 def name(self) -> str: 

483 # Docstring inherited 

484 return self._name 

485 

486 def isNull(self) -> sqlalchemy.sql.ColumnElement: 

487 # Docstring inherited 

488 return self.column.is_(None) 

489 

490 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]: 

491 # Docstring inherited 

492 yield self.column