Coverage for python/lsst/daf/butler/core/_topology.py: 61%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

129 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "SpatialRegionDatabaseRepresentation", 

26 "TopologicalSpace", 

27 "TopologicalFamily", 

28 "TopologicalRelationshipEndpoint", 

29 "TopologicalExtentDatabaseRepresentation", 

30) 

31 

32from abc import ABC, abstractmethod 

33import enum 

34from typing import ( 

35 Any, 

36 ClassVar, 

37 Dict, 

38 Generic, 

39 Iterator, 

40 Mapping, 

41 Optional, 

42 Tuple, 

43 Type, 

44 TypeVar, 

45) 

46 

47import sqlalchemy 

48 

49from lsst.utils.classes import immutable 

50import lsst.sphgeom 

51from . import ddl 

52from .named import NamedValueAbstractSet 

53 

54 

55@enum.unique 

56class TopologicalSpace(enum.Enum): 

57 """Enumeration of continuous-variable relationships for dimensions. 

58 

59 Most dimension relationships are discrete, in that they are regular foreign 

60 key relationships between tables. Those connected to a 

61 `TopologicalSpace` are not - a row in a table instead occupies some 

62 region in a continuous-variable space, and topological operators like 

63 "overlaps" between regions in that space define the relationships between 

64 rows. 

65 """ 

66 

67 SPATIAL = enum.auto() 

68 """The (spherical) sky, using `lsst.sphgeom.Region` objects to represent 

69 those regions in memory. 

70 """ 

71 

72 TEMPORAL = enum.auto() 

73 """Time, using `Timespan` instances (with TAI endpoints) to represent 

74 intervals in memory. 

75 """ 

76 

77 

78@immutable 

79class TopologicalFamily(ABC): 

80 """A grouping of `TopologicalRelationshipEndpoint` objects. 

81 

82 These regions form a hierarchy in which one endpoint's rows always contain 

83 another's in a predefined way. 

84 

85 This hierarchy means that endpoints in the same family do not generally 

86 have to be have to be related using (e.g.) overlaps; instead, the regions 

87 from one "best" endpoint from each family are related to the best endpoint 

88 from each other family in a query. 

89 

90 Parameters 

91 ---------- 

92 name : `str` 

93 Unique string identifier for this family. 

94 category : `TopologicalSpace` 

95 Space in which the regions of this family live. 

96 """ 

97 

98 def __init__( 

99 self, 

100 name: str, 

101 space: TopologicalSpace, 

102 ): 

103 self.name = name 

104 self.space = space 

105 

106 def __eq__(self, other: Any) -> bool: 

107 if isinstance(other, TopologicalFamily): 

108 return self.space == other.space and self.name == other.name 

109 return False 

110 

111 def __hash__(self) -> int: 

112 return hash(self.name) 

113 

114 def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool: 

115 return other.topology.get(self.space) == self 

116 

117 @abstractmethod 

118 def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint] 

119 ) -> TopologicalRelationshipEndpoint: 

120 """Select the best member of this family to use. 

121 

122 These are to be used in a query join or data ID when more than one 

123 is present. 

124 

125 Usually this should correspond to the most fine-grained region. 

126 

127 Parameters 

128 ---------- 

129 endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`] 

130 Endpoints to choose from. May include endpoints that are not 

131 members of this family (which should be ignored). 

132 

133 Returns 

134 ------- 

135 best : `TopologicalRelationshipEndpoint` 

136 The best endpoint that is both a member of ``self`` and in 

137 ``endpoints``. 

138 """ 

139 raise NotImplementedError() 

140 

141 name: str 

142 """Unique string identifier for this family (`str`). 

143 """ 

144 

145 space: TopologicalSpace 

146 """Space in which the regions of this family live (`TopologicalSpace`). 

147 """ 

148 

149 

150@immutable 

151class TopologicalRelationshipEndpoint(ABC): 

152 """Representation of a logical table that can participate in overlap joins. 

153 

154 An abstract base class whose instances represent a logical table that 

155 may participate in overlap joins defined by a `TopologicalSpace`. 

156 """ 

157 

158 @property 

159 @abstractmethod 

160 def name(self) -> str: 

161 """Return unique string identifier for this endpoint (`str`).""" 

162 raise NotImplementedError() 

163 

164 @property 

165 @abstractmethod 

166 def topology(self) -> Mapping[TopologicalSpace, TopologicalFamily]: 

167 """Return the relationship families to which this endpoint belongs. 

168 

169 It is keyed by the category for that family. 

170 """ 

171 raise NotImplementedError() 

172 

173 @property 

174 def spatial(self) -> Optional[TopologicalFamily]: 

175 """Return this endpoint's `~TopologicalSpace.SPATIAL` family.""" 

176 return self.topology.get(TopologicalSpace.SPATIAL) 

177 

178 @property 

179 def temporal(self) -> Optional[TopologicalFamily]: 

180 """Return this endpoint's `~TopologicalSpace.TEMPORAL` family.""" 

181 return self.topology.get(TopologicalSpace.TEMPORAL) 

182 

183 

184_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation") 

185_R = TypeVar("_R") 

186 

187 

188class TopologicalExtentDatabaseRepresentation(Generic[_R]): 

189 """Mapping of in-memory representation of a region to DB representation. 

190 

191 An abstract base class whose subclasses provide a mapping from the 

192 in-memory representation of a `TopologicalSpace` region to a 

193 database-storage representation, and whose instances act like a 

194 SQLAlchemy-based column expression. 

195 """ 

196 

197 NAME: ClassVar[str] 

198 """Name to use for this logical column in tables (`str`). 

199 

200 If the representation actually uses multiple columns, this will just be 

201 part of the names of those columns. Queries (and tables that represent 

202 materialized queries) may use a different name (via the ``name`` parameters 

203 to various methods) in order to disambiguate between the regions associated 

204 with different tables. 

205 """ 

206 

207 SPACE: ClassVar[TopologicalSpace] 

208 """Topological space where regions represented by this class exist. 

209 """ 

210 

211 @classmethod 

212 @abstractmethod 

213 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any 

214 ) -> Tuple[ddl.FieldSpec, ...]: 

215 """Make objects that reflect the fields that must be added to table. 

216 

217 Makes one or more `ddl.FieldSpec` objects that reflect the fields 

218 that must be added to a table for this representation. 

219 

220 Parameters 

221 ---------- 

222 nullable : `bool` 

223 If `True`, the region is permitted to be logically ``NULL`` 

224 (mapped to `None` in Python), though the correspoding value(s) in 

225 the database are implementation-defined. Nullable region fields 

226 default to NULL, while others default to (-∞, ∞). 

227 name : `str`, optional 

228 Name for the logical column; a part of the name for multi-column 

229 representations. Defaults to ``cls.NAME``. 

230 **kwargs 

231 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor 

232 for all fields; implementations only provide the ``name``, 

233 ``dtype``, and ``default`` arguments themselves. 

234 

235 Returns 

236 ------- 

237 specs : `tuple` [ `ddl.FieldSpec` ] 

238 Field specification objects; length of the tuple is 

239 subclass-dependent, but is guaranteed to match the length of the 

240 return values of `getFieldNames` and `update`. 

241 """ 

242 raise NotImplementedError() 

243 

244 @classmethod 

245 @abstractmethod 

246 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]: 

247 """Return the actual field names used by this representation. 

248 

249 Parameters 

250 ---------- 

251 name : `str`, optional 

252 Name for the logical column; a part of the name for multi-column 

253 representations. Defaults to ``cls.NAME``. 

254 

255 Returns 

256 ------- 

257 names : `tuple` [ `str` ] 

258 Field name(s). Guaranteed to be the same as the names of the field 

259 specifications returned by `makeFieldSpecs`. 

260 """ 

261 raise NotImplementedError() 

262 

263 @classmethod 

264 @abstractmethod 

265 def update(cls, extent: Optional[_R], name: Optional[str] = None, 

266 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

267 """Add a region to a dictionary. 

268 

269 This region represents a database row in this representation. 

270 

271 Parameters 

272 ---------- 

273 extent 

274 An instance of the region type this class provides a database 

275 representation for, or `None` for ``NULL``. 

276 name : `str`, optional 

277 Name for the logical column; a part of the name for multi-column 

278 representations. Defaults to ``cls.NAME``. 

279 result : `dict` [ `str`, `Any` ], optional 

280 A dictionary representing a database row that fields should be 

281 added to, or `None` to create and return a new one. 

282 

283 Returns 

284 ------- 

285 result : `dict` [ `str`, `Any` ] 

286 A dictionary containing this representation of a region. Exactly 

287 the `dict` passed as ``result`` if that is not `None`. 

288 """ 

289 raise NotImplementedError() 

290 

291 @classmethod 

292 @abstractmethod 

293 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]: 

294 """Extract a region from a dictionary. 

295 

296 This region represents a database row in this representation. 

297 

298 Parameters 

299 ---------- 

300 mapping : `Mapping` [ `str`, `Any` ] 

301 A dictionary representing a database row containing a `Timespan` 

302 in this representation. Should have key(s) equal to the return 

303 value of `getFieldNames`. 

304 name : `str`, optional 

305 Name for the logical column; a part of the name for multi-column 

306 representations. Defaults to ``cls.NAME``. 

307 

308 Returns 

309 ------- 

310 region 

311 Python representation of the region. 

312 """ 

313 raise NotImplementedError() 

314 

315 @classmethod 

316 def hasExclusionConstraint(cls) -> bool: 

317 """Return `True` if this representation supports exclusion constraints. 

318 

319 Returns 

320 ------- 

321 supported : `bool` 

322 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that 

323 includes the fields of this representation is allowed. 

324 """ 

325 return False 

326 

327 @classmethod 

328 @abstractmethod 

329 def fromSelectable(cls: Type[_S], selectable: sqlalchemy.sql.FromClause, 

330 name: Optional[str] = None) -> _S: 

331 """Construct representation of a column in the table or subquery. 

332 

333 Constructs an instance that represents a logical column (which may 

334 actually be backed by multiple columns) in the given table or subquery. 

335 

336 Parameters 

337 ---------- 

338 selectable : `sqlalchemy.sql.FromClause` 

339 SQLAlchemy object representing a table or subquery. 

340 name : `str`, optional 

341 Name for the logical column; a part of the name for multi-column 

342 representations. Defaults to ``cls.NAME``. 

343 

344 Returns 

345 ------- 

346 representation : `TopologicalExtentDatabaseRepresentation` 

347 Object representing a logical column. 

348 """ 

349 raise NotImplementedError() 

350 

351 @property 

352 @abstractmethod 

353 def name(self) -> str: 

354 """Return base logical name for the topological extent (`str`). 

355 

356 If the representation uses only one actual column, this should be the 

357 full name of the column. In other cases it is an unspecified subset of 

358 the column names. 

359 """ 

360 raise NotImplementedError() 

361 

362 @abstractmethod 

363 def isNull(self) -> sqlalchemy.sql.ColumnElement: 

364 """Return expression that tests where region is ``NULL``. 

365 

366 Returns a SQLAlchemy expression that tests whether this region is 

367 logically ``NULL``. 

368 

369 Returns 

370 ------- 

371 isnull : `sqlalchemy.sql.ColumnElement` 

372 A boolean SQLAlchemy expression object. 

373 """ 

374 raise NotImplementedError() 

375 

376 @abstractmethod 

377 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]: 

378 """Return the actual column(s) that comprise this logical column. 

379 

380 Parameters 

381 ---------- 

382 name : `str`, optional 

383 If provided, a name for the logical column that should be used to 

384 label the columns. If not provided, the columns' native names will 

385 be used. 

386 

387 Returns 

388 ------- 

389 columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ] 

390 The true column or columns that back this object. 

391 """ 

392 raise NotImplementedError() 

393 

394 

395class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]): 

396 """Class reflecting how spatial regions are represented inside the DB. 

397 

398 An instance of this class encapsulates how spatial regions on the sky are 

399 represented in a database engine. 

400 

401 Instances should be constructed via `fromSelectable`, not by calling the 

402 constructor directly. 

403 

404 Parameters 

405 ---------- 

406 column : `sqlalchemy.sql.ColumnElement` 

407 Column containing the opaque byte-string, with automatic conversion to 

408 `lsst.sphgeom.Region` implemented via SQLAlchemy hooks. 

409 name : `str` 

410 Name of the column. 

411 

412 Notes 

413 ----- 

414 Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because 

415 we currently do not support any database-native spatial regions, and 

416 instead rely on precomputed overlaps and opaque (to the database) byte 

417 string columns. As a result, it also does not support any in-database 

418 topological predicates. 

419 

420 If we add support for database-native regions in the future, this class may 

421 become an ABC with multiple concrete implementations. 

422 """ 

423 

424 def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str): 

425 self.column = column 

426 self._name = name 

427 

428 NAME: ClassVar[str] = "region" 

429 SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL 

430 

431 @classmethod 

432 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any 

433 ) -> Tuple[ddl.FieldSpec, ...]: 

434 # Docstring inherited. 

435 if name is None: 

436 name = cls.NAME 

437 # Most regions are small (they're quadrilaterals), but visit ones can 

438 # be quite large because they have a complicated boundary. For HSC, 

439 # that's about ~1400 bytes, and I've just rounded up to the nearest 

440 # power of two. Given what we now know about variable-length TEXT 

441 # having no performance penalties in PostgreSQL and SQLite vs. 

442 # fixed-length strings, there's probably a variable-length bytes type 

443 # we should be using instead, but that's a schema change and hence 

444 # something we won't be doing anytime soon. 

445 return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),) 

446 

447 @classmethod 

448 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]: 

449 # Docstring inherited. 

450 if name is None: 

451 name = cls.NAME 

452 return (name,) 

453 

454 @classmethod 

455 def update(cls, extent: Optional[lsst.sphgeom.Region], name: Optional[str] = None, 

456 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

457 # Docstring inherited. 

458 if name is None: 

459 name = cls.NAME 

460 if result is None: 

461 result = {} 

462 result[name] = extent 

463 return result 

464 

465 @classmethod 

466 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]: 

467 # Docstring inherited. 

468 if name is None: 

469 name = cls.NAME 

470 return mapping[name] 

471 

472 @classmethod 

473 def fromSelectable(cls: Type[SpatialRegionDatabaseRepresentation], selectable: sqlalchemy.sql.FromClause, 

474 name: Optional[str] = None) -> SpatialRegionDatabaseRepresentation: 

475 # Docstring inherited 

476 if name is None: 

477 name = cls.NAME 

478 return cls(selectable.columns[name], name) 

479 

480 @property 

481 def name(self) -> str: 

482 # Docstring inherited 

483 return self._name 

484 

485 def isNull(self) -> sqlalchemy.sql.ColumnElement: 

486 # Docstring inherited 

487 return self.column.is_(None) 

488 

489 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]: 

490 # Docstring inherited 

491 yield self.column