Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "SpatialRegionDatabaseRepresentation", 

26 "TopologicalSpace", 

27 "TopologicalFamily", 

28 "TopologicalRelationshipEndpoint", 

29 "TopologicalExtentDatabaseRepresentation", 

30) 

31 

32from abc import ABC, abstractmethod 

33import enum 

34from typing import ( 

35 Any, 

36 ClassVar, 

37 Dict, 

38 Generic, 

39 Iterator, 

40 Mapping, 

41 Optional, 

42 Tuple, 

43 Type, 

44 TypeVar, 

45) 

46 

47import sqlalchemy 

48 

49import lsst.sphgeom 

50from . import ddl 

51from .named import NamedValueAbstractSet 

52from .utils import immutable 

53 

54 

55@enum.unique 

56class TopologicalSpace(enum.Enum): 

57 """Enumeration of the different categories of continuous-variable 

58 relationships supported by the dimensions system. 

59 

60 Most dimension relationships are discrete, in that they are regular foreign 

61 key relationships between tables. Those connected to a 

62 `TopologicalSpace` are not - a row in a table instead occupies some 

63 region in a continuous-variable space, and topological operators like 

64 "overlaps" between regions in that space define the relationships between 

65 rows. 

66 """ 

67 

68 SPATIAL = enum.auto() 

69 """The (spherical) sky, using `lsst.sphgeom.Region` objects to represent 

70 those regions in memory. 

71 """ 

72 

73 TEMPORAL = enum.auto() 

74 """Time, using `Timespan` instances (with TAI endpoints) to represent 

75 intervals in memory. 

76 """ 

77 

78 

79@immutable 

80class TopologicalFamily(ABC): 

81 """A grouping of `TopologicalRelationshipEndpoint` objects whose regions 

82 form a hierarchy in which one endpoint's rows always contain another's in a 

83 predefined way. 

84 

85 This hierarchy means that endpoints in the same family do not generally 

86 have to be have to be related using (e.g.) overlaps; instead, the regions 

87 from one "best" endpoint from each family are related to the best endpoint 

88 from each other family in a query. 

89 

90 Parameters 

91 ---------- 

92 name : `str` 

93 Unique string identifier for this family. 

94 category : `TopologicalSpace` 

95 Space in which the regions of this family live. 

96 """ 

97 def __init__( 

98 self, 

99 name: str, 

100 space: TopologicalSpace, 

101 ): 

102 self.name = name 

103 self.space = space 

104 

105 def __eq__(self, other: Any) -> bool: 

106 if isinstance(other, TopologicalFamily): 

107 return self.space == other.space and self.name == other.name 

108 return False 

109 

110 def __hash__(self) -> int: 

111 return hash(self.name) 

112 

113 def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool: 

114 return other.topology.get(self.space) == self 

115 

116 @abstractmethod 

117 def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint] 

118 ) -> TopologicalRelationshipEndpoint: 

119 """Select the best member of this family to use in a query join or 

120 data ID when more than one is present. 

121 

122 Usually this should correspond to the most fine-grained region. 

123 

124 Parameters 

125 ---------- 

126 endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`] 

127 Endpoints to choose from. May include endpoints that are not 

128 members of this family (which should be ignored). 

129 

130 Returns 

131 ------- 

132 best : `TopologicalRelationshipEndpoint` 

133 The best endpoint that is both a member of ``self`` and in 

134 ``endpoints``. 

135 """ 

136 raise NotImplementedError() 

137 

138 name: str 

139 """Unique string identifier for this family (`str`). 

140 """ 

141 

142 space: TopologicalSpace 

143 """Space in which the regions of this family live (`TopologicalSpace`). 

144 """ 

145 

146 

147@immutable 

148class TopologicalRelationshipEndpoint(ABC): 

149 """An abstract base class whose instances represent a logical table that 

150 may participate in overlap joins defined by a `TopologicalSpace`. 

151 """ 

152 

153 @property 

154 @abstractmethod 

155 def name(self) -> str: 

156 """Unique string identifier for this endpoint (`str`). 

157 """ 

158 raise NotImplementedError() 

159 

160 @property 

161 @abstractmethod 

162 def topology(self) -> Mapping[TopologicalSpace, TopologicalFamily]: 

163 """The relationship families to which this endpoint belongs, keyed 

164 by the category for that family. 

165 """ 

166 raise NotImplementedError() 

167 

168 @property 

169 def spatial(self) -> Optional[TopologicalFamily]: 

170 """This endpoint's `~TopologicalSpace.SPATIAL` family. 

171 """ 

172 return self.topology.get(TopologicalSpace.SPATIAL) 

173 

174 @property 

175 def temporal(self) -> Optional[TopologicalFamily]: 

176 """This endpoint's `~TopologicalSpace.TEMPORAL` family. 

177 """ 

178 return self.topology.get(TopologicalSpace.TEMPORAL) 

179 

180 

181_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation") 

182_R = TypeVar("_R") 

183 

184 

185class TopologicalExtentDatabaseRepresentation(Generic[_R]): 

186 """An abstract base class whose subclasses provide a mapping from the 

187 in-memory representation of a `TopologicalSpace` region to a 

188 database-storage representation, and whose instances act like a 

189 SQLAlchemy-based column expression. 

190 """ 

191 

192 NAME: ClassVar[str] 

193 """Name to use for this logical column in tables (`str`). 

194 

195 If the representation actually uses multiple columns, this will just be 

196 part of the names of those columns. Queries (and tables that represent 

197 materialized queries) may use a different name (via the ``name`` parameters 

198 to various methods) in order to disambiguate between the regions associated 

199 with different tables. 

200 """ 

201 

202 SPACE: ClassVar[TopologicalSpace] 

203 """Topological space in which the regions represented by this class exist. 

204 """ 

205 

206 @classmethod 

207 @abstractmethod 

208 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any 

209 ) -> Tuple[ddl.FieldSpec, ...]: 

210 """Make one or more `ddl.FieldSpec` objects that reflect the fields 

211 that must be added to a table for this representation. 

212 

213 Parameters 

214 ---------- 

215 nullable : `bool` 

216 If `True`, the region is permitted to be logically ``NULL`` 

217 (mapped to `None` in Python), though the correspoding value(s) in 

218 the database are implementation-defined. Nullable region fields 

219 default to NULL, while others default to (-∞, ∞). 

220 name : `str`, optional 

221 Name for the logical column; a part of the name for multi-column 

222 representations. Defaults to ``cls.NAME``. 

223 **kwargs 

224 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor 

225 for all fields; implementations only provide the ``name``, 

226 ``dtype``, and ``default`` arguments themselves. 

227 

228 Returns 

229 ------- 

230 specs : `tuple` [ `ddl.FieldSpec` ] 

231 Field specification objects; length of the tuple is 

232 subclass-dependent, but is guaranteed to match the length of the 

233 return values of `getFieldNames` and `update`. 

234 """ 

235 raise NotImplementedError() 

236 

237 @classmethod 

238 @abstractmethod 

239 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]: 

240 """Return the actual field names used by this representation. 

241 

242 Parameters 

243 ---------- 

244 name : `str`, optional 

245 Name for the logical column; a part of the name for multi-column 

246 representations. Defaults to ``cls.NAME``. 

247 

248 Returns 

249 ------- 

250 names : `tuple` [ `str` ] 

251 Field name(s). Guaranteed to be the same as the names of the field 

252 specifications returned by `makeFieldSpecs`. 

253 """ 

254 raise NotImplementedError() 

255 

256 @classmethod 

257 @abstractmethod 

258 def update(cls, extent: Optional[_R], name: Optional[str] = None, 

259 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

260 """Add a region to a dictionary that represents a database row 

261 in this representation. 

262 

263 Parameters 

264 ---------- 

265 extent 

266 An instance of the region type this class provides a database 

267 representation for, or `None` for ``NULL``. 

268 name : `str`, optional 

269 Name for the logical column; a part of the name for multi-column 

270 representations. Defaults to ``cls.NAME``. 

271 result : `dict` [ `str`, `Any` ], optional 

272 A dictionary representing a database row that fields should be 

273 added to, or `None` to create and return a new one. 

274 

275 Returns 

276 ------- 

277 result : `dict` [ `str`, `Any` ] 

278 A dictionary containing this representation of a region. Exactly 

279 the `dict` passed as ``result`` if that is not `None`. 

280 """ 

281 raise NotImplementedError() 

282 

283 @classmethod 

284 @abstractmethod 

285 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]: 

286 """Extract a region from a dictionary that represents a 

287 database row in this representation. 

288 

289 Parameters 

290 ---------- 

291 mapping : `Mapping` [ `str`, `Any` ] 

292 A dictionary representing a database row containing a `Timespan` 

293 in this representation. Should have key(s) equal to the return 

294 value of `getFieldNames`. 

295 name : `str`, optional 

296 Name for the logical column; a part of the name for multi-column 

297 representations. Defaults to ``cls.NAME``. 

298 

299 Returns 

300 ------- 

301 region 

302 Python representation of the region. 

303 """ 

304 raise NotImplementedError() 

305 

306 @classmethod 

307 def hasExclusionConstraint(cls) -> bool: 

308 """Return `True` if this representation supports exclusion constraints. 

309 

310 Returns 

311 ------- 

312 supported : `bool` 

313 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that 

314 includes the fields of this representation is allowed. 

315 """ 

316 return False 

317 

318 @classmethod 

319 @abstractmethod 

320 def fromSelectable(cls: Type[_S], selectable: sqlalchemy.sql.FromClause, 

321 name: Optional[str] = None) -> _S: 

322 """Construct an instance that represents a logical column (which may 

323 actually be backed by multiple columns) in the given table or subquery. 

324 

325 Parameters 

326 ---------- 

327 selectable : `sqlalchemy.sql.FromClause` 

328 SQLAlchemy object representing a table or subquery. 

329 name : `str`, optional 

330 Name for the logical column; a part of the name for multi-column 

331 representations. Defaults to ``cls.NAME``. 

332 

333 Returns 

334 ------- 

335 representation : `TopologicalExtentDatabaseRepresentation` 

336 Object representing a logical column. 

337 """ 

338 raise NotImplementedError() 

339 

340 @property 

341 @abstractmethod 

342 def name(self) -> str: 

343 """Base logical name for the topological extent (`str`). 

344 

345 If the representation uses only one actual column, this should be the 

346 full name of the column. In other cases it is an unspecified subset of 

347 the column names. 

348 """ 

349 raise NotImplementedError() 

350 

351 @abstractmethod 

352 def isNull(self) -> sqlalchemy.sql.ColumnElement: 

353 """Return a SQLAlchemy expression that tests whether this region is 

354 logically ``NULL``. 

355 

356 Returns 

357 ------- 

358 isnull : `sqlalchemy.sql.ColumnElement` 

359 A boolean SQLAlchemy expression object. 

360 """ 

361 raise NotImplementedError() 

362 

363 @abstractmethod 

364 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]: 

365 """Return the actual column or columns that comprise this logical 

366 column. 

367 

368 Parameters 

369 ---------- 

370 name : `str`, optional 

371 If provided, a name for the logical column that should be used to 

372 label the columns. If not provided, the columns' native names will 

373 be used. 

374 

375 Returns 

376 ------- 

377 columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ] 

378 The true column or columns that back this object. 

379 """ 

380 raise NotImplementedError() 

381 

382 

383class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]): 

384 """An object that encapsulates how spatial regions on the sky are 

385 represented in a database engine. 

386 

387 Instances should be constructed via `fromSelectable`, not by calling the 

388 constructor directly. 

389 

390 Parameters 

391 ---------- 

392 column : `sqlalchemy.sql.ColumnElement` 

393 Column containing the opaque byte-string, with automatic conversion to 

394 `lsst.sphgeom.Region` implemented via SQLAlchemy hooks. 

395 name : `str` 

396 Name of the column. 

397 

398 Notes 

399 ----- 

400 Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because 

401 we currently do not support any database-native spatial regions, and 

402 instead rely on precomputed overlaps and opaque (to the database) byte 

403 string columns. As a result, it also does not support any in-database 

404 topological predicates. 

405 

406 If we add support for database-native regions in the future, this class may 

407 become an ABC with multiple concrete implementations. 

408 """ 

409 def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str): 

410 self.column = column 

411 self._name = name 

412 

413 NAME: ClassVar[str] = "region" 

414 SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL 

415 

416 @classmethod 

417 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any 

418 ) -> Tuple[ddl.FieldSpec, ...]: 

419 # Docstring inherited. 

420 if name is None: 

421 name = cls.NAME 

422 # Most regions are small (they're quadrilaterals), but visit ones can 

423 # be quite large because they have a complicated boundary. For HSC, 

424 # that's about ~1400 bytes, and I've just rounded up to the nearest 

425 # power of two. Given what we now know about variable-length TEXT 

426 # having no performance penalties in PostgreSQL and SQLite vs. 

427 # fixed-length strings, there's probably a variable-length bytes type 

428 # we should be using instead, but that's a schema change and hence 

429 # something we won't be doing anytime soon. 

430 return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),) 

431 

432 @classmethod 

433 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]: 

434 # Docstring inherited. 

435 if name is None: 

436 name = cls.NAME 

437 return (name,) 

438 

439 @classmethod 

440 def update(cls, extent: Optional[lsst.sphgeom.Region], name: Optional[str] = None, 

441 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

442 # Docstring inherited. 

443 if name is None: 

444 name = cls.NAME 

445 if result is None: 

446 result = {} 

447 result[name] = extent 

448 return result 

449 

450 @classmethod 

451 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]: 

452 # Docstring inherited. 

453 if name is None: 

454 name = cls.NAME 

455 return mapping[name] 

456 

457 @classmethod 

458 def fromSelectable(cls: Type[SpatialRegionDatabaseRepresentation], selectable: sqlalchemy.sql.FromClause, 

459 name: Optional[str] = None) -> SpatialRegionDatabaseRepresentation: 

460 # Docstring inherited 

461 if name is None: 

462 name = cls.NAME 

463 return cls(selectable.columns[name], name) 

464 

465 @property 

466 def name(self) -> str: 

467 # Docstring inherited 

468 return self._name 

469 

470 def isNull(self) -> sqlalchemy.sql.ColumnElement: 

471 # Docstring inherited 

472 return self.column.is_(None) 

473 

474 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]: 

475 # Docstring inherited 

476 yield self.column