Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL) in Python. 

22 

23This include "CREATE TABLE" etc. 

24 

25This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

26because we need a level of indirection between logical tables and the actual 

27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

28 

29We've opted for the rather more obscure "ddl" as the name of this module 

30instead of "schema" because the latter is too overloaded; in most SQL 

31databases, a "schema" is also another term for a namespace. 

32""" 

33from __future__ import annotations 

34 

35__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

36 "AstropyTimeNsecTai", "GUID") 

37 

38from base64 import b64encode, b64decode 

39import logging 

40from math import ceil 

41from dataclasses import dataclass 

42from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union 

43import uuid 

44 

45import sqlalchemy 

46from sqlalchemy.dialects.postgresql import UUID 

47import astropy.time 

48 

49from lsst.sphgeom import Region 

50from .config import Config 

51from .exceptions import ValidationError 

52from . import time_utils 

53from .utils import iterable, stripIfNotNone 

54from .named import NamedValueSet 

55 

56if TYPE_CHECKING: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true

57 from .timespan import TimespanDatabaseRepresentation 

58 

59 

60_LOG = logging.getLogger(__name__) 

61 

62 

63class SchemaValidationError(ValidationError): 

64 """Exceptions that indicate problems in Registry schema configuration.""" 

65 

66 @classmethod 

67 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

68 """Return decorator to re-raise exceptions as `SchemaValidationError`. 

69 

70 Decorated functions must be class or instance methods, with a 

71 ``config`` parameter as their first argument. This will be passed 

72 to ``message.format()`` as a keyword argument, along with ``err``, 

73 the original exception. 

74 

75 Parameters 

76 ---------- 

77 caught : `type` (`Exception` subclass) 

78 The type of exception to catch. 

79 message : `str` 

80 A `str.format` string that may contain named placeholders for 

81 ``config``, ``err``, or any keyword-only argument accepted by 

82 the decorated function. 

83 """ 

84 def decorate(func: Callable) -> Callable: 

85 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any: 

86 try: 

87 return func(self, config, *args, **kwds) 

88 except caught as err: 

89 raise cls(message.format(config=str(config), err=err)) 

90 return decorated 

91 return decorate 

92 

93 

94class Base64Bytes(sqlalchemy.TypeDecorator): 

95 """A SQLAlchemy custom type for Python `bytes`. 

96 

97 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field. 

98 """ 

99 

100 impl = sqlalchemy.Text 

101 

102 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

103 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None 

104 super().__init__(*args, length=length, **kwargs) 

105 self.nbytes = nbytes 

106 

107 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect 

108 ) -> Optional[str]: 

109 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

110 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

111 # String fields. 

112 if value is None: 

113 return None 

114 if not isinstance(value, bytes): 

115 raise TypeError( 

116 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

117 ) 

118 return b64encode(value).decode("ascii") 

119 

120 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

121 ) -> Optional[bytes]: 

122 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

123 # We want to transform that to base64-encoded `bytes` and then 

124 # native `bytes`. 

125 return b64decode(value.encode("ascii")) if value is not None else None 

126 

127 

128class Base64Region(Base64Bytes): 

129 """A SQLAlchemy custom type for Python `sphgeom.Region`. 

130 

131 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`. 

132 """ 

133 

134 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect 

135 ) -> Optional[str]: 

136 if value is None: 

137 return None 

138 return super().process_bind_param(value.encode(), dialect) 

139 

140 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

141 ) -> Optional[Region]: 

142 if value is None: 

143 return None 

144 return Region.decode(super().process_result_value(value, dialect)) 

145 

146 

147class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

148 """A SQLAlchemy custom type for Python `astropy.time.Time`. 

149 

150 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix 

151 epoch in TAI scale. 

152 """ 

153 

154 impl = sqlalchemy.BigInteger 

155 

156 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

157 ) -> Optional[int]: 

158 if value is None: 

159 return None 

160 if not isinstance(value, astropy.time.Time): 

161 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

162 value = time_utils.TimeConverter().astropy_to_nsec(value) 

163 return value 

164 

165 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

166 ) -> Optional[astropy.time.Time]: 

167 # value is nanoseconds since epoch, or None 

168 if value is None: 

169 return None 

170 value = time_utils.TimeConverter().nsec_to_astropy(value) 

171 return value 

172 

173 

174class GUID(sqlalchemy.TypeDecorator): 

175 """Platform-independent GUID type. 

176 

177 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as 

178 stringified hex values. 

179 """ 

180 

181 impl = sqlalchemy.CHAR 

182 

183 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine: 

184 if dialect.name == 'postgresql': 

185 return dialect.type_descriptor(UUID()) 

186 else: 

187 return dialect.type_descriptor(sqlalchemy.CHAR(32)) 

188 

189 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]: 

190 if value is None: 

191 return value 

192 

193 # Coerce input to UUID type, in general having UUID on input is the 

194 # only thing that we want but there is code right now that uses ints. 

195 if isinstance(value, int): 

196 value = uuid.UUID(int=value) 

197 elif isinstance(value, bytes): 

198 value = uuid.UUID(bytes=value) 

199 elif isinstance(value, str): 

200 # hexstring 

201 value = uuid.UUID(hex=value) 

202 elif not isinstance(value, uuid.UUID): 

203 raise TypeError(f"Unexpected type of a bind value: {type(value)}") 

204 

205 if dialect.name == 'postgresql': 

206 return str(value) 

207 else: 

208 return "%.32x" % value.int 

209 

210 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]: 

211 if value is None: 

212 return value 

213 else: 

214 return uuid.UUID(hex=value) 

215 

216 

217VALID_CONFIG_COLUMN_TYPES = { 

218 "string": sqlalchemy.String, 

219 "int": sqlalchemy.BigInteger, 

220 "float": sqlalchemy.Float, 

221 "region": Base64Region, 

222 "bool": sqlalchemy.Boolean, 

223 "blob": sqlalchemy.LargeBinary, 

224 "datetime": AstropyTimeNsecTai, 

225 "hash": Base64Bytes, 

226 "uuid": GUID, 

227} 

228 

229 

230@dataclass 

231class FieldSpec: 

232 """A data class for defining a column in a logical `Registry` table.""" 

233 

234 name: str 

235 """Name of the column.""" 

236 

237 dtype: type 

238 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

239 that defines both a Python type and a corresponding precise SQL type. 

240 """ 

241 

242 length: Optional[int] = None 

243 """Length of the type in the database, for variable-length types.""" 

244 

245 nbytes: Optional[int] = None 

246 """Natural length used for hash and encoded-region columns, to be converted 

247 into the post-encoding length. 

248 """ 

249 

250 primaryKey: bool = False 

251 """Whether this field is (part of) its table's primary key.""" 

252 

253 autoincrement: bool = False 

254 """Whether the database should insert automatically incremented values when 

255 no value is provided in an INSERT. 

256 """ 

257 

258 nullable: bool = True 

259 """Whether this field is allowed to be NULL.""" 

260 

261 default: Any = None 

262 """A server-side default value for this field. 

263 

264 This is passed directly as the ``server_default`` argument to 

265 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

266 type conversion or quoting for Python literals, and should hence be used 

267 with care. See the SQLAlchemy documentation for more information. 

268 """ 

269 

270 doc: Optional[str] = None 

271 """Documentation for this field.""" 

272 

273 def __eq__(self, other: Any) -> bool: 

274 if isinstance(other, FieldSpec): 

275 return self.name == other.name 

276 else: 

277 return NotImplemented 

278 

279 def __hash__(self) -> int: 

280 return hash(self.name) 

281 

282 @classmethod 

283 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

284 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec: 

285 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

286 

287 Parameters 

288 ---------- 

289 config: `Config` 

290 Configuration describing the column. Nested configuration keys 

291 correspond to `FieldSpec` attributes. 

292 kwds 

293 Additional keyword arguments that provide defaults for values 

294 not present in config. 

295 

296 Returns 

297 ------- 

298 spec: `FieldSpec` 

299 Specification structure for the column. 

300 

301 Raises 

302 ------ 

303 SchemaValidationError 

304 Raised if configuration keys are missing or have invalid values. 

305 """ 

306 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

307 if dtype is None: 

308 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

309 if not config["name"].islower(): 

310 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

311 self = cls(name=config["name"], dtype=dtype, **kwds) 

312 self.length = config.get("length", self.length) 

313 self.nbytes = config.get("nbytes", self.nbytes) 

314 if self.length is not None and self.nbytes is not None: 

315 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

316 self.primaryKey = config.get("primaryKey", self.primaryKey) 

317 self.autoincrement = config.get("autoincrement", self.autoincrement) 

318 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

319 self.doc = stripIfNotNone(config.get("doc", None)) 

320 return self 

321 

322 def isStringType(self) -> bool: 

323 """Indicate that this is a sqlalchemy.String field spec. 

324 

325 Returns 

326 ------- 

327 isString : `bool` 

328 The field refers to a `sqlalchemy.String` and not any other type. 

329 This can return `False` even if the object was created with a 

330 string type if it has been decided that it should be implemented 

331 as a `sqlalchemy.Text` type. 

332 """ 

333 if self.dtype == sqlalchemy.String: 

334 # For short strings retain them as strings 

335 if self.dtype == sqlalchemy.String and self.length and self.length <= 32: 

336 return True 

337 return False 

338 

339 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

340 """Return a sized version of the column type. 

341 

342 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``. 

343 

344 Returns 

345 ------- 

346 dtype : `sqlalchemy.types.TypeEngine` 

347 A SQLAlchemy column type object. 

348 """ 

349 if self.length is not None: 

350 # Last chance check that we are only looking at possible String 

351 if self.dtype == sqlalchemy.String and not self.isStringType(): 

352 return sqlalchemy.Text 

353 return self.dtype(length=self.length) 

354 if self.nbytes is not None: 

355 return self.dtype(nbytes=self.nbytes) 

356 return self.dtype 

357 

358 def getPythonType(self) -> type: 

359 """Return the Python type associated with this field's (SQL) dtype. 

360 

361 Returns 

362 ------- 

363 type : `type` 

364 Python type associated with this field's (SQL) `dtype`. 

365 """ 

366 return self.dtype().python_type 

367 

368 

369@dataclass 

370class ForeignKeySpec: 

371 """Definition of a foreign key constraint in a logical `Registry` table.""" 

372 

373 table: str 

374 """Name of the target table.""" 

375 

376 source: Tuple[str, ...] 

377 """Tuple of source table column names.""" 

378 

379 target: Tuple[str, ...] 

380 """Tuple of target table column names.""" 

381 

382 onDelete: Optional[str] = None 

383 """SQL clause indicating how to handle deletes to the target table. 

384 

385 If not `None` (which indicates that a constraint violation exception should 

386 be raised), should be either "SET NULL" or "CASCADE". 

387 """ 

388 

389 addIndex: bool = True 

390 """If `True`, create an index on the columns of this foreign key in the 

391 source table. 

392 """ 

393 

394 @classmethod 

395 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

396 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

397 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

398 

399 Parameters 

400 ---------- 

401 config: `Config` 

402 Configuration describing the constraint. Nested configuration keys 

403 correspond to `ForeignKeySpec` attributes. 

404 

405 Returns 

406 ------- 

407 spec: `ForeignKeySpec` 

408 Specification structure for the constraint. 

409 

410 Raises 

411 ------ 

412 SchemaValidationError 

413 Raised if configuration keys are missing or have invalid values. 

414 """ 

415 return cls(table=config["table"], 

416 source=tuple(iterable(config["source"])), 

417 target=tuple(iterable(config["target"])), 

418 onDelete=config.get("onDelete", None)) 

419 

420 

421@dataclass 

422class TableSpec: 

423 """A data class used to define a table or table-like query interface. 

424 

425 Parameters 

426 ---------- 

427 fields : `Iterable` [ `FieldSpec` ] 

428 Specifications for the columns in this table. 

429 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

430 Non-primary-key unique constraints for the table. 

431 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

432 Indexes for the table. 

433 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

434 Foreign key constraints for the table. 

435 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

436 Special constraints that prohibit overlaps between timespans over rows 

437 where other columns are equal. These take the same form as unique 

438 constraints, but each tuple may contain a single 

439 `TimespanDatabaseRepresentation` subclass representing a timespan 

440 column. 

441 recycleIds : `bool`, optional 

442 If `True`, allow databases that might normally recycle autoincrement 

443 IDs to do so (usually better for performance) on any autoincrement 

444 field in this table. 

445 doc : `str`, optional 

446 Documentation for the table. 

447 """ 

448 

449 def __init__( 

450 self, fields: Iterable[FieldSpec], *, 

451 unique: Iterable[Tuple[str, ...]] = (), 

452 indexes: Iterable[Tuple[str, ...]] = (), 

453 foreignKeys: Iterable[ForeignKeySpec] = (), 

454 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (), 

455 recycleIds: bool = True, 

456 doc: Optional[str] = None, 

457 ): 

458 self.fields = NamedValueSet(fields) 

459 self.unique = set(unique) 

460 self.indexes = set(indexes) 

461 self.foreignKeys = list(foreignKeys) 

462 self.exclusion = set(exclusion) 

463 self.recycleIds = recycleIds 

464 self.doc = doc 

465 

466 fields: NamedValueSet[FieldSpec] 

467 """Specifications for the columns in this table.""" 

468 

469 unique: Set[Tuple[str, ...]] 

470 """Non-primary-key unique constraints for the table.""" 

471 

472 indexes: Set[Tuple[str, ...]] 

473 """Indexes for the table.""" 

474 

475 foreignKeys: List[ForeignKeySpec] 

476 """Foreign key constraints for the table.""" 

477 

478 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] 

479 """Exclusion constraints for the table. 

480 

481 Exclusion constraints behave mostly like unique constraints, but may 

482 contain a database-native Timespan column that is restricted to not overlap 

483 across rows (for identical combinations of any non-Timespan columns in the 

484 constraint). 

485 """ 

486 

487 recycleIds: bool = True 

488 """If `True`, allow databases that might normally recycle autoincrement IDs 

489 to do so (usually better for performance) on any autoincrement field in 

490 this table. 

491 """ 

492 

493 doc: Optional[str] = None 

494 """Documentation for the table.""" 

495 

496 @classmethod 

497 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

498 def fromConfig(cls, config: Config) -> TableSpec: 

499 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

500 

501 Parameters 

502 ---------- 

503 config: `Config` 

504 Configuration describing the constraint. Nested configuration keys 

505 correspond to `TableSpec` attributes. 

506 

507 Returns 

508 ------- 

509 spec: `TableSpec` 

510 Specification structure for the table. 

511 

512 Raises 

513 ------ 

514 SchemaValidationError 

515 Raised if configuration keys are missing or have invalid values. 

516 """ 

517 return cls( 

518 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

519 unique={tuple(u) for u in config.get("unique", ())}, 

520 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

521 doc=stripIfNotNone(config.get("doc")), 

522 )