Coverage for python/lsst/daf/butler/core/ddl.py: 51%

216 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-23 02:26 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL) in Python. 

22 

23This include "CREATE TABLE" etc. 

24 

25This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

26because we need a level of indirection between logical tables and the actual 

27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

28 

29We've opted for the rather more obscure "ddl" as the name of this module 

30instead of "schema" because the latter is too overloaded; in most SQL 

31databases, a "schema" is also another term for a namespace. 

32""" 

33from __future__ import annotations 

34 

35from lsst import sphgeom 

36 

37__all__ = ( 

38 "TableSpec", 

39 "FieldSpec", 

40 "ForeignKeySpec", 

41 "Base64Bytes", 

42 "Base64Region", 

43 "AstropyTimeNsecTai", 

44 "GUID", 

45) 

46 

47import logging 

48import uuid 

49from base64 import b64decode, b64encode 

50from dataclasses import dataclass 

51from math import ceil 

52from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union 

53 

54import astropy.time 

55import sqlalchemy 

56from lsst.sphgeom import Region 

57from lsst.utils.iteration import ensure_iterable 

58from sqlalchemy.dialects.postgresql import UUID 

59 

60from . import time_utils 

61from .config import Config 

62from .exceptions import ValidationError 

63from .named import NamedValueSet 

64from .utils import stripIfNotNone 

65 

66if TYPE_CHECKING: 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true

67 from .timespan import TimespanDatabaseRepresentation 

68 

69 

70_LOG = logging.getLogger(__name__) 

71 

72 

73class SchemaValidationError(ValidationError): 

74 """Exceptions that indicate problems in Registry schema configuration.""" 

75 

76 @classmethod 

77 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

78 """Return decorator to re-raise exceptions as `SchemaValidationError`. 

79 

80 Decorated functions must be class or instance methods, with a 

81 ``config`` parameter as their first argument. This will be passed 

82 to ``message.format()`` as a keyword argument, along with ``err``, 

83 the original exception. 

84 

85 Parameters 

86 ---------- 

87 caught : `type` (`Exception` subclass) 

88 The type of exception to catch. 

89 message : `str` 

90 A `str.format` string that may contain named placeholders for 

91 ``config``, ``err``, or any keyword-only argument accepted by 

92 the decorated function. 

93 """ 

94 

95 def decorate(func: Callable) -> Callable: 

96 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any: 

97 try: 

98 return func(self, config, *args, **kwargs) 

99 except caught as err: 

100 raise cls(message.format(config=str(config), err=err)) 

101 

102 return decorated 

103 

104 return decorate 

105 

106 

107class Base64Bytes(sqlalchemy.TypeDecorator): 

108 """A SQLAlchemy custom type for Python `bytes`. 

109 

110 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field. 

111 """ 

112 

113 impl = sqlalchemy.Text 

114 

115 cache_ok = True 

116 

117 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

118 length = 4 * ceil(nbytes / 3) if self.impl == sqlalchemy.String else None 

119 super().__init__(*args, length=length, **kwargs) 

120 self.nbytes = nbytes 

121 

122 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect) -> Optional[str]: 

123 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

124 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

125 # String fields. 

126 if value is None: 

127 return None 

128 if not isinstance(value, bytes): 

129 raise TypeError( 

130 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

131 ) 

132 return b64encode(value).decode("ascii") 

133 

134 def process_result_value( 

135 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

136 ) -> Optional[bytes]: 

137 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

138 # We want to transform that to base64-encoded `bytes` and then 

139 # native `bytes`. 

140 return b64decode(value.encode("ascii")) if value is not None else None 

141 

142 @property 

143 def python_type(self) -> Type[bytes]: 

144 return bytes 

145 

146 

147# create an alias, for use below to disambiguate between the built in 

148# sqlachemy type 

149LocalBase64Bytes = Base64Bytes 

150 

151 

152class Base64Region(Base64Bytes): 

153 """A SQLAlchemy custom type for Python `sphgeom.Region`. 

154 

155 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`. 

156 """ 

157 

158 cache_ok = True # have to be set explicitly in each class 

159 

160 def process_bind_param( 

161 self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect 

162 ) -> Optional[str]: 

163 if value is None: 

164 return None 

165 return super().process_bind_param(value.encode(), dialect) 

166 

167 def process_result_value( 

168 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

169 ) -> Optional[Region]: 

170 if value is None: 

171 return None 

172 return Region.decode(super().process_result_value(value, dialect)) 

173 

174 @property 

175 def python_type(self) -> Type[sphgeom.Region]: 

176 return sphgeom.Region 

177 

178 

179class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

180 """A SQLAlchemy custom type for Python `astropy.time.Time`. 

181 

182 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix 

183 epoch in TAI scale. 

184 """ 

185 

186 impl = sqlalchemy.BigInteger 

187 

188 cache_ok = True 

189 

190 def process_bind_param( 

191 self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

192 ) -> Optional[int]: 

193 if value is None: 

194 return None 

195 if not isinstance(value, astropy.time.Time): 

196 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

197 value = time_utils.TimeConverter().astropy_to_nsec(value) 

198 return value 

199 

200 def process_result_value( 

201 self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

202 ) -> Optional[astropy.time.Time]: 

203 # value is nanoseconds since epoch, or None 

204 if value is None: 

205 return None 

206 value = time_utils.TimeConverter().nsec_to_astropy(value) 

207 return value 

208 

209 

210class GUID(sqlalchemy.TypeDecorator): 

211 """Platform-independent GUID type. 

212 

213 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as 

214 stringified hex values. 

215 """ 

216 

217 impl = sqlalchemy.CHAR 

218 

219 cache_ok = True 

220 

221 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine: 

222 if dialect.name == "postgresql": 

223 return dialect.type_descriptor(UUID()) 

224 else: 

225 return dialect.type_descriptor(sqlalchemy.CHAR(32)) 

226 

227 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]: 

228 if value is None: 

229 return value 

230 

231 # Coerce input to UUID type, in general having UUID on input is the 

232 # only thing that we want but there is code right now that uses ints. 

233 if isinstance(value, int): 

234 value = uuid.UUID(int=value) 

235 elif isinstance(value, bytes): 

236 value = uuid.UUID(bytes=value) 

237 elif isinstance(value, str): 

238 # hexstring 

239 value = uuid.UUID(hex=value) 

240 elif not isinstance(value, uuid.UUID): 

241 raise TypeError(f"Unexpected type of a bind value: {type(value)}") 

242 

243 if dialect.name == "postgresql": 

244 return str(value) 

245 else: 

246 return "%.32x" % value.int 

247 

248 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]: 

249 if value is None: 

250 return value 

251 else: 

252 return uuid.UUID(hex=value) 

253 

254 

255VALID_CONFIG_COLUMN_TYPES = { 

256 "string": sqlalchemy.String, 

257 "int": sqlalchemy.BigInteger, 

258 "float": sqlalchemy.Float, 

259 "region": Base64Region, 

260 "bool": sqlalchemy.Boolean, 

261 "blob": sqlalchemy.LargeBinary, 

262 "datetime": AstropyTimeNsecTai, 

263 "hash": Base64Bytes, 

264 "uuid": GUID, 

265} 

266 

267 

268@dataclass 

269class FieldSpec: 

270 """A data class for defining a column in a logical `Registry` table.""" 

271 

272 name: str 

273 """Name of the column.""" 

274 

275 dtype: type 

276 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

277 that defines both a Python type and a corresponding precise SQL type. 

278 """ 

279 

280 length: Optional[int] = None 

281 """Length of the type in the database, for variable-length types.""" 

282 

283 nbytes: Optional[int] = None 

284 """Natural length used for hash and encoded-region columns, to be converted 

285 into the post-encoding length. 

286 """ 

287 

288 primaryKey: bool = False 

289 """Whether this field is (part of) its table's primary key.""" 

290 

291 autoincrement: bool = False 

292 """Whether the database should insert automatically incremented values when 

293 no value is provided in an INSERT. 

294 """ 

295 

296 nullable: bool = True 

297 """Whether this field is allowed to be NULL. If ``primaryKey`` is 

298 `True`, during construction this value will be forced to `False`.""" 

299 

300 default: Any = None 

301 """A server-side default value for this field. 

302 

303 This is passed directly as the ``server_default`` argument to 

304 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

305 type conversion or quoting for Python literals, and should hence be used 

306 with care. See the SQLAlchemy documentation for more information. 

307 """ 

308 

309 doc: Optional[str] = None 

310 """Documentation for this field.""" 

311 

312 def __post_init__(self) -> None: 

313 if self.primaryKey: 

314 # Change the default to match primaryKey. 

315 self.nullable = False 

316 

317 def __eq__(self, other: Any) -> bool: 

318 if isinstance(other, FieldSpec): 

319 return self.name == other.name 

320 else: 

321 return NotImplemented 

322 

323 def __hash__(self) -> int: 

324 return hash(self.name) 

325 

326 @classmethod 

327 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

328 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec: 

329 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

330 

331 Parameters 

332 ---------- 

333 config: `Config` 

334 Configuration describing the column. Nested configuration keys 

335 correspond to `FieldSpec` attributes. 

336 **kwargs 

337 Additional keyword arguments that provide defaults for values 

338 not present in config. 

339 

340 Returns 

341 ------- 

342 spec: `FieldSpec` 

343 Specification structure for the column. 

344 

345 Raises 

346 ------ 

347 SchemaValidationError 

348 Raised if configuration keys are missing or have invalid values. 

349 """ 

350 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

351 if dtype is None: 

352 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

353 if not config["name"].islower(): 

354 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

355 self = cls(name=config["name"], dtype=dtype, **kwargs) 

356 self.length = config.get("length", self.length) 

357 self.nbytes = config.get("nbytes", self.nbytes) 

358 if self.length is not None and self.nbytes is not None: 

359 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

360 self.primaryKey = config.get("primaryKey", self.primaryKey) 

361 self.autoincrement = config.get("autoincrement", self.autoincrement) 

362 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

363 self.doc = stripIfNotNone(config.get("doc", None)) 

364 return self 

365 

366 def isStringType(self) -> bool: 

367 """Indicate that this is a sqlalchemy.String field spec. 

368 

369 Returns 

370 ------- 

371 isString : `bool` 

372 The field refers to a `sqlalchemy.String` and not any other type. 

373 This can return `False` even if the object was created with a 

374 string type if it has been decided that it should be implemented 

375 as a `sqlalchemy.Text` type. 

376 """ 

377 if self.dtype == sqlalchemy.String: 

378 # For short strings retain them as strings 

379 if self.dtype == sqlalchemy.String and self.length and self.length <= 32: 

380 return True 

381 return False 

382 

383 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

384 """Return a sized version of the column type. 

385 

386 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``. 

387 

388 Returns 

389 ------- 

390 dtype : `sqlalchemy.types.TypeEngine` 

391 A SQLAlchemy column type object. 

392 """ 

393 if self.length is not None: 

394 # Last chance check that we are only looking at possible String 

395 if self.dtype == sqlalchemy.String and not self.isStringType(): 

396 return sqlalchemy.Text 

397 return self.dtype(length=self.length) 

398 if self.nbytes is not None: 

399 return self.dtype(nbytes=self.nbytes) 

400 return self.dtype 

401 

402 def getPythonType(self) -> type: 

403 """Return the Python type associated with this field's (SQL) dtype. 

404 

405 Returns 

406 ------- 

407 type : `type` 

408 Python type associated with this field's (SQL) `dtype`. 

409 """ 

410 # to construct these objects, nbytes keyword is needed 

411 if issubclass(self.dtype, LocalBase64Bytes): 

412 # satisfy mypy for something that must be true 

413 assert self.nbytes is not None 

414 return self.dtype(nbytes=self.nbytes).python_type 

415 else: 

416 return self.dtype().python_type # type: ignore 

417 

418 

419@dataclass 

420class ForeignKeySpec: 

421 """Definition of a foreign key constraint in a logical `Registry` table.""" 

422 

423 table: str 

424 """Name of the target table.""" 

425 

426 source: Tuple[str, ...] 

427 """Tuple of source table column names.""" 

428 

429 target: Tuple[str, ...] 

430 """Tuple of target table column names.""" 

431 

432 onDelete: Optional[str] = None 

433 """SQL clause indicating how to handle deletes to the target table. 

434 

435 If not `None` (which indicates that a constraint violation exception should 

436 be raised), should be either "SET NULL" or "CASCADE". 

437 """ 

438 

439 addIndex: bool = True 

440 """If `True`, create an index on the columns of this foreign key in the 

441 source table. 

442 """ 

443 

444 @classmethod 

445 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

446 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

447 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

448 

449 Parameters 

450 ---------- 

451 config: `Config` 

452 Configuration describing the constraint. Nested configuration keys 

453 correspond to `ForeignKeySpec` attributes. 

454 

455 Returns 

456 ------- 

457 spec: `ForeignKeySpec` 

458 Specification structure for the constraint. 

459 

460 Raises 

461 ------ 

462 SchemaValidationError 

463 Raised if configuration keys are missing or have invalid values. 

464 """ 

465 return cls( 

466 table=config["table"], 

467 source=tuple(ensure_iterable(config["source"])), 

468 target=tuple(ensure_iterable(config["target"])), 

469 onDelete=config.get("onDelete", None), 

470 ) 

471 

472 

473@dataclass 

474class TableSpec: 

475 """A data class used to define a table or table-like query interface. 

476 

477 Parameters 

478 ---------- 

479 fields : `Iterable` [ `FieldSpec` ] 

480 Specifications for the columns in this table. 

481 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

482 Non-primary-key unique constraints for the table. 

483 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

484 Indexes for the table. 

485 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

486 Foreign key constraints for the table. 

487 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

488 Special constraints that prohibit overlaps between timespans over rows 

489 where other columns are equal. These take the same form as unique 

490 constraints, but each tuple may contain a single 

491 `TimespanDatabaseRepresentation` subclass representing a timespan 

492 column. 

493 recycleIds : `bool`, optional 

494 If `True`, allow databases that might normally recycle autoincrement 

495 IDs to do so (usually better for performance) on any autoincrement 

496 field in this table. 

497 doc : `str`, optional 

498 Documentation for the table. 

499 """ 

500 

501 def __init__( 

502 self, 

503 fields: Iterable[FieldSpec], 

504 *, 

505 unique: Iterable[Tuple[str, ...]] = (), 

506 indexes: Iterable[Tuple[str, ...]] = (), 

507 foreignKeys: Iterable[ForeignKeySpec] = (), 

508 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (), 

509 recycleIds: bool = True, 

510 doc: Optional[str] = None, 

511 ): 

512 self.fields = NamedValueSet(fields) 

513 self.unique = set(unique) 

514 self.indexes = set(indexes) 

515 self.foreignKeys = list(foreignKeys) 

516 self.exclusion = set(exclusion) 

517 self.recycleIds = recycleIds 

518 self.doc = doc 

519 

520 fields: NamedValueSet[FieldSpec] 

521 """Specifications for the columns in this table.""" 

522 

523 unique: Set[Tuple[str, ...]] 

524 """Non-primary-key unique constraints for the table.""" 

525 

526 indexes: Set[Tuple[str, ...]] 

527 """Indexes for the table.""" 

528 

529 foreignKeys: List[ForeignKeySpec] 

530 """Foreign key constraints for the table.""" 

531 

532 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] 

533 """Exclusion constraints for the table. 

534 

535 Exclusion constraints behave mostly like unique constraints, but may 

536 contain a database-native Timespan column that is restricted to not overlap 

537 across rows (for identical combinations of any non-Timespan columns in the 

538 constraint). 

539 """ 

540 

541 recycleIds: bool = True 

542 """If `True`, allow databases that might normally recycle autoincrement IDs 

543 to do so (usually better for performance) on any autoincrement field in 

544 this table. 

545 """ 

546 

547 doc: Optional[str] = None 

548 """Documentation for the table.""" 

549 

550 @classmethod 

551 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

552 def fromConfig(cls, config: Config) -> TableSpec: 

553 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

554 

555 Parameters 

556 ---------- 

557 config: `Config` 

558 Configuration describing the constraint. Nested configuration keys 

559 correspond to `TableSpec` attributes. 

560 

561 Returns 

562 ------- 

563 spec: `TableSpec` 

564 Specification structure for the table. 

565 

566 Raises 

567 ------ 

568 SchemaValidationError 

569 Raised if configuration keys are missing or have invalid values. 

570 """ 

571 return cls( 

572 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

573 unique={tuple(u) for u in config.get("unique", ())}, 

574 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

575 doc=stripIfNotNone(config.get("doc")), 

576 )