Coverage for python/lsst/daf/butler/core/ddl.py: 50%

234 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-23 11:08 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL) in Python. 

22 

23This include "CREATE TABLE" etc. 

24 

25This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

26because we need a level of indirection between logical tables and the actual 

27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

28 

29We've opted for the rather more obscure "ddl" as the name of this module 

30instead of "schema" because the latter is too overloaded; in most SQL 

31databases, a "schema" is also another term for a namespace. 

32""" 

33from __future__ import annotations 

34 

35from lsst import sphgeom 

36 

37__all__ = ( 

38 "TableSpec", 

39 "FieldSpec", 

40 "ForeignKeySpec", 

41 "IndexSpec", 

42 "Base64Bytes", 

43 "Base64Region", 

44 "AstropyTimeNsecTai", 

45 "GUID", 

46) 

47 

48import logging 

49import uuid 

50from base64 import b64decode, b64encode 

51from dataclasses import dataclass 

52from math import ceil 

53from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union 

54 

55import astropy.time 

56import sqlalchemy 

57from lsst.sphgeom import Region 

58from lsst.utils.iteration import ensure_iterable 

59from sqlalchemy.dialects.postgresql import UUID 

60 

61from . import time_utils 

62from .config import Config 

63from .exceptions import ValidationError 

64from .named import NamedValueSet 

65from .utils import stripIfNotNone 

66 

67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 from .timespan import TimespanDatabaseRepresentation 

69 

70 

71_LOG = logging.getLogger(__name__) 

72 

73 

74class SchemaValidationError(ValidationError): 

75 """Exceptions that indicate problems in Registry schema configuration.""" 

76 

77 @classmethod 

78 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

79 """Return decorator to re-raise exceptions as `SchemaValidationError`. 

80 

81 Decorated functions must be class or instance methods, with a 

82 ``config`` parameter as their first argument. This will be passed 

83 to ``message.format()`` as a keyword argument, along with ``err``, 

84 the original exception. 

85 

86 Parameters 

87 ---------- 

88 caught : `type` (`Exception` subclass) 

89 The type of exception to catch. 

90 message : `str` 

91 A `str.format` string that may contain named placeholders for 

92 ``config``, ``err``, or any keyword-only argument accepted by 

93 the decorated function. 

94 """ 

95 

96 def decorate(func: Callable) -> Callable: 

97 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any: 

98 try: 

99 return func(self, config, *args, **kwargs) 

100 except caught as err: 

101 raise cls(message.format(config=str(config), err=err)) 

102 

103 return decorated 

104 

105 return decorate 

106 

107 

108class Base64Bytes(sqlalchemy.TypeDecorator): 

109 """A SQLAlchemy custom type for Python `bytes`. 

110 

111 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field. 

112 """ 

113 

114 impl = sqlalchemy.Text 

115 

116 cache_ok = True 

117 

118 def __init__(self, nbytes: int | None = None, *args: Any, **kwargs: Any): 

119 if nbytes is not None: 

120 length = 4 * ceil(nbytes / 3) if self.impl == sqlalchemy.String else None 

121 else: 

122 length = None 

123 super().__init__(*args, length=length, **kwargs) 

124 self.nbytes = nbytes 

125 

126 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect) -> Optional[str]: 

127 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

128 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

129 # String fields. 

130 if value is None: 

131 return None 

132 if not isinstance(value, bytes): 

133 raise TypeError( 

134 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

135 ) 

136 return b64encode(value).decode("ascii") 

137 

138 def process_result_value( 

139 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

140 ) -> Optional[bytes]: 

141 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

142 # We want to transform that to base64-encoded `bytes` and then 

143 # native `bytes`. 

144 return b64decode(value.encode("ascii")) if value is not None else None 

145 

146 @property 

147 def python_type(self) -> Type[bytes]: 

148 return bytes 

149 

150 

151# create an alias, for use below to disambiguate between the built in 

152# sqlachemy type 

153LocalBase64Bytes = Base64Bytes 

154 

155 

156class Base64Region(Base64Bytes): 

157 """A SQLAlchemy custom type for Python `sphgeom.Region`. 

158 

159 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`. 

160 """ 

161 

162 cache_ok = True # have to be set explicitly in each class 

163 

164 def process_bind_param( 

165 self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect 

166 ) -> Optional[str]: 

167 if value is None: 

168 return None 

169 return super().process_bind_param(value.encode(), dialect) 

170 

171 def process_result_value( 

172 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

173 ) -> Optional[Region]: 

174 if value is None: 

175 return None 

176 return Region.decode(super().process_result_value(value, dialect)) 

177 

178 @property 

179 def python_type(self) -> Type[sphgeom.Region]: 

180 return sphgeom.Region 

181 

182 

183class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

184 """A SQLAlchemy custom type for Python `astropy.time.Time`. 

185 

186 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix 

187 epoch in TAI scale. 

188 """ 

189 

190 impl = sqlalchemy.BigInteger 

191 

192 cache_ok = True 

193 

194 def process_bind_param( 

195 self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

196 ) -> Optional[int]: 

197 if value is None: 

198 return None 

199 if not isinstance(value, astropy.time.Time): 

200 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

201 value = time_utils.TimeConverter().astropy_to_nsec(value) 

202 return value 

203 

204 def process_result_value( 

205 self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

206 ) -> Optional[astropy.time.Time]: 

207 # value is nanoseconds since epoch, or None 

208 if value is None: 

209 return None 

210 value = time_utils.TimeConverter().nsec_to_astropy(value) 

211 return value 

212 

213 

214# TODO: sqlalchemy 2 has internal support for UUID: 

215# https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid 

216class GUID(sqlalchemy.TypeDecorator): 

217 """Platform-independent GUID type. 

218 

219 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as 

220 stringified hex values. 

221 """ 

222 

223 impl = sqlalchemy.CHAR 

224 

225 cache_ok = True 

226 

227 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.types.TypeEngine: 

228 if dialect.name == "postgresql": 

229 return dialect.type_descriptor(UUID()) 

230 else: 

231 return dialect.type_descriptor(sqlalchemy.CHAR(32)) 

232 

233 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]: 

234 if value is None: 

235 return value 

236 

237 # Coerce input to UUID type, in general having UUID on input is the 

238 # only thing that we want but there is code right now that uses ints. 

239 if isinstance(value, int): 

240 value = uuid.UUID(int=value) 

241 elif isinstance(value, bytes): 

242 value = uuid.UUID(bytes=value) 

243 elif isinstance(value, str): 

244 # hexstring 

245 value = uuid.UUID(hex=value) 

246 elif not isinstance(value, uuid.UUID): 

247 raise TypeError(f"Unexpected type of a bind value: {type(value)}") 

248 

249 if dialect.name == "postgresql": 

250 return str(value) 

251 else: 

252 return "%.32x" % value.int 

253 

254 def process_result_value( 

255 self, value: str | uuid.UUID | None, dialect: sqlalchemy.Dialect 

256 ) -> Optional[uuid.UUID]: 

257 if value is None: 

258 return value 

259 elif isinstance(value, uuid.UUID): 

260 # sqlalchemy 2 converts to UUID internally 

261 return value 

262 else: 

263 return uuid.UUID(hex=value) 

264 

265 

266VALID_CONFIG_COLUMN_TYPES = { 

267 "string": sqlalchemy.String, 

268 "int": sqlalchemy.BigInteger, 

269 "float": sqlalchemy.Float, 

270 "region": Base64Region, 

271 "bool": sqlalchemy.Boolean, 

272 "blob": sqlalchemy.LargeBinary, 

273 "datetime": AstropyTimeNsecTai, 

274 "hash": Base64Bytes, 

275 "uuid": GUID, 

276} 

277 

278 

279@dataclass 

280class FieldSpec: 

281 """A data class for defining a column in a logical `Registry` table.""" 

282 

283 name: str 

284 """Name of the column.""" 

285 

286 dtype: type 

287 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

288 that defines both a Python type and a corresponding precise SQL type. 

289 """ 

290 

291 length: Optional[int] = None 

292 """Length of the type in the database, for variable-length types.""" 

293 

294 nbytes: Optional[int] = None 

295 """Natural length used for hash and encoded-region columns, to be converted 

296 into the post-encoding length. 

297 """ 

298 

299 primaryKey: bool = False 

300 """Whether this field is (part of) its table's primary key.""" 

301 

302 autoincrement: bool = False 

303 """Whether the database should insert automatically incremented values when 

304 no value is provided in an INSERT. 

305 """ 

306 

307 nullable: bool = True 

308 """Whether this field is allowed to be NULL. If ``primaryKey`` is 

309 `True`, during construction this value will be forced to `False`.""" 

310 

311 default: Any = None 

312 """A server-side default value for this field. 

313 

314 This is passed directly as the ``server_default`` argument to 

315 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

316 type conversion or quoting for Python literals, and should hence be used 

317 with care. See the SQLAlchemy documentation for more information. 

318 """ 

319 

320 doc: Optional[str] = None 

321 """Documentation for this field.""" 

322 

323 def __post_init__(self) -> None: 

324 if self.primaryKey: 

325 # Change the default to match primaryKey. 

326 self.nullable = False 

327 

328 def __eq__(self, other: Any) -> bool: 

329 if isinstance(other, FieldSpec): 

330 return self.name == other.name 

331 else: 

332 return NotImplemented 

333 

334 def __hash__(self) -> int: 

335 return hash(self.name) 

336 

337 @classmethod 

338 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

339 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec: 

340 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

341 

342 Parameters 

343 ---------- 

344 config: `Config` 

345 Configuration describing the column. Nested configuration keys 

346 correspond to `FieldSpec` attributes. 

347 **kwargs 

348 Additional keyword arguments that provide defaults for values 

349 not present in config. 

350 

351 Returns 

352 ------- 

353 spec: `FieldSpec` 

354 Specification structure for the column. 

355 

356 Raises 

357 ------ 

358 SchemaValidationError 

359 Raised if configuration keys are missing or have invalid values. 

360 """ 

361 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

362 if dtype is None: 

363 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

364 if not config["name"].islower(): 

365 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

366 self = cls(name=config["name"], dtype=dtype, **kwargs) 

367 self.length = config.get("length", self.length) 

368 self.nbytes = config.get("nbytes", self.nbytes) 

369 if self.length is not None and self.nbytes is not None: 

370 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

371 self.primaryKey = config.get("primaryKey", self.primaryKey) 

372 self.autoincrement = config.get("autoincrement", self.autoincrement) 

373 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

374 self.doc = stripIfNotNone(config.get("doc", None)) 

375 return self 

376 

377 @classmethod 

378 def for_region(cls, name: str = "region", nullable: bool = True, nbytes: int = 2048) -> FieldSpec: 

379 """Create a `FieldSpec` for a spatial region column. 

380 

381 Parameters 

382 ---------- 

383 name : `str`, optional 

384 Name for the field. 

385 nullable : `bool`, optional 

386 Whether NULL values are permitted. 

387 nbytes : `int`, optional 

388 Maximum number of bytes for serialized regions. The actual column 

389 size will be larger to allow for base-64 encoding. 

390 

391 Returns 

392 ------- 

393 spec : `FieldSpec` 

394 Specification structure for a region column. 

395 """ 

396 return cls(name, nullable=nullable, dtype=Base64Region, nbytes=nbytes) 

397 

398 def isStringType(self) -> bool: 

399 """Indicate that this is a sqlalchemy.String field spec. 

400 

401 Returns 

402 ------- 

403 isString : `bool` 

404 The field refers to a `sqlalchemy.String` and not any other type. 

405 This can return `False` even if the object was created with a 

406 string type if it has been decided that it should be implemented 

407 as a `sqlalchemy.Text` type. 

408 """ 

409 if self.dtype == sqlalchemy.String: 

410 # For short strings retain them as strings 

411 if self.dtype == sqlalchemy.String and self.length and self.length <= 32: 

412 return True 

413 return False 

414 

415 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine | type: 

416 """Return a sized version of the column type. 

417 

418 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``. 

419 

420 Returns 

421 ------- 

422 dtype : `sqlalchemy.types.TypeEngine` 

423 A SQLAlchemy column type object. 

424 """ 

425 if self.length is not None: 

426 # Last chance check that we are only looking at possible String 

427 if self.dtype == sqlalchemy.String and not self.isStringType(): 

428 return sqlalchemy.Text 

429 return self.dtype(length=self.length) 

430 if self.nbytes is not None: 

431 return self.dtype(nbytes=self.nbytes) 

432 return self.dtype 

433 

434 def getPythonType(self) -> type: 

435 """Return the Python type associated with this field's (SQL) dtype. 

436 

437 Returns 

438 ------- 

439 type : `type` 

440 Python type associated with this field's (SQL) `dtype`. 

441 """ 

442 # to construct these objects, nbytes keyword is needed 

443 if issubclass(self.dtype, LocalBase64Bytes): 

444 # satisfy mypy for something that must be true 

445 assert self.nbytes is not None 

446 return self.dtype(nbytes=self.nbytes).python_type 

447 else: 

448 return self.dtype().python_type # type: ignore 

449 

450 

451@dataclass 

452class ForeignKeySpec: 

453 """Definition of a foreign key constraint in a logical `Registry` table.""" 

454 

455 table: str 

456 """Name of the target table.""" 

457 

458 source: Tuple[str, ...] 

459 """Tuple of source table column names.""" 

460 

461 target: Tuple[str, ...] 

462 """Tuple of target table column names.""" 

463 

464 onDelete: Optional[str] = None 

465 """SQL clause indicating how to handle deletes to the target table. 

466 

467 If not `None` (which indicates that a constraint violation exception should 

468 be raised), should be either "SET NULL" or "CASCADE". 

469 """ 

470 

471 addIndex: bool = True 

472 """If `True`, create an index on the columns of this foreign key in the 

473 source table. 

474 """ 

475 

476 @classmethod 

477 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

478 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

479 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

480 

481 Parameters 

482 ---------- 

483 config: `Config` 

484 Configuration describing the constraint. Nested configuration keys 

485 correspond to `ForeignKeySpec` attributes. 

486 

487 Returns 

488 ------- 

489 spec: `ForeignKeySpec` 

490 Specification structure for the constraint. 

491 

492 Raises 

493 ------ 

494 SchemaValidationError 

495 Raised if configuration keys are missing or have invalid values. 

496 """ 

497 return cls( 

498 table=config["table"], 

499 source=tuple(ensure_iterable(config["source"])), 

500 target=tuple(ensure_iterable(config["target"])), 

501 onDelete=config.get("onDelete", None), 

502 ) 

503 

504 

505@dataclass(frozen=True) 

506class IndexSpec: 

507 """Specification of an index on table columns. 

508 

509 Parameters 

510 ---------- 

511 *columns : `str` 

512 Names of the columns to index. 

513 **kwargs: `Any` 

514 Additional keyword arguments to pass directly to 

515 `sqlalchemy.schema.Index` constructor. This could be used to provide 

516 backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL 

517 one can pass ``postgresql_using="gist"``. 

518 """ 

519 

520 def __init__(self, *columns: str, **kwargs: Any): 

521 object.__setattr__(self, "columns", tuple(columns)) 

522 object.__setattr__(self, "kwargs", kwargs) 

523 

524 def __hash__(self) -> int: 

525 return hash(self.columns) 

526 

527 columns: Tuple[str, ...] 

528 """Column names to include in the index (`Tuple` [ `str` ]).""" 

529 

530 kwargs: dict[str, Any] 

531 """Additional keyword arguments passed directly to 

532 `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]). 

533 """ 

534 

535 

536@dataclass 

537class TableSpec: 

538 """A data class used to define a table or table-like query interface. 

539 

540 Parameters 

541 ---------- 

542 fields : `Iterable` [ `FieldSpec` ] 

543 Specifications for the columns in this table. 

544 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

545 Non-primary-key unique constraints for the table. 

546 indexes: `Iterable` [ `IndexSpec` ], optional 

547 Indexes for the table. 

548 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

549 Foreign key constraints for the table. 

550 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

551 Special constraints that prohibit overlaps between timespans over rows 

552 where other columns are equal. These take the same form as unique 

553 constraints, but each tuple may contain a single 

554 `TimespanDatabaseRepresentation` subclass representing a timespan 

555 column. 

556 recycleIds : `bool`, optional 

557 If `True`, allow databases that might normally recycle autoincrement 

558 IDs to do so (usually better for performance) on any autoincrement 

559 field in this table. 

560 doc : `str`, optional 

561 Documentation for the table. 

562 """ 

563 

564 def __init__( 

565 self, 

566 fields: Iterable[FieldSpec], 

567 *, 

568 unique: Iterable[Tuple[str, ...]] = (), 

569 indexes: Iterable[IndexSpec] = (), 

570 foreignKeys: Iterable[ForeignKeySpec] = (), 

571 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (), 

572 recycleIds: bool = True, 

573 doc: Optional[str] = None, 

574 ): 

575 self.fields = NamedValueSet(fields) 

576 self.unique = set(unique) 

577 self.indexes = set(indexes) 

578 self.foreignKeys = list(foreignKeys) 

579 self.exclusion = set(exclusion) 

580 self.recycleIds = recycleIds 

581 self.doc = doc 

582 

583 fields: NamedValueSet[FieldSpec] 

584 """Specifications for the columns in this table.""" 

585 

586 unique: Set[Tuple[str, ...]] 

587 """Non-primary-key unique constraints for the table.""" 

588 

589 indexes: Set[IndexSpec] 

590 """Indexes for the table.""" 

591 

592 foreignKeys: List[ForeignKeySpec] 

593 """Foreign key constraints for the table.""" 

594 

595 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] 

596 """Exclusion constraints for the table. 

597 

598 Exclusion constraints behave mostly like unique constraints, but may 

599 contain a database-native Timespan column that is restricted to not overlap 

600 across rows (for identical combinations of any non-Timespan columns in the 

601 constraint). 

602 """ 

603 

604 recycleIds: bool = True 

605 """If `True`, allow databases that might normally recycle autoincrement IDs 

606 to do so (usually better for performance) on any autoincrement field in 

607 this table. 

608 """ 

609 

610 doc: Optional[str] = None 

611 """Documentation for the table.""" 

612 

613 @classmethod 

614 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

615 def fromConfig(cls, config: Config) -> TableSpec: 

616 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

617 

618 Parameters 

619 ---------- 

620 config: `Config` 

621 Configuration describing the constraint. Nested configuration keys 

622 correspond to `TableSpec` attributes. 

623 

624 Returns 

625 ------- 

626 spec: `TableSpec` 

627 Specification structure for the table. 

628 

629 Raises 

630 ------ 

631 SchemaValidationError 

632 Raised if configuration keys are missing or have invalid values. 

633 """ 

634 return cls( 

635 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

636 unique={tuple(u) for u in config.get("unique", ())}, 

637 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

638 doc=stripIfNotNone(config.get("doc")), 

639 )