Coverage for python/lsst/daf/butler/core/ddl.py: 46%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

180 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL) in Python. 

22 

23This include "CREATE TABLE" etc. 

24 

25This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

26because we need a level of indirection between logical tables and the actual 

27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

28 

29We've opted for the rather more obscure "ddl" as the name of this module 

30instead of "schema" because the latter is too overloaded; in most SQL 

31databases, a "schema" is also another term for a namespace. 

32""" 

33from __future__ import annotations 

34 

35__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

36 "AstropyTimeNsecTai", "GUID") 

37 

38from base64 import b64encode, b64decode 

39import logging 

40from math import ceil 

41from dataclasses import dataclass 

42from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union 

43import uuid 

44 

45import sqlalchemy 

46from sqlalchemy.dialects.postgresql import UUID 

47import astropy.time 

48 

49from lsst.utils.iteration import ensure_iterable 

50from lsst.sphgeom import Region 

51from .config import Config 

52from .exceptions import ValidationError 

53from . import time_utils 

54from .utils import stripIfNotNone 

55from .named import NamedValueSet 

56 

57if TYPE_CHECKING: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 from .timespan import TimespanDatabaseRepresentation 

59 

60 

61_LOG = logging.getLogger(__name__) 

62 

63 

64class SchemaValidationError(ValidationError): 

65 """Exceptions that indicate problems in Registry schema configuration.""" 

66 

67 @classmethod 

68 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

69 """Return decorator to re-raise exceptions as `SchemaValidationError`. 

70 

71 Decorated functions must be class or instance methods, with a 

72 ``config`` parameter as their first argument. This will be passed 

73 to ``message.format()`` as a keyword argument, along with ``err``, 

74 the original exception. 

75 

76 Parameters 

77 ---------- 

78 caught : `type` (`Exception` subclass) 

79 The type of exception to catch. 

80 message : `str` 

81 A `str.format` string that may contain named placeholders for 

82 ``config``, ``err``, or any keyword-only argument accepted by 

83 the decorated function. 

84 """ 

85 def decorate(func: Callable) -> Callable: 

86 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any: 

87 try: 

88 return func(self, config, *args, **kwargs) 

89 except caught as err: 

90 raise cls(message.format(config=str(config), err=err)) 

91 return decorated 

92 return decorate 

93 

94 

95class Base64Bytes(sqlalchemy.TypeDecorator): 

96 """A SQLAlchemy custom type for Python `bytes`. 

97 

98 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field. 

99 """ 

100 

101 impl = sqlalchemy.Text 

102 

103 cache_ok = True 

104 

105 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

106 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None 

107 super().__init__(*args, length=length, **kwargs) 

108 self.nbytes = nbytes 

109 

110 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect 

111 ) -> Optional[str]: 

112 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

113 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

114 # String fields. 

115 if value is None: 

116 return None 

117 if not isinstance(value, bytes): 

118 raise TypeError( 

119 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

120 ) 

121 return b64encode(value).decode("ascii") 

122 

123 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

124 ) -> Optional[bytes]: 

125 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

126 # We want to transform that to base64-encoded `bytes` and then 

127 # native `bytes`. 

128 return b64decode(value.encode("ascii")) if value is not None else None 

129 

130 

131class Base64Region(Base64Bytes): 

132 """A SQLAlchemy custom type for Python `sphgeom.Region`. 

133 

134 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`. 

135 """ 

136 

137 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect 

138 ) -> Optional[str]: 

139 if value is None: 

140 return None 

141 return super().process_bind_param(value.encode(), dialect) 

142 

143 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

144 ) -> Optional[Region]: 

145 if value is None: 

146 return None 

147 return Region.decode(super().process_result_value(value, dialect)) 

148 

149 

150class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

151 """A SQLAlchemy custom type for Python `astropy.time.Time`. 

152 

153 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix 

154 epoch in TAI scale. 

155 """ 

156 

157 impl = sqlalchemy.BigInteger 

158 

159 cache_ok = True 

160 

161 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

162 ) -> Optional[int]: 

163 if value is None: 

164 return None 

165 if not isinstance(value, astropy.time.Time): 

166 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

167 value = time_utils.TimeConverter().astropy_to_nsec(value) 

168 return value 

169 

170 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

171 ) -> Optional[astropy.time.Time]: 

172 # value is nanoseconds since epoch, or None 

173 if value is None: 

174 return None 

175 value = time_utils.TimeConverter().nsec_to_astropy(value) 

176 return value 

177 

178 

179class GUID(sqlalchemy.TypeDecorator): 

180 """Platform-independent GUID type. 

181 

182 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as 

183 stringified hex values. 

184 """ 

185 

186 impl = sqlalchemy.CHAR 

187 

188 cache_ok = True 

189 

190 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine: 

191 if dialect.name == 'postgresql': 

192 return dialect.type_descriptor(UUID()) 

193 else: 

194 return dialect.type_descriptor(sqlalchemy.CHAR(32)) 

195 

196 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]: 

197 if value is None: 

198 return value 

199 

200 # Coerce input to UUID type, in general having UUID on input is the 

201 # only thing that we want but there is code right now that uses ints. 

202 if isinstance(value, int): 

203 value = uuid.UUID(int=value) 

204 elif isinstance(value, bytes): 

205 value = uuid.UUID(bytes=value) 

206 elif isinstance(value, str): 

207 # hexstring 

208 value = uuid.UUID(hex=value) 

209 elif not isinstance(value, uuid.UUID): 

210 raise TypeError(f"Unexpected type of a bind value: {type(value)}") 

211 

212 if dialect.name == 'postgresql': 

213 return str(value) 

214 else: 

215 return "%.32x" % value.int 

216 

217 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]: 

218 if value is None: 

219 return value 

220 else: 

221 return uuid.UUID(hex=value) 

222 

223 

224VALID_CONFIG_COLUMN_TYPES = { 

225 "string": sqlalchemy.String, 

226 "int": sqlalchemy.BigInteger, 

227 "float": sqlalchemy.Float, 

228 "region": Base64Region, 

229 "bool": sqlalchemy.Boolean, 

230 "blob": sqlalchemy.LargeBinary, 

231 "datetime": AstropyTimeNsecTai, 

232 "hash": Base64Bytes, 

233 "uuid": GUID, 

234} 

235 

236 

237@dataclass 

238class FieldSpec: 

239 """A data class for defining a column in a logical `Registry` table.""" 

240 

241 name: str 

242 """Name of the column.""" 

243 

244 dtype: type 

245 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

246 that defines both a Python type and a corresponding precise SQL type. 

247 """ 

248 

249 length: Optional[int] = None 

250 """Length of the type in the database, for variable-length types.""" 

251 

252 nbytes: Optional[int] = None 

253 """Natural length used for hash and encoded-region columns, to be converted 

254 into the post-encoding length. 

255 """ 

256 

257 primaryKey: bool = False 

258 """Whether this field is (part of) its table's primary key.""" 

259 

260 autoincrement: bool = False 

261 """Whether the database should insert automatically incremented values when 

262 no value is provided in an INSERT. 

263 """ 

264 

265 nullable: bool = True 

266 """Whether this field is allowed to be NULL.""" 

267 

268 default: Any = None 

269 """A server-side default value for this field. 

270 

271 This is passed directly as the ``server_default`` argument to 

272 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

273 type conversion or quoting for Python literals, and should hence be used 

274 with care. See the SQLAlchemy documentation for more information. 

275 """ 

276 

277 doc: Optional[str] = None 

278 """Documentation for this field.""" 

279 

280 def __eq__(self, other: Any) -> bool: 

281 if isinstance(other, FieldSpec): 

282 return self.name == other.name 

283 else: 

284 return NotImplemented 

285 

286 def __hash__(self) -> int: 

287 return hash(self.name) 

288 

289 @classmethod 

290 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

291 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec: 

292 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

293 

294 Parameters 

295 ---------- 

296 config: `Config` 

297 Configuration describing the column. Nested configuration keys 

298 correspond to `FieldSpec` attributes. 

299 **kwargs 

300 Additional keyword arguments that provide defaults for values 

301 not present in config. 

302 

303 Returns 

304 ------- 

305 spec: `FieldSpec` 

306 Specification structure for the column. 

307 

308 Raises 

309 ------ 

310 SchemaValidationError 

311 Raised if configuration keys are missing or have invalid values. 

312 """ 

313 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

314 if dtype is None: 

315 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

316 if not config["name"].islower(): 

317 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

318 self = cls(name=config["name"], dtype=dtype, **kwargs) 

319 self.length = config.get("length", self.length) 

320 self.nbytes = config.get("nbytes", self.nbytes) 

321 if self.length is not None and self.nbytes is not None: 

322 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

323 self.primaryKey = config.get("primaryKey", self.primaryKey) 

324 self.autoincrement = config.get("autoincrement", self.autoincrement) 

325 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

326 self.doc = stripIfNotNone(config.get("doc", None)) 

327 return self 

328 

329 def isStringType(self) -> bool: 

330 """Indicate that this is a sqlalchemy.String field spec. 

331 

332 Returns 

333 ------- 

334 isString : `bool` 

335 The field refers to a `sqlalchemy.String` and not any other type. 

336 This can return `False` even if the object was created with a 

337 string type if it has been decided that it should be implemented 

338 as a `sqlalchemy.Text` type. 

339 """ 

340 if self.dtype == sqlalchemy.String: 

341 # For short strings retain them as strings 

342 if self.dtype == sqlalchemy.String and self.length and self.length <= 32: 

343 return True 

344 return False 

345 

346 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

347 """Return a sized version of the column type. 

348 

349 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``. 

350 

351 Returns 

352 ------- 

353 dtype : `sqlalchemy.types.TypeEngine` 

354 A SQLAlchemy column type object. 

355 """ 

356 if self.length is not None: 

357 # Last chance check that we are only looking at possible String 

358 if self.dtype == sqlalchemy.String and not self.isStringType(): 

359 return sqlalchemy.Text 

360 return self.dtype(length=self.length) 

361 if self.nbytes is not None: 

362 return self.dtype(nbytes=self.nbytes) 

363 return self.dtype 

364 

365 def getPythonType(self) -> type: 

366 """Return the Python type associated with this field's (SQL) dtype. 

367 

368 Returns 

369 ------- 

370 type : `type` 

371 Python type associated with this field's (SQL) `dtype`. 

372 """ 

373 return self.dtype().python_type 

374 

375 

376@dataclass 

377class ForeignKeySpec: 

378 """Definition of a foreign key constraint in a logical `Registry` table.""" 

379 

380 table: str 

381 """Name of the target table.""" 

382 

383 source: Tuple[str, ...] 

384 """Tuple of source table column names.""" 

385 

386 target: Tuple[str, ...] 

387 """Tuple of target table column names.""" 

388 

389 onDelete: Optional[str] = None 

390 """SQL clause indicating how to handle deletes to the target table. 

391 

392 If not `None` (which indicates that a constraint violation exception should 

393 be raised), should be either "SET NULL" or "CASCADE". 

394 """ 

395 

396 addIndex: bool = True 

397 """If `True`, create an index on the columns of this foreign key in the 

398 source table. 

399 """ 

400 

401 @classmethod 

402 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

403 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

404 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

405 

406 Parameters 

407 ---------- 

408 config: `Config` 

409 Configuration describing the constraint. Nested configuration keys 

410 correspond to `ForeignKeySpec` attributes. 

411 

412 Returns 

413 ------- 

414 spec: `ForeignKeySpec` 

415 Specification structure for the constraint. 

416 

417 Raises 

418 ------ 

419 SchemaValidationError 

420 Raised if configuration keys are missing or have invalid values. 

421 """ 

422 return cls(table=config["table"], 

423 source=tuple(ensure_iterable(config["source"])), 

424 target=tuple(ensure_iterable(config["target"])), 

425 onDelete=config.get("onDelete", None)) 

426 

427 

428@dataclass 

429class TableSpec: 

430 """A data class used to define a table or table-like query interface. 

431 

432 Parameters 

433 ---------- 

434 fields : `Iterable` [ `FieldSpec` ] 

435 Specifications for the columns in this table. 

436 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

437 Non-primary-key unique constraints for the table. 

438 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

439 Indexes for the table. 

440 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

441 Foreign key constraints for the table. 

442 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

443 Special constraints that prohibit overlaps between timespans over rows 

444 where other columns are equal. These take the same form as unique 

445 constraints, but each tuple may contain a single 

446 `TimespanDatabaseRepresentation` subclass representing a timespan 

447 column. 

448 recycleIds : `bool`, optional 

449 If `True`, allow databases that might normally recycle autoincrement 

450 IDs to do so (usually better for performance) on any autoincrement 

451 field in this table. 

452 doc : `str`, optional 

453 Documentation for the table. 

454 """ 

455 

456 def __init__( 

457 self, fields: Iterable[FieldSpec], *, 

458 unique: Iterable[Tuple[str, ...]] = (), 

459 indexes: Iterable[Tuple[str, ...]] = (), 

460 foreignKeys: Iterable[ForeignKeySpec] = (), 

461 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (), 

462 recycleIds: bool = True, 

463 doc: Optional[str] = None, 

464 ): 

465 self.fields = NamedValueSet(fields) 

466 self.unique = set(unique) 

467 self.indexes = set(indexes) 

468 self.foreignKeys = list(foreignKeys) 

469 self.exclusion = set(exclusion) 

470 self.recycleIds = recycleIds 

471 self.doc = doc 

472 

473 fields: NamedValueSet[FieldSpec] 

474 """Specifications for the columns in this table.""" 

475 

476 unique: Set[Tuple[str, ...]] 

477 """Non-primary-key unique constraints for the table.""" 

478 

479 indexes: Set[Tuple[str, ...]] 

480 """Indexes for the table.""" 

481 

482 foreignKeys: List[ForeignKeySpec] 

483 """Foreign key constraints for the table.""" 

484 

485 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] 

486 """Exclusion constraints for the table. 

487 

488 Exclusion constraints behave mostly like unique constraints, but may 

489 contain a database-native Timespan column that is restricted to not overlap 

490 across rows (for identical combinations of any non-Timespan columns in the 

491 constraint). 

492 """ 

493 

494 recycleIds: bool = True 

495 """If `True`, allow databases that might normally recycle autoincrement IDs 

496 to do so (usually better for performance) on any autoincrement field in 

497 this table. 

498 """ 

499 

500 doc: Optional[str] = None 

501 """Documentation for the table.""" 

502 

503 @classmethod 

504 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

505 def fromConfig(cls, config: Config) -> TableSpec: 

506 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

507 

508 Parameters 

509 ---------- 

510 config: `Config` 

511 Configuration describing the constraint. Nested configuration keys 

512 correspond to `TableSpec` attributes. 

513 

514 Returns 

515 ------- 

516 spec: `TableSpec` 

517 Specification structure for the table. 

518 

519 Raises 

520 ------ 

521 SchemaValidationError 

522 Raised if configuration keys are missing or have invalid values. 

523 """ 

524 return cls( 

525 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

526 unique={tuple(u) for u in config.get("unique", ())}, 

527 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

528 doc=stripIfNotNone(config.get("doc")), 

529 )