Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL) in Python. 

22 

23This include "CREATE TABLE" etc. 

24 

25This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

26because we need a level of indirection between logical tables and the actual 

27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

28 

29We've opted for the rather more obscure "ddl" as the name of this module 

30instead of "schema" because the latter is too overloaded; in most SQL 

31databases, a "schema" is also another term for a namespace. 

32""" 

33from __future__ import annotations 

34 

35__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

36 "AstropyTimeNsecTai") 

37 

38from base64 import b64encode, b64decode 

39import logging 

40from math import ceil 

41from dataclasses import dataclass 

42from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union 

43 

44import sqlalchemy 

45import astropy.time 

46 

47from lsst.sphgeom import Region 

48from .config import Config 

49from .exceptions import ValidationError 

50from . import time_utils 

51from .utils import iterable, stripIfNotNone 

52from .named import NamedValueSet 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 from .timespan import TimespanDatabaseRepresentation 

56 

57 

58_LOG = logging.getLogger(__name__) 

59 

60 

61class SchemaValidationError(ValidationError): 

62 """Exceptions that indicate problems in Registry schema configuration.""" 

63 

64 @classmethod 

65 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

66 """Return decorator to re-raise exceptions as `SchemaValidationError`. 

67 

68 Decorated functions must be class or instance methods, with a 

69 ``config`` parameter as their first argument. This will be passed 

70 to ``message.format()`` as a keyword argument, along with ``err``, 

71 the original exception. 

72 

73 Parameters 

74 ---------- 

75 caught : `type` (`Exception` subclass) 

76 The type of exception to catch. 

77 message : `str` 

78 A `str.format` string that may contain named placeholders for 

79 ``config``, ``err``, or any keyword-only argument accepted by 

80 the decorated function. 

81 """ 

82 def decorate(func: Callable) -> Callable: 

83 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any: 

84 try: 

85 return func(self, config, *args, **kwds) 

86 except caught as err: 

87 raise cls(message.format(config=str(config), err=err)) 

88 return decorated 

89 return decorate 

90 

91 

92class Base64Bytes(sqlalchemy.TypeDecorator): 

93 """A SQLAlchemy custom type for Python `bytes`. 

94 

95 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field. 

96 """ 

97 

98 impl = sqlalchemy.Text 

99 

100 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

101 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None 

102 super().__init__(*args, length=length, **kwargs) 

103 self.nbytes = nbytes 

104 

105 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect 

106 ) -> Optional[str]: 

107 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

108 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

109 # String fields. 

110 if value is None: 

111 return None 

112 if not isinstance(value, bytes): 

113 raise TypeError( 

114 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

115 ) 

116 return b64encode(value).decode("ascii") 

117 

118 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

119 ) -> Optional[bytes]: 

120 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

121 # We want to transform that to base64-encoded `bytes` and then 

122 # native `bytes`. 

123 return b64decode(value.encode("ascii")) if value is not None else None 

124 

125 

126class Base64Region(Base64Bytes): 

127 """A SQLAlchemy custom type for Python `sphgeom.Region`. 

128 

129 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`. 

130 """ 

131 

132 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect 

133 ) -> Optional[str]: 

134 if value is None: 

135 return None 

136 return super().process_bind_param(value.encode(), dialect) 

137 

138 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

139 ) -> Optional[Region]: 

140 if value is None: 

141 return None 

142 return Region.decode(super().process_result_value(value, dialect)) 

143 

144 

145class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

146 """A SQLAlchemy custom type for Python `astropy.time.Time`. 

147 

148 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix 

149 epoch in TAI scale. 

150 """ 

151 

152 impl = sqlalchemy.BigInteger 

153 

154 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

155 ) -> Optional[int]: 

156 if value is None: 

157 return None 

158 if not isinstance(value, astropy.time.Time): 

159 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

160 value = time_utils.TimeConverter().astropy_to_nsec(value) 

161 return value 

162 

163 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

164 ) -> Optional[astropy.time.Time]: 

165 # value is nanoseconds since epoch, or None 

166 if value is None: 

167 return None 

168 value = time_utils.TimeConverter().nsec_to_astropy(value) 

169 return value 

170 

171 

172VALID_CONFIG_COLUMN_TYPES = { 

173 "string": sqlalchemy.String, 

174 "int": sqlalchemy.BigInteger, 

175 "float": sqlalchemy.Float, 

176 "region": Base64Region, 

177 "bool": sqlalchemy.Boolean, 

178 "blob": sqlalchemy.LargeBinary, 

179 "datetime": AstropyTimeNsecTai, 

180 "hash": Base64Bytes 

181} 

182 

183 

184@dataclass 

185class FieldSpec: 

186 """A data class for defining a column in a logical `Registry` table.""" 

187 

188 name: str 

189 """Name of the column.""" 

190 

191 dtype: type 

192 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

193 that defines both a Python type and a corresponding precise SQL type. 

194 """ 

195 

196 length: Optional[int] = None 

197 """Length of the type in the database, for variable-length types.""" 

198 

199 nbytes: Optional[int] = None 

200 """Natural length used for hash and encoded-region columns, to be converted 

201 into the post-encoding length. 

202 """ 

203 

204 primaryKey: bool = False 

205 """Whether this field is (part of) its table's primary key.""" 

206 

207 autoincrement: bool = False 

208 """Whether the database should insert automatically incremented values when 

209 no value is provided in an INSERT. 

210 """ 

211 

212 nullable: bool = True 

213 """Whether this field is allowed to be NULL.""" 

214 

215 default: Any = None 

216 """A server-side default value for this field. 

217 

218 This is passed directly as the ``server_default`` argument to 

219 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

220 type conversion or quoting for Python literals, and should hence be used 

221 with care. See the SQLAlchemy documentation for more information. 

222 """ 

223 

224 doc: Optional[str] = None 

225 """Documentation for this field.""" 

226 

227 def __eq__(self, other: Any) -> bool: 

228 if isinstance(other, FieldSpec): 

229 return self.name == other.name 

230 else: 

231 return NotImplemented 

232 

233 def __hash__(self) -> int: 

234 return hash(self.name) 

235 

236 @classmethod 

237 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

238 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec: 

239 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

240 

241 Parameters 

242 ---------- 

243 config: `Config` 

244 Configuration describing the column. Nested configuration keys 

245 correspond to `FieldSpec` attributes. 

246 kwds 

247 Additional keyword arguments that provide defaults for values 

248 not present in config. 

249 

250 Returns 

251 ------- 

252 spec: `FieldSpec` 

253 Specification structure for the column. 

254 

255 Raises 

256 ------ 

257 SchemaValidationError 

258 Raised if configuration keys are missing or have invalid values. 

259 """ 

260 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

261 if dtype is None: 

262 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

263 if not config["name"].islower(): 

264 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

265 self = cls(name=config["name"], dtype=dtype, **kwds) 

266 self.length = config.get("length", self.length) 

267 self.nbytes = config.get("nbytes", self.nbytes) 

268 if self.length is not None and self.nbytes is not None: 

269 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

270 self.primaryKey = config.get("primaryKey", self.primaryKey) 

271 self.autoincrement = config.get("autoincrement", self.autoincrement) 

272 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

273 self.doc = stripIfNotNone(config.get("doc", None)) 

274 return self 

275 

276 def isStringType(self) -> bool: 

277 """Indicate that this is a sqlalchemy.String field spec. 

278 

279 Returns 

280 ------- 

281 isString : `bool` 

282 The field refers to a `sqlalchemy.String` and not any other type. 

283 This can return `False` even if the object was created with a 

284 string type if it has been decided that it should be implemented 

285 as a `sqlalchemy.Text` type. 

286 """ 

287 if self.dtype == sqlalchemy.String: 

288 # For short strings retain them as strings 

289 if self.dtype == sqlalchemy.String and self.length and self.length <= 32: 

290 return True 

291 return False 

292 

293 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

294 """Return a sized version of the column type. 

295 

296 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``. 

297 

298 Returns 

299 ------- 

300 dtype : `sqlalchemy.types.TypeEngine` 

301 A SQLAlchemy column type object. 

302 """ 

303 if self.length is not None: 

304 # Last chance check that we are only looking at possible String 

305 if self.dtype == sqlalchemy.String and not self.isStringType(): 

306 return sqlalchemy.Text 

307 return self.dtype(length=self.length) 

308 if self.nbytes is not None: 

309 return self.dtype(nbytes=self.nbytes) 

310 return self.dtype 

311 

312 def getPythonType(self) -> type: 

313 """Return the Python type associated with this field's (SQL) dtype. 

314 

315 Returns 

316 ------- 

317 type : `type` 

318 Python type associated with this field's (SQL) `dtype`. 

319 """ 

320 return self.dtype().python_type 

321 

322 

323@dataclass 

324class ForeignKeySpec: 

325 """Definition of a foreign key constraint in a logical `Registry` table.""" 

326 

327 table: str 

328 """Name of the target table.""" 

329 

330 source: Tuple[str, ...] 

331 """Tuple of source table column names.""" 

332 

333 target: Tuple[str, ...] 

334 """Tuple of target table column names.""" 

335 

336 onDelete: Optional[str] = None 

337 """SQL clause indicating how to handle deletes to the target table. 

338 

339 If not `None` (which indicates that a constraint violation exception should 

340 be raised), should be either "SET NULL" or "CASCADE". 

341 """ 

342 

343 addIndex: bool = True 

344 """If `True`, create an index on the columns of this foreign key in the 

345 source table. 

346 """ 

347 

348 @classmethod 

349 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

350 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

351 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

352 

353 Parameters 

354 ---------- 

355 config: `Config` 

356 Configuration describing the constraint. Nested configuration keys 

357 correspond to `ForeignKeySpec` attributes. 

358 

359 Returns 

360 ------- 

361 spec: `ForeignKeySpec` 

362 Specification structure for the constraint. 

363 

364 Raises 

365 ------ 

366 SchemaValidationError 

367 Raised if configuration keys are missing or have invalid values. 

368 """ 

369 return cls(table=config["table"], 

370 source=tuple(iterable(config["source"])), 

371 target=tuple(iterable(config["target"])), 

372 onDelete=config.get("onDelete", None)) 

373 

374 

375@dataclass 

376class TableSpec: 

377 """A data class used to define a table or table-like query interface. 

378 

379 Parameters 

380 ---------- 

381 fields : `Iterable` [ `FieldSpec` ] 

382 Specifications for the columns in this table. 

383 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

384 Non-primary-key unique constraints for the table. 

385 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

386 Indexes for the table. 

387 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

388 Foreign key constraints for the table. 

389 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

390 Special constraints that prohibit overlaps between timespans over rows 

391 where other columns are equal. These take the same form as unique 

392 constraints, but each tuple may contain a single 

393 `TimespanDatabaseRepresentation` subclass representing a timespan 

394 column. 

395 recycleIds : `bool`, optional 

396 If `True`, allow databases that might normally recycle autoincrement 

397 IDs to do so (usually better for performance) on any autoincrement 

398 field in this table. 

399 doc : `str`, optional 

400 Documentation for the table. 

401 """ 

402 

403 def __init__( 

404 self, fields: Iterable[FieldSpec], *, 

405 unique: Iterable[Tuple[str, ...]] = (), 

406 indexes: Iterable[Tuple[str, ...]] = (), 

407 foreignKeys: Iterable[ForeignKeySpec] = (), 

408 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (), 

409 recycleIds: bool = True, 

410 doc: Optional[str] = None, 

411 ): 

412 self.fields = NamedValueSet(fields) 

413 self.unique = set(unique) 

414 self.indexes = set(indexes) 

415 self.foreignKeys = list(foreignKeys) 

416 self.exclusion = set(exclusion) 

417 self.recycleIds = recycleIds 

418 self.doc = doc 

419 

420 fields: NamedValueSet[FieldSpec] 

421 """Specifications for the columns in this table.""" 

422 

423 unique: Set[Tuple[str, ...]] 

424 """Non-primary-key unique constraints for the table.""" 

425 

426 indexes: Set[Tuple[str, ...]] 

427 """Indexes for the table.""" 

428 

429 foreignKeys: List[ForeignKeySpec] 

430 """Foreign key constraints for the table.""" 

431 

432 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] 

433 """Exclusion constraints for the table. 

434 

435 Exclusion constraints behave mostly like unique constraints, but may 

436 contain a database-native Timespan column that is restricted to not overlap 

437 across rows (for identical combinations of any non-Timespan columns in the 

438 constraint). 

439 """ 

440 

441 recycleIds: bool = True 

442 """If `True`, allow databases that might normally recycle autoincrement IDs 

443 to do so (usually better for performance) on any autoincrement field in 

444 this table. 

445 """ 

446 

447 doc: Optional[str] = None 

448 """Documentation for the table.""" 

449 

450 @classmethod 

451 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

452 def fromConfig(cls, config: Config) -> TableSpec: 

453 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

454 

455 Parameters 

456 ---------- 

457 config: `Config` 

458 Configuration describing the constraint. Nested configuration keys 

459 correspond to `TableSpec` attributes. 

460 

461 Returns 

462 ------- 

463 spec: `TableSpec` 

464 Specification structure for the table. 

465 

466 Raises 

467 ------ 

468 SchemaValidationError 

469 Raised if configuration keys are missing or have invalid values. 

470 """ 

471 return cls( 

472 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

473 unique={tuple(u) for u in config.get("unique", ())}, 

474 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

475 doc=stripIfNotNone(config.get("doc")), 

476 )