Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE", 

22etc.) in Python. 

23 

24This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

25because we need a level of indirection between logical tables and the actual 

26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

27 

28We've opted for the rather more obscure "ddl" as the name of this module 

29instead of "schema" because the latter is too overloaded; in most SQL 

30databases, a "schema" is also another term for a namespace. 

31""" 

32from __future__ import annotations 

33 

34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

35 "AstropyTimeNsecTai") 

36 

37from base64 import b64encode, b64decode 

38import logging 

39from math import ceil 

40from dataclasses import dataclass 

41from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union 

42 

43import sqlalchemy 

44import astropy.time 

45 

46from lsst.sphgeom import ConvexPolygon 

47from .config import Config 

48from .exceptions import ValidationError 

49from . import time_utils 

50from .utils import iterable, stripIfNotNone 

51from .named import NamedValueSet 

52 

53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true

54 from .timespan import DatabaseTimespanRepresentation 

55 

56 

57_LOG = logging.getLogger(__name__) 

58 

59 

60class SchemaValidationError(ValidationError): 

61 """Exceptions used to indicate problems in Registry schema configuration. 

62 """ 

63 

64 @classmethod 

65 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

66 """A decorator that re-raises exceptions as `SchemaValidationError`. 

67 

68 Decorated functions must be class or instance methods, with a 

69 ``config`` parameter as their first argument. This will be passed 

70 to ``message.format()`` as a keyword argument, along with ``err``, 

71 the original exception. 

72 

73 Parameters 

74 ---------- 

75 caught : `type` (`Exception` subclass) 

76 The type of exception to catch. 

77 message : `str` 

78 A `str.format` string that may contain named placeholders for 

79 ``config``, ``err``, or any keyword-only argument accepted by 

80 the decorated function. 

81 """ 

82 def decorate(func: Callable) -> Callable: 

83 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any: 

84 try: 

85 return func(self, config, *args, **kwds) 

86 except caught as err: 

87 raise cls(message.format(config=str(config), err=err)) 

88 return decorated 

89 return decorate 

90 

91 

92class Base64Bytes(sqlalchemy.TypeDecorator): 

93 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded 

94 `sqlalchemy.String`. 

95 """ 

96 

97 impl = sqlalchemy.String 

98 

99 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

100 length = 4*ceil(nbytes/3) 

101 super().__init__(*args, length=length, **kwargs) 

102 self.nbytes = nbytes 

103 

104 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect 

105 ) -> Optional[str]: 

106 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

107 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

108 # String fields. 

109 if value is None: 

110 return None 

111 if not isinstance(value, bytes): 

112 raise TypeError( 

113 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

114 ) 

115 return b64encode(value).decode("ascii") 

116 

117 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

118 ) -> Optional[bytes]: 

119 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

120 # We want to transform that to base64-encoded `bytes` and then 

121 # native `bytes`. 

122 return b64decode(value.encode("ascii")) if value is not None else None 

123 

124 

125class Base64Region(Base64Bytes): 

126 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a 

127 base64-encoded `sqlalchemy.String`. 

128 """ 

129 

130 def process_bind_param(self, value: Optional[ConvexPolygon], dialect: sqlalchemy.engine.Dialect 

131 ) -> Optional[str]: 

132 if value is None: 

133 return None 

134 return super().process_bind_param(value.encode(), dialect) 

135 

136 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

137 ) -> Optional[ConvexPolygon]: 

138 if value is None: 

139 return None 

140 return ConvexPolygon.decode(super().process_result_value(value, dialect)) 

141 

142 

143class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

144 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a 

145 number of nanoseconds since Unix epoch in TAI scale. 

146 """ 

147 

148 impl = sqlalchemy.BigInteger 

149 

150 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

151 ) -> Optional[int]: 

152 if value is None: 

153 return None 

154 if not isinstance(value, astropy.time.Time): 

155 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

156 value = time_utils.astropy_to_nsec(value) 

157 return value 

158 

159 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

160 ) -> Optional[astropy.time.Time]: 

161 # value is nanoseconds since epoch, or None 

162 if value is None: 

163 return None 

164 value = time_utils.nsec_to_astropy(value) 

165 return value 

166 

167 

168VALID_CONFIG_COLUMN_TYPES = { 

169 "string": sqlalchemy.String, 

170 "int": sqlalchemy.BigInteger, 

171 "float": sqlalchemy.Float, 

172 "region": Base64Region, 

173 "bool": sqlalchemy.Boolean, 

174 "blob": sqlalchemy.LargeBinary, 

175 "datetime": AstropyTimeNsecTai, 

176 "hash": Base64Bytes 

177} 

178 

179 

180@dataclass 

181class FieldSpec: 

182 """A struct-like class used to define a column in a logical `Registry` 

183 table. 

184 """ 

185 

186 name: str 

187 """Name of the column.""" 

188 

189 dtype: type 

190 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

191 that defines both a Python type and a corresponding precise SQL type. 

192 """ 

193 

194 length: Optional[int] = None 

195 """Length of the type in the database, for variable-length types.""" 

196 

197 nbytes: Optional[int] = None 

198 """Natural length used for hash and encoded-region columns, to be converted 

199 into the post-encoding length. 

200 """ 

201 

202 primaryKey: bool = False 

203 """Whether this field is (part of) its table's primary key.""" 

204 

205 autoincrement: bool = False 

206 """Whether the database should insert automatically incremented values when 

207 no value is provided in an INSERT. 

208 """ 

209 

210 nullable: bool = True 

211 """Whether this field is allowed to be NULL.""" 

212 

213 default: Any = None 

214 """A server-side default value for this field. 

215 

216 This is passed directly as the ``server_default`` argument to 

217 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

218 type conversion or quoting for Python literals, and should hence be used 

219 with care. See the SQLAlchemy documentation for more information. 

220 """ 

221 

222 doc: Optional[str] = None 

223 """Documentation for this field.""" 

224 

225 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

226 if isinstance(other, FieldSpec): 

227 return self.name == other.name 

228 else: 

229 return NotImplemented 

230 

231 def __hash__(self) -> int: 

232 return hash(self.name) 

233 

234 @classmethod 

235 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

236 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec: 

237 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

238 

239 Parameters 

240 ---------- 

241 config: `Config` 

242 Configuration describing the column. Nested configuration keys 

243 correspond to `FieldSpec` attributes. 

244 kwds 

245 Additional keyword arguments that provide defaults for values 

246 not present in config. 

247 

248 Returns 

249 ------- 

250 spec: `FieldSpec` 

251 Specification structure for the column. 

252 

253 Raises 

254 ------ 

255 SchemaValidationError 

256 Raised if configuration keys are missing or have invalid values. 

257 """ 

258 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

259 if dtype is None: 

260 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

261 if not config["name"].islower(): 

262 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

263 self = cls(name=config["name"], dtype=dtype, **kwds) 

264 self.length = config.get("length", self.length) 

265 self.nbytes = config.get("nbytes", self.nbytes) 

266 if self.length is not None and self.nbytes is not None: 

267 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

268 self.primaryKey = config.get("primaryKey", self.primaryKey) 

269 self.autoincrement = config.get("autoincrement", self.autoincrement) 

270 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

271 self.doc = stripIfNotNone(config.get("doc", None)) 

272 return self 

273 

274 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

275 """Return a sized version of the column type, utilizing either (or 

276 neither) of ``self.length`` and ``self.nbytes``. 

277 

278 Returns 

279 ------- 

280 dtype : `sqlalchemy.types.TypeEngine` 

281 A SQLAlchemy column type object. 

282 """ 

283 if self.length is not None: 

284 return self.dtype(length=self.length) 

285 if self.nbytes is not None: 

286 return self.dtype(nbytes=self.nbytes) 

287 return self.dtype 

288 

289 

290@dataclass 

291class ForeignKeySpec: 

292 """A struct-like class used to define a foreign key constraint in a logical 

293 `Registry` table. 

294 """ 

295 

296 table: str 

297 """Name of the target table.""" 

298 

299 source: Tuple[str, ...] 

300 """Tuple of source table column names.""" 

301 

302 target: Tuple[str, ...] 

303 """Tuple of target table column names.""" 

304 

305 onDelete: Optional[str] = None 

306 """SQL clause indicating how to handle deletes to the target table. 

307 

308 If not `None` (which indicates that a constraint violation exception should 

309 be raised), should be either "SET NULL" or "CASCADE". 

310 """ 

311 

312 addIndex: bool = True 

313 """If `True`, create an index on the columns of this foreign key in the 

314 source table. 

315 """ 

316 

317 @classmethod 

318 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

319 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

320 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

321 

322 Parameters 

323 ---------- 

324 config: `Config` 

325 Configuration describing the constraint. Nested configuration keys 

326 correspond to `ForeignKeySpec` attributes. 

327 

328 Returns 

329 ------- 

330 spec: `ForeignKeySpec` 

331 Specification structure for the constraint. 

332 

333 Raises 

334 ------ 

335 SchemaValidationError 

336 Raised if configuration keys are missing or have invalid values. 

337 """ 

338 return cls(table=config["table"], 

339 source=tuple(iterable(config["source"])), 

340 target=tuple(iterable(config["target"])), 

341 onDelete=config.get("onDelete", None)) 

342 

343 

344@dataclass 

345class TableSpec: 

346 """A struct-like class used to define a table or table-like 

347 query interface. 

348 

349 Parameters 

350 ---------- 

351 fields : `Iterable` [ `FieldSpec` ] 

352 Specifications for the columns in this table. 

353 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

354 Non-primary-key unique constraints for the table. 

355 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

356 Indexes for the table. 

357 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

358 Foreign key constraints for the table. 

359 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

360 Special constraints that prohibit overlaps between timespans over rows 

361 where other columns are equal. These take the same form as unique 

362 constraints, but each tuple may contain a single 

363 `DatabaseTimespanRepresentation` subclass representing a timespan 

364 column. 

365 recycleIds : bool, optional 

366 If `True`, allow databases that might normally recycle autoincrement 

367 IDs to do so (usually better for performance) on any autoincrement 

368 field in this table. 

369 doc : str, optional 

370 Documentation for the table. 

371 """ 

372 def __init__( 

373 self, fields: Iterable[FieldSpec], *, 

374 unique: Iterable[Tuple[str, ...]] = (), 

375 indexes: Iterable[Tuple[str, ...]] = (), 

376 foreignKeys: Iterable[ForeignKeySpec] = (), 

377 exclusion: Iterable[Tuple[Union[str, Type[DatabaseTimespanRepresentation]], ...]] = (), 

378 recycleIds: bool = True, 

379 doc: Optional[str] = None, 

380 ): 

381 self.fields = NamedValueSet(fields) 

382 self.unique = set(unique) 

383 self.indexes = set(indexes) 

384 self.foreignKeys = list(foreignKeys) 

385 self.exclusion = set(exclusion) 

386 self.recycleIds = recycleIds 

387 self.doc = doc 

388 

389 fields: NamedValueSet[FieldSpec] 

390 """Specifications for the columns in this table.""" 

391 

392 unique: Set[Tuple[str, ...]] 

393 """Non-primary-key unique constraints for the table.""" 

394 

395 indexes: Set[Tuple[str, ...]] 

396 """Indexes for the table.""" 

397 

398 foreignKeys: List[ForeignKeySpec] 

399 """Foreign key constraints for the table.""" 

400 

401 exclusion: Set[Tuple[Union[str, Type[DatabaseTimespanRepresentation]], ...]] 

402 """Exclusion constraints for the table. 

403 

404 Exclusion constraints behave mostly like unique constraints, but may 

405 contain a database-native Timespan column that is restricted to not overlap 

406 across rows (for identical combinations of any non-Timespan columns in the 

407 constraint). 

408 """ 

409 

410 recycleIds: bool = True 

411 """If `True`, allow databases that might normally recycle autoincrement IDs 

412 to do so (usually better for performance) on any autoincrement field in 

413 this table. 

414 """ 

415 

416 doc: Optional[str] = None 

417 """Documentation for the table.""" 

418 

419 @classmethod 

420 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

421 def fromConfig(cls, config: Config) -> TableSpec: 

422 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

423 

424 Parameters 

425 ---------- 

426 config: `Config` 

427 Configuration describing the constraint. Nested configuration keys 

428 correspond to `TableSpec` attributes. 

429 

430 Returns 

431 ------- 

432 spec: `TableSpec` 

433 Specification structure for the table. 

434 

435 Raises 

436 ------ 

437 SchemaValidationError 

438 Raised if configuration keys are missing or have invalid values. 

439 """ 

440 return cls( 

441 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

442 unique={tuple(u) for u in config.get("unique", ())}, 

443 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

444 doc=stripIfNotNone(config.get("doc")), 

445 )