Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE", 

22etc.) in Python. 

23 

24This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

25because we need a level of indirection between logical tables and the actual 

26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

27 

28We've opted for the rather more obscure "ddl" as the name of this module 

29instead of "schema" because the latter is too overloaded; in most SQL 

30databases, a "schema" is also another term for a namespace. 

31""" 

32from __future__ import annotations 

33 

34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

35 "AstropyTimeNsecTai") 

36 

37from base64 import b64encode, b64decode 

38import logging 

39from math import ceil 

40from dataclasses import dataclass 

41from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union 

42 

43import sqlalchemy 

44import astropy.time 

45 

46from lsst.sphgeom import ConvexPolygon 

47from .config import Config 

48from .exceptions import ValidationError 

49from . import time_utils 

50from .utils import iterable, stripIfNotNone 

51from .named import NamedValueSet 

52 

53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true

54 from .timespan import DatabaseTimespanRepresentation 

55 

56 

57_LOG = logging.getLogger(__name__) 

58 

59 

60class SchemaValidationError(ValidationError): 

61 """Exceptions used to indicate problems in Registry schema configuration. 

62 """ 

63 

64 @classmethod 

65 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

66 """A decorator that re-raises exceptions as `SchemaValidationError`. 

67 

68 Decorated functions must be class or instance methods, with a 

69 ``config`` parameter as their first argument. This will be passed 

70 to ``message.format()`` as a keyword argument, along with ``err``, 

71 the original exception. 

72 

73 Parameters 

74 ---------- 

75 caught : `type` (`Exception` subclass) 

76 The type of exception to catch. 

77 message : `str` 

78 A `str.format` string that may contain named placeholders for 

79 ``config``, ``err``, or any keyword-only argument accepted by 

80 the decorated function. 

81 """ 

82 def decorate(func: Callable) -> Callable: 

83 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any: 

84 try: 

85 return func(self, config, *args, **kwds) 

86 except caught as err: 

87 raise cls(message.format(config=str(config), err=err)) 

88 return decorated 

89 return decorate 

90 

91 

92class Base64Bytes(sqlalchemy.TypeDecorator): 

93 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded 

94 `sqlalchemy.Text` field. 

95 """ 

96 

97 impl = sqlalchemy.Text 

98 

99 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

100 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None 

101 super().__init__(*args, length=length, **kwargs) 

102 self.nbytes = nbytes 

103 

104 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect 

105 ) -> Optional[str]: 

106 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

107 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

108 # String fields. 

109 if value is None: 

110 return None 

111 if not isinstance(value, bytes): 

112 raise TypeError( 

113 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

114 ) 

115 return b64encode(value).decode("ascii") 

116 

117 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

118 ) -> Optional[bytes]: 

119 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

120 # We want to transform that to base64-encoded `bytes` and then 

121 # native `bytes`. 

122 return b64decode(value.encode("ascii")) if value is not None else None 

123 

124 

125class Base64Region(Base64Bytes): 

126 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a 

127 base64-encoded `sqlalchemy.String`. 

128 """ 

129 

130 def process_bind_param(self, value: Optional[ConvexPolygon], dialect: sqlalchemy.engine.Dialect 

131 ) -> Optional[str]: 

132 if value is None: 

133 return None 

134 return super().process_bind_param(value.encode(), dialect) 

135 

136 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

137 ) -> Optional[ConvexPolygon]: 

138 if value is None: 

139 return None 

140 return ConvexPolygon.decode(super().process_result_value(value, dialect)) 

141 

142 

143class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

144 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a 

145 number of nanoseconds since Unix epoch in TAI scale. 

146 """ 

147 

148 impl = sqlalchemy.BigInteger 

149 

150 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

151 ) -> Optional[int]: 

152 if value is None: 

153 return None 

154 if not isinstance(value, astropy.time.Time): 

155 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

156 value = time_utils.astropy_to_nsec(value) 

157 return value 

158 

159 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

160 ) -> Optional[astropy.time.Time]: 

161 # value is nanoseconds since epoch, or None 

162 if value is None: 

163 return None 

164 value = time_utils.nsec_to_astropy(value) 

165 return value 

166 

167 

168VALID_CONFIG_COLUMN_TYPES = { 

169 "string": sqlalchemy.String, 

170 "int": sqlalchemy.BigInteger, 

171 "float": sqlalchemy.Float, 

172 "region": Base64Region, 

173 "bool": sqlalchemy.Boolean, 

174 "blob": sqlalchemy.LargeBinary, 

175 "datetime": AstropyTimeNsecTai, 

176 "hash": Base64Bytes 

177} 

178 

179 

180@dataclass 

181class FieldSpec: 

182 """A struct-like class used to define a column in a logical `Registry` 

183 table. 

184 """ 

185 

186 name: str 

187 """Name of the column.""" 

188 

189 dtype: type 

190 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

191 that defines both a Python type and a corresponding precise SQL type. 

192 """ 

193 

194 length: Optional[int] = None 

195 """Length of the type in the database, for variable-length types.""" 

196 

197 nbytes: Optional[int] = None 

198 """Natural length used for hash and encoded-region columns, to be converted 

199 into the post-encoding length. 

200 """ 

201 

202 primaryKey: bool = False 

203 """Whether this field is (part of) its table's primary key.""" 

204 

205 autoincrement: bool = False 

206 """Whether the database should insert automatically incremented values when 

207 no value is provided in an INSERT. 

208 """ 

209 

210 nullable: bool = True 

211 """Whether this field is allowed to be NULL.""" 

212 

213 default: Any = None 

214 """A server-side default value for this field. 

215 

216 This is passed directly as the ``server_default`` argument to 

217 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual 

218 type conversion or quoting for Python literals, and should hence be used 

219 with care. See the SQLAlchemy documentation for more information. 

220 """ 

221 

222 doc: Optional[str] = None 

223 """Documentation for this field.""" 

224 

225 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

226 if isinstance(other, FieldSpec): 

227 return self.name == other.name 

228 else: 

229 return NotImplemented 

230 

231 def __hash__(self) -> int: 

232 return hash(self.name) 

233 

234 @classmethod 

235 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

236 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec: 

237 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

238 

239 Parameters 

240 ---------- 

241 config: `Config` 

242 Configuration describing the column. Nested configuration keys 

243 correspond to `FieldSpec` attributes. 

244 kwds 

245 Additional keyword arguments that provide defaults for values 

246 not present in config. 

247 

248 Returns 

249 ------- 

250 spec: `FieldSpec` 

251 Specification structure for the column. 

252 

253 Raises 

254 ------ 

255 SchemaValidationError 

256 Raised if configuration keys are missing or have invalid values. 

257 """ 

258 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

259 if dtype is None: 

260 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

261 if not config["name"].islower(): 

262 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

263 self = cls(name=config["name"], dtype=dtype, **kwds) 

264 self.length = config.get("length", self.length) 

265 self.nbytes = config.get("nbytes", self.nbytes) 

266 if self.length is not None and self.nbytes is not None: 

267 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

268 self.primaryKey = config.get("primaryKey", self.primaryKey) 

269 self.autoincrement = config.get("autoincrement", self.autoincrement) 

270 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

271 self.doc = stripIfNotNone(config.get("doc", None)) 

272 return self 

273 

274 def isStringType(self) -> bool: 

275 """Indicate that this is a sqlalchemy.String field spec. 

276 

277 Returns 

278 ------- 

279 isString : `bool` 

280 The field refers to a `sqlalchemy.String` and not any other type. 

281 This can return `False` even if the object was created with a 

282 string type if it has been decided that it should be implemented 

283 as a `sqlalchemy.Text` type. 

284 """ 

285 if self.dtype == sqlalchemy.String: 

286 # For short strings retain them as strings 

287 if self.dtype == sqlalchemy.String and self.length and self.length <= 32: 

288 return True 

289 return False 

290 

291 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

292 """Return a sized version of the column type, utilizing either (or 

293 neither) of ``self.length`` and ``self.nbytes``. 

294 

295 Returns 

296 ------- 

297 dtype : `sqlalchemy.types.TypeEngine` 

298 A SQLAlchemy column type object. 

299 """ 

300 if self.length is not None: 

301 # Last chance check that we are only looking at possible String 

302 if self.dtype == sqlalchemy.String and not self.isStringType(): 

303 return sqlalchemy.Text 

304 return self.dtype(length=self.length) 

305 if self.nbytes is not None: 

306 return self.dtype(nbytes=self.nbytes) 

307 return self.dtype 

308 

309 

310@dataclass 

311class ForeignKeySpec: 

312 """A struct-like class used to define a foreign key constraint in a logical 

313 `Registry` table. 

314 """ 

315 

316 table: str 

317 """Name of the target table.""" 

318 

319 source: Tuple[str, ...] 

320 """Tuple of source table column names.""" 

321 

322 target: Tuple[str, ...] 

323 """Tuple of target table column names.""" 

324 

325 onDelete: Optional[str] = None 

326 """SQL clause indicating how to handle deletes to the target table. 

327 

328 If not `None` (which indicates that a constraint violation exception should 

329 be raised), should be either "SET NULL" or "CASCADE". 

330 """ 

331 

332 addIndex: bool = True 

333 """If `True`, create an index on the columns of this foreign key in the 

334 source table. 

335 """ 

336 

337 @classmethod 

338 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

339 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

340 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

341 

342 Parameters 

343 ---------- 

344 config: `Config` 

345 Configuration describing the constraint. Nested configuration keys 

346 correspond to `ForeignKeySpec` attributes. 

347 

348 Returns 

349 ------- 

350 spec: `ForeignKeySpec` 

351 Specification structure for the constraint. 

352 

353 Raises 

354 ------ 

355 SchemaValidationError 

356 Raised if configuration keys are missing or have invalid values. 

357 """ 

358 return cls(table=config["table"], 

359 source=tuple(iterable(config["source"])), 

360 target=tuple(iterable(config["target"])), 

361 onDelete=config.get("onDelete", None)) 

362 

363 

364@dataclass 

365class TableSpec: 

366 """A struct-like class used to define a table or table-like 

367 query interface. 

368 

369 Parameters 

370 ---------- 

371 fields : `Iterable` [ `FieldSpec` ] 

372 Specifications for the columns in this table. 

373 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

374 Non-primary-key unique constraints for the table. 

375 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

376 Indexes for the table. 

377 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

378 Foreign key constraints for the table. 

379 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ] 

380 Special constraints that prohibit overlaps between timespans over rows 

381 where other columns are equal. These take the same form as unique 

382 constraints, but each tuple may contain a single 

383 `DatabaseTimespanRepresentation` subclass representing a timespan 

384 column. 

385 recycleIds : bool, optional 

386 If `True`, allow databases that might normally recycle autoincrement 

387 IDs to do so (usually better for performance) on any autoincrement 

388 field in this table. 

389 doc : str, optional 

390 Documentation for the table. 

391 """ 

392 def __init__( 

393 self, fields: Iterable[FieldSpec], *, 

394 unique: Iterable[Tuple[str, ...]] = (), 

395 indexes: Iterable[Tuple[str, ...]] = (), 

396 foreignKeys: Iterable[ForeignKeySpec] = (), 

397 exclusion: Iterable[Tuple[Union[str, Type[DatabaseTimespanRepresentation]], ...]] = (), 

398 recycleIds: bool = True, 

399 doc: Optional[str] = None, 

400 ): 

401 self.fields = NamedValueSet(fields) 

402 self.unique = set(unique) 

403 self.indexes = set(indexes) 

404 self.foreignKeys = list(foreignKeys) 

405 self.exclusion = set(exclusion) 

406 self.recycleIds = recycleIds 

407 self.doc = doc 

408 

409 fields: NamedValueSet[FieldSpec] 

410 """Specifications for the columns in this table.""" 

411 

412 unique: Set[Tuple[str, ...]] 

413 """Non-primary-key unique constraints for the table.""" 

414 

415 indexes: Set[Tuple[str, ...]] 

416 """Indexes for the table.""" 

417 

418 foreignKeys: List[ForeignKeySpec] 

419 """Foreign key constraints for the table.""" 

420 

421 exclusion: Set[Tuple[Union[str, Type[DatabaseTimespanRepresentation]], ...]] 

422 """Exclusion constraints for the table. 

423 

424 Exclusion constraints behave mostly like unique constraints, but may 

425 contain a database-native Timespan column that is restricted to not overlap 

426 across rows (for identical combinations of any non-Timespan columns in the 

427 constraint). 

428 """ 

429 

430 recycleIds: bool = True 

431 """If `True`, allow databases that might normally recycle autoincrement IDs 

432 to do so (usually better for performance) on any autoincrement field in 

433 this table. 

434 """ 

435 

436 doc: Optional[str] = None 

437 """Documentation for the table.""" 

438 

439 @classmethod 

440 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

441 def fromConfig(cls, config: Config) -> TableSpec: 

442 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

443 

444 Parameters 

445 ---------- 

446 config: `Config` 

447 Configuration describing the constraint. Nested configuration keys 

448 correspond to `TableSpec` attributes. 

449 

450 Returns 

451 ------- 

452 spec: `TableSpec` 

453 Specification structure for the table. 

454 

455 Raises 

456 ------ 

457 SchemaValidationError 

458 Raised if configuration keys are missing or have invalid values. 

459 """ 

460 return cls( 

461 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

462 unique={tuple(u) for u in config.get("unique", ())}, 

463 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

464 doc=stripIfNotNone(config.get("doc")), 

465 )