Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE", 

22etc.) in Python. 

23 

24This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

25because we need a level of indirection between logical tables and the actual 

26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

27 

28We've opted for the rather more obscure "ddl" as the name of this module 

29instead of "schema" because the latter is too overloaded; in most SQL 

30databases, a "schema" is also another term for a namespace. 

31""" 

32from __future__ import annotations 

33 

34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

35 "AstropyTimeNsecTai") 

36 

37from base64 import b64encode, b64decode 

38import logging 

39from math import ceil 

40from dataclasses import dataclass 

41from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union 

42 

43import sqlalchemy 

44import astropy.time 

45 

46from lsst.sphgeom import ConvexPolygon 

47from .config import Config 

48from .exceptions import ValidationError 

49from . import time_utils 

50from .utils import iterable, stripIfNotNone 

51from .named import NamedValueSet 

52 

53 

54_LOG = logging.getLogger(__name__) 

55 

56 

57class SchemaValidationError(ValidationError): 

58 """Exceptions used to indicate problems in Registry schema configuration. 

59 """ 

60 

61 @classmethod 

62 def translate(cls, caught: Type[Exception], message: str) -> Callable: 

63 """A decorator that re-raises exceptions as `SchemaValidationError`. 

64 

65 Decorated functions must be class or instance methods, with a 

66 ``config`` parameter as their first argument. This will be passed 

67 to ``message.format()`` as a keyword argument, along with ``err``, 

68 the original exception. 

69 

70 Parameters 

71 ---------- 

72 caught : `type` (`Exception` subclass) 

73 The type of exception to catch. 

74 message : `str` 

75 A `str.format` string that may contain named placeholders for 

76 ``config``, ``err``, or any keyword-only argument accepted by 

77 the decorated function. 

78 """ 

79 def decorate(func: Callable) -> Callable: 

80 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any: 

81 try: 

82 return func(self, config, *args, **kwds) 

83 except caught as err: 

84 raise cls(message.format(config=str(config), err=err)) 

85 return decorated 

86 return decorate 

87 

88 

89class Base64Bytes(sqlalchemy.TypeDecorator): 

90 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded 

91 `sqlalchemy.String`. 

92 """ 

93 

94 impl = sqlalchemy.String 

95 

96 def __init__(self, nbytes: int, *args: Any, **kwargs: Any): 

97 length = 4*ceil(nbytes/3) 

98 super().__init__(*args, length=length, **kwargs) 

99 self.nbytes = nbytes 

100 

101 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect 

102 ) -> Optional[str]: 

103 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

104 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

105 # String fields. 

106 if value is None: 

107 return None 

108 if not isinstance(value, bytes): 

109 raise TypeError( 

110 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

111 ) 

112 return b64encode(value).decode("ascii") 

113 

114 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

115 ) -> Optional[bytes]: 

116 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

117 # We want to transform that to base64-encoded `bytes` and then 

118 # native `bytes`. 

119 return b64decode(value.encode("ascii")) if value is not None else None 

120 

121 

122class Base64Region(Base64Bytes): 

123 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a 

124 base64-encoded `sqlalchemy.String`. 

125 """ 

126 

127 def process_bind_param(self, value: Optional[ConvexPolygon], dialect: sqlalchemy.engine.Dialect 

128 ) -> Optional[str]: 

129 if value is None: 

130 return None 

131 return super().process_bind_param(value.encode(), dialect) 

132 

133 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect 

134 ) -> Optional[ConvexPolygon]: 

135 if value is None: 

136 return None 

137 return ConvexPolygon.decode(super().process_result_value(value, dialect)) 

138 

139 

140class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

141 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a 

142 number of nanoseconds since Unix epoch in TAI scale. 

143 """ 

144 

145 impl = sqlalchemy.BigInteger 

146 

147 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect 

148 ) -> Optional[int]: 

149 if value is None: 

150 return None 

151 if not isinstance(value, astropy.time.Time): 

152 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

153 value = time_utils.astropy_to_nsec(value) 

154 return value 

155 

156 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect 

157 ) -> Optional[astropy.time.Time]: 

158 # value is nanoseconds since epoch, or None 

159 if value is None: 

160 return None 

161 value = time_utils.nsec_to_astropy(value) 

162 return value 

163 

164 

165VALID_CONFIG_COLUMN_TYPES = { 

166 "string": sqlalchemy.String, 

167 "int": sqlalchemy.BigInteger, 

168 "float": sqlalchemy.Float, 

169 "region": Base64Region, 

170 "bool": sqlalchemy.Boolean, 

171 "blob": sqlalchemy.LargeBinary, 

172 "datetime": AstropyTimeNsecTai, 

173 "hash": Base64Bytes 

174} 

175 

176 

177@dataclass 

178class FieldSpec: 

179 """A struct-like class used to define a column in a logical `Registry` 

180 table. 

181 """ 

182 

183 name: str 

184 """Name of the column.""" 

185 

186 dtype: type 

187 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

188 that defines both a Python type and a corresponding precise SQL type. 

189 """ 

190 

191 length: Optional[int] = None 

192 """Length of the type in the database, for variable-length types.""" 

193 

194 nbytes: Optional[int] = None 

195 """Natural length used for hash and encoded-region columns, to be converted 

196 into the post-encoding length. 

197 """ 

198 

199 primaryKey: bool = False 

200 """Whether this field is (part of) its table's primary key.""" 

201 

202 autoincrement: bool = False 

203 """Whether the database should insert automatically incremented values when 

204 no value is provided in an INSERT. 

205 """ 

206 

207 nullable: bool = True 

208 """Whether this field is allowed to be NULL.""" 

209 

210 doc: Optional[str] = None 

211 """Documentation for this field.""" 

212 

213 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

214 if isinstance(other, FieldSpec): 

215 return self.name == other.name 

216 else: 

217 return NotImplemented 

218 

219 def __hash__(self) -> int: 

220 return hash(self.name) 

221 

222 @classmethod 

223 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

224 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec: 

225 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

226 

227 Parameters 

228 ---------- 

229 config: `Config` 

230 Configuration describing the column. Nested configuration keys 

231 correspond to `FieldSpec` attributes. 

232 kwds 

233 Additional keyword arguments that provide defaults for values 

234 not present in config. 

235 

236 Returns 

237 ------- 

238 spec: `FieldSpec` 

239 Specification structure for the column. 

240 

241 Raises 

242 ------ 

243 SchemaValidationError 

244 Raised if configuration keys are missing or have invalid values. 

245 """ 

246 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

247 if dtype is None: 

248 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

249 if not config["name"].islower(): 

250 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

251 self = cls(name=config["name"], dtype=dtype, **kwds) 

252 self.length = config.get("length", self.length) 

253 self.nbytes = config.get("nbytes", self.nbytes) 

254 if self.length is not None and self.nbytes is not None: 

255 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

256 self.primaryKey = config.get("primaryKey", self.primaryKey) 

257 self.autoincrement = config.get("autoincrement", self.autoincrement) 

258 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

259 self.doc = stripIfNotNone(config.get("doc", None)) 

260 return self 

261 

262 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

263 """Return a sized version of the column type, utilizing either (or 

264 neither) of ``self.length`` and ``self.nbytes``. 

265 

266 Returns 

267 ------- 

268 dtype : `sqlalchemy.types.TypeEngine` 

269 A SQLAlchemy column type object. 

270 """ 

271 if self.length is not None: 

272 return self.dtype(length=self.length) 

273 if self.nbytes is not None: 

274 return self.dtype(nbytes=self.nbytes) 

275 return self.dtype 

276 

277 

278@dataclass 

279class ForeignKeySpec: 

280 """A struct-like class used to define a foreign key constraint in a logical 

281 `Registry` table. 

282 """ 

283 

284 table: str 

285 """Name of the target table.""" 

286 

287 source: Tuple[str, ...] 

288 """Tuple of source table column names.""" 

289 

290 target: Tuple[str, ...] 

291 """Tuple of target table column names.""" 

292 

293 onDelete: Optional[str] = None 

294 """SQL clause indicating how to handle deletes to the target table. 

295 

296 If not `None` (which indicates that a constraint violation exception should 

297 be raised), should be either "SET NULL" or "CASCADE". 

298 """ 

299 

300 addIndex: bool = True 

301 """If `True`, create an index on the columns of this foreign key in the 

302 source table. 

303 """ 

304 

305 @classmethod 

306 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

307 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

308 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

309 

310 Parameters 

311 ---------- 

312 config: `Config` 

313 Configuration describing the constraint. Nested configuration keys 

314 correspond to `ForeignKeySpec` attributes. 

315 

316 Returns 

317 ------- 

318 spec: `ForeignKeySpec` 

319 Specification structure for the constraint. 

320 

321 Raises 

322 ------ 

323 SchemaValidationError 

324 Raised if configuration keys are missing or have invalid values. 

325 """ 

326 return cls(table=config["table"], 

327 source=tuple(iterable(config["source"])), 

328 target=tuple(iterable(config["target"])), 

329 onDelete=config.get("onDelete", None)) 

330 

331 

332@dataclass 

333class TableSpec: 

334 """A struct-like class used to define a table or table-like 

335 query interface. 

336 

337 Parameters 

338 ---------- 

339 fields : `Iterable` [ `FieldSpec` ] 

340 Specifications for the columns in this table. 

341 unique : `Iterable` [ `tuple` [ `str` ] ], optional 

342 Non-primary-key unique constraints for the table. 

343 indexes: `Iterable` [ `tuple` [ `str` ] ], optional 

344 Indexes for the table. 

345 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional 

346 Foreign key constraints for the table. 

347 recycleIds : bool, optional 

348 If `True`, allow databases that might normally recycle autoincrement 

349 IDs to do so (usually better for performance) on any autoincrement 

350 field in this table. 

351 doc : str, optional 

352 Documentation for the table. 

353 """ 

354 def __init__( 

355 self, fields: Iterable[FieldSpec], *, 

356 unique: Iterable[Tuple[str, ...]] = (), 

357 indexes: Iterable[Tuple[str, ...]] = (), 

358 foreignKeys: Iterable[ForeignKeySpec] = (), 

359 recycleIds: bool = True, 

360 doc: Optional[str] = None, 

361 ): 

362 self.fields = NamedValueSet(fields) 

363 self.unique = set(unique) 

364 self.indexes = set(indexes) 

365 self.foreignKeys = list(foreignKeys) 

366 self.recycleIds = recycleIds 

367 self.doc = doc 

368 

369 fields: NamedValueSet[FieldSpec] 

370 """Specifications for the columns in this table.""" 

371 

372 unique: Set[Tuple[str, ...]] 

373 """Non-primary-key unique constraints for the table.""" 

374 

375 indexes: Set[Tuple[str, ...]] 

376 """Indexes for the table.""" 

377 

378 foreignKeys: List[ForeignKeySpec] 

379 """Foreign key constraints for the table.""" 

380 

381 recycleIds: bool = True 

382 """If `True`, allow databases that might normally recycle autoincrement IDs 

383 to do so (usually better for performance) on any autoincrement field in 

384 this table. 

385 """ 

386 

387 doc: Optional[str] = None 

388 """Documentation for the table.""" 

389 

390 @classmethod 

391 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

392 def fromConfig(cls, config: Config) -> TableSpec: 

393 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

394 

395 Parameters 

396 ---------- 

397 config: `Config` 

398 Configuration describing the constraint. Nested configuration keys 

399 correspond to `TableSpec` attributes. 

400 

401 Returns 

402 ------- 

403 spec: `TableSpec` 

404 Specification structure for the table. 

405 

406 Raises 

407 ------ 

408 SchemaValidationError 

409 Raised if configuration keys are missing or have invalid values. 

410 """ 

411 return cls( 

412 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

413 unique={tuple(u) for u in config.get("unique", ())}, 

414 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

415 doc=stripIfNotNone(config.get("doc")), 

416 )