Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE", 

22etc.) in Python. 

23 

24This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

25because we need a level of indirection between logical tables and the actual 

26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

27 

28We've opted for the rather more obscure "ddl" as the name of this module 

29instead of "schema" because the latter is too overloaded; in most SQL 

30databases, a "schema" is also another term for a namespace. 

31""" 

32from __future__ import annotations 

33 

34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region", 

35 "AstropyTimeNsecTai") 

36 

37from base64 import b64encode, b64decode 

38import logging 

39from math import ceil 

40from dataclasses import dataclass 

41from typing import Optional, Tuple, Sequence, Set 

42 

43import sqlalchemy 

44import astropy.time 

45 

46from lsst.sphgeom import ConvexPolygon 

47from .config import Config 

48from .exceptions import ValidationError 

49from . import time_utils 

50from .utils import iterable, stripIfNotNone, NamedValueSet 

51 

52 

53_LOG = logging.getLogger(__name__) 

54 

55 

56class SchemaValidationError(ValidationError): 

57 """Exceptions used to indicate problems in Registry schema configuration. 

58 """ 

59 

60 @classmethod 

61 def translate(cls, caught, message): 

62 """A decorator that re-raises exceptions as `SchemaValidationError`. 

63 

64 Decorated functions must be class or instance methods, with a 

65 ``config`` parameter as their first argument. This will be passed 

66 to ``message.format()`` as a keyword argument, along with ``err``, 

67 the original exception. 

68 

69 Parameters 

70 ---------- 

71 caught : `type` (`Exception` subclass) 

72 The type of exception to catch. 

73 message : `str` 

74 A `str.format` string that may contain named placeholders for 

75 ``config``, ``err``, or any keyword-only argument accepted by 

76 the decorated function. 

77 """ 

78 def decorate(func): 

79 def decorated(self, config, *args, **kwds): 

80 try: 

81 return func(self, config, *args, **kwds) 

82 except caught as err: 

83 raise cls(message.format(config=str(config), err=err)) 

84 return decorated 

85 return decorate 

86 

87 

88class Base64Bytes(sqlalchemy.TypeDecorator): 

89 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded 

90 `sqlalchemy.String`. 

91 """ 

92 

93 impl = sqlalchemy.String 

94 

95 def __init__(self, nbytes, *args, **kwds): 

96 length = 4*ceil(nbytes/3) 

97 super().__init__(*args, length=length, **kwds) 

98 self.nbytes = nbytes 

99 

100 def process_bind_param(self, value, dialect): 

101 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

102 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

103 # String fields. 

104 if value is None: 

105 return None 

106 if not isinstance(value, bytes): 

107 raise TypeError( 

108 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

109 ) 

110 return b64encode(value).decode("ascii") 

111 

112 def process_result_value(self, value, dialect): 

113 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

114 # We want to transform that to base64-encoded `bytes` and then 

115 # native `bytes`. 

116 return b64decode(value.encode("ascii")) if value is not None else None 

117 

118 

119class Base64Region(Base64Bytes): 

120 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a 

121 base64-encoded `sqlalchemy.String`. 

122 """ 

123 

124 def process_bind_param(self, value, dialect): 

125 if value is None: 

126 return None 

127 return super().process_bind_param(value.encode(), dialect) 

128 

129 def process_result_value(self, value, dialect): 

130 if value is None: 

131 return None 

132 return ConvexPolygon.decode(super().process_result_value(value, dialect)) 

133 

134 

135class AstropyTimeNsecTai(sqlalchemy.TypeDecorator): 

136 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a 

137 number of nanoseconds since Unix epoch in TAI scale. 

138 """ 

139 

140 impl = sqlalchemy.BigInteger 

141 

142 def process_bind_param(self, value, dialect): 

143 # value is astropy.time.Time or None 

144 if value is None: 

145 return None 

146 if not isinstance(value, astropy.time.Time): 

147 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time") 

148 value = time_utils.astropy_to_nsec(value) 

149 return value 

150 

151 def process_result_value(self, value, dialect): 

152 # value is nanoseconds since epoch, or None 

153 if value is None: 

154 return None 

155 value = time_utils.nsec_to_astropy(value) 

156 return value 

157 

158 

159VALID_CONFIG_COLUMN_TYPES = { 

160 "string": sqlalchemy.String, 

161 "int": sqlalchemy.Integer, 

162 "float": sqlalchemy.Float, 

163 "region": Base64Region, 

164 "bool": sqlalchemy.Boolean, 

165 "blob": sqlalchemy.LargeBinary, 

166 "datetime": AstropyTimeNsecTai, 

167 "hash": Base64Bytes 

168} 

169 

170 

171@dataclass 

172class FieldSpec: 

173 """A struct-like class used to define a column in a logical `Registry` 

174 table. 

175 """ 

176 

177 name: str 

178 """Name of the column.""" 

179 

180 dtype: type 

181 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

182 that defines both a Python type and a corresponding precise SQL type. 

183 """ 

184 

185 length: Optional[int] = None 

186 """Length of the type in the database, for variable-length types.""" 

187 

188 nbytes: Optional[int] = None 

189 """Natural length used for hash and encoded-region columns, to be converted 

190 into the post-encoding length. 

191 """ 

192 

193 primaryKey: bool = False 

194 """Whether this field is (part of) its table's primary key.""" 

195 

196 autoincrement: bool = False 

197 """Whether the database should insert automatically incremented values when 

198 no value is provided in an INSERT. 

199 """ 

200 

201 nullable: bool = True 

202 """Whether this field is allowed to be NULL.""" 

203 

204 doc: Optional[str] = None 

205 """Documentation for this field.""" 

206 

207 def __eq__(self, other): 

208 return self.name == other.name 

209 

210 def __hash__(self): 

211 return hash(self.name) 

212 

213 @classmethod 

214 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

215 def fromConfig(cls, config: Config, **kwds) -> FieldSpec: 

216 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

217 

218 Parameters 

219 ---------- 

220 config: `Config` 

221 Configuration describing the column. Nested configuration keys 

222 correspond to `FieldSpec` attributes. 

223 kwds 

224 Additional keyword arguments that provide defaults for values 

225 not present in config. 

226 

227 Returns 

228 ------- 

229 spec: `FieldSpec` 

230 Specification structure for the column. 

231 

232 Raises 

233 ------ 

234 SchemaValidationError 

235 Raised if configuration keys are missing or have invalid values. 

236 """ 

237 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

238 if dtype is None: 

239 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

240 if not config["name"].islower(): 

241 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

242 self = cls(name=config["name"], dtype=dtype, **kwds) 

243 self.length = config.get("length", self.length) 

244 self.nbytes = config.get("nbytes", self.nbytes) 

245 if self.length is not None and self.nbytes is not None: 

246 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

247 self.primaryKey = config.get("primaryKey", self.primaryKey) 

248 self.autoincrement = config.get("autoincrement", self.autoincrement) 

249 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

250 self.doc = stripIfNotNone(config.get("doc", None)) 

251 return self 

252 

253 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

254 """Return a sized version of the column type, utilizing either (or 

255 neither) of ``self.length`` and ``self.nbytes``. 

256 

257 Returns 

258 ------- 

259 dtype : `sqlalchemy.types.TypeEngine` 

260 A SQLAlchemy column type object. 

261 """ 

262 if self.length is not None: 

263 return self.dtype(length=self.length) 

264 if self.nbytes is not None: 

265 return self.dtype(nbytes=self.nbytes) 

266 return self.dtype 

267 

268 

269@dataclass 

270class ForeignKeySpec: 

271 """A struct-like class used to define a foreign key constraint in a logical 

272 `Registry` table. 

273 """ 

274 

275 table: str 

276 """Name of the target table.""" 

277 

278 source: Tuple[str, ...] 

279 """Tuple of source table column names.""" 

280 

281 target: Tuple[str, ...] 

282 """Tuple of target table column names.""" 

283 

284 onDelete: Optional[str] = None 

285 """SQL clause indicating how to handle deletes to the target table. 

286 

287 If not `None` (which indicates that a constraint violation exception should 

288 be raised), should be either "SET NULL" or "CASCADE". 

289 """ 

290 

291 addIndex: bool = True 

292 """If `True`, create an index on the columns of this foreign key in the 

293 source table. 

294 """ 

295 

296 @classmethod 

297 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

298 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

299 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

300 

301 Parameters 

302 ---------- 

303 config: `Config` 

304 Configuration describing the constraint. Nested configuration keys 

305 correspond to `ForeignKeySpec` attributes. 

306 

307 Returns 

308 ------- 

309 spec: `ForeignKeySpec` 

310 Specification structure for the constraint. 

311 

312 Raises 

313 ------ 

314 SchemaValidationError 

315 Raised if configuration keys are missing or have invalid values. 

316 """ 

317 return cls(table=config["table"], 

318 source=tuple(iterable(config["source"])), 

319 target=tuple(iterable(config["target"])), 

320 onDelete=config.get("onDelete", None)) 

321 

322 

323@dataclass 

324class TableSpec: 

325 """A struct-like class used to define a table or table-like 

326 query interface. 

327 """ 

328 

329 fields: NamedValueSet[FieldSpec] 

330 """Specifications for the columns in this table.""" 

331 

332 unique: Set[Tuple[str, ...]] = frozenset() 

333 """Non-primary-key unique constraints for the table.""" 

334 

335 indexes: Set[Tuple[str, ...]] = frozenset() 

336 """Indexes for the table.""" 

337 

338 foreignKeys: Sequence[ForeignKeySpec] = tuple() 

339 """Foreign key constraints for the table.""" 

340 

341 recycleIds: bool = True 

342 """If `True`, allow databases that might normally recycle autoincrement IDs 

343 to do so (usually better for performance) on any autoincrement field in 

344 this table. 

345 """ 

346 

347 doc: Optional[str] = None 

348 """Documentation for the table.""" 

349 

350 def __post_init__(self): 

351 self.fields = NamedValueSet(self.fields) 

352 self.unique = set(self.unique) 

353 self.indexes = set(self.indexes) 

354 self.foreignKeys = list(self.foreignKeys) 

355 

356 @classmethod 

357 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

358 def fromConfig(cls, config: Config) -> TableSpec: 

359 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

360 

361 Parameters 

362 ---------- 

363 config: `Config` 

364 Configuration describing the constraint. Nested configuration keys 

365 correspond to `TableSpec` attributes. 

366 

367 Returns 

368 ------- 

369 spec: `TableSpec` 

370 Specification structure for the table. 

371 

372 Raises 

373 ------ 

374 SchemaValidationError 

375 Raised if configuration keys are missing or have invalid values. 

376 """ 

377 return cls( 

378 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

379 unique={tuple(u) for u in config.get("unique", ())}, 

380 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

381 sql=config.get("sql"), 

382 doc=stripIfNotNone(config.get("doc")), 

383 )