Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE", 

22etc.) in Python. 

23 

24This provides an extra layer on top of SQLAlchemy's classes for these concepts, 

25because we need a level of indirection between logical tables and the actual 

26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL. 

27 

28We've opted for the rather more obscure "ddl" as the name of this module 

29instead of "schema" because the latter is too overloaded; in most SQL 

30databases, a "schema" is also another term for a namespace. 

31""" 

32from __future__ import annotations 

33 

34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region") 

35 

36from base64 import b64encode, b64decode 

37from math import ceil 

38from dataclasses import dataclass 

39from typing import Optional, Tuple, Sequence, Set 

40 

41import sqlalchemy 

42 

43from lsst.sphgeom import ConvexPolygon 

44from .config import Config 

45from .exceptions import ValidationError 

46from .utils import iterable, stripIfNotNone, NamedValueSet 

47 

48 

49class SchemaValidationError(ValidationError): 

50 """Exceptions used to indicate problems in Registry schema configuration. 

51 """ 

52 

53 @classmethod 

54 def translate(cls, caught, message): 

55 """A decorator that re-raises exceptions as `SchemaValidationError`. 

56 

57 Decorated functions must be class or instance methods, with a 

58 ``config`` parameter as their first argument. This will be passed 

59 to ``message.format()`` as a keyword argument, along with ``err``, 

60 the original exception. 

61 

62 Parameters 

63 ---------- 

64 caught : `type` (`Exception` subclass) 

65 The type of exception to catch. 

66 message : `str` 

67 A `str.format` string that may contain named placeholders for 

68 ``config``, ``err``, or any keyword-only argument accepted by 

69 the decorated function. 

70 """ 

71 def decorate(func): 

72 def decorated(self, config, *args, **kwds): 

73 try: 

74 return func(self, config, *args, **kwds) 

75 except caught as err: 

76 raise cls(message.format(config=str(config), err=err)) 

77 return decorated 

78 return decorate 

79 

80 

81class Base64Bytes(sqlalchemy.TypeDecorator): 

82 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded 

83 `sqlalchemy.String`. 

84 """ 

85 

86 impl = sqlalchemy.String 

87 

88 def __init__(self, nbytes, *args, **kwds): 

89 length = 4*ceil(nbytes/3) 

90 super().__init__(*args, length=length, **kwds) 

91 self.nbytes = nbytes 

92 

93 def process_bind_param(self, value, dialect): 

94 # 'value' is native `bytes`. We want to encode that to base64 `bytes` 

95 # and then ASCII `str`, because `str` is what SQLAlchemy expects for 

96 # String fields. 

97 if value is None: 

98 return None 

99 if not isinstance(value, bytes): 

100 raise TypeError( 

101 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}." 

102 ) 

103 return b64encode(value).decode("ascii") 

104 

105 def process_result_value(self, value, dialect): 

106 # 'value' is a `str` that must be ASCII because it's base64-encoded. 

107 # We want to transform that to base64-encoded `bytes` and then 

108 # native `bytes`. 

109 return b64decode(value.encode("ascii")) if value is not None else None 

110 

111 

112class Base64Region(Base64Bytes): 

113 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a 

114 base64-encoded `sqlalchemy.String`. 

115 """ 

116 

117 def process_bind_param(self, value, dialect): 

118 if value is None: 

119 return None 

120 return super().process_bind_param(value.encode(), dialect) 

121 

122 def process_result_value(self, value, dialect): 

123 if value is None: 

124 return None 

125 return ConvexPolygon.decode(super().process_result_value(value, dialect)) 

126 

127 

128VALID_CONFIG_COLUMN_TYPES = { 

129 "string": sqlalchemy.String, 

130 "int": sqlalchemy.Integer, 

131 "float": sqlalchemy.Float, 

132 "region": Base64Region, 

133 "bool": sqlalchemy.Boolean, 

134 "blob": sqlalchemy.LargeBinary, 

135 "datetime": sqlalchemy.DateTime, 

136 "hash": Base64Bytes 

137} 

138 

139 

140@dataclass 

141class FieldSpec: 

142 """A struct-like class used to define a column in a logical `Registry` 

143 table. 

144 """ 

145 

146 name: str 

147 """Name of the column.""" 

148 

149 dtype: type 

150 """Type of the column; usually a `type` subclass provided by SQLAlchemy 

151 that defines both a Python type and a corresponding precise SQL type. 

152 """ 

153 

154 length: Optional[int] = None 

155 """Length of the type in the database, for variable-length types.""" 

156 

157 nbytes: Optional[int] = None 

158 """Natural length used for hash and encoded-region columns, to be converted 

159 into the post-encoding length. 

160 """ 

161 

162 primaryKey: bool = False 

163 """Whether this field is (part of) its table's primary key.""" 

164 

165 autoincrement: bool = False 

166 """Whether the database should insert automatically incremented values when 

167 no value is provided in an INSERT. 

168 """ 

169 

170 nullable: bool = True 

171 """Whether this field is allowed to be NULL.""" 

172 

173 doc: Optional[str] = None 

174 """Documentation for this field.""" 

175 

176 def __eq__(self, other): 

177 return self.name == other.name 

178 

179 def __hash__(self): 

180 return hash(self.name) 

181 

182 @classmethod 

183 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.") 

184 def fromConfig(cls, config: Config, **kwds) -> FieldSpec: 

185 """Create a `FieldSpec` from a subset of a `SchemaConfig`. 

186 

187 Parameters 

188 ---------- 

189 config: `Config` 

190 Configuration describing the column. Nested configuration keys 

191 correspond to `FieldSpec` attributes. 

192 kwds 

193 Additional keyword arguments that provide defaults for values 

194 not present in config. 

195 

196 Returns 

197 ------- 

198 spec: `FieldSpec` 

199 Specification structure for the column. 

200 

201 Raises 

202 ------ 

203 SchemaValidationError 

204 Raised if configuration keys are missing or have invalid values. 

205 """ 

206 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"]) 

207 if dtype is None: 

208 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.") 

209 if not config["name"].islower(): 

210 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.") 

211 self = cls(name=config["name"], dtype=dtype, **kwds) 

212 self.length = config.get("length", self.length) 

213 self.nbytes = config.get("nbytes", self.nbytes) 

214 if self.length is not None and self.nbytes is not None: 

215 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.") 

216 self.primaryKey = config.get("primaryKey", self.primaryKey) 

217 self.autoincrement = config.get("autoincrement", self.autoincrement) 

218 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable) 

219 self.doc = stripIfNotNone(config.get("doc", None)) 

220 return self 

221 

222 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine: 

223 """Return a sized version of the column type, utilizing either (or 

224 neither) of ``self.length`` and ``self.nbytes``. 

225 

226 Returns 

227 ------- 

228 dtype : `sqlalchemy.types.TypeEngine` 

229 A SQLAlchemy column type object. 

230 """ 

231 if self.length is not None: 

232 return self.dtype(length=self.length) 

233 if self.nbytes is not None: 

234 return self.dtype(nbytes=self.nbytes) 

235 return self.dtype 

236 

237 

238@dataclass 

239class ForeignKeySpec: 

240 """A struct-like class used to define a foreign key constraint in a logical 

241 `Registry` table. 

242 """ 

243 

244 table: str 

245 """Name of the target table.""" 

246 

247 source: Tuple[str, ...] 

248 """Tuple of source table column names.""" 

249 

250 target: Tuple[str, ...] 

251 """Tuple of target table column names.""" 

252 

253 onDelete: Optional[str] = None 

254 """SQL clause indicating how to handle deletes to the target table. 

255 

256 If not `None` (which indicates that a constraint violation exception should 

257 be raised), should be either "SET NULL" or "CASCADE". 

258 """ 

259 

260 @classmethod 

261 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.") 

262 def fromConfig(cls, config: Config) -> ForeignKeySpec: 

263 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

264 

265 Parameters 

266 ---------- 

267 config: `Config` 

268 Configuration describing the constraint. Nested configuration keys 

269 correspond to `ForeignKeySpec` attributes. 

270 

271 Returns 

272 ------- 

273 spec: `ForeignKeySpec` 

274 Specification structure for the constraint. 

275 

276 Raises 

277 ------ 

278 SchemaValidationError 

279 Raised if configuration keys are missing or have invalid values. 

280 """ 

281 return cls(table=config["table"], 

282 source=tuple(iterable(config["source"])), 

283 target=tuple(iterable(config["target"])), 

284 onDelete=config.get("onDelete", None)) 

285 

286 

287@dataclass 

288class TableSpec: 

289 """A struct-like class used to define a table or table-like 

290 query interface. 

291 """ 

292 

293 fields: NamedValueSet[FieldSpec] 

294 """Specifications for the columns in this table.""" 

295 

296 unique: Set[Tuple[str, ...]] = frozenset() 

297 """Non-primary-key unique constraints for the table.""" 

298 

299 indexes: Set[Tuple[str, ...]] = frozenset() 

300 """Indexes for the table.""" 

301 

302 foreignKeys: Sequence[ForeignKeySpec] = tuple() 

303 """Foreign key constraints for the table.""" 

304 

305 doc: Optional[str] = None 

306 """Documentation for the table.""" 

307 

308 def __post_init__(self): 

309 self.fields = NamedValueSet(self.fields) 

310 self.unique = set(self.unique) 

311 self.indexes = set(self.indexes) 

312 self.foreignKeys = list(self.foreignKeys) 

313 

314 @classmethod 

315 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.") 

316 def fromConfig(cls, config: Config) -> TableSpec: 

317 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`. 

318 

319 Parameters 

320 ---------- 

321 config: `Config` 

322 Configuration describing the constraint. Nested configuration keys 

323 correspond to `TableSpec` attributes. 

324 

325 Returns 

326 ------- 

327 spec: `TableSpec` 

328 Specification structure for the table. 

329 

330 Raises 

331 ------ 

332 SchemaValidationError 

333 Raised if configuration keys are missing or have invalid values. 

334 """ 

335 return cls( 

336 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]), 

337 unique={tuple(u) for u in config.get("unique", ())}, 

338 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())], 

339 sql=config.get("sql"), 

340 doc=stripIfNotNone(config.get("doc")), 

341 )