Coverage for python/felis/metadata.py: 17%

187 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 02:49 -0700

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import logging 

25from typing import IO, Any, Literal 

26 

27import sqlalchemy.schema as sqa_schema 

28from lsst.utils.iteration import ensure_iterable 

29from sqlalchemy import ( 

30 CheckConstraint, 

31 Column, 

32 Constraint, 

33 Engine, 

34 ForeignKeyConstraint, 

35 Index, 

36 MetaData, 

37 PrimaryKeyConstraint, 

38 ResultProxy, 

39 Table, 

40 TextClause, 

41 UniqueConstraint, 

42 create_mock_engine, 

43 make_url, 

44 text, 

45) 

46from sqlalchemy.engine.interfaces import Dialect 

47from sqlalchemy.engine.mock import MockConnection 

48from sqlalchemy.engine.url import URL 

49from sqlalchemy.exc import SQLAlchemyError 

50from sqlalchemy.types import TypeEngine 

51 

52from felis.datamodel import Schema 

53from felis.db._variants import make_variant_dict 

54 

55from . import datamodel 

56from .db import sqltypes 

57from .types import FelisType 

58 

59logger = logging.getLogger(__name__) 

60 

61 

62class InsertDump: 

63 """An Insert Dumper for SQL statements which supports writing messages 

64 to stdout or a file. 

65 """ 

66 

67 def __init__(self, file: IO[str] | None = None) -> None: 

68 """Initialize the insert dumper. 

69 

70 Parameters 

71 ---------- 

72 file : `io.TextIOBase` or `None`, optional 

73 The file to write the SQL statements to. If None, the statements 

74 will be written to stdout. 

75 """ 

76 self.file = file 

77 self.dialect: Dialect | None = None 

78 

79 def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None: 

80 """Dump the SQL statement to a file or stdout. 

81 

82 Statements with parameters will be formatted with the values 

83 inserted into the resultant SQL output. 

84 

85 Parameters 

86 ---------- 

87 sql : `typing.Any` 

88 The SQL statement to dump. 

89 multiparams : `typing.Any` 

90 The multiparams to use for the SQL statement. 

91 params : `typing.Any` 

92 The params to use for the SQL statement. 

93 """ 

94 compiled = sql.compile(dialect=self.dialect) 

95 sql_str = str(compiled) + ";" 

96 params_list = [compiled.params] 

97 for params in params_list: 

98 if not params: 

99 print(sql_str, file=self.file) 

100 continue 

101 new_params = {} 

102 for key, value in params.items(): 

103 if isinstance(value, str): 

104 new_params[key] = f"'{value}'" 

105 elif value is None: 

106 new_params[key] = "null" 

107 else: 

108 new_params[key] = value 

109 print(sql_str % new_params, file=self.file) 

110 

111 

112def get_datatype_with_variants(column_obj: datamodel.Column) -> TypeEngine: 

113 """Use the Felis type system to get a SQLAlchemy datatype with variant 

114 overrides from the information in a `Column` object. 

115 

116 Parameters 

117 ---------- 

118 column_obj : `felis.datamodel.Column` 

119 The column object from which to get the datatype. 

120 

121 Raises 

122 ------ 

123 ValueError 

124 If the column has a sized type but no length. 

125 """ 

126 variant_dict = make_variant_dict(column_obj) 

127 felis_type = FelisType.felis_type(column_obj.datatype.value) 

128 datatype_fun = getattr(sqltypes, column_obj.datatype.value) 

129 if felis_type.is_sized: 

130 if not column_obj.length: 

131 raise ValueError(f"Column {column_obj.name} has sized type '{column_obj.datatype}' but no length") 

132 datatype = datatype_fun(column_obj.length, **variant_dict) 

133 else: 

134 datatype = datatype_fun(**variant_dict) 

135 return datatype 

136 

137 

138_VALID_SERVER_DEFAULTS = ("CURRENT_TIMESTAMP", "NOW()", "LOCALTIMESTAMP", "NULL") 

139 

140 

141class MetaDataBuilder: 

142 """A class for building a `MetaData` object from a Felis `Schema`.""" 

143 

144 def __init__( 

145 self, schema: Schema, apply_schema_to_metadata: bool = True, apply_schema_to_tables: bool = True 

146 ) -> None: 

147 """Initialize the metadata builder. 

148 

149 Parameters 

150 ---------- 

151 schema : `felis.datamodel.Schema` 

152 The schema object from which to build the SQLAlchemy metadata. 

153 apply_schema_to_metadata : `bool`, optional 

154 Whether to apply the schema name to the metadata object. 

155 apply_schema_to_tables : `bool`, optional 

156 Whether to apply the schema name to the tables. 

157 """ 

158 self.schema = schema 

159 if not apply_schema_to_metadata: 

160 logger.debug("Schema name will not be applied to metadata") 

161 if not apply_schema_to_tables: 

162 logger.debug("Schema name will not be applied to tables") 

163 self.metadata = MetaData(schema=schema.name if apply_schema_to_metadata else None) 

164 self._objects: dict[str, Any] = {} 

165 self.apply_schema_to_tables = apply_schema_to_tables 

166 

167 def build(self) -> MetaData: 

168 """Build the SQLAlchemy tables and constraints from the schema.""" 

169 self.build_tables() 

170 self.build_constraints() 

171 return self.metadata 

172 

173 def build_tables(self) -> None: 

174 """Build the SQLAlchemy tables from the schema. 

175 

176 Notes 

177 ----- 

178 This function builds all the tables by calling ``build_table`` on 

179 each Pydantic object. It also calls ``build_primary_key`` to create the 

180 primary key constraints. 

181 """ 

182 for table in self.schema.tables: 

183 self.build_table(table) 

184 if table.primary_key: 

185 primary_key = self.build_primary_key(table.primary_key) 

186 self._objects[table.id].append_constraint(primary_key) 

187 

188 def build_primary_key(self, primary_key_columns: str | list[str]) -> PrimaryKeyConstraint: 

189 """Build a SQLAlchemy `PrimaryKeyConstraint` from a single column ID 

190 or a list. 

191 

192 The `primary_key_columns` are strings or a list of strings representing 

193 IDs pointing to columns that will be looked up in the internal object 

194 dictionary. 

195 

196 Parameters 

197 ---------- 

198 primary_key_columns : `str` or `list` of `str` 

199 The column ID or list of column IDs from which to build the primary 

200 key. 

201 

202 Returns 

203 ------- 

204 primary_key: `sqlalchemy.PrimaryKeyConstraint` 

205 The SQLAlchemy primary key constraint object. 

206 """ 

207 return PrimaryKeyConstraint( 

208 *[self._objects[column_id] for column_id in ensure_iterable(primary_key_columns)] 

209 ) 

210 

211 def build_table(self, table_obj: datamodel.Table) -> None: 

212 """Build a `sqlalchemy.Table` from a `felis.datamodel.Table` and add 

213 it to the `sqlalchemy.MetaData` object. 

214 

215 Several MySQL table options are handled by annotations on the table, 

216 including the engine and charset. This is not needed for Postgres, 

217 which does not have equivalent options. 

218 

219 Parameters 

220 ---------- 

221 table_obj : `felis.datamodel.Table` 

222 The table object to build the SQLAlchemy table from. 

223 """ 

224 # Process mysql table options. 

225 optargs = {} 

226 if table_obj.mysql_engine: 

227 optargs["mysql_engine"] = table_obj.mysql_engine 

228 if table_obj.mysql_charset: 

229 optargs["mysql_charset"] = table_obj.mysql_charset 

230 

231 # Create the SQLAlchemy table object and its columns. 

232 name = table_obj.name 

233 id = table_obj.id 

234 description = table_obj.description 

235 columns = [self.build_column(column) for column in table_obj.columns] 

236 table = Table( 

237 name, 

238 self.metadata, 

239 *columns, 

240 comment=description, 

241 schema=self.schema.name if self.apply_schema_to_tables else None, 

242 **optargs, # type: ignore[arg-type] 

243 ) 

244 

245 # Create the indexes and add them to the table. 

246 indexes = [self.build_index(index) for index in table_obj.indexes] 

247 for index in indexes: 

248 index._set_parent(table) 

249 table.indexes.add(index) 

250 

251 self._objects[id] = table 

252 

253 def build_column(self, column_obj: datamodel.Column) -> Column: 

254 """Build a SQLAlchemy column from a `felis.datamodel.Column` object. 

255 

256 Parameters 

257 ---------- 

258 column_obj : `felis.datamodel.Column` 

259 The column object from which to build the SQLAlchemy column. 

260 

261 Returns 

262 ------- 

263 column: `sqlalchemy.Column` 

264 The SQLAlchemy column object. 

265 """ 

266 # Get basic column attributes. 

267 name = column_obj.name 

268 id = column_obj.id 

269 description = column_obj.description 

270 value = column_obj.value 

271 nullable = column_obj.nullable 

272 

273 # Get datatype, handling variant overrides such as "mysql:datatype". 

274 datatype = get_datatype_with_variants(column_obj) 

275 

276 # Set autoincrement, depending on if it was provided explicitly. 

277 autoincrement: Literal["auto"] | bool = ( 

278 column_obj.autoincrement if column_obj.autoincrement is not None else "auto" 

279 ) 

280 

281 server_default: str | TextClause | None = None 

282 if value is not None: 

283 server_default = str(value) 

284 if server_default in _VALID_SERVER_DEFAULTS or not isinstance(value, str): 

285 # If the server default is a valid keyword or not a string, 

286 # use it as is. 

287 server_default = text(server_default) 

288 

289 if server_default is not None: 

290 logger.debug(f"Column '{id}' has default value: {server_default}") 

291 

292 column: Column = Column( 

293 name, 

294 datatype, 

295 comment=description, 

296 autoincrement=autoincrement, 

297 nullable=nullable, 

298 server_default=server_default, 

299 ) 

300 

301 self._objects[id] = column 

302 

303 return column 

304 

305 def build_constraints(self) -> None: 

306 """Build the SQLAlchemy constraints in the Felis schema and append them 

307 to the associated `Table`. 

308 

309 Notes 

310 ----- 

311 This is performed as a separate step after building the tables so that 

312 all the referenced objects in the constraints will be present and can 

313 be looked up by their ID. 

314 """ 

315 for table_obj in self.schema.tables: 

316 table = self._objects[table_obj.id] 

317 for constraint_obj in table_obj.constraints: 

318 constraint = self.build_constraint(constraint_obj) 

319 table.append_constraint(constraint) 

320 

321 def build_constraint(self, constraint_obj: datamodel.Constraint) -> Constraint: 

322 """Build a SQLAlchemy `Constraint` from a `felis.datamodel.Constraint` 

323 object. 

324 

325 Parameters 

326 ---------- 

327 constraint_obj : `felis.datamodel.Constraint` 

328 The constraint object from which to build the SQLAlchemy 

329 constraint. 

330 

331 Returns 

332 ------- 

333 constraint: `sqlalchemy.Constraint` 

334 The SQLAlchemy constraint object. 

335 

336 Raises 

337 ------ 

338 ValueError 

339 If the constraint type is not recognized. 

340 TypeError 

341 If the constraint object is not the expected type. 

342 """ 

343 args: dict[str, Any] = { 

344 "name": constraint_obj.name or None, 

345 "info": constraint_obj.description or None, 

346 "deferrable": constraint_obj.deferrable or None, 

347 "initially": constraint_obj.initially or None, 

348 } 

349 constraint: Constraint 

350 constraint_type = constraint_obj.type 

351 

352 if isinstance(constraint_obj, datamodel.ForeignKeyConstraint): 

353 fk_obj: datamodel.ForeignKeyConstraint = constraint_obj 

354 columns = [self._objects[column_id] for column_id in fk_obj.columns] 

355 refcolumns = [self._objects[column_id] for column_id in fk_obj.referenced_columns] 

356 constraint = ForeignKeyConstraint(columns, refcolumns, **args) 

357 elif isinstance(constraint_obj, datamodel.CheckConstraint): 

358 check_obj: datamodel.CheckConstraint = constraint_obj 

359 expression = check_obj.expression 

360 constraint = CheckConstraint(expression, **args) 

361 elif isinstance(constraint_obj, datamodel.UniqueConstraint): 

362 uniq_obj: datamodel.UniqueConstraint = constraint_obj 

363 columns = [self._objects[column_id] for column_id in uniq_obj.columns] 

364 constraint = UniqueConstraint(*columns, **args) 

365 else: 

366 raise ValueError(f"Unknown constraint type: {constraint_type}") 

367 

368 self._objects[constraint_obj.id] = constraint 

369 

370 return constraint 

371 

372 def build_index(self, index_obj: datamodel.Index) -> Index: 

373 """Build a SQLAlchemy `Index` from a `felis.datamodel.Index` object. 

374 

375 Parameters 

376 ---------- 

377 index_obj : `felis.datamodel.Index` 

378 The index object from which to build the SQLAlchemy index. 

379 

380 Returns 

381 ------- 

382 index: `sqlalchemy.Index` 

383 The SQLAlchemy index object. 

384 """ 

385 columns = [self._objects[c_id] for c_id in (index_obj.columns if index_obj.columns else [])] 

386 expressions = index_obj.expressions if index_obj.expressions else [] 

387 index = Index(index_obj.name, *columns, *expressions) 

388 self._objects[index_obj.id] = index 

389 return index 

390 

391 

392class ConnectionWrapper: 

393 """A wrapper for a SQLAlchemy engine or mock connection which provides a 

394 consistent interface for executing SQL statements. 

395 """ 

396 

397 def __init__(self, engine: Engine | MockConnection): 

398 """Initialize the connection wrapper. 

399 

400 Parameters 

401 ---------- 

402 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection` 

403 The SQLAlchemy engine or mock connection to wrap. 

404 """ 

405 self.engine = engine 

406 

407 def execute(self, statement: Any) -> ResultProxy: 

408 """Execute a SQL statement on the engine and return the result.""" 

409 if isinstance(statement, str): 

410 statement = text(statement) 

411 if isinstance(self.engine, MockConnection): 

412 return self.engine.connect().execute(statement) 

413 else: 

414 with self.engine.begin() as connection: 

415 result = connection.execute(statement) 

416 return result 

417 

418 

419class DatabaseContext: 

420 """A class for managing the schema and its database connection.""" 

421 

422 def __init__(self, metadata: MetaData, engine: Engine | MockConnection): 

423 """Initialize the database context. 

424 

425 Parameters 

426 ---------- 

427 metadata : `sqlalchemy.MetaData` 

428 The SQLAlchemy metadata object. 

429 

430 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection` 

431 The SQLAlchemy engine or mock connection object. 

432 """ 

433 self.engine = engine 

434 self.metadata = metadata 

435 self.connection = ConnectionWrapper(engine) 

436 

437 def create_if_not_exists(self) -> None: 

438 """Create the schema in the database if it does not exist. 

439 

440 In MySQL, this will create a new database. In PostgreSQL, it will 

441 create a new schema. For other variants, this is an unsupported 

442 operation. 

443 

444 Parameters 

445 ---------- 

446 engine: `sqlalchemy.Engine` 

447 The SQLAlchemy engine object. 

448 schema_name: `str` 

449 The name of the schema (or database) to create. 

450 """ 

451 db_type = self.engine.dialect.name 

452 schema_name = self.metadata.schema 

453 try: 

454 if db_type == "mysql": 

455 logger.info(f"Creating MySQL database: {schema_name}") 

456 self.connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {schema_name}")) 

457 elif db_type == "postgresql": 

458 logger.info(f"Creating PG schema: {schema_name}") 

459 self.connection.execute(sqa_schema.CreateSchema(schema_name, if_not_exists=True)) 

460 else: 

461 raise ValueError("Unsupported database type:" + db_type) 

462 except SQLAlchemyError as e: 

463 logger.error(f"Error creating schema: {e}") 

464 raise 

465 

466 def drop_if_exists(self) -> None: 

467 """Drop the schema in the database if it exists. 

468 

469 In MySQL, this will drop a database. In PostgreSQL, it will drop a 

470 schema. For other variants, this is unsupported for now. 

471 

472 Parameters 

473 ---------- 

474 engine: `sqlalchemy.Engine` 

475 The SQLAlchemy engine object. 

476 schema_name: `str` 

477 The name of the schema (or database) to drop. 

478 """ 

479 db_type = self.engine.dialect.name 

480 schema_name = self.metadata.schema 

481 try: 

482 if db_type == "mysql": 

483 logger.info(f"Dropping MySQL database if exists: {schema_name}") 

484 self.connection.execute(text(f"DROP DATABASE IF EXISTS {schema_name}")) 

485 elif db_type == "postgresql": 

486 logger.info(f"Dropping PostgreSQL schema if exists: {schema_name}") 

487 self.connection.execute(sqa_schema.DropSchema(schema_name, if_exists=True, cascade=True)) 

488 else: 

489 raise ValueError(f"Unsupported database type: {db_type}") 

490 except SQLAlchemyError as e: 

491 logger.error(f"Error dropping schema: {e}") 

492 raise 

493 

494 def create_all(self) -> None: 

495 """Create all tables in the schema using the metadata object.""" 

496 self.metadata.create_all(self.engine) 

497 

498 @staticmethod 

499 def create_mock_engine(engine_url: URL, output_file: IO[str] | None = None) -> MockConnection: 

500 """Create a mock engine for testing or dumping DDL statements. 

501 

502 Parameters 

503 ---------- 

504 engine_url : `sqlalchemy.engine.url.URL` 

505 The SQLAlchemy engine URL. 

506 output_file : `typing.IO` [ `str` ] or `None`, optional 

507 The file to write the SQL statements to. If None, the statements 

508 will be written to stdout. 

509 """ 

510 dumper = InsertDump(output_file) 

511 engine = create_mock_engine(make_url(engine_url), executor=dumper.dump) 

512 dumper.dialect = engine.dialect 

513 return engine