Coverage for python/felis/metadata.py: 17%

181 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:50 +0000

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import logging 

25from typing import IO, Any, Literal 

26 

27import sqlalchemy.schema as sqa_schema 

28from lsst.utils.iteration import ensure_iterable 

29from sqlalchemy import ( 

30 CheckConstraint, 

31 Column, 

32 Constraint, 

33 Engine, 

34 ForeignKeyConstraint, 

35 Index, 

36 MetaData, 

37 Numeric, 

38 PrimaryKeyConstraint, 

39 ResultProxy, 

40 Table, 

41 UniqueConstraint, 

42 create_mock_engine, 

43 make_url, 

44 text, 

45) 

46from sqlalchemy.engine.interfaces import Dialect 

47from sqlalchemy.engine.mock import MockConnection 

48from sqlalchemy.engine.url import URL 

49from sqlalchemy.exc import SQLAlchemyError 

50from sqlalchemy.types import TypeEngine 

51 

52from felis.datamodel import Schema 

53from felis.db._variants import make_variant_dict 

54 

55from . import datamodel 

56from .db import sqltypes 

57from .types import FelisType 

58 

59logger = logging.getLogger(__name__) 

60 

61 

62class InsertDump: 

63 """An Insert Dumper for SQL statements which supports writing messages 

64 to stdout or a file. 

65 """ 

66 

67 def __init__(self, file: IO[str] | None = None) -> None: 

68 """Initialize the insert dumper. 

69 

70 Parameters 

71 ---------- 

72 file : `io.TextIOBase` or `None`, optional 

73 The file to write the SQL statements to. If None, the statements 

74 will be written to stdout. 

75 """ 

76 self.file = file 

77 self.dialect: Dialect | None = None 

78 

79 def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None: 

80 """Dump the SQL statement to a file or stdout. 

81 

82 Statements with parameters will be formatted with the values 

83 inserted into the resultant SQL output. 

84 

85 Parameters 

86 ---------- 

87 sql : `typing.Any` 

88 The SQL statement to dump. 

89 multiparams : `typing.Any` 

90 The multiparams to use for the SQL statement. 

91 params : `typing.Any` 

92 The params to use for the SQL statement. 

93 """ 

94 compiled = sql.compile(dialect=self.dialect) 

95 sql_str = str(compiled) + ";" 

96 params_list = [compiled.params] 

97 for params in params_list: 

98 if not params: 

99 print(sql_str, file=self.file) 

100 continue 

101 new_params = {} 

102 for key, value in params.items(): 

103 if isinstance(value, str): 

104 new_params[key] = f"'{value}'" 

105 elif value is None: 

106 new_params[key] = "null" 

107 else: 

108 new_params[key] = value 

109 print(sql_str % new_params, file=self.file) 

110 

111 

112def get_datatype_with_variants(column_obj: datamodel.Column) -> TypeEngine: 

113 """Use the Felis type system to get a SQLAlchemy datatype with variant 

114 overrides from the information in a `Column` object. 

115 

116 Parameters 

117 ---------- 

118 column_obj : `felis.datamodel.Column` 

119 The column object from which to get the datatype. 

120 

121 Raises 

122 ------ 

123 ValueError 

124 If the column has a sized type but no length. 

125 """ 

126 variant_dict = make_variant_dict(column_obj) 

127 felis_type = FelisType.felis_type(column_obj.datatype.value) 

128 datatype_fun = getattr(sqltypes, column_obj.datatype.value) 

129 if felis_type.is_sized: 

130 if not column_obj.length: 

131 raise ValueError(f"Column {column_obj.name} has sized type '{column_obj.datatype}' but no length") 

132 datatype = datatype_fun(column_obj.length, **variant_dict) 

133 else: 

134 datatype = datatype_fun(**variant_dict) 

135 return datatype 

136 

137 

138class MetaDataBuilder: 

139 """A class for building a `MetaData` object from a Felis `Schema`.""" 

140 

141 def __init__( 

142 self, schema: Schema, apply_schema_to_metadata: bool = True, apply_schema_to_tables: bool = True 

143 ) -> None: 

144 """Initialize the metadata builder. 

145 

146 Parameters 

147 ---------- 

148 schema : `felis.datamodel.Schema` 

149 The schema object from which to build the SQLAlchemy metadata. 

150 apply_schema_to_metadata : `bool`, optional 

151 Whether to apply the schema name to the metadata object. 

152 apply_schema_to_tables : `bool`, optional 

153 Whether to apply the schema name to the tables. 

154 """ 

155 self.schema = schema 

156 if not apply_schema_to_metadata: 

157 logger.debug("Schema name will not be applied to metadata") 

158 if not apply_schema_to_tables: 

159 logger.debug("Schema name will not be applied to tables") 

160 self.metadata = MetaData(schema=schema.name if apply_schema_to_metadata else None) 

161 self._objects: dict[str, Any] = {} 

162 self.apply_schema_to_tables = apply_schema_to_tables 

163 

164 def build(self) -> MetaData: 

165 """Build the SQLAlchemy tables and constraints from the schema.""" 

166 self.build_tables() 

167 self.build_constraints() 

168 return self.metadata 

169 

170 def build_tables(self) -> None: 

171 """Build the SQLAlchemy tables from the schema. 

172 

173 Notes 

174 ----- 

175 This function builds all the tables by calling ``build_table`` on 

176 each Pydantic object. It also calls ``build_primary_key`` to create the 

177 primary key constraints. 

178 """ 

179 for table in self.schema.tables: 

180 self.build_table(table) 

181 if table.primary_key: 

182 primary_key = self.build_primary_key(table.primary_key) 

183 self._objects[table.id].append_constraint(primary_key) 

184 

185 def build_primary_key(self, primary_key_columns: str | list[str]) -> PrimaryKeyConstraint: 

186 """Build a SQLAlchemy `PrimaryKeyConstraint` from a single column ID 

187 or a list. 

188 

189 The `primary_key_columns` are strings or a list of strings representing 

190 IDs pointing to columns that will be looked up in the internal object 

191 dictionary. 

192 

193 Parameters 

194 ---------- 

195 primary_key_columns : `str` or `list` of `str` 

196 The column ID or list of column IDs from which to build the primary 

197 key. 

198 

199 Returns 

200 ------- 

201 primary_key: `sqlalchemy.PrimaryKeyConstraint` 

202 The SQLAlchemy primary key constraint object. 

203 """ 

204 return PrimaryKeyConstraint( 

205 *[self._objects[column_id] for column_id in ensure_iterable(primary_key_columns)] 

206 ) 

207 

208 def build_table(self, table_obj: datamodel.Table) -> None: 

209 """Build a `sqlalchemy.Table` from a `felis.datamodel.Table` and add 

210 it to the `sqlalchemy.MetaData` object. 

211 

212 Several MySQL table options are handled by annotations on the table, 

213 including the engine and charset. This is not needed for Postgres, 

214 which does not have equivalent options. 

215 

216 Parameters 

217 ---------- 

218 table_obj : `felis.datamodel.Table` 

219 The table object to build the SQLAlchemy table from. 

220 """ 

221 # Process mysql table options. 

222 optargs = {} 

223 if table_obj.mysql_engine: 

224 optargs["mysql_engine"] = table_obj.mysql_engine 

225 if table_obj.mysql_charset: 

226 optargs["mysql_charset"] = table_obj.mysql_charset 

227 

228 # Create the SQLAlchemy table object and its columns. 

229 name = table_obj.name 

230 id = table_obj.id 

231 description = table_obj.description 

232 columns = [self.build_column(column) for column in table_obj.columns] 

233 table = Table( 

234 name, 

235 self.metadata, 

236 *columns, 

237 comment=description, 

238 schema=self.schema.name if self.apply_schema_to_tables else None, 

239 **optargs, # type: ignore[arg-type] 

240 ) 

241 

242 # Create the indexes and add them to the table. 

243 indexes = [self.build_index(index) for index in table_obj.indexes] 

244 for index in indexes: 

245 index._set_parent(table) 

246 table.indexes.add(index) 

247 

248 self._objects[id] = table 

249 

250 def build_column(self, column_obj: datamodel.Column) -> Column: 

251 """Build a SQLAlchemy column from a `felis.datamodel.Column` object. 

252 

253 Parameters 

254 ---------- 

255 column_obj : `felis.datamodel.Column` 

256 The column object from which to build the SQLAlchemy column. 

257 

258 Returns 

259 ------- 

260 column: `sqlalchemy.Column` 

261 The SQLAlchemy column object. 

262 """ 

263 # Get basic column attributes. 

264 name = column_obj.name 

265 id = column_obj.id 

266 description = column_obj.description 

267 default = column_obj.value 

268 

269 # Handle variant overrides for the column (e.g., "mysql:datatype"). 

270 datatype = get_datatype_with_variants(column_obj) 

271 

272 # Set default value of nullable based on column type and then whether 

273 # it was explicitly provided in the schema data. 

274 nullable = column_obj.nullable 

275 if nullable is None: 

276 nullable = False if isinstance(datatype, Numeric) else True 

277 

278 # Set autoincrement depending on if it was provided explicitly. 

279 autoincrement: Literal["auto"] | bool = ( 

280 column_obj.autoincrement if column_obj.autoincrement is not None else "auto" 

281 ) 

282 

283 column: Column = Column( 

284 name, 

285 datatype, 

286 comment=description, 

287 autoincrement=autoincrement, 

288 nullable=nullable, 

289 server_default=default, 

290 ) 

291 

292 self._objects[id] = column 

293 

294 return column 

295 

296 def build_constraints(self) -> None: 

297 """Build the SQLAlchemy constraints in the Felis schema and append them 

298 to the associated `Table`. 

299 

300 Notes 

301 ----- 

302 This is performed as a separate step after building the tables so that 

303 all the referenced objects in the constraints will be present and can 

304 be looked up by their ID. 

305 """ 

306 for table_obj in self.schema.tables: 

307 table = self._objects[table_obj.id] 

308 for constraint_obj in table_obj.constraints: 

309 constraint = self.build_constraint(constraint_obj) 

310 table.append_constraint(constraint) 

311 

312 def build_constraint(self, constraint_obj: datamodel.Constraint) -> Constraint: 

313 """Build a SQLAlchemy `Constraint` from a `felis.datamodel.Constraint` 

314 object. 

315 

316 Parameters 

317 ---------- 

318 constraint_obj : `felis.datamodel.Constraint` 

319 The constraint object from which to build the SQLAlchemy 

320 constraint. 

321 

322 Returns 

323 ------- 

324 constraint: `sqlalchemy.Constraint` 

325 The SQLAlchemy constraint object. 

326 

327 Raises 

328 ------ 

329 ValueError 

330 If the constraint type is not recognized. 

331 TypeError 

332 If the constraint object is not the expected type. 

333 """ 

334 args: dict[str, Any] = { 

335 "name": constraint_obj.name or None, 

336 "info": constraint_obj.description or None, 

337 "deferrable": constraint_obj.deferrable or None, 

338 "initially": constraint_obj.initially or None, 

339 } 

340 constraint: Constraint 

341 constraint_type = constraint_obj.type 

342 

343 if isinstance(constraint_obj, datamodel.ForeignKeyConstraint): 

344 fk_obj: datamodel.ForeignKeyConstraint = constraint_obj 

345 columns = [self._objects[column_id] for column_id in fk_obj.columns] 

346 refcolumns = [self._objects[column_id] for column_id in fk_obj.referenced_columns] 

347 constraint = ForeignKeyConstraint(columns, refcolumns, **args) 

348 elif isinstance(constraint_obj, datamodel.CheckConstraint): 

349 check_obj: datamodel.CheckConstraint = constraint_obj 

350 expression = check_obj.expression 

351 constraint = CheckConstraint(expression, **args) 

352 elif isinstance(constraint_obj, datamodel.UniqueConstraint): 

353 uniq_obj: datamodel.UniqueConstraint = constraint_obj 

354 columns = [self._objects[column_id] for column_id in uniq_obj.columns] 

355 constraint = UniqueConstraint(*columns, **args) 

356 else: 

357 raise ValueError(f"Unknown constraint type: {constraint_type}") 

358 

359 self._objects[constraint_obj.id] = constraint 

360 

361 return constraint 

362 

363 def build_index(self, index_obj: datamodel.Index) -> Index: 

364 """Build a SQLAlchemy `Index` from a `felis.datamodel.Index` object. 

365 

366 Parameters 

367 ---------- 

368 index_obj : `felis.datamodel.Index` 

369 The index object from which to build the SQLAlchemy index. 

370 

371 Returns 

372 ------- 

373 index: `sqlalchemy.Index` 

374 The SQLAlchemy index object. 

375 """ 

376 columns = [self._objects[c_id] for c_id in (index_obj.columns if index_obj.columns else [])] 

377 expressions = index_obj.expressions if index_obj.expressions else [] 

378 index = Index(index_obj.name, *columns, *expressions) 

379 self._objects[index_obj.id] = index 

380 return index 

381 

382 

383class ConnectionWrapper: 

384 """A wrapper for a SQLAlchemy engine or mock connection which provides a 

385 consistent interface for executing SQL statements. 

386 """ 

387 

388 def __init__(self, engine: Engine | MockConnection): 

389 """Initialize the connection wrapper. 

390 

391 Parameters 

392 ---------- 

393 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection` 

394 The SQLAlchemy engine or mock connection to wrap. 

395 """ 

396 self.engine = engine 

397 

398 def execute(self, statement: Any) -> ResultProxy: 

399 """Execute a SQL statement on the engine and return the result.""" 

400 if isinstance(statement, str): 

401 statement = text(statement) 

402 if isinstance(self.engine, MockConnection): 

403 return self.engine.connect().execute(statement) 

404 else: 

405 with self.engine.begin() as connection: 

406 result = connection.execute(statement) 

407 return result 

408 

409 

410class DatabaseContext: 

411 """A class for managing the schema and its database connection.""" 

412 

413 def __init__(self, metadata: MetaData, engine: Engine | MockConnection): 

414 """Initialize the database context. 

415 

416 Parameters 

417 ---------- 

418 metadata : `sqlalchemy.MetaData` 

419 The SQLAlchemy metadata object. 

420 

421 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection` 

422 The SQLAlchemy engine or mock connection object. 

423 """ 

424 self.engine = engine 

425 self.metadata = metadata 

426 self.connection = ConnectionWrapper(engine) 

427 

428 def create_if_not_exists(self) -> None: 

429 """Create the schema in the database if it does not exist. 

430 

431 In MySQL, this will create a new database. In PostgreSQL, it will 

432 create a new schema. For other variants, this is an unsupported 

433 operation. 

434 

435 Parameters 

436 ---------- 

437 engine: `sqlalchemy.Engine` 

438 The SQLAlchemy engine object. 

439 schema_name: `str` 

440 The name of the schema (or database) to create. 

441 """ 

442 db_type = self.engine.dialect.name 

443 schema_name = self.metadata.schema 

444 try: 

445 if db_type == "mysql": 

446 logger.info(f"Creating MySQL database: {schema_name}") 

447 self.connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {schema_name}")) 

448 elif db_type == "postgresql": 

449 logger.info(f"Creating PG schema: {schema_name}") 

450 self.connection.execute(sqa_schema.CreateSchema(schema_name, if_not_exists=True)) 

451 else: 

452 raise ValueError("Unsupported database type:" + db_type) 

453 except SQLAlchemyError as e: 

454 logger.error(f"Error creating schema: {e}") 

455 raise 

456 

457 def drop_if_exists(self) -> None: 

458 """Drop the schema in the database if it exists. 

459 

460 In MySQL, this will drop a database. In PostgreSQL, it will drop a 

461 schema. For other variants, this is unsupported for now. 

462 

463 Parameters 

464 ---------- 

465 engine: `sqlalchemy.Engine` 

466 The SQLAlchemy engine object. 

467 schema_name: `str` 

468 The name of the schema (or database) to drop. 

469 """ 

470 db_type = self.engine.dialect.name 

471 schema_name = self.metadata.schema 

472 try: 

473 if db_type == "mysql": 

474 logger.info(f"Dropping MySQL database if exists: {schema_name}") 

475 self.connection.execute(text(f"DROP DATABASE IF EXISTS {schema_name}")) 

476 elif db_type == "postgresql": 

477 logger.info(f"Dropping PostgreSQL schema if exists: {schema_name}") 

478 self.connection.execute(sqa_schema.DropSchema(schema_name, if_exists=True)) 

479 else: 

480 raise ValueError(f"Unsupported database type: {db_type}") 

481 except SQLAlchemyError as e: 

482 logger.error(f"Error dropping schema: {e}") 

483 raise 

484 

485 def create_all(self) -> None: 

486 """Create all tables in the schema using the metadata object.""" 

487 self.metadata.create_all(self.engine) 

488 

489 @staticmethod 

490 def create_mock_engine(engine_url: URL, output_file: IO[str] | None = None) -> MockConnection: 

491 """Create a mock engine for testing or dumping DDL statements. 

492 

493 Parameters 

494 ---------- 

495 engine_url : `sqlalchemy.engine.url.URL` 

496 The SQLAlchemy engine URL. 

497 output_file : `typing.IO` [ `str` ] or `None`, optional 

498 The file to write the SQL statements to. If None, the statements 

499 will be written to stdout. 

500 """ 

501 dumper = InsertDump(output_file) 

502 engine = create_mock_engine(make_url(engine_url), executor=dumper.dump) 

503 dumper.dialect = engine.dialect 

504 return engine