Coverage for python/felis/metadata.py: 17%
187 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-14 09:10 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-14 09:10 +0000
1# This file is part of felis.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24import logging
25from typing import IO, Any, Literal
27import sqlalchemy.schema as sqa_schema
28from lsst.utils.iteration import ensure_iterable
29from sqlalchemy import (
30 CheckConstraint,
31 Column,
32 Constraint,
33 Engine,
34 ForeignKeyConstraint,
35 Index,
36 MetaData,
37 PrimaryKeyConstraint,
38 ResultProxy,
39 Table,
40 TextClause,
41 UniqueConstraint,
42 create_mock_engine,
43 make_url,
44 text,
45)
46from sqlalchemy.engine.interfaces import Dialect
47from sqlalchemy.engine.mock import MockConnection
48from sqlalchemy.engine.url import URL
49from sqlalchemy.exc import SQLAlchemyError
50from sqlalchemy.types import TypeEngine
52from felis.datamodel import Schema
53from felis.db._variants import make_variant_dict
55from . import datamodel
56from .db import sqltypes
57from .types import FelisType
59logger = logging.getLogger(__name__)
62class InsertDump:
63 """An Insert Dumper for SQL statements which supports writing messages
64 to stdout or a file.
65 """
67 def __init__(self, file: IO[str] | None = None) -> None:
68 """Initialize the insert dumper.
70 Parameters
71 ----------
72 file : `io.TextIOBase` or `None`, optional
73 The file to write the SQL statements to. If None, the statements
74 will be written to stdout.
75 """
76 self.file = file
77 self.dialect: Dialect | None = None
79 def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None:
80 """Dump the SQL statement to a file or stdout.
82 Statements with parameters will be formatted with the values
83 inserted into the resultant SQL output.
85 Parameters
86 ----------
87 sql : `typing.Any`
88 The SQL statement to dump.
89 multiparams : `typing.Any`
90 The multiparams to use for the SQL statement.
91 params : `typing.Any`
92 The params to use for the SQL statement.
93 """
94 compiled = sql.compile(dialect=self.dialect)
95 sql_str = str(compiled) + ";"
96 params_list = [compiled.params]
97 for params in params_list:
98 if not params:
99 print(sql_str, file=self.file)
100 continue
101 new_params = {}
102 for key, value in params.items():
103 if isinstance(value, str):
104 new_params[key] = f"'{value}'"
105 elif value is None:
106 new_params[key] = "null"
107 else:
108 new_params[key] = value
109 print(sql_str % new_params, file=self.file)
112def get_datatype_with_variants(column_obj: datamodel.Column) -> TypeEngine:
113 """Use the Felis type system to get a SQLAlchemy datatype with variant
114 overrides from the information in a `Column` object.
116 Parameters
117 ----------
118 column_obj : `felis.datamodel.Column`
119 The column object from which to get the datatype.
121 Raises
122 ------
123 ValueError
124 If the column has a sized type but no length.
125 """
126 variant_dict = make_variant_dict(column_obj)
127 felis_type = FelisType.felis_type(column_obj.datatype.value)
128 datatype_fun = getattr(sqltypes, column_obj.datatype.value)
129 if felis_type.is_sized:
130 if not column_obj.length:
131 raise ValueError(f"Column {column_obj.name} has sized type '{column_obj.datatype}' but no length")
132 datatype = datatype_fun(column_obj.length, **variant_dict)
133 else:
134 datatype = datatype_fun(**variant_dict)
135 return datatype
138_VALID_SERVER_DEFAULTS = ("CURRENT_TIMESTAMP", "NOW()", "LOCALTIMESTAMP", "NULL")
141class MetaDataBuilder:
142 """A class for building a `MetaData` object from a Felis `Schema`."""
144 def __init__(
145 self, schema: Schema, apply_schema_to_metadata: bool = True, apply_schema_to_tables: bool = True
146 ) -> None:
147 """Initialize the metadata builder.
149 Parameters
150 ----------
151 schema : `felis.datamodel.Schema`
152 The schema object from which to build the SQLAlchemy metadata.
153 apply_schema_to_metadata : `bool`, optional
154 Whether to apply the schema name to the metadata object.
155 apply_schema_to_tables : `bool`, optional
156 Whether to apply the schema name to the tables.
157 """
158 self.schema = schema
159 if not apply_schema_to_metadata:
160 logger.debug("Schema name will not be applied to metadata")
161 if not apply_schema_to_tables:
162 logger.debug("Schema name will not be applied to tables")
163 self.metadata = MetaData(schema=schema.name if apply_schema_to_metadata else None)
164 self._objects: dict[str, Any] = {}
165 self.apply_schema_to_tables = apply_schema_to_tables
167 def build(self) -> MetaData:
168 """Build the SQLAlchemy tables and constraints from the schema."""
169 self.build_tables()
170 self.build_constraints()
171 return self.metadata
173 def build_tables(self) -> None:
174 """Build the SQLAlchemy tables from the schema.
176 Notes
177 -----
178 This function builds all the tables by calling ``build_table`` on
179 each Pydantic object. It also calls ``build_primary_key`` to create the
180 primary key constraints.
181 """
182 for table in self.schema.tables:
183 self.build_table(table)
184 if table.primary_key:
185 primary_key = self.build_primary_key(table.primary_key)
186 self._objects[table.id].append_constraint(primary_key)
188 def build_primary_key(self, primary_key_columns: str | list[str]) -> PrimaryKeyConstraint:
189 """Build a SQLAlchemy `PrimaryKeyConstraint` from a single column ID
190 or a list.
192 The `primary_key_columns` are strings or a list of strings representing
193 IDs pointing to columns that will be looked up in the internal object
194 dictionary.
196 Parameters
197 ----------
198 primary_key_columns : `str` or `list` of `str`
199 The column ID or list of column IDs from which to build the primary
200 key.
202 Returns
203 -------
204 primary_key: `sqlalchemy.PrimaryKeyConstraint`
205 The SQLAlchemy primary key constraint object.
206 """
207 return PrimaryKeyConstraint(
208 *[self._objects[column_id] for column_id in ensure_iterable(primary_key_columns)]
209 )
211 def build_table(self, table_obj: datamodel.Table) -> None:
212 """Build a `sqlalchemy.Table` from a `felis.datamodel.Table` and add
213 it to the `sqlalchemy.MetaData` object.
215 Several MySQL table options are handled by annotations on the table,
216 including the engine and charset. This is not needed for Postgres,
217 which does not have equivalent options.
219 Parameters
220 ----------
221 table_obj : `felis.datamodel.Table`
222 The table object to build the SQLAlchemy table from.
223 """
224 # Process mysql table options.
225 optargs = {}
226 if table_obj.mysql_engine:
227 optargs["mysql_engine"] = table_obj.mysql_engine
228 if table_obj.mysql_charset:
229 optargs["mysql_charset"] = table_obj.mysql_charset
231 # Create the SQLAlchemy table object and its columns.
232 name = table_obj.name
233 id = table_obj.id
234 description = table_obj.description
235 columns = [self.build_column(column) for column in table_obj.columns]
236 table = Table(
237 name,
238 self.metadata,
239 *columns,
240 comment=description,
241 schema=self.schema.name if self.apply_schema_to_tables else None,
242 **optargs, # type: ignore[arg-type]
243 )
245 # Create the indexes and add them to the table.
246 indexes = [self.build_index(index) for index in table_obj.indexes]
247 for index in indexes:
248 index._set_parent(table)
249 table.indexes.add(index)
251 self._objects[id] = table
253 def build_column(self, column_obj: datamodel.Column) -> Column:
254 """Build a SQLAlchemy column from a `felis.datamodel.Column` object.
256 Parameters
257 ----------
258 column_obj : `felis.datamodel.Column`
259 The column object from which to build the SQLAlchemy column.
261 Returns
262 -------
263 column: `sqlalchemy.Column`
264 The SQLAlchemy column object.
265 """
266 # Get basic column attributes.
267 name = column_obj.name
268 id = column_obj.id
269 description = column_obj.description
270 value = column_obj.value
271 nullable = column_obj.nullable
273 # Get datatype, handling variant overrides such as "mysql:datatype".
274 datatype = get_datatype_with_variants(column_obj)
276 # Set autoincrement, depending on if it was provided explicitly.
277 autoincrement: Literal["auto"] | bool = (
278 column_obj.autoincrement if column_obj.autoincrement is not None else "auto"
279 )
281 server_default: str | TextClause | None = None
282 if value is not None:
283 server_default = str(value)
284 if server_default in _VALID_SERVER_DEFAULTS or not isinstance(value, str):
285 # If the server default is a valid keyword or not a string,
286 # use it as is.
287 server_default = text(server_default)
289 if server_default is not None:
290 logger.debug(f"Column '{id}' has default value: {server_default}")
292 column: Column = Column(
293 name,
294 datatype,
295 comment=description,
296 autoincrement=autoincrement,
297 nullable=nullable,
298 server_default=server_default,
299 )
301 self._objects[id] = column
303 return column
305 def build_constraints(self) -> None:
306 """Build the SQLAlchemy constraints in the Felis schema and append them
307 to the associated `Table`.
309 Notes
310 -----
311 This is performed as a separate step after building the tables so that
312 all the referenced objects in the constraints will be present and can
313 be looked up by their ID.
314 """
315 for table_obj in self.schema.tables:
316 table = self._objects[table_obj.id]
317 for constraint_obj in table_obj.constraints:
318 constraint = self.build_constraint(constraint_obj)
319 table.append_constraint(constraint)
321 def build_constraint(self, constraint_obj: datamodel.Constraint) -> Constraint:
322 """Build a SQLAlchemy `Constraint` from a `felis.datamodel.Constraint`
323 object.
325 Parameters
326 ----------
327 constraint_obj : `felis.datamodel.Constraint`
328 The constraint object from which to build the SQLAlchemy
329 constraint.
331 Returns
332 -------
333 constraint: `sqlalchemy.Constraint`
334 The SQLAlchemy constraint object.
336 Raises
337 ------
338 ValueError
339 If the constraint type is not recognized.
340 TypeError
341 If the constraint object is not the expected type.
342 """
343 args: dict[str, Any] = {
344 "name": constraint_obj.name or None,
345 "comment": constraint_obj.description or None,
346 "deferrable": constraint_obj.deferrable or None,
347 "initially": constraint_obj.initially or None,
348 }
349 constraint: Constraint
350 constraint_type = constraint_obj.type
352 if isinstance(constraint_obj, datamodel.ForeignKeyConstraint):
353 fk_obj: datamodel.ForeignKeyConstraint = constraint_obj
354 columns = [self._objects[column_id] for column_id in fk_obj.columns]
355 refcolumns = [self._objects[column_id] for column_id in fk_obj.referenced_columns]
356 constraint = ForeignKeyConstraint(columns, refcolumns, **args)
357 elif isinstance(constraint_obj, datamodel.CheckConstraint):
358 check_obj: datamodel.CheckConstraint = constraint_obj
359 expression = check_obj.expression
360 constraint = CheckConstraint(expression, **args)
361 elif isinstance(constraint_obj, datamodel.UniqueConstraint):
362 uniq_obj: datamodel.UniqueConstraint = constraint_obj
363 columns = [self._objects[column_id] for column_id in uniq_obj.columns]
364 constraint = UniqueConstraint(*columns, **args)
365 else:
366 raise ValueError(f"Unknown constraint type: {constraint_type}")
368 self._objects[constraint_obj.id] = constraint
370 return constraint
372 def build_index(self, index_obj: datamodel.Index) -> Index:
373 """Build a SQLAlchemy `Index` from a `felis.datamodel.Index` object.
375 Parameters
376 ----------
377 index_obj : `felis.datamodel.Index`
378 The index object from which to build the SQLAlchemy index.
380 Returns
381 -------
382 index: `sqlalchemy.Index`
383 The SQLAlchemy index object.
384 """
385 columns = [self._objects[c_id] for c_id in (index_obj.columns if index_obj.columns else [])]
386 expressions = index_obj.expressions if index_obj.expressions else []
387 index = Index(index_obj.name, *columns, *expressions)
388 self._objects[index_obj.id] = index
389 return index
392class ConnectionWrapper:
393 """A wrapper for a SQLAlchemy engine or mock connection which provides a
394 consistent interface for executing SQL statements.
395 """
397 def __init__(self, engine: Engine | MockConnection):
398 """Initialize the connection wrapper.
400 Parameters
401 ----------
402 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection`
403 The SQLAlchemy engine or mock connection to wrap.
404 """
405 self.engine = engine
407 def execute(self, statement: Any) -> ResultProxy:
408 """Execute a SQL statement on the engine and return the result."""
409 if isinstance(statement, str):
410 statement = text(statement)
411 if isinstance(self.engine, MockConnection):
412 return self.engine.connect().execute(statement)
413 else:
414 with self.engine.begin() as connection:
415 result = connection.execute(statement)
416 return result
419class DatabaseContext:
420 """A class for managing the schema and its database connection."""
422 def __init__(self, metadata: MetaData, engine: Engine | MockConnection):
423 """Initialize the database context.
425 Parameters
426 ----------
427 metadata : `sqlalchemy.MetaData`
428 The SQLAlchemy metadata object.
430 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection`
431 The SQLAlchemy engine or mock connection object.
432 """
433 self.engine = engine
434 self.metadata = metadata
435 self.connection = ConnectionWrapper(engine)
437 def create_if_not_exists(self) -> None:
438 """Create the schema in the database if it does not exist.
440 In MySQL, this will create a new database. In PostgreSQL, it will
441 create a new schema. For other variants, this is an unsupported
442 operation.
444 Parameters
445 ----------
446 engine: `sqlalchemy.Engine`
447 The SQLAlchemy engine object.
448 schema_name: `str`
449 The name of the schema (or database) to create.
450 """
451 db_type = self.engine.dialect.name
452 schema_name = self.metadata.schema
453 try:
454 if db_type == "mysql":
455 logger.info(f"Creating MySQL database: {schema_name}")
456 self.connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {schema_name}"))
457 elif db_type == "postgresql":
458 logger.info(f"Creating PG schema: {schema_name}")
459 self.connection.execute(sqa_schema.CreateSchema(schema_name, if_not_exists=True))
460 else:
461 raise ValueError("Unsupported database type:" + db_type)
462 except SQLAlchemyError as e:
463 logger.error(f"Error creating schema: {e}")
464 raise
466 def drop_if_exists(self) -> None:
467 """Drop the schema in the database if it exists.
469 In MySQL, this will drop a database. In PostgreSQL, it will drop a
470 schema. For other variants, this is unsupported for now.
472 Parameters
473 ----------
474 engine: `sqlalchemy.Engine`
475 The SQLAlchemy engine object.
476 schema_name: `str`
477 The name of the schema (or database) to drop.
478 """
479 db_type = self.engine.dialect.name
480 schema_name = self.metadata.schema
481 try:
482 if db_type == "mysql":
483 logger.info(f"Dropping MySQL database if exists: {schema_name}")
484 self.connection.execute(text(f"DROP DATABASE IF EXISTS {schema_name}"))
485 elif db_type == "postgresql":
486 logger.info(f"Dropping PostgreSQL schema if exists: {schema_name}")
487 self.connection.execute(sqa_schema.DropSchema(schema_name, if_exists=True, cascade=True))
488 else:
489 raise ValueError(f"Unsupported database type: {db_type}")
490 except SQLAlchemyError as e:
491 logger.error(f"Error dropping schema: {e}")
492 raise
494 def create_all(self) -> None:
495 """Create all tables in the schema using the metadata object."""
496 self.metadata.create_all(self.engine)
498 @staticmethod
499 def create_mock_engine(engine_url: URL, output_file: IO[str] | None = None) -> MockConnection:
500 """Create a mock engine for testing or dumping DDL statements.
502 Parameters
503 ----------
504 engine_url : `sqlalchemy.engine.url.URL`
505 The SQLAlchemy engine URL.
506 output_file : `typing.IO` [ `str` ] or `None`, optional
507 The file to write the SQL statements to. If None, the statements
508 will be written to stdout.
509 """
510 dumper = InsertDump(output_file)
511 engine = create_mock_engine(make_url(engine_url), executor=dumper.dump)
512 dumper.dialect = engine.dialect
513 return engine