Coverage for python/felis/metadata.py: 17%
181 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 15:16 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 15:16 -0700
1# This file is part of felis.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24import logging
25from typing import IO, Any, Literal
27import sqlalchemy.schema as sqa_schema
28from lsst.utils.iteration import ensure_iterable
29from sqlalchemy import (
30 CheckConstraint,
31 Column,
32 Constraint,
33 Engine,
34 ForeignKeyConstraint,
35 Index,
36 MetaData,
37 Numeric,
38 PrimaryKeyConstraint,
39 ResultProxy,
40 Table,
41 UniqueConstraint,
42 create_mock_engine,
43 make_url,
44 text,
45)
46from sqlalchemy.engine.interfaces import Dialect
47from sqlalchemy.engine.mock import MockConnection
48from sqlalchemy.engine.url import URL
49from sqlalchemy.exc import SQLAlchemyError
50from sqlalchemy.types import TypeEngine
52from felis.datamodel import Schema
53from felis.db._variants import make_variant_dict
55from . import datamodel
56from .db import sqltypes
57from .types import FelisType
59logger = logging.getLogger(__name__)
62class InsertDump:
63 """An Insert Dumper for SQL statements which supports writing messages
64 to stdout or a file.
65 """
67 def __init__(self, file: IO[str] | None = None) -> None:
68 """Initialize the insert dumper.
70 Parameters
71 ----------
72 file : `io.TextIOBase` or `None`, optional
73 The file to write the SQL statements to. If None, the statements
74 will be written to stdout.
75 """
76 self.file = file
77 self.dialect: Dialect | None = None
79 def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None:
80 """Dump the SQL statement to a file or stdout.
82 Statements with parameters will be formatted with the values
83 inserted into the resultant SQL output.
85 Parameters
86 ----------
87 sql : `typing.Any`
88 The SQL statement to dump.
89 multiparams : `typing.Any`
90 The multiparams to use for the SQL statement.
91 params : `typing.Any`
92 The params to use for the SQL statement.
93 """
94 compiled = sql.compile(dialect=self.dialect)
95 sql_str = str(compiled) + ";"
96 params_list = [compiled.params]
97 for params in params_list:
98 if not params:
99 print(sql_str, file=self.file)
100 continue
101 new_params = {}
102 for key, value in params.items():
103 if isinstance(value, str):
104 new_params[key] = f"'{value}'"
105 elif value is None:
106 new_params[key] = "null"
107 else:
108 new_params[key] = value
109 print(sql_str % new_params, file=self.file)
112def get_datatype_with_variants(column_obj: datamodel.Column) -> TypeEngine:
113 """Use the Felis type system to get a SQLAlchemy datatype with variant
114 overrides from the information in a `Column` object.
116 Parameters
117 ----------
118 column_obj : `felis.datamodel.Column`
119 The column object from which to get the datatype.
121 Raises
122 ------
123 ValueError
124 If the column has a sized type but no length.
125 """
126 variant_dict = make_variant_dict(column_obj)
127 felis_type = FelisType.felis_type(column_obj.datatype.value)
128 datatype_fun = getattr(sqltypes, column_obj.datatype.value)
129 if felis_type.is_sized:
130 if not column_obj.length:
131 raise ValueError(f"Column {column_obj.name} has sized type '{column_obj.datatype}' but no length")
132 datatype = datatype_fun(column_obj.length, **variant_dict)
133 else:
134 datatype = datatype_fun(**variant_dict)
135 return datatype
138class MetaDataBuilder:
139 """A class for building a `MetaData` object from a Felis `Schema`."""
141 def __init__(
142 self, schema: Schema, apply_schema_to_metadata: bool = True, apply_schema_to_tables: bool = True
143 ) -> None:
144 """Initialize the metadata builder.
146 Parameters
147 ----------
148 schema : `felis.datamodel.Schema`
149 The schema object from which to build the SQLAlchemy metadata.
150 apply_schema_to_metadata : `bool`, optional
151 Whether to apply the schema name to the metadata object.
152 apply_schema_to_tables : `bool`, optional
153 Whether to apply the schema name to the tables.
154 """
155 self.schema = schema
156 if not apply_schema_to_metadata:
157 logger.debug("Schema name will not be applied to metadata")
158 if not apply_schema_to_tables:
159 logger.debug("Schema name will not be applied to tables")
160 self.metadata = MetaData(schema=schema.name if apply_schema_to_metadata else None)
161 self._objects: dict[str, Any] = {}
162 self.apply_schema_to_tables = apply_schema_to_tables
164 def build(self) -> MetaData:
165 """Build the SQLAlchemy tables and constraints from the schema."""
166 self.build_tables()
167 self.build_constraints()
168 return self.metadata
170 def build_tables(self) -> None:
171 """Build the SQLAlchemy tables from the schema.
173 Notes
174 -----
175 This function builds all the tables by calling ``build_table`` on
176 each Pydantic object. It also calls ``build_primary_key`` to create the
177 primary key constraints.
178 """
179 for table in self.schema.tables:
180 self.build_table(table)
181 if table.primary_key:
182 primary_key = self.build_primary_key(table.primary_key)
183 self._objects[table.id].append_constraint(primary_key)
185 def build_primary_key(self, primary_key_columns: str | list[str]) -> PrimaryKeyConstraint:
186 """Build a SQLAlchemy `PrimaryKeyConstraint` from a single column ID
187 or a list.
189 The `primary_key_columns` are strings or a list of strings representing
190 IDs pointing to columns that will be looked up in the internal object
191 dictionary.
193 Parameters
194 ----------
195 primary_key_columns : `str` or `list` of `str`
196 The column ID or list of column IDs from which to build the primary
197 key.
199 Returns
200 -------
201 primary_key: `sqlalchemy.PrimaryKeyConstraint`
202 The SQLAlchemy primary key constraint object.
203 """
204 return PrimaryKeyConstraint(
205 *[self._objects[column_id] for column_id in ensure_iterable(primary_key_columns)]
206 )
208 def build_table(self, table_obj: datamodel.Table) -> None:
209 """Build a `sqlalchemy.Table` from a `felis.datamodel.Table` and add
210 it to the `sqlalchemy.MetaData` object.
212 Several MySQL table options are handled by annotations on the table,
213 including the engine and charset. This is not needed for Postgres,
214 which does not have equivalent options.
216 Parameters
217 ----------
218 table_obj : `felis.datamodel.Table`
219 The table object to build the SQLAlchemy table from.
220 """
221 # Process mysql table options.
222 optargs = {}
223 if table_obj.mysql_engine:
224 optargs["mysql_engine"] = table_obj.mysql_engine
225 if table_obj.mysql_charset:
226 optargs["mysql_charset"] = table_obj.mysql_charset
228 # Create the SQLAlchemy table object and its columns.
229 name = table_obj.name
230 id = table_obj.id
231 description = table_obj.description
232 columns = [self.build_column(column) for column in table_obj.columns]
233 table = Table(
234 name,
235 self.metadata,
236 *columns,
237 comment=description,
238 schema=self.schema.name if self.apply_schema_to_tables else None,
239 **optargs, # type: ignore[arg-type]
240 )
242 # Create the indexes and add them to the table.
243 indexes = [self.build_index(index) for index in table_obj.indexes]
244 for index in indexes:
245 index._set_parent(table)
246 table.indexes.add(index)
248 self._objects[id] = table
250 def build_column(self, column_obj: datamodel.Column) -> Column:
251 """Build a SQLAlchemy column from a `felis.datamodel.Column` object.
253 Parameters
254 ----------
255 column_obj : `felis.datamodel.Column`
256 The column object from which to build the SQLAlchemy column.
258 Returns
259 -------
260 column: `sqlalchemy.Column`
261 The SQLAlchemy column object.
262 """
263 # Get basic column attributes.
264 name = column_obj.name
265 id = column_obj.id
266 description = column_obj.description
267 default = column_obj.value
269 # Handle variant overrides for the column (e.g., "mysql:datatype").
270 datatype = get_datatype_with_variants(column_obj)
272 # Set default value of nullable based on column type and then whether
273 # it was explicitly provided in the schema data.
274 nullable = column_obj.nullable
275 if nullable is None:
276 nullable = False if isinstance(datatype, Numeric) else True
278 # Set autoincrement depending on if it was provided explicitly.
279 autoincrement: Literal["auto"] | bool = (
280 column_obj.autoincrement if column_obj.autoincrement is not None else "auto"
281 )
283 column: Column = Column(
284 name,
285 datatype,
286 comment=description,
287 autoincrement=autoincrement,
288 nullable=nullable,
289 server_default=default,
290 )
292 self._objects[id] = column
294 return column
296 def build_constraints(self) -> None:
297 """Build the SQLAlchemy constraints in the Felis schema and append them
298 to the associated `Table`.
300 Notes
301 -----
302 This is performed as a separate step after building the tables so that
303 all the referenced objects in the constraints will be present and can
304 be looked up by their ID.
305 """
306 for table_obj in self.schema.tables:
307 table = self._objects[table_obj.id]
308 for constraint_obj in table_obj.constraints:
309 constraint = self.build_constraint(constraint_obj)
310 table.append_constraint(constraint)
312 def build_constraint(self, constraint_obj: datamodel.Constraint) -> Constraint:
313 """Build a SQLAlchemy `Constraint` from a `felis.datamodel.Constraint`
314 object.
316 Parameters
317 ----------
318 constraint_obj : `felis.datamodel.Constraint`
319 The constraint object from which to build the SQLAlchemy
320 constraint.
322 Returns
323 -------
324 constraint: `sqlalchemy.Constraint`
325 The SQLAlchemy constraint object.
327 Raises
328 ------
329 ValueError
330 If the constraint type is not recognized.
331 TypeError
332 If the constraint object is not the expected type.
333 """
334 args: dict[str, Any] = {
335 "name": constraint_obj.name or None,
336 "info": constraint_obj.description or None,
337 "deferrable": constraint_obj.deferrable or None,
338 "initially": constraint_obj.initially or None,
339 }
340 constraint: Constraint
341 constraint_type = constraint_obj.type
343 if isinstance(constraint_obj, datamodel.ForeignKeyConstraint):
344 fk_obj: datamodel.ForeignKeyConstraint = constraint_obj
345 columns = [self._objects[column_id] for column_id in fk_obj.columns]
346 refcolumns = [self._objects[column_id] for column_id in fk_obj.referenced_columns]
347 constraint = ForeignKeyConstraint(columns, refcolumns, **args)
348 elif isinstance(constraint_obj, datamodel.CheckConstraint):
349 check_obj: datamodel.CheckConstraint = constraint_obj
350 expression = check_obj.expression
351 constraint = CheckConstraint(expression, **args)
352 elif isinstance(constraint_obj, datamodel.UniqueConstraint):
353 uniq_obj: datamodel.UniqueConstraint = constraint_obj
354 columns = [self._objects[column_id] for column_id in uniq_obj.columns]
355 constraint = UniqueConstraint(*columns, **args)
356 else:
357 raise ValueError(f"Unknown constraint type: {constraint_type}")
359 self._objects[constraint_obj.id] = constraint
361 return constraint
363 def build_index(self, index_obj: datamodel.Index) -> Index:
364 """Build a SQLAlchemy `Index` from a `felis.datamodel.Index` object.
366 Parameters
367 ----------
368 index_obj : `felis.datamodel.Index`
369 The index object from which to build the SQLAlchemy index.
371 Returns
372 -------
373 index: `sqlalchemy.Index`
374 The SQLAlchemy index object.
375 """
376 columns = [self._objects[c_id] for c_id in (index_obj.columns if index_obj.columns else [])]
377 expressions = index_obj.expressions if index_obj.expressions else []
378 index = Index(index_obj.name, *columns, *expressions)
379 self._objects[index_obj.id] = index
380 return index
383class ConnectionWrapper:
384 """A wrapper for a SQLAlchemy engine or mock connection which provides a
385 consistent interface for executing SQL statements.
386 """
388 def __init__(self, engine: Engine | MockConnection):
389 """Initialize the connection wrapper.
391 Parameters
392 ----------
393 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection`
394 The SQLAlchemy engine or mock connection to wrap.
395 """
396 self.engine = engine
398 def execute(self, statement: Any) -> ResultProxy:
399 """Execute a SQL statement on the engine and return the result."""
400 if isinstance(statement, str):
401 statement = text(statement)
402 if isinstance(self.engine, MockConnection):
403 return self.engine.connect().execute(statement)
404 else:
405 with self.engine.begin() as connection:
406 result = connection.execute(statement)
407 return result
410class DatabaseContext:
411 """A class for managing the schema and its database connection."""
413 def __init__(self, metadata: MetaData, engine: Engine | MockConnection):
414 """Initialize the database context.
416 Parameters
417 ----------
418 metadata : `sqlalchemy.MetaData`
419 The SQLAlchemy metadata object.
421 engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection`
422 The SQLAlchemy engine or mock connection object.
423 """
424 self.engine = engine
425 self.metadata = metadata
426 self.connection = ConnectionWrapper(engine)
428 def create_if_not_exists(self) -> None:
429 """Create the schema in the database if it does not exist.
431 In MySQL, this will create a new database. In PostgreSQL, it will
432 create a new schema. For other variants, this is an unsupported
433 operation.
435 Parameters
436 ----------
437 engine: `sqlalchemy.Engine`
438 The SQLAlchemy engine object.
439 schema_name: `str`
440 The name of the schema (or database) to create.
441 """
442 db_type = self.engine.dialect.name
443 schema_name = self.metadata.schema
444 try:
445 if db_type == "mysql":
446 logger.info(f"Creating MySQL database: {schema_name}")
447 self.connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {schema_name}"))
448 elif db_type == "postgresql":
449 logger.info(f"Creating PG schema: {schema_name}")
450 self.connection.execute(sqa_schema.CreateSchema(schema_name, if_not_exists=True))
451 else:
452 raise ValueError("Unsupported database type:" + db_type)
453 except SQLAlchemyError as e:
454 logger.error(f"Error creating schema: {e}")
455 raise
457 def drop_if_exists(self) -> None:
458 """Drop the schema in the database if it exists.
460 In MySQL, this will drop a database. In PostgreSQL, it will drop a
461 schema. For other variants, this is unsupported for now.
463 Parameters
464 ----------
465 engine: `sqlalchemy.Engine`
466 The SQLAlchemy engine object.
467 schema_name: `str`
468 The name of the schema (or database) to drop.
469 """
470 db_type = self.engine.dialect.name
471 schema_name = self.metadata.schema
472 try:
473 if db_type == "mysql":
474 logger.info(f"Dropping MySQL database if exists: {schema_name}")
475 self.connection.execute(text(f"DROP DATABASE IF EXISTS {schema_name}"))
476 elif db_type == "postgresql":
477 logger.info(f"Dropping PostgreSQL schema if exists: {schema_name}")
478 self.connection.execute(sqa_schema.DropSchema(schema_name, if_exists=True))
479 else:
480 raise ValueError(f"Unsupported database type: {db_type}")
481 except SQLAlchemyError as e:
482 logger.error(f"Error dropping schema: {e}")
483 raise
485 def create_all(self) -> None:
486 """Create all tables in the schema using the metadata object."""
487 self.metadata.create_all(self.engine)
489 @staticmethod
490 def create_mock_engine(engine_url: URL, output_file: IO[str] | None = None) -> MockConnection:
491 """Create a mock engine for testing or dumping DDL statements.
493 Parameters
494 ----------
495 engine_url : `sqlalchemy.engine.url.URL`
496 The SQLAlchemy engine URL.
497 output_file : `typing.IO` [ `str` ] or `None`, optional
498 The file to write the SQL statements to. If None, the statements
499 will be written to stdout.
500 """
501 dumper = InsertDump(output_file)
502 engine = create_mock_engine(make_url(engine_url), executor=dumper.dump)
503 dumper.dialect = engine.dialect
504 return engine