Coverage for python/lsst/dax/apdb/apdbSqlSchema.py: 18%
239 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 02:30 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 02:30 -0700
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module responsible for APDB schema operations.
23"""
25from __future__ import annotations
27__all__ = ["ApdbSqlSchema", "ExtraTables"]
29import enum
30import logging
31import uuid
32from collections.abc import Mapping
33from typing import Any
35import felis.types
36import sqlalchemy
37from felis import simple
38from sqlalchemy import (
39 DDL,
40 Column,
41 ForeignKeyConstraint,
42 Index,
43 MetaData,
44 PrimaryKeyConstraint,
45 Table,
46 UniqueConstraint,
47 event,
48 inspect,
49)
50from sqlalchemy.dialects.postgresql import UUID
52from .apdbSchema import ApdbSchema, ApdbTables
54_LOG = logging.getLogger(__name__)
57#
58# Copied from daf_butler.
59#
60class GUID(sqlalchemy.TypeDecorator):
61 """Platform-independent GUID type.
63 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
64 stringified hex values.
65 """
67 impl = sqlalchemy.CHAR
69 cache_ok = True
71 def load_dialect_impl(self, dialect: sqlalchemy.engine.Dialect) -> sqlalchemy.types.TypeEngine:
72 if dialect.name == "postgresql":
73 return dialect.type_descriptor(UUID())
74 else:
75 return dialect.type_descriptor(sqlalchemy.CHAR(32))
77 def process_bind_param(self, value: Any, dialect: sqlalchemy.engine.Dialect) -> str | None:
78 if value is None:
79 return value
81 # Coerce input to UUID type, in general having UUID on input is the
82 # only thing that we want but there is code right now that uses ints.
83 if isinstance(value, int):
84 value = uuid.UUID(int=value)
85 elif isinstance(value, bytes):
86 value = uuid.UUID(bytes=value)
87 elif isinstance(value, str):
88 # hexstring
89 value = uuid.UUID(hex=value)
90 elif not isinstance(value, uuid.UUID):
91 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
93 if dialect.name == "postgresql":
94 return str(value)
95 else:
96 return "%.32x" % value.int
98 def process_result_value(
99 self, value: str | uuid.UUID | None, dialect: sqlalchemy.engine.Dialect
100 ) -> uuid.UUID | None:
101 if value is None:
102 return value
103 elif isinstance(value, uuid.UUID):
104 # sqlalchemy 2 converts to UUID internally
105 return value
106 else:
107 return uuid.UUID(hex=value)
110class InconsistentSchemaError(RuntimeError):
111 """Exception raised when schema state is inconsistent."""
114@enum.unique
115class ExtraTables(enum.Enum):
116 """Names of the tables used for tracking insert IDs."""
118 DiaInsertId = "DiaInsertId"
119 """Name of the table for insert ID records."""
121 DiaObjectInsertId = "DiaObjectInsertId"
122 """Name of the table for DIAObject insert ID records."""
124 DiaSourceInsertId = "DiaSourceInsertId"
125 """Name of the table for DIASource insert ID records."""
127 DiaForcedSourceInsertId = "DiaFSourceInsertId"
128 """Name of the table for DIAForcedSource insert ID records."""
130 def table_name(self, prefix: str = "") -> str:
131 """Return full table name."""
132 return prefix + self.value
134 @classmethod
135 def insert_id_tables(cls) -> Mapping[ExtraTables, ApdbTables]:
136 """Return mapping of tables used for insert ID tracking to their
137 corresponding regular tables.
138 """
139 return {
140 cls.DiaObjectInsertId: ApdbTables.DiaObject,
141 cls.DiaSourceInsertId: ApdbTables.DiaSource,
142 cls.DiaForcedSourceInsertId: ApdbTables.DiaForcedSource,
143 }
146class ApdbSqlSchema(ApdbSchema):
147 """Class for management of APDB schema.
149 Attributes
150 ----------
151 objects : `sqlalchemy.Table`
152 DiaObject table instance
153 objects_last : `sqlalchemy.Table`
154 DiaObjectLast table instance, may be None
155 sources : `sqlalchemy.Table`
156 DiaSource table instance
157 forcedSources : `sqlalchemy.Table`
158 DiaForcedSource table instance
159 has_insert_id : `bool`
160 If true then schema has tables for insert ID tracking.
162 Parameters
163 ----------
164 engine : `sqlalchemy.engine.Engine`
165 SQLAlchemy engine instance
166 dia_object_index : `str`
167 Indexing mode for DiaObject table, see `ApdbSqlConfig.dia_object_index`
168 for details.
169 htm_index_column : `str`
170 Name of a HTM index column for DiaObject and DiaSource tables.
171 schema_file : `str`
172 Name of the YAML schema file.
173 schema_name : `str`, optional
174 Name of the schema in YAML files.
175 prefix : `str`, optional
176 Prefix to add to all schema elements.
177 namespace : `str`, optional
178 Namespace (or schema name) to use for all APDB tables.
179 use_insert_id : `bool`, optional
181 """
183 pixel_id_tables = (ApdbTables.DiaObject, ApdbTables.DiaObjectLast, ApdbTables.DiaSource)
184 """Tables that need pixelId column for spatial indexing."""
186 def __init__(
187 self,
188 engine: sqlalchemy.engine.Engine,
189 dia_object_index: str,
190 htm_index_column: str,
191 schema_file: str,
192 schema_name: str = "ApdbSchema",
193 prefix: str = "",
194 namespace: str | None = None,
195 use_insert_id: bool = False,
196 ):
197 super().__init__(schema_file, schema_name)
199 self._engine = engine
200 self._dia_object_index = dia_object_index
201 self._htm_index_column = htm_index_column
202 self._prefix = prefix
203 self._use_insert_id = use_insert_id
205 self._metadata = MetaData(schema=namespace)
207 # map YAML column types to SQLAlchemy
208 self._type_map = {
209 felis.types.Double: self._getDoubleType(engine),
210 felis.types.Float: sqlalchemy.types.Float,
211 felis.types.Timestamp: sqlalchemy.types.TIMESTAMP,
212 felis.types.Long: sqlalchemy.types.BigInteger,
213 felis.types.Int: sqlalchemy.types.Integer,
214 felis.types.Short: sqlalchemy.types.Integer,
215 felis.types.Byte: sqlalchemy.types.Integer,
216 felis.types.Binary: sqlalchemy.types.LargeBinary,
217 felis.types.Text: sqlalchemy.types.Text,
218 felis.types.String: sqlalchemy.types.CHAR,
219 felis.types.Char: sqlalchemy.types.CHAR,
220 felis.types.Unicode: sqlalchemy.types.CHAR,
221 felis.types.Boolean: sqlalchemy.types.Boolean,
222 }
224 # Add pixelId column and index to tables that need it
225 for table in self.pixel_id_tables:
226 tableDef = self.tableSchemas.get(table)
227 if not tableDef:
228 continue
229 column = simple.Column(
230 id=f"#{htm_index_column}",
231 name=htm_index_column,
232 datatype=felis.types.Long,
233 nullable=False,
234 value=None,
235 description="Pixelization index column.",
236 table=tableDef,
237 )
238 tableDef.columns.append(column)
240 # Adjust index if needed
241 if table == ApdbTables.DiaObject and self._dia_object_index == "pix_id_iov":
242 tableDef.primary_key.insert(0, column)
244 if table is ApdbTables.DiaObjectLast:
245 # use it as a leading PK column
246 tableDef.primary_key.insert(0, column)
247 else:
248 # make a regular index
249 name = f"IDX_{tableDef.name}_{htm_index_column}"
250 index = simple.Index(id=f"#{name}", name=name, columns=[column])
251 tableDef.indexes.append(index)
253 # generate schema for all tables, must be called last
254 self._apdb_tables = self._make_apdb_tables()
255 self._extra_tables = self._make_extra_tables(self._apdb_tables)
257 self._has_insert_id: bool | None = None
258 self._metadata_check: bool | None = None
260 def empty(self) -> bool:
261 """Return True if database schema is empty.
263 Returns
264 -------
265 empty : `bool`
266 `True` if none of the required APDB tables exist in the database,
267 `False` if all required tables exist.
269 Raises
270 ------
271 InconsistentSchemaError
272 Raised when some of the required tables exist but not all.
273 """
274 inspector = inspect(self._engine)
275 table_names = set(inspector.get_table_names(self._metadata.schema))
277 existing_tables = []
278 missing_tables = []
279 for table_enum in self._apdb_tables:
280 table_name = table_enum.table_name(self._prefix)
281 if table_name in table_names:
282 existing_tables.append(table_name)
283 else:
284 missing_tables.append(table_name)
286 if not missing_tables:
287 return False
288 elif not existing_tables:
289 return True
290 else:
291 raise InconsistentSchemaError(
292 f"Only some required APDB tables exist: {existing_tables}, missing tables: {missing_tables}"
293 )
295 def makeSchema(self, drop: bool = False) -> None:
296 """Create or re-create all tables.
298 Parameters
299 ----------
300 drop : `bool`, optional
301 If True then drop tables before creating new ones.
302 """
303 # Create namespace if it does not exist yet, for now this only makes
304 # sense for postgres.
305 if self._metadata.schema:
306 dialect = self._engine.dialect
307 quoted_schema = dialect.preparer(dialect).quote_schema(self._metadata.schema)
308 create_schema = DDL(
309 "CREATE SCHEMA IF NOT EXISTS %(schema)s", context={"schema": quoted_schema}
310 ).execute_if(dialect="postgresql")
311 event.listen(self._metadata, "before_create", create_schema)
313 # create all tables (optionally drop first)
314 if drop:
315 _LOG.info("dropping all tables")
316 self._metadata.drop_all(self._engine)
317 _LOG.info("creating all tables")
318 self._metadata.create_all(self._engine)
320 # Reset possibly cached value.
321 self._has_insert_id = None
322 self._metadata_check = None
324 def get_table(self, table_enum: ApdbTables | ExtraTables) -> Table:
325 """Return SQLAlchemy table instance for a specified table type/enum.
327 Parameters
328 ----------
329 table_enum : `ApdbTables` or `ExtraTables`
330 Type of table to return.
332 Returns
333 -------
334 table : `sqlalchemy.schema.Table`
335 Table instance.
337 Raises
338 ------
339 ValueError
340 Raised if ``table_enum`` is not valid for this database.
341 """
342 try:
343 if isinstance(table_enum, ApdbTables):
344 if table_enum is ApdbTables.metadata:
345 # There may be cases when schema is configured with the
346 # metadata table but database is still missing it. Check
347 # that table actually exists in the database. Note that
348 # this may interact with `makeSchema`.
349 if self._metadata_check is None:
350 inspector = inspect(self._engine)
351 table_name = table_enum.table_name(self._prefix)
352 self._metadata_check = inspector.has_table(table_name, schema=self._metadata.schema)
353 if not self._metadata_check:
354 # this will be caught below
355 raise LookupError("metadata table is missing")
356 return self._apdb_tables[table_enum]
357 else:
358 return self._extra_tables[table_enum]
359 except LookupError:
360 raise ValueError(f"Table type {table_enum} does not exist in the schema") from None
362 def get_apdb_columns(self, table_enum: ApdbTables | ExtraTables) -> list[Column]:
363 """Return list of columns defined for a table in APDB schema.
365 Returned list excludes columns that are implementation-specific, e.g.
366 ``pixelId`` column is not include in the returned list.
368 Parameters
369 ----------
370 table_enum : `ApdbTables` or `ExtraTables`
371 Type of table.
373 Returns
374 -------
375 table : `list` [`sqlalchemy.schema.Column`]
376 Table instance.
378 Raises
379 ------
380 ValueError
381 Raised if ``table_enum`` is not valid for this database.
382 """
383 table = self.get_table(table_enum)
384 exclude_columns = set()
385 if table_enum in self.pixel_id_tables:
386 exclude_columns.add(self._htm_index_column)
387 return [column for column in table.columns if column.name not in exclude_columns]
389 @property
390 def has_insert_id(self) -> bool:
391 """Whether insert ID tables are to be used (`bool`)."""
392 if self._has_insert_id is None:
393 self._has_insert_id = self._use_insert_id and self._check_insert_id()
394 return self._has_insert_id
396 def _check_insert_id(self) -> bool:
397 """Check whether database has tables for tracking insert IDs."""
398 inspector = inspect(self._engine)
399 db_tables = set(inspector.get_table_names(schema=self._metadata.schema))
400 return ExtraTables.DiaInsertId.table_name(self._prefix) in db_tables
402 def _make_apdb_tables(self, mysql_engine: str = "InnoDB") -> Mapping[ApdbTables, Table]:
403 """Generate schema for regular tables.
405 Parameters
406 ----------
407 mysql_engine : `str`, optional
408 MySQL engine type to use for new tables.
409 """
410 tables = {}
411 for table_enum in ApdbTables:
412 if table_enum is ApdbTables.DiaObjectLast and self._dia_object_index != "last_object_table":
413 continue
414 if table_enum is ApdbTables.metadata and table_enum not in self.tableSchemas:
415 # Schema does not define metadata.
416 continue
418 columns = self._tableColumns(table_enum)
419 constraints = self._tableIndices(table_enum)
420 table = Table(
421 table_enum.table_name(self._prefix),
422 self._metadata,
423 *columns,
424 *constraints,
425 mysql_engine=mysql_engine,
426 )
427 tables[table_enum] = table
429 return tables
431 def _make_extra_tables(
432 self, apdb_tables: Mapping[ApdbTables, Table], mysql_engine: str = "InnoDB"
433 ) -> Mapping[ExtraTables, Table]:
434 """Generate schema for insert ID tables."""
435 tables: dict[ExtraTables, Table] = {}
436 if not self._use_insert_id:
437 return tables
439 # Parent table needs to be defined first
440 column_defs = [
441 Column("insert_id", GUID, primary_key=True),
442 Column("insert_time", sqlalchemy.types.TIMESTAMP, nullable=False),
443 ]
444 parent_table = Table(
445 ExtraTables.DiaInsertId.table_name(self._prefix),
446 self._metadata,
447 *column_defs,
448 mysql_engine=mysql_engine,
449 )
450 tables[ExtraTables.DiaInsertId] = parent_table
452 for table_enum, apdb_enum in ExtraTables.insert_id_tables().items():
453 apdb_table = apdb_tables[apdb_enum]
454 columns = self._insertIdColumns(table_enum)
455 constraints = self._insertIdIndices(table_enum, apdb_table, parent_table)
456 table = Table(
457 table_enum.table_name(self._prefix),
458 self._metadata,
459 *columns,
460 *constraints,
461 mysql_engine=mysql_engine,
462 )
463 tables[table_enum] = table
465 return tables
467 def _tableColumns(self, table_name: ApdbTables) -> list[Column]:
468 """Return set of columns in a table
470 Parameters
471 ----------
472 table_name : `ApdbTables`
473 Name of the table.
475 Returns
476 -------
477 column_defs : `list`
478 List of `Column` objects.
479 """
480 # get the list of columns in primary key, they are treated somewhat
481 # specially below
482 table_schema = self.tableSchemas[table_name]
484 # convert all column dicts into alchemy Columns
485 column_defs: list[Column] = []
486 for column in table_schema.columns:
487 kwargs: dict[str, Any] = dict(nullable=column.nullable)
488 if column.value is not None:
489 kwargs.update(server_default=str(column.value))
490 if column in table_schema.primary_key:
491 kwargs.update(autoincrement=False)
492 ctype = self._type_map[column.datatype]
493 column_defs.append(Column(column.name, ctype, **kwargs))
495 return column_defs
497 def _tableIndices(self, table_name: ApdbTables) -> list[sqlalchemy.schema.SchemaItem]:
498 """Return set of constraints/indices in a table
500 Parameters
501 ----------
502 table_name : `ApdbTables`
503 Name of the table.
504 info : `dict`
505 Additional options passed to SQLAlchemy index constructor.
507 Returns
508 -------
509 index_defs : `list`
510 List of SQLAlchemy index/constraint objects.
511 """
512 table_schema = self.tableSchemas[table_name]
514 # convert all index dicts into alchemy Columns
515 index_defs: list[sqlalchemy.schema.SchemaItem] = []
516 if table_schema.primary_key:
517 index_defs.append(PrimaryKeyConstraint(*[column.name for column in table_schema.primary_key]))
518 for index in table_schema.indexes:
519 name = self._prefix + index.name if index.name else ""
520 index_defs.append(Index(name, *[column.name for column in index.columns]))
521 for constraint in table_schema.constraints:
522 constr_name: str | None = None
523 if constraint.name:
524 constr_name = self._prefix + constraint.name
525 if isinstance(constraint, simple.UniqueConstraint):
526 index_defs.append(
527 UniqueConstraint(*[column.name for column in constraint.columns], name=constr_name)
528 )
530 return index_defs
532 def _insertIdColumns(self, table_enum: ExtraTables) -> list[Column]:
533 """Return list of columns for insert ID tables."""
534 column_defs: list[Column] = [Column("insert_id", GUID, nullable=False)]
535 insert_id_tables = ExtraTables.insert_id_tables()
536 if table_enum in insert_id_tables:
537 column_defs += self._tablePkColumns(insert_id_tables[table_enum])
538 else:
539 assert False, "Above branches have to cover all enum values"
540 return column_defs
542 def _tablePkColumns(self, table_enum: ApdbTables) -> list[Column]:
543 """Return a list of columns for table PK."""
544 table_schema = self.tableSchemas[table_enum]
545 column_defs: list[Column] = []
546 for column in table_schema.primary_key:
547 ctype = self._type_map[column.datatype]
548 column_defs.append(Column(column.name, ctype, nullable=False, autoincrement=False))
549 return column_defs
551 def _insertIdIndices(
552 self,
553 table_enum: ExtraTables,
554 apdb_table: sqlalchemy.schema.Table,
555 parent_table: sqlalchemy.schema.Table,
556 ) -> list[sqlalchemy.schema.SchemaItem]:
557 """Return set of constraints/indices for insert ID tables."""
558 index_defs: list[sqlalchemy.schema.SchemaItem] = []
560 # Special case for insert ID tables that are not in felis schema.
561 insert_id_tables = ExtraTables.insert_id_tables()
562 if table_enum in insert_id_tables:
563 # PK is the same as for original table
564 pk_names = [column.name for column in self._tablePkColumns(insert_id_tables[table_enum])]
565 index_defs.append(PrimaryKeyConstraint(*pk_names))
566 # Non-unique index on insert_id column.
567 name = self._prefix + table_enum.name + "_idx"
568 index_defs.append(Index(name, "insert_id"))
569 # Foreign key to original table
570 pk_columns = [apdb_table.columns[column] for column in pk_names]
571 index_defs.append(
572 ForeignKeyConstraint(pk_names, pk_columns, onupdate="CASCADE", ondelete="CASCADE")
573 )
574 # Foreign key to parent table
575 index_defs.append(
576 ForeignKeyConstraint(
577 ["insert_id"], [parent_table.columns["insert_id"]], onupdate="CASCADE", ondelete="CASCADE"
578 )
579 )
580 else:
581 assert False, "Above branches have to cover all enum values"
582 return index_defs
584 @classmethod
585 def _getDoubleType(cls, engine: sqlalchemy.engine.Engine) -> type | sqlalchemy.types.TypeEngine:
586 """DOUBLE type is database-specific, select one based on dialect.
588 Parameters
589 ----------
590 engine : `sqlalchemy.engine.Engine`
591 Database engine.
593 Returns
594 -------
595 type_object : `object`
596 Database-specific type definition.
597 """
598 if engine.name == "mysql":
599 from sqlalchemy.dialects.mysql import DOUBLE
601 return DOUBLE(asdecimal=False)
602 elif engine.name == "postgresql":
603 from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
605 return DOUBLE_PRECISION
606 elif engine.name == "oracle":
607 from sqlalchemy.dialects.oracle import DOUBLE_PRECISION
609 return DOUBLE_PRECISION
610 elif engine.name == "sqlite":
611 # all floats in sqlite are 8-byte
612 from sqlalchemy.dialects.sqlite import REAL
614 return REAL
615 else:
616 raise TypeError("cannot determine DOUBLE type, unexpected dialect: " + engine.name)