Coverage for python/lsst/dax/apdb/apdbSqlSchema.py: 18%
209 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-02 06:34 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-02 06:34 -0800
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module responsible for APDB schema operations.
23"""
25from __future__ import annotations
27__all__ = ["ApdbSqlSchema", "ExtraTables"]
29import enum
30import logging
31import uuid
32from typing import Any, Dict, List, Mapping, Optional, Type
34import felis.types
35import sqlalchemy
36from felis import simple
37from sqlalchemy import (
38 DDL,
39 Column,
40 ForeignKeyConstraint,
41 Index,
42 MetaData,
43 PrimaryKeyConstraint,
44 Table,
45 UniqueConstraint,
46 event,
47 inspect,
48)
49from sqlalchemy.dialects.postgresql import UUID
51from .apdbSchema import ApdbSchema, ApdbTables
53_LOG = logging.getLogger(__name__)
56#
57# Copied from daf_butler.
58#
59class GUID(sqlalchemy.TypeDecorator):
60 """Platform-independent GUID type.
62 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
63 stringified hex values.
64 """
66 impl = sqlalchemy.CHAR
68 cache_ok = True
70 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
71 if dialect.name == "postgresql":
72 return dialect.type_descriptor(UUID())
73 else:
74 return dialect.type_descriptor(sqlalchemy.CHAR(32))
76 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
77 if value is None:
78 return value
80 # Coerce input to UUID type, in general having UUID on input is the
81 # only thing that we want but there is code right now that uses ints.
82 if isinstance(value, int):
83 value = uuid.UUID(int=value)
84 elif isinstance(value, bytes):
85 value = uuid.UUID(bytes=value)
86 elif isinstance(value, str):
87 # hexstring
88 value = uuid.UUID(hex=value)
89 elif not isinstance(value, uuid.UUID):
90 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
92 if dialect.name == "postgresql":
93 return str(value)
94 else:
95 return "%.32x" % value.int
97 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
98 if value is None:
99 return value
100 else:
101 return uuid.UUID(hex=value)
104@enum.unique
105class ExtraTables(enum.Enum):
106 """Names of the tables used for tracking insert IDs."""
108 DiaInsertId = "DiaInsertId"
109 """Name of the table for insert ID records."""
111 DiaObjectInsertId = "DiaObjectInsertId"
112 """Name of the table for DIAObject insert ID records."""
114 DiaSourceInsertId = "DiaSourceInsertId"
115 """Name of the table for DIASource insert ID records."""
117 DiaForcedSourceInsertId = "DiaFSourceInsertId"
118 """Name of the table for DIAForcedSource insert ID records."""
120 def table_name(self, prefix: str = "") -> str:
121 """Return full table name."""
122 return prefix + self.value
124 @classmethod
125 def insert_id_tables(cls) -> Mapping[ExtraTables, ApdbTables]:
126 """Return mapping of tables used for insert ID tracking to their
127 corresponding regular tables."""
128 return {
129 cls.DiaObjectInsertId: ApdbTables.DiaObject,
130 cls.DiaSourceInsertId: ApdbTables.DiaSource,
131 cls.DiaForcedSourceInsertId: ApdbTables.DiaForcedSource,
132 }
135class ApdbSqlSchema(ApdbSchema):
136 """Class for management of APDB schema.
138 Attributes
139 ----------
140 objects : `sqlalchemy.Table`
141 DiaObject table instance
142 objects_last : `sqlalchemy.Table`
143 DiaObjectLast table instance, may be None
144 sources : `sqlalchemy.Table`
145 DiaSource table instance
146 forcedSources : `sqlalchemy.Table`
147 DiaForcedSource table instance
148 has_insert_id : `bool`
149 If true then schema has tables for insert ID tracking.
151 Parameters
152 ----------
153 engine : `sqlalchemy.engine.Engine`
154 SQLAlchemy engine instance
155 dia_object_index : `str`
156 Indexing mode for DiaObject table, see `ApdbSqlConfig.dia_object_index`
157 for details.
158 htm_index_column : `str`
159 Name of a HTM index column for DiaObject and DiaSource tables.
160 schema_file : `str`
161 Name of the YAML schema file.
162 schema_name : `str`, optional
163 Name of the schema in YAML files.
164 prefix : `str`, optional
165 Prefix to add to all schema elements.
166 namespace : `str`, optional
167 Namespace (or schema name) to use for all APDB tables.
168 use_insert_id : `bool`, optional
170 """
172 pixel_id_tables = (ApdbTables.DiaObject, ApdbTables.DiaObjectLast, ApdbTables.DiaSource)
173 """Tables that need pixelId column for spatial indexing."""
175 def __init__(
176 self,
177 engine: sqlalchemy.engine.Engine,
178 dia_object_index: str,
179 htm_index_column: str,
180 schema_file: str,
181 schema_name: str = "ApdbSchema",
182 prefix: str = "",
183 namespace: Optional[str] = None,
184 use_insert_id: bool = False,
185 ):
187 super().__init__(schema_file, schema_name)
189 self._engine = engine
190 self._dia_object_index = dia_object_index
191 self._htm_index_column = htm_index_column
192 self._prefix = prefix
193 self._use_insert_id = use_insert_id
195 self._metadata = MetaData(schema=namespace)
197 # map YAML column types to SQLAlchemy
198 self._type_map = {
199 felis.types.Double: self._getDoubleType(engine),
200 felis.types.Float: sqlalchemy.types.Float,
201 felis.types.Timestamp: sqlalchemy.types.TIMESTAMP,
202 felis.types.Long: sqlalchemy.types.BigInteger,
203 felis.types.Int: sqlalchemy.types.Integer,
204 felis.types.Short: sqlalchemy.types.Integer,
205 felis.types.Byte: sqlalchemy.types.Integer,
206 felis.types.Binary: sqlalchemy.types.LargeBinary,
207 felis.types.Text: sqlalchemy.types.CHAR,
208 felis.types.String: sqlalchemy.types.CHAR,
209 felis.types.Char: sqlalchemy.types.CHAR,
210 felis.types.Unicode: sqlalchemy.types.CHAR,
211 felis.types.Boolean: sqlalchemy.types.Boolean,
212 }
214 # Add pixelId column and index to tables that need it
215 for table in self.pixel_id_tables:
216 tableDef = self.tableSchemas.get(table)
217 if not tableDef:
218 continue
219 column = simple.Column(
220 id=f"#{htm_index_column}",
221 name=htm_index_column,
222 datatype=felis.types.Long,
223 nullable=False,
224 value=None,
225 description="Pixelization index column.",
226 table=tableDef,
227 )
228 tableDef.columns.append(column)
230 # Adjust index if needed
231 if table == ApdbTables.DiaObject and self._dia_object_index == "pix_id_iov":
232 tableDef.primary_key.insert(0, column)
234 if table is ApdbTables.DiaObjectLast:
235 # use it as a leading PK column
236 tableDef.primary_key.insert(0, column)
237 else:
238 # make a regular index
239 name = f"IDX_{tableDef.name}_{htm_index_column}"
240 index = simple.Index(id=f"#{name}", name=name, columns=[column])
241 tableDef.indexes.append(index)
243 # generate schema for all tables, must be called last
244 self._apdb_tables = self._make_apdb_tables()
245 self._extra_tables = self._make_extra_tables(self._apdb_tables)
247 self._has_insert_id: bool | None = None
249 def makeSchema(self, drop: bool = False) -> None:
250 """Create or re-create all tables.
252 Parameters
253 ----------
254 drop : `bool`, optional
255 If True then drop tables before creating new ones.
256 """
257 # Create namespace if it does not exist yet, for now this only makes
258 # sense for postgres.
259 if self._metadata.schema:
260 dialect = self._engine.dialect
261 quoted_schema = dialect.preparer(dialect).quote_schema(self._metadata.schema)
262 create_schema = DDL(
263 "CREATE SCHEMA IF NOT EXISTS %(schema)s", context={"schema": quoted_schema}
264 ).execute_if(dialect="postgresql")
265 event.listen(self._metadata, "before_create", create_schema)
267 # create all tables (optionally drop first)
268 if drop:
269 _LOG.info("dropping all tables")
270 self._metadata.drop_all(self._engine)
271 _LOG.info("creating all tables")
272 self._metadata.create_all(self._engine)
274 # Reset possibly cached value.
275 self._has_insert_id = None
277 def get_table(self, table_enum: ApdbTables | ExtraTables) -> Table:
278 """Return SQLAlchemy table instance for a specified table type/enum.
280 Parameters
281 ----------
282 table_enum : `ApdbTables` or `ExtraTables`
283 Type of table to return.
285 Returns
286 -------
287 table : `sqlalchemy.schema.Table`
288 Table instance.
290 Raises
291 ------
292 ValueError
293 Raised if ``table_enum`` is not valid for this database.
294 """
295 try:
296 if isinstance(table_enum, ApdbTables):
297 return self._apdb_tables[table_enum]
298 else:
299 return self._extra_tables[table_enum]
300 except LookupError:
301 raise ValueError(f"Table type {table_enum} does not exist in the schema") from None
303 def get_apdb_columns(self, table_enum: ApdbTables | ExtraTables) -> list[Column]:
304 """Return list of columns defined for a table in APDB schema.
306 Returned list excludes columns that are implementation-specific, e.g.
307 ``pixelId`` column is not include in the returned list.
309 Parameters
310 ----------
311 table_enum : `ApdbTables` or `ExtraTables`
312 Type of table.
314 Returns
315 -------
316 table : `list` [`sqlalchemy.schema.Column`]
317 Table instance.
319 Raises
320 ------
321 ValueError
322 Raised if ``table_enum`` is not valid for this database.
323 """
324 table = self.get_table(table_enum)
325 exclude_columns = set()
326 if table_enum in self.pixel_id_tables:
327 exclude_columns.add(self._htm_index_column)
328 return [column for column in table.columns if column.name not in exclude_columns]
330 @property
331 def has_insert_id(self) -> bool:
332 """Whether insert ID tables are to be used (`bool`)."""
333 if self._has_insert_id is None:
334 self._has_insert_id = self._use_insert_id and self._check_insert_id()
335 return self._has_insert_id
337 def _check_insert_id(self) -> bool:
338 """Check whether database has tables for tracking insert IDs."""
339 inspector = inspect(self._engine)
340 db_tables = set(inspector.get_table_names(schema=self._metadata.schema))
341 return ExtraTables.DiaInsertId.table_name(self._prefix) in db_tables
343 def _make_apdb_tables(self, mysql_engine: str = "InnoDB") -> Mapping[ApdbTables, Table]:
344 """Generate schema for regular tables.
346 Parameters
347 ----------
348 mysql_engine : `str`, optional
349 MySQL engine type to use for new tables.
350 """
351 tables = {}
352 for table_enum in ApdbTables:
354 if table_enum is ApdbTables.DiaObjectLast and self._dia_object_index != "last_object_table":
355 continue
357 columns = self._tableColumns(table_enum)
358 constraints = self._tableIndices(table_enum)
359 table = Table(
360 table_enum.table_name(self._prefix),
361 self._metadata,
362 *columns,
363 *constraints,
364 mysql_engine=mysql_engine,
365 )
366 tables[table_enum] = table
368 return tables
370 def _make_extra_tables(
371 self, apdb_tables: Mapping[ApdbTables, Table], mysql_engine: str = "InnoDB"
372 ) -> Mapping[ExtraTables, Table]:
373 """Generate schema for insert ID tables."""
374 tables: dict[ExtraTables, Table] = {}
375 if not self._use_insert_id:
376 return tables
378 # Parent table needs to be defined first
379 column_defs = [
380 Column("insert_id", GUID, primary_key=True),
381 Column("insert_time", sqlalchemy.types.TIMESTAMP, nullable=False),
382 ]
383 parent_table = Table(
384 ExtraTables.DiaInsertId.table_name(self._prefix),
385 self._metadata,
386 *column_defs,
387 mysql_engine=mysql_engine,
388 )
389 tables[ExtraTables.DiaInsertId] = parent_table
391 for table_enum, apdb_enum in ExtraTables.insert_id_tables().items():
392 apdb_table = apdb_tables[apdb_enum]
393 columns = self._insertIdColumns(table_enum)
394 constraints = self._insertIdIndices(table_enum, apdb_table, parent_table)
395 table = Table(
396 table_enum.table_name(self._prefix),
397 self._metadata,
398 *columns,
399 *constraints,
400 mysql_engine=mysql_engine,
401 )
402 tables[table_enum] = table
404 return tables
406 def _tableColumns(self, table_name: ApdbTables) -> List[Column]:
407 """Return set of columns in a table
409 Parameters
410 ----------
411 table_name : `ApdbTables`
412 Name of the table.
414 Returns
415 -------
416 column_defs : `list`
417 List of `Column` objects.
418 """
419 # get the list of columns in primary key, they are treated somewhat
420 # specially below
421 table_schema = self.tableSchemas[table_name]
423 # convert all column dicts into alchemy Columns
424 column_defs = []
425 for column in table_schema.columns:
426 kwargs: Dict[str, Any] = dict(nullable=column.nullable)
427 if column.value is not None:
428 kwargs.update(server_default=str(column.value))
429 if column in table_schema.primary_key:
430 kwargs.update(autoincrement=False)
431 ctype = self._type_map[column.datatype]
432 column_defs.append(Column(column.name, ctype, **kwargs))
434 return column_defs
436 def _tableIndices(self, table_name: ApdbTables) -> List[sqlalchemy.schema.Constraint]:
437 """Return set of constraints/indices in a table
439 Parameters
440 ----------
441 table_name : `ApdbTables`
442 Name of the table.
443 info : `dict`
444 Additional options passed to SQLAlchemy index constructor.
446 Returns
447 -------
448 index_defs : `list`
449 List of SQLAlchemy index/constraint objects.
450 """
452 table_schema = self.tableSchemas[table_name]
454 # convert all index dicts into alchemy Columns
455 index_defs: List[sqlalchemy.schema.Constraint] = []
456 if table_schema.primary_key:
457 index_defs.append(PrimaryKeyConstraint(*[column.name for column in table_schema.primary_key]))
458 for index in table_schema.indexes:
459 name = self._prefix + index.name if index.name else ""
460 index_defs.append(Index(name, *[column.name for column in index.columns]))
461 for constraint in table_schema.constraints:
462 kwargs = {}
463 if constraint.name:
464 kwargs["name"] = self._prefix + constraint.name
465 if isinstance(constraint, simple.UniqueConstraint):
466 index_defs.append(UniqueConstraint(*[column.name for column in constraint.columns], **kwargs))
468 return index_defs
470 def _insertIdColumns(self, table_enum: ExtraTables) -> List[Column]:
471 """Return list of columns for insert ID tables."""
472 column_defs: list[Column] = [Column("insert_id", GUID, nullable=False)]
473 insert_id_tables = ExtraTables.insert_id_tables()
474 if table_enum in insert_id_tables:
475 column_defs += self._tablePkColumns(insert_id_tables[table_enum])
476 else:
477 assert False, "Above branches have to cover all enum values"
478 return column_defs
480 def _tablePkColumns(self, table_enum: ApdbTables) -> list[Column]:
481 """Return a list of columns for table PK."""
482 table_schema = self.tableSchemas[table_enum]
483 column_defs = []
484 for column in table_schema.primary_key:
485 ctype = self._type_map[column.datatype]
486 column_defs.append(Column(column.name, ctype, nullable=False, autoincrement=False))
487 return column_defs
489 def _insertIdIndices(
490 self,
491 table_enum: ExtraTables,
492 apdb_table: sqlalchemy.schema.Table,
493 parent_table: sqlalchemy.schema.Table,
494 ) -> List[sqlalchemy.schema.Constraint]:
495 """Return set of constraints/indices for insert ID tables."""
496 index_defs: List[sqlalchemy.schema.Constraint] = []
498 # Special case for insert ID tables that are not in felis schema.
499 insert_id_tables = ExtraTables.insert_id_tables()
500 if table_enum in insert_id_tables:
501 # PK is the same as for original table
502 pk_names = [column.name for column in self._tablePkColumns(insert_id_tables[table_enum])]
503 index_defs.append(PrimaryKeyConstraint(*pk_names))
504 # Non-unique index on insert_id column.
505 name = self._prefix + table_enum.name + "_idx"
506 index_defs.append(Index(name, "insert_id"))
507 # Foreign key to original table
508 pk_columns = [apdb_table.columns[column] for column in pk_names]
509 index_defs.append(
510 ForeignKeyConstraint(pk_names, pk_columns, onupdate="CASCADE", ondelete="CASCADE")
511 )
512 # Foreign key to parent table
513 index_defs.append(
514 ForeignKeyConstraint(
515 ["insert_id"], [parent_table.columns["insert_id"]], onupdate="CASCADE", ondelete="CASCADE"
516 )
517 )
518 else:
519 assert False, "Above branches have to cover all enum values"
520 return index_defs
522 @classmethod
523 def _getDoubleType(cls, engine: sqlalchemy.engine.Engine) -> Type:
524 """DOUBLE type is database-specific, select one based on dialect.
526 Parameters
527 ----------
528 engine : `sqlalchemy.engine.Engine`
529 Database engine.
531 Returns
532 -------
533 type_object : `object`
534 Database-specific type definition.
535 """
536 if engine.name == "mysql":
537 from sqlalchemy.dialects.mysql import DOUBLE
539 return DOUBLE(asdecimal=False)
540 elif engine.name == "postgresql":
541 from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
543 return DOUBLE_PRECISION
544 elif engine.name == "oracle":
545 from sqlalchemy.dialects.oracle import DOUBLE_PRECISION
547 return DOUBLE_PRECISION
548 elif engine.name == "sqlite":
549 # all floats in sqlite are 8-byte
550 from sqlalchemy.dialects.sqlite import REAL
552 return REAL
553 else:
554 raise TypeError("cannot determine DOUBLE type, unexpected dialect: " + engine.name)