Coverage for python/lsst/dax/apdb/apdbSchema.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module responsible for APDB schema operations.
23"""
25__all__ = ["ColumnDef", "IndexDef", "TableDef",
26 "make_minimal_dia_object_schema", "make_minimal_dia_source_schema",
27 "ApdbSchema"]
29from collections import namedtuple
30import logging
31import os
32import yaml
34import sqlalchemy
35from sqlalchemy import (Column, Index, MetaData, PrimaryKeyConstraint,
36 UniqueConstraint, Table)
37from sqlalchemy.schema import CreateTable, CreateIndex
38from sqlalchemy.ext.compiler import compiles
39import lsst.afw.table as afwTable
42_LOG = logging.getLogger(__name__.partition(".")[2]) # strip leading "lsst."
44# Classes for representing schema
46# Column description:
47# name : column name
48# type : name of cat type (INT, FLOAT, etc.)
49# nullable : True or False
50# default : default value for column, can be None
51# description : documentation, can be None or empty
52# unit : string with unit name, can be None
53# ucd : string with ucd, can be None
54ColumnDef = namedtuple('ColumnDef', 'name type nullable default description unit ucd')
56# Index description:
57# name : index name, can be None or empty
58# type : one of "PRIMARY", "UNIQUE", "INDEX"
59# columns : list of column names in index
60IndexDef = namedtuple('IndexDef', 'name type columns')
62# Table description:
63# name : table name
64# description : documentation, can be None or empty
65# columns : list of ColumnDef instances
66# indices : list of IndexDef instances, can be empty or None
67TableDef = namedtuple('TableDef', 'name description columns indices')
70def make_minimal_dia_object_schema():
71 """Define and create the minimal schema required for a DIAObject.
73 Returns
74 -------
75 schema : `lsst.afw.table.Schema`
76 Minimal schema for DIAObjects.
77 """
78 schema = afwTable.SourceTable.makeMinimalSchema()
79 schema.addField("pixelId", type='L',
80 doc='Unique spherical pixelization identifier.')
81 schema.addField("nDiaSources", type='L')
82 return schema
85def make_minimal_dia_source_schema():
86 """ Define and create the minimal schema required for a DIASource.
88 Returns
89 -------
90 schema : `lsst.afw.table.Schema`
91 Minimal schema for DIASources.
92 """
93 schema = afwTable.SourceTable.makeMinimalSchema()
94 schema.addField("diaObjectId", type='L',
95 doc='Unique identifier of the DIAObject this source is '
96 'associated to.')
97 schema.addField("ccdVisitId", type='L',
98 doc='Id of the exposure and ccd this object was detected '
99 'in.')
100 schema.addField("psFlux", type='D',
101 doc='Calibrated PSF flux of this source.')
102 schema.addField("psFluxErr", type='D',
103 doc='Calibrated PSF flux err of this source.')
104 schema.addField("flags", type='L',
105 doc='Quality flags for this DIASource.')
106 schema.addField("pixelId", type='L',
107 doc='Unique spherical pixelization identifier.')
108 return schema
111@compiles(CreateTable, "oracle")
112def _add_suffixes_tbl(element, compiler, **kw):
113 """Add all needed suffixed for Oracle CREATE TABLE statement.
115 This is a special compilation method for CreateTable clause which
116 registers itself with SQLAlchemy using @compiles decotrator. Exact method
117 name does not matter. Client can pass a dict to ``info`` keyword argument
118 of Table constructor. If the dict has a key "oracle_tablespace" then its
119 value is used as tablespace name. If the dict has a key "oracle_iot" with
120 true value then IOT table is created. This method generates additional
121 clauses for CREATE TABLE statement which specify tablespace name and
122 "ORGANIZATION INDEX" for IOT.
124 .. seealso:: https://docs.sqlalchemy.org/en/latest/core/compiler.html
125 """
126 text = compiler.visit_create_table(element, **kw)
127 _LOG.debug("text: %r", text)
128 oracle_tablespace = element.element.info.get("oracle_tablespace")
129 oracle_iot = element.element.info.get("oracle_iot", False)
130 _LOG.debug("oracle_tablespace: %r", oracle_tablespace)
131 if oracle_iot:
132 text += " ORGANIZATION INDEX"
133 if oracle_tablespace:
134 text += " TABLESPACE " + oracle_tablespace
135 _LOG.debug("text: %r", text)
136 return text
139@compiles(CreateIndex, "oracle")
140def _add_suffixes_idx(element, compiler, **kw):
141 """Add all needed suffixed for Oracle CREATE INDEX statement.
143 This is a special compilation method for CreateIndex clause which
144 registers itself with SQLAlchemy using @compiles decotrator. Exact method
145 name does not matter. Client can pass a dict to ``info`` keyword argument
146 of Index constructor. If the dict has a key "oracle_tablespace" then its
147 value is used as tablespace name. This method generates additional
148 clause for CREATE INDEX statement which specifies tablespace name.
150 .. seealso:: https://docs.sqlalchemy.org/en/latest/core/compiler.html
151 """
152 text = compiler.visit_create_index(element, **kw)
153 _LOG.debug("text: %r", text)
154 oracle_tablespace = element.element.info.get("oracle_tablespace")
155 _LOG.debug("oracle_tablespace: %r", oracle_tablespace)
156 if oracle_tablespace:
157 text += " TABLESPACE " + oracle_tablespace
158 _LOG.debug("text: %r", text)
159 return text
162class ApdbSchema(object):
163 """Class for management of APDB schema.
165 Attributes
166 ----------
167 objects : `sqlalchemy.Table`
168 DiaObject table instance
169 objects_nightly : `sqlalchemy.Table`
170 DiaObjectNightly table instance, may be None
171 objects_last : `sqlalchemy.Table`
172 DiaObjectLast table instance, may be None
173 sources : `sqlalchemy.Table`
174 DiaSource table instance
175 forcedSources : `sqlalchemy.Table`
176 DiaForcedSource table instance
177 visits : `sqlalchemy.Table`
178 ApdbProtoVisits table instance
180 Parameters
181 ----------
182 engine : `sqlalchemy.engine.Engine`
183 SQLAlchemy engine instance
184 dia_object_index : `str`
185 Indexing mode for DiaObject table, see `ApdbConfig.dia_object_index`
186 for details.
187 dia_object_nightly : `bool`
188 If `True` then create per-night DiaObject table as well.
189 schema_file : `str`
190 Name of the YAML schema file.
191 extra_schema_file : `str`, optional
192 Name of the YAML schema file with extra column definitions.
193 column_map : `str`, optional
194 Name of the YAML file with column mappings.
195 afw_schemas : `dict`, optional
196 Dictionary with table name for a key and `afw.table.Schema`
197 for a value. Columns in schema will be added to standard APDB
198 schema (only if standard schema does not have matching column).
199 prefix : `str`, optional
200 Prefix to add to all scheam elements.
201 """
203 # map afw type names into cat type names
204 _afw_type_map = {"I": "INT",
205 "L": "BIGINT",
206 "F": "FLOAT",
207 "D": "DOUBLE",
208 "Angle": "DOUBLE",
209 "String": "CHAR",
210 "Flag": "BOOL"}
211 _afw_type_map_reverse = {"INT": "I",
212 "BIGINT": "L",
213 "FLOAT": "F",
214 "DOUBLE": "D",
215 "DATETIME": "L",
216 "CHAR": "String",
217 "BOOL": "Flag"}
219 def __init__(self, engine, dia_object_index, dia_object_nightly,
220 schema_file, extra_schema_file=None, column_map=None,
221 afw_schemas=None, prefix=""):
223 self._engine = engine
224 self._dia_object_index = dia_object_index
225 self._dia_object_nightly = dia_object_nightly
226 self._prefix = prefix
228 self._metadata = MetaData(self._engine)
230 self.objects = None
231 self.objects_nightly = None
232 self.objects_last = None
233 self.sources = None
234 self.forcedSources = None
235 self.visits = None
237 if column_map:
238 column_map = os.path.expandvars(column_map)
239 _LOG.debug("Reading column map file %s", column_map)
240 with open(column_map) as yaml_stream:
241 # maps cat column name to afw column name
242 self._column_map = yaml.load(yaml_stream, Loader=yaml.SafeLoader)
243 _LOG.debug("column map: %s", self._column_map)
244 else:
245 _LOG.debug("No column map file is given, initialize to empty")
246 self._column_map = {}
247 self._column_map_reverse = {}
248 for table, cmap in self._column_map.items():
249 # maps afw column name to cat column name
250 self._column_map_reverse[table] = {v: k for k, v in cmap.items()}
251 _LOG.debug("reverse column map: %s", self._column_map_reverse)
253 # build complete table schema
254 self._schemas = self._buildSchemas(schema_file, extra_schema_file,
255 afw_schemas)
257 # map cat column types to alchemy
258 self._type_map = dict(DOUBLE=self._getDoubleType(),
259 FLOAT=sqlalchemy.types.Float,
260 DATETIME=sqlalchemy.types.TIMESTAMP,
261 BIGINT=sqlalchemy.types.BigInteger,
262 INTEGER=sqlalchemy.types.Integer,
263 INT=sqlalchemy.types.Integer,
264 TINYINT=sqlalchemy.types.Integer,
265 BLOB=sqlalchemy.types.LargeBinary,
266 CHAR=sqlalchemy.types.CHAR,
267 BOOL=sqlalchemy.types.Boolean)
269 # generate schema for all tables, must be called last
270 self._makeTables()
272 def _makeTables(self, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
273 """Generate schema for all tables.
275 Parameters
276 ----------
277 mysql_engine : `str`, optional
278 MySQL engine type to use for new tables.
279 oracle_tablespace : `str`, optional
280 Name of Oracle tablespace, only useful with oracle
281 oracle_iot : `bool`, optional
282 Make Index-organized DiaObjectLast table.
283 """
285 info = dict(oracle_tablespace=oracle_tablespace)
287 if self._dia_object_index == 'pix_id_iov':
288 # Special PK with HTM column in first position
289 constraints = self._tableIndices('DiaObjectIndexHtmFirst', info)
290 else:
291 constraints = self._tableIndices('DiaObject', info)
292 table = Table(self._prefix+'DiaObject', self._metadata,
293 *(self._tableColumns('DiaObject') + constraints),
294 mysql_engine=mysql_engine,
295 info=info)
296 self.objects = table
298 if self._dia_object_nightly:
299 # Same as DiaObject but no index
300 table = Table(self._prefix+'DiaObjectNightly', self._metadata,
301 *self._tableColumns('DiaObject'),
302 mysql_engine=mysql_engine,
303 info=info)
304 self.objects_nightly = table
306 if self._dia_object_index == 'last_object_table':
307 # Same as DiaObject but with special index
308 info2 = info.copy()
309 info2.update(oracle_iot=oracle_iot)
310 table = Table(self._prefix+'DiaObjectLast', self._metadata,
311 *(self._tableColumns('DiaObjectLast')
312 + self._tableIndices('DiaObjectLast', info)),
313 mysql_engine=mysql_engine,
314 info=info2)
315 self.objects_last = table
317 # for all other tables use index definitions in schema
318 for table_name in ('DiaSource', 'SSObject', 'DiaForcedSource', 'DiaObject_To_Object_Match'):
319 table = Table(self._prefix+table_name, self._metadata,
320 *(self._tableColumns(table_name)
321 + self._tableIndices(table_name, info)),
322 mysql_engine=mysql_engine,
323 info=info)
324 if table_name == 'DiaSource':
325 self.sources = table
326 elif table_name == 'DiaForcedSource':
327 self.forcedSources = table
329 # special table to track visits, only used by prototype
330 table = Table(self._prefix+'ApdbProtoVisits', self._metadata,
331 Column('visitId', sqlalchemy.types.BigInteger, nullable=False),
332 Column('visitTime', sqlalchemy.types.TIMESTAMP, nullable=False),
333 PrimaryKeyConstraint('visitId', name=self._prefix+'PK_ApdbProtoVisits'),
334 Index(self._prefix+'IDX_ApdbProtoVisits_vTime', 'visitTime', info=info),
335 mysql_engine=mysql_engine,
336 info=info)
337 self.visits = table
339 def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
340 """Create or re-create all tables.
342 Parameters
343 ----------
344 drop : `bool`, optional
345 If True then drop tables before creating new ones.
346 mysql_engine : `str`, optional
347 MySQL engine type to use for new tables.
348 oracle_tablespace : `str`, optional
349 Name of Oracle tablespace, only useful with oracle
350 oracle_iot : `bool`, optional
351 Make Index-organized DiaObjectLast table.
352 """
354 # re-make table schema for all needed tables with possibly different options
355 _LOG.debug("clear metadata")
356 self._metadata.clear()
357 _LOG.debug("re-do schema mysql_engine=%r oracle_tablespace=%r",
358 mysql_engine, oracle_tablespace)
359 self._makeTables(mysql_engine=mysql_engine, oracle_tablespace=oracle_tablespace,
360 oracle_iot=oracle_iot)
362 # create all tables (optionally drop first)
363 if drop:
364 _LOG.info('dropping all tables')
365 self._metadata.drop_all()
366 _LOG.info('creating all tables')
367 self._metadata.create_all()
369 def getAfwSchema(self, table_name, columns=None):
370 """Return afw schema for given table.
372 Parameters
373 ----------
374 table_name : `str`
375 One of known APDB table names.
376 columns : `list` of `str`, optional
377 Include only given table columns in schema, by default all columns
378 are included.
380 Returns
381 -------
382 schema : `lsst.afw.table.Schema`
383 column_map : `dict`
384 Mapping of the table/result column names into schema key.
385 """
387 table = self._schemas[table_name]
388 col_map = self._column_map.get(table_name, {})
390 # make a schema
391 col2afw = {}
392 schema = afwTable.SourceTable.makeMinimalSchema()
393 for column in table.columns:
394 if columns and column.name not in columns:
395 continue
396 afw_col = col_map.get(column.name, column.name)
397 if afw_col in schema.getNames():
398 # Continue if the column is already in the minimal schema.
399 key = schema.find(afw_col).getKey()
400 elif column.type in ("DOUBLE", "FLOAT") and column.unit == "deg":
401 #
402 # NOTE: degree to radian conversion is not supported (yet)
403 #
404 # angles in afw are radians and have special "Angle" type
405 key = schema.addField(afw_col,
406 type="Angle",
407 doc=column.description or "",
408 units="rad")
409 elif column.type == "BLOB":
410 # No BLOB support for now
411 key = None
412 else:
413 units = column.unit or ""
414 # some units in schema are not recognized by afw but we do not care
415 if self._afw_type_map_reverse[column.type] == 'String':
416 key = schema.addField(afw_col,
417 type=self._afw_type_map_reverse[column.type],
418 doc=column.description or "",
419 units=units,
420 parse_strict="silent",
421 size=10)
422 elif units == "deg":
423 key = schema.addField(afw_col,
424 type='Angle',
425 doc=column.description or "",
426 parse_strict="silent")
427 else:
428 key = schema.addField(afw_col,
429 type=self._afw_type_map_reverse[column.type],
430 doc=column.description or "",
431 units=units,
432 parse_strict="silent")
433 col2afw[column.name] = key
435 return schema, col2afw
437 def getAfwColumns(self, table_name):
438 """Returns mapping of afw column names to Column definitions.
440 Parameters
441 ----------
442 table_name : `str`
443 One of known APDB table names.
445 Returns
446 -------
447 column_map : `dict`
448 Mapping of afw column names to `ColumnDef` instances.
449 """
450 table = self._schemas[table_name]
451 col_map = self._column_map.get(table_name, {})
453 cmap = {}
454 for column in table.columns:
455 afw_name = col_map.get(column.name, column.name)
456 cmap[afw_name] = column
457 return cmap
459 def getColumnMap(self, table_name):
460 """Returns mapping of column names to Column definitions.
462 Parameters
463 ----------
464 table_name : `str`
465 One of known APDB table names.
467 Returns
468 -------
469 column_map : `dict`
470 Mapping of column names to `ColumnDef` instances.
471 """
472 table = self._schemas[table_name]
473 cmap = {column.name: column for column in table.columns}
474 return cmap
476 def _buildSchemas(self, schema_file, extra_schema_file=None, afw_schemas=None):
477 """Create schema definitions for all tables.
479 Reads YAML schemas and builds dictionary containing `TableDef`
480 instances for each table.
482 Parameters
483 ----------
484 schema_file : `str`
485 Name of YAML file with standard cat schema.
486 extra_schema_file : `str`, optional
487 Name of YAML file with extra table information or `None`.
488 afw_schemas : `dict`, optional
489 Dictionary with table name for a key and `afw.table.Schema`
490 for a value. Columns in schema will be added to standard APDB
491 schema (only if standard schema does not have matching column).
493 Returns
494 -------
495 schemas : `dict`
496 Mapping of table names to `TableDef` instances.
497 """
499 schema_file = os.path.expandvars(schema_file)
500 _LOG.debug("Reading schema file %s", schema_file)
501 with open(schema_file) as yaml_stream:
502 tables = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
503 # index it by table name
504 _LOG.debug("Read %d tables from schema", len(tables))
506 if extra_schema_file:
507 extra_schema_file = os.path.expandvars(extra_schema_file)
508 _LOG.debug("Reading extra schema file %s", extra_schema_file)
509 with open(extra_schema_file) as yaml_stream:
510 extras = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
511 # index it by table name
512 schemas_extra = {table['table']: table for table in extras}
513 else:
514 schemas_extra = {}
516 # merge extra schema into a regular schema, for now only columns are merged
517 for table in tables:
518 table_name = table['table']
519 if table_name in schemas_extra:
520 columns = table['columns']
521 extra_columns = schemas_extra[table_name].get('columns', [])
522 extra_columns = {col['name']: col for col in extra_columns}
523 _LOG.debug("Extra columns for table %s: %s", table_name, extra_columns.keys())
524 columns = []
525 for col in table['columns']:
526 if col['name'] in extra_columns:
527 columns.append(extra_columns.pop(col['name']))
528 else:
529 columns.append(col)
530 # add all remaining extra columns
531 table['columns'] = columns + list(extra_columns.values())
533 if 'indices' in schemas_extra[table_name]:
534 raise RuntimeError("Extra table definition contains indices, "
535 "merging is not implemented")
537 del schemas_extra[table_name]
539 # Pure "extra" table definitions may contain indices
540 tables += schemas_extra.values()
542 # convert all dicts into named tuples
543 schemas = {}
544 for table in tables:
546 columns = table.get('columns', [])
548 table_name = table['table']
549 afw_schema = afw_schemas and afw_schemas.get(table_name)
550 if afw_schema:
551 # use afw schema to create extra columns
552 column_names = {col['name'] for col in columns}
553 column_names_lower = {col.lower() for col in column_names}
554 for _, field in afw_schema:
555 column = self._field2dict(field, table_name)
556 if column['name'] not in column_names:
557 # check that there is no column name that only differs in case
558 if column['name'].lower() in column_names_lower:
559 raise ValueError("afw.table column name case does not match schema column name")
560 columns.append(column)
562 table_columns = []
563 for col in columns:
564 # For prototype set default to 0 even if columns don't specify it
565 if "default" not in col:
566 default = None
567 if col['type'] not in ("BLOB", "DATETIME"):
568 default = 0
569 else:
570 default = col["default"]
572 column = ColumnDef(name=col['name'],
573 type=col['type'],
574 nullable=col.get("nullable"),
575 default=default,
576 description=col.get("description"),
577 unit=col.get("unit"),
578 ucd=col.get("ucd"))
579 table_columns.append(column)
581 table_indices = []
582 for idx in table.get('indices', []):
583 index = IndexDef(name=idx.get('name'),
584 type=idx.get('type'),
585 columns=idx.get('columns'))
586 table_indices.append(index)
588 schemas[table_name] = TableDef(name=table_name,
589 description=table.get('description'),
590 columns=table_columns,
591 indices=table_indices)
593 return schemas
595 def _tableColumns(self, table_name):
596 """Return set of columns in a table
598 Parameters
599 ----------
600 table_name : `str`
601 Name of the table.
603 Returns
604 -------
605 column_defs : `list`
606 List of `Column` objects.
607 """
609 # get the list of columns in primary key, they are treated somewhat
610 # specially below
611 table_schema = self._schemas[table_name]
612 pkey_columns = set()
613 for index in table_schema.indices:
614 if index.type == 'PRIMARY':
615 pkey_columns = set(index.columns)
616 break
618 # convert all column dicts into alchemy Columns
619 column_defs = []
620 for column in table_schema.columns:
621 kwargs = dict(nullable=column.nullable)
622 if column.default is not None:
623 kwargs.update(server_default=str(column.default))
624 if column.name in pkey_columns:
625 kwargs.update(autoincrement=False)
626 ctype = self._type_map[column.type]
627 column_defs.append(Column(column.name, ctype, **kwargs))
629 return column_defs
631 def _field2dict(self, field, table_name):
632 """Convert afw schema field definition into a dict format.
634 Parameters
635 ----------
636 field : `lsst.afw.table.Field`
637 Field in afw table schema.
638 table_name : `str`
639 Name of the table.
641 Returns
642 -------
643 field_dict : `dict`
644 Field attributes for SQL schema:
646 - ``name`` : field name (`str`)
647 - ``type`` : type name in SQL, e.g. "INT", "FLOAT" (`str`)
648 - ``nullable`` : `True` if column can be ``NULL`` (`bool`)
649 """
650 column = field.getName()
651 column = self._column_map_reverse[table_name].get(column, column)
652 ctype = self._afw_type_map[field.getTypeString()]
653 return dict(name=column, type=ctype, nullable=True)
655 def _tableIndices(self, table_name, info):
656 """Return set of constraints/indices in a table
658 Parameters
659 ----------
660 table_name : `str`
661 Name of the table.
662 info : `dict`
663 Additional options passed to SQLAlchemy index constructor.
665 Returns
666 -------
667 index_defs : `list`
668 List of SQLAlchemy index/constraint objects.
669 """
671 table_schema = self._schemas[table_name]
673 # convert all index dicts into alchemy Columns
674 index_defs = []
675 for index in table_schema.indices:
676 if index.type == "INDEX":
677 index_defs.append(Index(self._prefix+index.name, *index.columns, info=info))
678 else:
679 kwargs = {}
680 if index.name:
681 kwargs['name'] = self._prefix+index.name
682 if index.type == "PRIMARY":
683 index_defs.append(PrimaryKeyConstraint(*index.columns, **kwargs))
684 elif index.type == "UNIQUE":
685 index_defs.append(UniqueConstraint(*index.columns, **kwargs))
687 return index_defs
689 def _getDoubleType(self):
690 """DOUBLE type is database-specific, select one based on dialect.
692 Returns
693 -------
694 type_object : `object`
695 Database-specific type definition.
696 """
697 if self._engine.name == 'mysql':
698 from sqlalchemy.dialects.mysql import DOUBLE
699 return DOUBLE(asdecimal=False)
700 elif self._engine.name == 'postgresql':
701 from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
702 return DOUBLE_PRECISION
703 elif self._engine.name == 'oracle':
704 from sqlalchemy.dialects.oracle import DOUBLE_PRECISION
705 return DOUBLE_PRECISION
706 elif self._engine.name == 'sqlite':
707 # all floats in sqlite are 8-byte
708 from sqlalchemy.dialects.sqlite import REAL
709 return REAL
710 else:
711 raise TypeError('cannot determine DOUBLE type, unexpected dialect: ' + self._engine.name)