Coverage for python/lsst/dax/apdb/apdbSchema.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module responsible for APDB schema operations.
23"""
25__all__ = ["ColumnDef", "IndexDef", "TableDef",
26 "make_minimal_dia_object_schema", "make_minimal_dia_source_schema",
27 "ApdbSchema"]
29from collections import namedtuple
30import logging
31import yaml
33import sqlalchemy
34from sqlalchemy import (Column, Index, MetaData, PrimaryKeyConstraint,
35 UniqueConstraint, Table)
36from sqlalchemy.schema import CreateTable, CreateIndex
37from sqlalchemy.ext.compiler import compiles
38import lsst.afw.table as afwTable
41_LOG = logging.getLogger(__name__.partition(".")[2]) # strip leading "lsst."
43# Classes for representing schema
45# Column description:
46# name : column name
47# type : name of cat type (INT, FLOAT, etc.)
48# nullable : True or False
49# default : default value for column, can be None
50# description : documentation, can be None or empty
51# unit : string with unit name, can be None
52# ucd : string with ucd, can be None
53ColumnDef = namedtuple('ColumnDef', 'name type nullable default description unit ucd')
55# Index description:
56# name : index name, can be None or empty
57# type : one of "PRIMARY", "UNIQUE", "INDEX"
58# columns : list of column names in index
59IndexDef = namedtuple('IndexDef', 'name type columns')
61# Table description:
62# name : table name
63# description : documentation, can be None or empty
64# columns : list of ColumnDef instances
65# indices : list of IndexDef instances, can be empty or None
66TableDef = namedtuple('TableDef', 'name description columns indices')
69def make_minimal_dia_object_schema():
70 """Define and create the minimal schema required for a DIAObject.
72 Returns
73 -------
74 schema : `lsst.afw.table.Schema`
75 Minimal schema for DIAObjects.
76 """
77 schema = afwTable.SourceTable.makeMinimalSchema()
78 schema.addField("pixelId", type='L',
79 doc='Unique spherical pixelization identifier.')
80 schema.addField("nDiaSources", type='L')
81 return schema
84def make_minimal_dia_source_schema():
85 """ Define and create the minimal schema required for a DIASource.
87 Returns
88 -------
89 schema : `lsst.afw.table.Schema`
90 Minimal schema for DIASources.
91 """
92 schema = afwTable.SourceTable.makeMinimalSchema()
93 schema.addField("diaObjectId", type='L',
94 doc='Unique identifier of the DIAObject this source is '
95 'associated to.')
96 schema.addField("ccdVisitId", type='L',
97 doc='Id of the exposure and ccd this object was detected '
98 'in.')
99 schema.addField("psFlux", type='D',
100 doc='Calibrated PSF flux of this source.')
101 schema.addField("psFluxErr", type='D',
102 doc='Calibrated PSF flux err of this source.')
103 schema.addField("flags", type='L',
104 doc='Quality flags for this DIASource.')
105 schema.addField("pixelId", type='L',
106 doc='Unique spherical pixelization identifier.')
107 return schema
110@compiles(CreateTable, "oracle")
111def _add_suffixes_tbl(element, compiler, **kw):
112 """Add all needed suffixed for Oracle CREATE TABLE statement.
114 This is a special compilation method for CreateTable clause which
115 registers itself with SQLAlchemy using @compiles decotrator. Exact method
116 name does not matter. Client can pass a dict to ``info`` keyword argument
117 of Table constructor. If the dict has a key "oracle_tablespace" then its
118 value is used as tablespace name. If the dict has a key "oracle_iot" with
119 true value then IOT table is created. This method generates additional
120 clauses for CREATE TABLE statement which specify tablespace name and
121 "ORGANIZATION INDEX" for IOT.
123 .. seealso:: https://docs.sqlalchemy.org/en/latest/core/compiler.html
124 """
125 text = compiler.visit_create_table(element, **kw)
126 _LOG.debug("text: %r", text)
127 oracle_tablespace = element.element.info.get("oracle_tablespace")
128 oracle_iot = element.element.info.get("oracle_iot", False)
129 _LOG.debug("oracle_tablespace: %r", oracle_tablespace)
130 if oracle_iot:
131 text += " ORGANIZATION INDEX"
132 if oracle_tablespace:
133 text += " TABLESPACE " + oracle_tablespace
134 _LOG.debug("text: %r", text)
135 return text
138@compiles(CreateIndex, "oracle")
139def _add_suffixes_idx(element, compiler, **kw):
140 """Add all needed suffixed for Oracle CREATE INDEX statement.
142 This is a special compilation method for CreateIndex clause which
143 registers itself with SQLAlchemy using @compiles decotrator. Exact method
144 name does not matter. Client can pass a dict to ``info`` keyword argument
145 of Index constructor. If the dict has a key "oracle_tablespace" then its
146 value is used as tablespace name. This method generates additional
147 clause for CREATE INDEX statement which specifies tablespace name.
149 .. seealso:: https://docs.sqlalchemy.org/en/latest/core/compiler.html
150 """
151 text = compiler.visit_create_index(element, **kw)
152 _LOG.debug("text: %r", text)
153 oracle_tablespace = element.element.info.get("oracle_tablespace")
154 _LOG.debug("oracle_tablespace: %r", oracle_tablespace)
155 if oracle_tablespace:
156 text += " TABLESPACE " + oracle_tablespace
157 _LOG.debug("text: %r", text)
158 return text
161class ApdbSchema(object):
162 """Class for management of APDB schema.
164 Attributes
165 ----------
166 objects : `sqlalchemy.Table`
167 DiaObject table instance
168 objects_nightly : `sqlalchemy.Table`
169 DiaObjectNightly table instance, may be None
170 objects_last : `sqlalchemy.Table`
171 DiaObjectLast table instance, may be None
172 sources : `sqlalchemy.Table`
173 DiaSource table instance
174 forcedSources : `sqlalchemy.Table`
175 DiaForcedSource table instance
176 visits : `sqlalchemy.Table`
177 ApdbProtoVisits table instance
179 Parameters
180 ----------
181 engine : `sqlalchemy.engine.Engine`
182 SQLAlchemy engine instance
183 dia_object_index : `str`
184 Indexing mode for DiaObject table, see `ApdbConfig.dia_object_index`
185 for details.
186 dia_object_nightly : `bool`
187 If `True` then create per-night DiaObject table as well.
188 schema_file : `str`
189 Name of the YAML schema file.
190 extra_schema_file : `str`, optional
191 Name of the YAML schema file with extra column definitions.
192 column_map : `str`, optional
193 Name of the YAML file with column mappings.
194 afw_schemas : `dict`, optional
195 Dictionary with table name for a key and `afw.table.Schema`
196 for a value. Columns in schema will be added to standard APDB
197 schema (only if standard schema does not have matching column).
198 prefix : `str`, optional
199 Prefix to add to all scheam elements.
200 """
202 # map afw type names into cat type names
203 _afw_type_map = {"I": "INT",
204 "L": "BIGINT",
205 "F": "FLOAT",
206 "D": "DOUBLE",
207 "Angle": "DOUBLE",
208 "String": "CHAR",
209 "Flag": "BOOL"}
210 _afw_type_map_reverse = {"INT": "I",
211 "BIGINT": "L",
212 "FLOAT": "F",
213 "DOUBLE": "D",
214 "DATETIME": "L",
215 "CHAR": "String",
216 "BOOL": "Flag"}
218 def __init__(self, engine, dia_object_index, dia_object_nightly,
219 schema_file, extra_schema_file=None, column_map=None,
220 afw_schemas=None, prefix=""):
222 self._engine = engine
223 self._dia_object_index = dia_object_index
224 self._dia_object_nightly = dia_object_nightly
225 self._prefix = prefix
227 self._metadata = MetaData(self._engine)
229 self.objects = None
230 self.objects_nightly = None
231 self.objects_last = None
232 self.sources = None
233 self.forcedSources = None
234 self.visits = None
236 if column_map:
237 _LOG.debug("Reading column map file %s", column_map)
238 with open(column_map) as yaml_stream:
239 # maps cat column name to afw column name
240 self._column_map = yaml.load(yaml_stream, Loader=yaml.SafeLoader)
241 _LOG.debug("column map: %s", self._column_map)
242 else:
243 _LOG.debug("No column map file is given, initialize to empty")
244 self._column_map = {}
245 self._column_map_reverse = {}
246 for table, cmap in self._column_map.items():
247 # maps afw column name to cat column name
248 self._column_map_reverse[table] = {v: k for k, v in cmap.items()}
249 _LOG.debug("reverse column map: %s", self._column_map_reverse)
251 # build complete table schema
252 self._schemas = self._buildSchemas(schema_file, extra_schema_file,
253 afw_schemas)
255 # map cat column types to alchemy
256 self._type_map = dict(DOUBLE=self._getDoubleType(),
257 FLOAT=sqlalchemy.types.Float,
258 DATETIME=sqlalchemy.types.TIMESTAMP,
259 BIGINT=sqlalchemy.types.BigInteger,
260 INTEGER=sqlalchemy.types.Integer,
261 INT=sqlalchemy.types.Integer,
262 TINYINT=sqlalchemy.types.Integer,
263 BLOB=sqlalchemy.types.LargeBinary,
264 CHAR=sqlalchemy.types.CHAR,
265 BOOL=sqlalchemy.types.Boolean)
267 # generate schema for all tables, must be called last
268 self._makeTables()
270 def _makeTables(self, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
271 """Generate schema for all tables.
273 Parameters
274 ----------
275 mysql_engine : `str`, optional
276 MySQL engine type to use for new tables.
277 oracle_tablespace : `str`, optional
278 Name of Oracle tablespace, only useful with oracle
279 oracle_iot : `bool`, optional
280 Make Index-organized DiaObjectLast table.
281 """
283 info = dict(oracle_tablespace=oracle_tablespace)
285 if self._dia_object_index == 'pix_id_iov':
286 # Special PK with HTM column in first position
287 constraints = self._tableIndices('DiaObjectIndexHtmFirst', info)
288 else:
289 constraints = self._tableIndices('DiaObject', info)
290 table = Table(self._prefix+'DiaObject', self._metadata,
291 *(self._tableColumns('DiaObject') + constraints),
292 mysql_engine=mysql_engine,
293 info=info)
294 self.objects = table
296 if self._dia_object_nightly:
297 # Same as DiaObject but no index
298 table = Table(self._prefix+'DiaObjectNightly', self._metadata,
299 *self._tableColumns('DiaObject'),
300 mysql_engine=mysql_engine,
301 info=info)
302 self.objects_nightly = table
304 if self._dia_object_index == 'last_object_table':
305 # Same as DiaObject but with special index
306 info2 = info.copy()
307 info2.update(oracle_iot=oracle_iot)
308 table = Table(self._prefix+'DiaObjectLast', self._metadata,
309 *(self._tableColumns('DiaObjectLast')
310 + self._tableIndices('DiaObjectLast', info)),
311 mysql_engine=mysql_engine,
312 info=info2)
313 self.objects_last = table
315 # for all other tables use index definitions in schema
316 for table_name in ('DiaSource', 'SSObject', 'DiaForcedSource', 'DiaObject_To_Object_Match'):
317 table = Table(self._prefix+table_name, self._metadata,
318 *(self._tableColumns(table_name)
319 + self._tableIndices(table_name, info)),
320 mysql_engine=mysql_engine,
321 info=info)
322 if table_name == 'DiaSource':
323 self.sources = table
324 elif table_name == 'DiaForcedSource':
325 self.forcedSources = table
327 # special table to track visits, only used by prototype
328 table = Table(self._prefix+'ApdbProtoVisits', self._metadata,
329 Column('visitId', sqlalchemy.types.BigInteger, nullable=False),
330 Column('visitTime', sqlalchemy.types.TIMESTAMP, nullable=False),
331 PrimaryKeyConstraint('visitId', name=self._prefix+'PK_ApdbProtoVisits'),
332 Index(self._prefix+'IDX_ApdbProtoVisits_vTime', 'visitTime', info=info),
333 mysql_engine=mysql_engine,
334 info=info)
335 self.visits = table
337 def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
338 """Create or re-create all tables.
340 Parameters
341 ----------
342 drop : `bool`, optional
343 If True then drop tables before creating new ones.
344 mysql_engine : `str`, optional
345 MySQL engine type to use for new tables.
346 oracle_tablespace : `str`, optional
347 Name of Oracle tablespace, only useful with oracle
348 oracle_iot : `bool`, optional
349 Make Index-organized DiaObjectLast table.
350 """
352 # re-make table schema for all needed tables with possibly different options
353 _LOG.debug("clear metadata")
354 self._metadata.clear()
355 _LOG.debug("re-do schema mysql_engine=%r oracle_tablespace=%r",
356 mysql_engine, oracle_tablespace)
357 self._makeTables(mysql_engine=mysql_engine, oracle_tablespace=oracle_tablespace,
358 oracle_iot=oracle_iot)
360 # create all tables (optionally drop first)
361 if drop:
362 _LOG.info('dropping all tables')
363 self._metadata.drop_all()
364 _LOG.info('creating all tables')
365 self._metadata.create_all()
367 def getAfwSchema(self, table_name, columns=None):
368 """Return afw schema for given table.
370 Parameters
371 ----------
372 table_name : `str`
373 One of known APDB table names.
374 columns : `list` of `str`, optional
375 Include only given table columns in schema, by default all columns
376 are included.
378 Returns
379 -------
380 schema : `lsst.afw.table.Schema`
381 column_map : `dict`
382 Mapping of the table/result column names into schema key.
383 """
385 table = self._schemas[table_name]
386 col_map = self._column_map.get(table_name, {})
388 # make a schema
389 col2afw = {}
390 schema = afwTable.SourceTable.makeMinimalSchema()
391 for column in table.columns:
392 if columns and column.name not in columns:
393 continue
394 afw_col = col_map.get(column.name, column.name)
395 if afw_col in schema.getNames():
396 # Continue if the column is already in the minimal schema.
397 key = schema.find(afw_col).getKey()
398 elif column.type in ("DOUBLE", "FLOAT") and column.unit == "deg":
399 #
400 # NOTE: degree to radian conversion is not supported (yet)
401 #
402 # angles in afw are radians and have special "Angle" type
403 key = schema.addField(afw_col,
404 type="Angle",
405 doc=column.description or "",
406 units="rad")
407 elif column.type == "BLOB":
408 # No BLOB support for now
409 key = None
410 else:
411 units = column.unit or ""
412 # some units in schema are not recognized by afw but we do not care
413 if self._afw_type_map_reverse[column.type] == 'String':
414 key = schema.addField(afw_col,
415 type=self._afw_type_map_reverse[column.type],
416 doc=column.description or "",
417 units=units,
418 parse_strict="silent",
419 size=10)
420 elif units == "deg":
421 key = schema.addField(afw_col,
422 type='Angle',
423 doc=column.description or "",
424 parse_strict="silent")
425 else:
426 key = schema.addField(afw_col,
427 type=self._afw_type_map_reverse[column.type],
428 doc=column.description or "",
429 units=units,
430 parse_strict="silent")
431 col2afw[column.name] = key
433 return schema, col2afw
435 def getAfwColumns(self, table_name):
436 """Returns mapping of afw column names to Column definitions.
438 Parameters
439 ----------
440 table_name : `str`
441 One of known APDB table names.
443 Returns
444 -------
445 column_map : `dict`
446 Mapping of afw column names to `ColumnDef` instances.
447 """
448 table = self._schemas[table_name]
449 col_map = self._column_map.get(table_name, {})
451 cmap = {}
452 for column in table.columns:
453 afw_name = col_map.get(column.name, column.name)
454 cmap[afw_name] = column
455 return cmap
457 def getColumnMap(self, table_name):
458 """Returns mapping of column names to Column definitions.
460 Parameters
461 ----------
462 table_name : `str`
463 One of known APDB table names.
465 Returns
466 -------
467 column_map : `dict`
468 Mapping of column names to `ColumnDef` instances.
469 """
470 table = self._schemas[table_name]
471 cmap = {column.name: column for column in table.columns}
472 return cmap
474 def _buildSchemas(self, schema_file, extra_schema_file=None, afw_schemas=None):
475 """Create schema definitions for all tables.
477 Reads YAML schemas and builds dictionary containing `TableDef`
478 instances for each table.
480 Parameters
481 ----------
482 schema_file : `str`
483 Name of YAML file with standard cat schema.
484 extra_schema_file : `str`, optional
485 Name of YAML file with extra table information or `None`.
486 afw_schemas : `dict`, optional
487 Dictionary with table name for a key and `afw.table.Schema`
488 for a value. Columns in schema will be added to standard APDB
489 schema (only if standard schema does not have matching column).
491 Returns
492 -------
493 schemas : `dict`
494 Mapping of table names to `TableDef` instances.
495 """
497 _LOG.debug("Reading schema file %s", schema_file)
498 with open(schema_file) as yaml_stream:
499 tables = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
500 # index it by table name
501 _LOG.debug("Read %d tables from schema", len(tables))
503 if extra_schema_file:
504 _LOG.debug("Reading extra schema file %s", extra_schema_file)
505 with open(extra_schema_file) as yaml_stream:
506 extras = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
507 # index it by table name
508 schemas_extra = {table['table']: table for table in extras}
509 else:
510 schemas_extra = {}
512 # merge extra schema into a regular schema, for now only columns are merged
513 for table in tables:
514 table_name = table['table']
515 if table_name in schemas_extra:
516 columns = table['columns']
517 extra_columns = schemas_extra[table_name].get('columns', [])
518 extra_columns = {col['name']: col for col in extra_columns}
519 _LOG.debug("Extra columns for table %s: %s", table_name, extra_columns.keys())
520 columns = []
521 for col in table['columns']:
522 if col['name'] in extra_columns:
523 columns.append(extra_columns.pop(col['name']))
524 else:
525 columns.append(col)
526 # add all remaining extra columns
527 table['columns'] = columns + list(extra_columns.values())
529 if 'indices' in schemas_extra[table_name]:
530 raise RuntimeError("Extra table definition contains indices, "
531 "merging is not implemented")
533 del schemas_extra[table_name]
535 # Pure "extra" table definitions may contain indices
536 tables += schemas_extra.values()
538 # convert all dicts into named tuples
539 schemas = {}
540 for table in tables:
542 columns = table.get('columns', [])
544 table_name = table['table']
545 afw_schema = afw_schemas and afw_schemas.get(table_name)
546 if afw_schema:
547 # use afw schema to create extra columns
548 column_names = {col['name'] for col in columns}
549 column_names_lower = {col.lower() for col in column_names}
550 for _, field in afw_schema:
551 column = self._field2dict(field, table_name)
552 if column['name'] not in column_names:
553 # check that there is no column name that only differs in case
554 if column['name'].lower() in column_names_lower:
555 raise ValueError("afw.table column name case does not match schema column name")
556 columns.append(column)
558 table_columns = []
559 for col in columns:
560 # For prototype set default to 0 even if columns don't specify it
561 if "default" not in col:
562 default = None
563 if col['type'] not in ("BLOB", "DATETIME"):
564 default = 0
565 else:
566 default = col["default"]
568 column = ColumnDef(name=col['name'],
569 type=col['type'],
570 nullable=col.get("nullable"),
571 default=default,
572 description=col.get("description"),
573 unit=col.get("unit"),
574 ucd=col.get("ucd"))
575 table_columns.append(column)
577 table_indices = []
578 for idx in table.get('indices', []):
579 index = IndexDef(name=idx.get('name'),
580 type=idx.get('type'),
581 columns=idx.get('columns'))
582 table_indices.append(index)
584 schemas[table_name] = TableDef(name=table_name,
585 description=table.get('description'),
586 columns=table_columns,
587 indices=table_indices)
589 return schemas
591 def _tableColumns(self, table_name):
592 """Return set of columns in a table
594 Parameters
595 ----------
596 table_name : `str`
597 Name of the table.
599 Returns
600 -------
601 column_defs : `list`
602 List of `Column` objects.
603 """
605 # get the list of columns in primary key, they are treated somewhat
606 # specially below
607 table_schema = self._schemas[table_name]
608 pkey_columns = set()
609 for index in table_schema.indices:
610 if index.type == 'PRIMARY':
611 pkey_columns = set(index.columns)
612 break
614 # convert all column dicts into alchemy Columns
615 column_defs = []
616 for column in table_schema.columns:
617 kwargs = dict(nullable=column.nullable)
618 if column.default is not None:
619 kwargs.update(server_default=str(column.default))
620 if column.name in pkey_columns:
621 kwargs.update(autoincrement=False)
622 ctype = self._type_map[column.type]
623 column_defs.append(Column(column.name, ctype, **kwargs))
625 return column_defs
627 def _field2dict(self, field, table_name):
628 """Convert afw schema field definition into a dict format.
630 Parameters
631 ----------
632 field : `lsst.afw.table.Field`
633 Field in afw table schema.
634 table_name : `str`
635 Name of the table.
637 Returns
638 -------
639 field_dict : `dict`
640 Field attributes for SQL schema:
642 - ``name`` : field name (`str`)
643 - ``type`` : type name in SQL, e.g. "INT", "FLOAT" (`str`)
644 - ``nullable`` : `True` if column can be ``NULL`` (`bool`)
645 """
646 column = field.getName()
647 column = self._column_map_reverse[table_name].get(column, column)
648 ctype = self._afw_type_map[field.getTypeString()]
649 return dict(name=column, type=ctype, nullable=True)
651 def _tableIndices(self, table_name, info):
652 """Return set of constraints/indices in a table
654 Parameters
655 ----------
656 table_name : `str`
657 Name of the table.
658 info : `dict`
659 Additional options passed to SQLAlchemy index constructor.
661 Returns
662 -------
663 index_defs : `list`
664 List of SQLAlchemy index/constraint objects.
665 """
667 table_schema = self._schemas[table_name]
669 # convert all index dicts into alchemy Columns
670 index_defs = []
671 for index in table_schema.indices:
672 if index.type == "INDEX":
673 index_defs.append(Index(self._prefix+index.name, *index.columns, info=info))
674 else:
675 kwargs = {}
676 if index.name:
677 kwargs['name'] = self._prefix+index.name
678 if index.type == "PRIMARY":
679 index_defs.append(PrimaryKeyConstraint(*index.columns, **kwargs))
680 elif index.type == "UNIQUE":
681 index_defs.append(UniqueConstraint(*index.columns, **kwargs))
683 return index_defs
685 def _getDoubleType(self):
686 """DOUBLE type is database-specific, select one based on dialect.
688 Returns
689 -------
690 type_object : `object`
691 Database-specific type definition.
692 """
693 if self._engine.name == 'mysql':
694 from sqlalchemy.dialects.mysql import DOUBLE
695 return DOUBLE(asdecimal=False)
696 elif self._engine.name == 'postgresql':
697 from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
698 return DOUBLE_PRECISION
699 elif self._engine.name == 'oracle':
700 from sqlalchemy.dialects.oracle import DOUBLE_PRECISION
701 return DOUBLE_PRECISION
702 elif self._engine.name == 'sqlite':
703 # all floats in sqlite are 8-byte
704 from sqlalchemy.dialects.sqlite import REAL
705 return REAL
706 else:
707 raise TypeError('cannot determine DOUBLE type, unexpected dialect: ' + self._engine.name)