Coverage for python/lsst/dax/apdb/apdbSchema.py: 48%
70 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 11:36 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 11:36 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module containing methods and classes for generic APDB schema operations.
24The code in this module is independent of the specific technology used to
25implement APDB.
26"""
28from __future__ import annotations
30__all__ = ["ApdbTables", "ApdbSchema"]
32import enum
33import logging
34import os
35from collections.abc import Mapping, MutableMapping
37import felis.types
38import numpy
39import yaml
40from felis import DEFAULT_FRAME
41from felis.simple import SimpleVisitor, Table
43from .versionTuple import VersionTuple
45_LOG = logging.getLogger(__name__)
47# In most cases column types are determined by Cassandra driver, but in some
48# cases we need to create Pandas Dataframe ourselves and we use this map to
49# infer types of columns from their YAML schema.
50_dtype_map: Mapping[type[felis.types.FelisType], type | str] = {
51 felis.types.Double: numpy.float64,
52 felis.types.Float: numpy.float32,
53 felis.types.Timestamp: "datetime64[ms]",
54 felis.types.Long: numpy.int64,
55 felis.types.Int: numpy.int32,
56 felis.types.Short: numpy.int16,
57 felis.types.Byte: numpy.int8,
58 felis.types.Binary: object,
59 felis.types.Char: object,
60 felis.types.Text: object,
61 felis.types.String: object,
62 felis.types.Unicode: object,
63 felis.types.Boolean: bool,
64}
67@enum.unique
68class ApdbTables(enum.Enum):
69 """Names of the tables in APDB schema."""
71 DiaObject = "DiaObject"
72 """Name of the table for DIAObject records."""
74 DiaSource = "DiaSource"
75 """Name of the table for DIASource records."""
77 DiaForcedSource = "DiaForcedSource"
78 """Name of the table for DIAForcedSource records."""
80 DiaObjectLast = "DiaObjectLast"
81 """Name of the table for the last version of DIAObject records.
83 This table may be optional for some implementations.
84 """
86 SSObject = "SSObject"
87 """Name of the table for SSObject records."""
89 DiaObject_To_Object_Match = "DiaObject_To_Object_Match"
90 """Name of the table for DiaObject_To_Object_Match records."""
92 metadata = "metadata"
93 """Name of the metadata table, this table may not always exist."""
95 def table_name(self, prefix: str = "") -> str:
96 """Return full table name."""
97 return prefix + self.value
100class ApdbSchema:
101 """Class for management of APDB schema.
103 Attributes
104 ----------
105 tableSchemas : `dict`
106 Maps table name to `TableDef` instance.
108 Parameters
109 ----------
110 schema_file : `str`
111 Name of the YAML schema file.
112 schema_name : `str`, optional
113 Name of the schema in YAML files.
114 """
116 def __init__(
117 self,
118 schema_file: str,
119 schema_name: str = "ApdbSchema",
120 ):
121 # build complete table schema
122 self.tableSchemas, self._schemaVersion = self._buildSchemas(schema_file, schema_name)
124 def column_dtype(self, felis_type: type[felis.types.FelisType]) -> type | str:
125 """Return Pandas data type for a given Felis column type.
127 Parameters
128 ----------
129 felis_type : `type`
130 Felis type, on of the classes defined in `felis.types` module.
132 Returns
133 -------
134 column_dtype : `type` or `str`
135 Type that can be used for columns in Pandas.
137 Raises
138 ------
139 TypeError
140 Raised if type is cannot be handled.
141 """
142 try:
143 return _dtype_map[felis_type]
144 except KeyError:
145 raise TypeError(f"Unexpected Felis type: {felis_type}")
147 def schemaVersion(self) -> VersionTuple:
148 """Return schema version as defined in YAML schema file.
150 Returns
151 -------
152 version : `VersionTuple`
153 Version number read from YAML file, if YAML file does not define
154 schema version then "0.1.0" is returned.
155 """
156 if self._schemaVersion is None:
157 return VersionTuple(0, 1, 0)
158 else:
159 return self._schemaVersion
161 @classmethod
162 def _buildSchemas(
163 cls, schema_file: str, schema_name: str = "ApdbSchema"
164 ) -> tuple[Mapping[ApdbTables, Table], VersionTuple | None]:
165 """Create schema definitions for all tables.
167 Reads YAML schema and builds a dictionary containing
168 `felis.simple.Table` instances for each table.
170 Parameters
171 ----------
172 schema_file : `str`
173 Name of YAML file with ``felis`` schema.
174 schema_name : `str`, optional
175 Name of the schema in YAML files.
177 Returns
178 -------
179 tables : `dict`
180 Mapping of table names to `felis.simple.Table` instances.
181 version : `VersionTuple` or `None`
182 Schema version defined in schema file, `None` if version is not
183 defined.
184 """
185 schema_file = os.path.expandvars(schema_file)
186 with open(schema_file) as yaml_stream:
187 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
188 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name]
189 if not schemas_list:
190 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}")
191 elif len(schemas_list) > 1:
192 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}")
193 schema_dict = schemas_list[0]
194 schema_dict.update(DEFAULT_FRAME)
195 visitor = SimpleVisitor()
196 schema = visitor.visit_schema(schema_dict)
198 # convert all dicts into classes
199 tables: MutableMapping[ApdbTables, Table] = {}
200 for table in schema.tables:
201 try:
202 table_enum = ApdbTables(table.name)
203 except ValueError:
204 # There may be other tables in the schema that do not belong
205 # to APDB.
206 continue
207 else:
208 tables[table_enum] = table
210 version: VersionTuple | None = None
211 if schema.version is not None:
212 version = VersionTuple.fromString(schema.version.current)
214 return tables, version