Coverage for python/lsst/dax/apdb/apdbSchema.py: 50%

60 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-11 02:40 -0800

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""This module contains methods and classes for generic APDB schema operations. 

23 

24The code in this module is independent of the specific technology used to 

25implement APDB. 

26""" 

27 

28from __future__ import annotations 

29 

30__all__ = ["ApdbTables", "ApdbSchema"] 

31 

32import enum 

33import logging 

34import os 

35from collections.abc import Mapping, MutableMapping 

36from typing import Type, Union 

37 

38import felis.types 

39import numpy 

40import yaml 

41from felis import DEFAULT_FRAME 

42from felis.simple import SimpleVisitor, Table 

43 

44_LOG = logging.getLogger(__name__) 

45 

46# In most cases column types are determined by Cassandra driver, but in some 

47# cases we need to create Pandas Dataframe ourselves and we use this map to 

48# infer types of columns from their YAML schema. 

49_dtype_map: Mapping[Type[felis.types.FelisType], Union[Type, str]] = { 

50 felis.types.Double: numpy.float64, 

51 felis.types.Float: numpy.float32, 

52 felis.types.Timestamp: "datetime64[ms]", 

53 felis.types.Long: numpy.int64, 

54 felis.types.Int: numpy.int32, 

55 felis.types.Short: numpy.int16, 

56 felis.types.Byte: numpy.int8, 

57 felis.types.Binary: object, 

58 felis.types.Char: object, 

59 felis.types.Text: object, 

60 felis.types.String: object, 

61 felis.types.Unicode: object, 

62 felis.types.Boolean: bool, 

63} 

64 

65 

66@enum.unique 

67class ApdbTables(enum.Enum): 

68 """Names of the tables in APDB schema.""" 

69 

70 DiaObject = "DiaObject" 

71 """Name of the table for DIAObject records.""" 

72 

73 DiaSource = "DiaSource" 

74 """Name of the table for DIASource records.""" 

75 

76 DiaForcedSource = "DiaForcedSource" 

77 """Name of the table for DIAForcedSource records.""" 

78 

79 DiaObjectLast = "DiaObjectLast" 

80 """Name of the table for the last version of DIAObject records. 

81 

82 This table may be optional for some implementations. 

83 """ 

84 

85 SSObject = "SSObject" 

86 """Name of the table for SSObject records.""" 

87 

88 DiaObject_To_Object_Match = "DiaObject_To_Object_Match" 

89 """Name of the table for DiaObject_To_Object_Match records.""" 

90 

91 def table_name(self, prefix: str = "") -> str: 

92 """Return full table name.""" 

93 return prefix + self.value 

94 

95 

96class ApdbSchema: 

97 """Class for management of APDB schema. 

98 

99 Attributes 

100 ---------- 

101 tableSchemas : `dict` 

102 Maps table name to `TableDef` instance. 

103 

104 Parameters 

105 ---------- 

106 schema_file : `str` 

107 Name of the YAML schema file. 

108 schema_name : `str`, optional 

109 Name of the schema in YAML files. 

110 """ 

111 

112 def __init__( 

113 self, 

114 schema_file: str, 

115 schema_name: str = "ApdbSchema", 

116 ): 

117 # build complete table schema 

118 self.tableSchemas = self._buildSchemas(schema_file, schema_name) 

119 

120 def column_dtype(self, felis_type: Type[felis.types.FelisType]) -> Union[type, str]: 

121 """Return Pandas data type for a given Felis column type. 

122 

123 Parameters 

124 ---------- 

125 felis_type : `type` 

126 Felis type, on of the classes defined in `felis.types` module. 

127 

128 Returns 

129 ------- 

130 column_dtype : `type` or `str` 

131 Type that can be used for columns in Pandas. 

132 

133 Raises 

134 ------ 

135 TypeError 

136 Raised if type is cannot be handled. 

137 """ 

138 try: 

139 return _dtype_map[felis_type] 

140 except KeyError: 

141 raise TypeError(f"Unexpected Felis type: {felis_type}") 

142 

143 def _buildSchemas( 

144 self, 

145 schema_file: str, 

146 schema_name: str = "ApdbSchema", 

147 ) -> Mapping[ApdbTables, Table]: 

148 """Create schema definitions for all tables. 

149 

150 Reads YAML schemas and builds dictionary containing `TableDef` 

151 instances for each table. 

152 

153 Parameters 

154 ---------- 

155 schema_file : `str` 

156 Name of YAML file with ``felis`` schema. 

157 schema_name : `str`, optional 

158 Name of the schema in YAML files. 

159 

160 Returns 

161 ------- 

162 schemas : `dict` 

163 Mapping of table names to `TableDef` instances. 

164 """ 

165 

166 schema_file = os.path.expandvars(schema_file) 

167 with open(schema_file) as yaml_stream: 

168 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader)) 

169 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name] 

170 if not schemas_list: 

171 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}") 

172 elif len(schemas_list) > 1: 

173 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}") 

174 schema_dict = schemas_list[0] 

175 schema_dict.update(DEFAULT_FRAME) 

176 visitor = SimpleVisitor() 

177 schema = visitor.visit_schema(schema_dict) 

178 

179 # convert all dicts into classes 

180 schemas: MutableMapping[ApdbTables, Table] = {} 

181 for table in schema.tables: 

182 try: 

183 table_enum = ApdbTables(table.name) 

184 except ValueError: 

185 # There may be other tables in the schema that do not belong 

186 # to APDB. 

187 continue 

188 else: 

189 schemas[table_enum] = table 

190 

191 return schemas