Coverage for python/lsst/dax/apdb/apdbSchema.py: 49%

59 statements  

« prev     ^ index     » next       coverage.py v7.3.3, created at 2023-12-20 17:15 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module containing methods and classes for generic APDB schema operations. 

23 

24The code in this module is independent of the specific technology used to 

25implement APDB. 

26""" 

27 

28from __future__ import annotations 

29 

30__all__ = ["ApdbTables", "ApdbSchema"] 

31 

32import enum 

33import logging 

34import os 

35from collections.abc import Mapping, MutableMapping 

36 

37import felis.types 

38import numpy 

39import yaml 

40from felis import DEFAULT_FRAME 

41from felis.simple import SimpleVisitor, Table 

42 

43_LOG = logging.getLogger(__name__) 

44 

45# In most cases column types are determined by Cassandra driver, but in some 

46# cases we need to create Pandas Dataframe ourselves and we use this map to 

47# infer types of columns from their YAML schema. 

48_dtype_map: Mapping[type[felis.types.FelisType], type | str] = { 

49 felis.types.Double: numpy.float64, 

50 felis.types.Float: numpy.float32, 

51 felis.types.Timestamp: "datetime64[ms]", 

52 felis.types.Long: numpy.int64, 

53 felis.types.Int: numpy.int32, 

54 felis.types.Short: numpy.int16, 

55 felis.types.Byte: numpy.int8, 

56 felis.types.Binary: object, 

57 felis.types.Char: object, 

58 felis.types.Text: object, 

59 felis.types.String: object, 

60 felis.types.Unicode: object, 

61 felis.types.Boolean: bool, 

62} 

63 

64 

65@enum.unique 

66class ApdbTables(enum.Enum): 

67 """Names of the tables in APDB schema.""" 

68 

69 DiaObject = "DiaObject" 

70 """Name of the table for DIAObject records.""" 

71 

72 DiaSource = "DiaSource" 

73 """Name of the table for DIASource records.""" 

74 

75 DiaForcedSource = "DiaForcedSource" 

76 """Name of the table for DIAForcedSource records.""" 

77 

78 DiaObjectLast = "DiaObjectLast" 

79 """Name of the table for the last version of DIAObject records. 

80 

81 This table may be optional for some implementations. 

82 """ 

83 

84 SSObject = "SSObject" 

85 """Name of the table for SSObject records.""" 

86 

87 DiaObject_To_Object_Match = "DiaObject_To_Object_Match" 

88 """Name of the table for DiaObject_To_Object_Match records.""" 

89 

90 def table_name(self, prefix: str = "") -> str: 

91 """Return full table name.""" 

92 return prefix + self.value 

93 

94 

95class ApdbSchema: 

96 """Class for management of APDB schema. 

97 

98 Attributes 

99 ---------- 

100 tableSchemas : `dict` 

101 Maps table name to `TableDef` instance. 

102 

103 Parameters 

104 ---------- 

105 schema_file : `str` 

106 Name of the YAML schema file. 

107 schema_name : `str`, optional 

108 Name of the schema in YAML files. 

109 """ 

110 

111 def __init__( 

112 self, 

113 schema_file: str, 

114 schema_name: str = "ApdbSchema", 

115 ): 

116 # build complete table schema 

117 self.tableSchemas = self._buildSchemas(schema_file, schema_name) 

118 

119 def column_dtype(self, felis_type: type[felis.types.FelisType]) -> type | str: 

120 """Return Pandas data type for a given Felis column type. 

121 

122 Parameters 

123 ---------- 

124 felis_type : `type` 

125 Felis type, on of the classes defined in `felis.types` module. 

126 

127 Returns 

128 ------- 

129 column_dtype : `type` or `str` 

130 Type that can be used for columns in Pandas. 

131 

132 Raises 

133 ------ 

134 TypeError 

135 Raised if type is cannot be handled. 

136 """ 

137 try: 

138 return _dtype_map[felis_type] 

139 except KeyError: 

140 raise TypeError(f"Unexpected Felis type: {felis_type}") 

141 

142 def _buildSchemas( 

143 self, 

144 schema_file: str, 

145 schema_name: str = "ApdbSchema", 

146 ) -> Mapping[ApdbTables, Table]: 

147 """Create schema definitions for all tables. 

148 

149 Reads YAML schemas and builds dictionary containing `TableDef` 

150 instances for each table. 

151 

152 Parameters 

153 ---------- 

154 schema_file : `str` 

155 Name of YAML file with ``felis`` schema. 

156 schema_name : `str`, optional 

157 Name of the schema in YAML files. 

158 

159 Returns 

160 ------- 

161 schemas : `dict` 

162 Mapping of table names to `TableDef` instances. 

163 """ 

164 schema_file = os.path.expandvars(schema_file) 

165 with open(schema_file) as yaml_stream: 

166 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader)) 

167 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name] 

168 if not schemas_list: 

169 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}") 

170 elif len(schemas_list) > 1: 

171 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}") 

172 schema_dict = schemas_list[0] 

173 schema_dict.update(DEFAULT_FRAME) 

174 visitor = SimpleVisitor() 

175 schema = visitor.visit_schema(schema_dict) 

176 

177 # convert all dicts into classes 

178 schemas: MutableMapping[ApdbTables, Table] = {} 

179 for table in schema.tables: 

180 try: 

181 table_enum = ApdbTables(table.name) 

182 except ValueError: 

183 # There may be other tables in the schema that do not belong 

184 # to APDB. 

185 continue 

186 else: 

187 schemas[table_enum] = table 

188 

189 return schemas