Coverage for python/lsst/dax/apdb/apdbSchema.py: 48%

70 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-20 11:36 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module containing methods and classes for generic APDB schema operations. 

23 

24The code in this module is independent of the specific technology used to 

25implement APDB. 

26""" 

27 

28from __future__ import annotations 

29 

30__all__ = ["ApdbTables", "ApdbSchema"] 

31 

32import enum 

33import logging 

34import os 

35from collections.abc import Mapping, MutableMapping 

36 

37import felis.types 

38import numpy 

39import yaml 

40from felis import DEFAULT_FRAME 

41from felis.simple import SimpleVisitor, Table 

42 

43from .versionTuple import VersionTuple 

44 

45_LOG = logging.getLogger(__name__) 

46 

47# In most cases column types are determined by Cassandra driver, but in some 

48# cases we need to create Pandas Dataframe ourselves and we use this map to 

49# infer types of columns from their YAML schema. 

50_dtype_map: Mapping[type[felis.types.FelisType], type | str] = { 

51 felis.types.Double: numpy.float64, 

52 felis.types.Float: numpy.float32, 

53 felis.types.Timestamp: "datetime64[ms]", 

54 felis.types.Long: numpy.int64, 

55 felis.types.Int: numpy.int32, 

56 felis.types.Short: numpy.int16, 

57 felis.types.Byte: numpy.int8, 

58 felis.types.Binary: object, 

59 felis.types.Char: object, 

60 felis.types.Text: object, 

61 felis.types.String: object, 

62 felis.types.Unicode: object, 

63 felis.types.Boolean: bool, 

64} 

65 

66 

67@enum.unique 

68class ApdbTables(enum.Enum): 

69 """Names of the tables in APDB schema.""" 

70 

71 DiaObject = "DiaObject" 

72 """Name of the table for DIAObject records.""" 

73 

74 DiaSource = "DiaSource" 

75 """Name of the table for DIASource records.""" 

76 

77 DiaForcedSource = "DiaForcedSource" 

78 """Name of the table for DIAForcedSource records.""" 

79 

80 DiaObjectLast = "DiaObjectLast" 

81 """Name of the table for the last version of DIAObject records. 

82 

83 This table may be optional for some implementations. 

84 """ 

85 

86 SSObject = "SSObject" 

87 """Name of the table for SSObject records.""" 

88 

89 DiaObject_To_Object_Match = "DiaObject_To_Object_Match" 

90 """Name of the table for DiaObject_To_Object_Match records.""" 

91 

92 metadata = "metadata" 

93 """Name of the metadata table, this table may not always exist.""" 

94 

95 def table_name(self, prefix: str = "") -> str: 

96 """Return full table name.""" 

97 return prefix + self.value 

98 

99 

100class ApdbSchema: 

101 """Class for management of APDB schema. 

102 

103 Attributes 

104 ---------- 

105 tableSchemas : `dict` 

106 Maps table name to `TableDef` instance. 

107 

108 Parameters 

109 ---------- 

110 schema_file : `str` 

111 Name of the YAML schema file. 

112 schema_name : `str`, optional 

113 Name of the schema in YAML files. 

114 """ 

115 

116 def __init__( 

117 self, 

118 schema_file: str, 

119 schema_name: str = "ApdbSchema", 

120 ): 

121 # build complete table schema 

122 self.tableSchemas, self._schemaVersion = self._buildSchemas(schema_file, schema_name) 

123 

124 def column_dtype(self, felis_type: type[felis.types.FelisType]) -> type | str: 

125 """Return Pandas data type for a given Felis column type. 

126 

127 Parameters 

128 ---------- 

129 felis_type : `type` 

130 Felis type, on of the classes defined in `felis.types` module. 

131 

132 Returns 

133 ------- 

134 column_dtype : `type` or `str` 

135 Type that can be used for columns in Pandas. 

136 

137 Raises 

138 ------ 

139 TypeError 

140 Raised if type is cannot be handled. 

141 """ 

142 try: 

143 return _dtype_map[felis_type] 

144 except KeyError: 

145 raise TypeError(f"Unexpected Felis type: {felis_type}") 

146 

147 def schemaVersion(self) -> VersionTuple: 

148 """Return schema version as defined in YAML schema file. 

149 

150 Returns 

151 ------- 

152 version : `VersionTuple` 

153 Version number read from YAML file, if YAML file does not define 

154 schema version then "0.1.0" is returned. 

155 """ 

156 if self._schemaVersion is None: 

157 return VersionTuple(0, 1, 0) 

158 else: 

159 return self._schemaVersion 

160 

161 @classmethod 

162 def _buildSchemas( 

163 cls, schema_file: str, schema_name: str = "ApdbSchema" 

164 ) -> tuple[Mapping[ApdbTables, Table], VersionTuple | None]: 

165 """Create schema definitions for all tables. 

166 

167 Reads YAML schema and builds a dictionary containing 

168 `felis.simple.Table` instances for each table. 

169 

170 Parameters 

171 ---------- 

172 schema_file : `str` 

173 Name of YAML file with ``felis`` schema. 

174 schema_name : `str`, optional 

175 Name of the schema in YAML files. 

176 

177 Returns 

178 ------- 

179 tables : `dict` 

180 Mapping of table names to `felis.simple.Table` instances. 

181 version : `VersionTuple` or `None` 

182 Schema version defined in schema file, `None` if version is not 

183 defined. 

184 """ 

185 schema_file = os.path.expandvars(schema_file) 

186 with open(schema_file) as yaml_stream: 

187 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader)) 

188 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name] 

189 if not schemas_list: 

190 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}") 

191 elif len(schemas_list) > 1: 

192 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}") 

193 schema_dict = schemas_list[0] 

194 schema_dict.update(DEFAULT_FRAME) 

195 visitor = SimpleVisitor() 

196 schema = visitor.visit_schema(schema_dict) 

197 

198 # convert all dicts into classes 

199 tables: MutableMapping[ApdbTables, Table] = {} 

200 for table in schema.tables: 

201 try: 

202 table_enum = ApdbTables(table.name) 

203 except ValueError: 

204 # There may be other tables in the schema that do not belong 

205 # to APDB. 

206 continue 

207 else: 

208 tables[table_enum] = table 

209 

210 version: VersionTuple | None = None 

211 if schema.version is not None: 

212 version = VersionTuple.fromString(schema.version.current) 

213 

214 return tables, version