Coverage for python/lsst/dax/apdb/apdbSchema.py: 48%

67 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-26 09:55 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module containing methods and classes for generic APDB schema operations. 

23 

24The code in this module is independent of the specific technology used to 

25implement APDB. 

26""" 

27 

28from __future__ import annotations 

29 

30__all__ = ["ApdbTables", "ApdbSchema"] 

31 

32import enum 

33import logging 

34import os 

35from collections.abc import Mapping, MutableMapping 

36 

37import felis.datamodel 

38import numpy 

39import yaml 

40 

41from .schema_model import ExtraDataTypes, Schema, Table 

42from .versionTuple import VersionTuple 

43 

44_LOG = logging.getLogger(__name__) 

45 

46# In most cases column types are determined by Cassandra driver, but in some 

47# cases we need to create Pandas Dataframe ourselves and we use this map to 

48# infer types of columns from their YAML schema. 

49_dtype_map: Mapping[felis.datamodel.DataType | ExtraDataTypes, type | str] = { 

50 felis.datamodel.DataType.double: numpy.float64, 

51 felis.datamodel.DataType.float: numpy.float32, 

52 felis.datamodel.DataType.timestamp: "datetime64[ms]", 

53 felis.datamodel.DataType.long: numpy.int64, 

54 felis.datamodel.DataType.int: numpy.int32, 

55 felis.datamodel.DataType.short: numpy.int16, 

56 felis.datamodel.DataType.byte: numpy.int8, 

57 felis.datamodel.DataType.binary: object, 

58 felis.datamodel.DataType.char: object, 

59 felis.datamodel.DataType.text: object, 

60 felis.datamodel.DataType.string: object, 

61 felis.datamodel.DataType.unicode: object, 

62 felis.datamodel.DataType.boolean: bool, 

63} 

64 

65 

66@enum.unique 

67class ApdbTables(enum.Enum): 

68 """Names of the tables in APDB schema.""" 

69 

70 DiaObject = "DiaObject" 

71 """Name of the table for DIAObject records.""" 

72 

73 DiaSource = "DiaSource" 

74 """Name of the table for DIASource records.""" 

75 

76 DiaForcedSource = "DiaForcedSource" 

77 """Name of the table for DIAForcedSource records.""" 

78 

79 DiaObjectLast = "DiaObjectLast" 

80 """Name of the table for the last version of DIAObject records. 

81 

82 This table may be optional for some implementations. 

83 """ 

84 

85 SSObject = "SSObject" 

86 """Name of the table for SSObject records.""" 

87 

88 DiaObject_To_Object_Match = "DiaObject_To_Object_Match" 

89 """Name of the table for DiaObject_To_Object_Match records.""" 

90 

91 metadata = "metadata" 

92 """Name of the metadata table, this table may not always exist.""" 

93 

94 def table_name(self, prefix: str = "") -> str: 

95 """Return full table name.""" 

96 return prefix + self.value 

97 

98 

99class ApdbSchema: 

100 """Class for management of APDB schema. 

101 

102 Attributes 

103 ---------- 

104 tableSchemas : `dict` 

105 Maps table name to `TableDef` instance. 

106 

107 Parameters 

108 ---------- 

109 schema_file : `str` 

110 Name of the YAML schema file. 

111 schema_name : `str`, optional 

112 Name of the schema in YAML files. 

113 """ 

114 

115 def __init__( 

116 self, 

117 schema_file: str, 

118 schema_name: str = "ApdbSchema", 

119 ): 

120 # build complete table schema 

121 self.tableSchemas, self._schemaVersion = self._buildSchemas(schema_file, schema_name) 

122 

123 def column_dtype(self, felis_type: felis.datamodel.DataType | ExtraDataTypes) -> type | str: 

124 """Return Pandas data type for a given Felis column type. 

125 

126 Parameters 

127 ---------- 

128 felis_type : `felis.datamodel.DataType` 

129 Felis type, on of the enums defined in `felis.datamodel` module. 

130 

131 Returns 

132 ------- 

133 column_dtype : `type` or `str` 

134 Type that can be used for columns in Pandas. 

135 

136 Raises 

137 ------ 

138 TypeError 

139 Raised if type is cannot be handled. 

140 """ 

141 try: 

142 return _dtype_map[felis_type] 

143 except KeyError: 

144 raise TypeError(f"Unexpected Felis type: {felis_type}") 

145 

146 def schemaVersion(self) -> VersionTuple: 

147 """Return schema version as defined in YAML schema file. 

148 

149 Returns 

150 ------- 

151 version : `VersionTuple` 

152 Version number read from YAML file, if YAML file does not define 

153 schema version then "0.1.0" is returned. 

154 """ 

155 if self._schemaVersion is None: 

156 return VersionTuple(0, 1, 0) 

157 else: 

158 return self._schemaVersion 

159 

160 @classmethod 

161 def _buildSchemas( 

162 cls, schema_file: str, schema_name: str = "ApdbSchema" 

163 ) -> tuple[Mapping[ApdbTables, Table], VersionTuple | None]: 

164 """Create schema definitions for all tables. 

165 

166 Reads YAML schema and builds a dictionary containing 

167 `.schema_model.Table` instances for each table. 

168 

169 Parameters 

170 ---------- 

171 schema_file : `str` 

172 Name of YAML file with ``felis`` schema. 

173 schema_name : `str`, optional 

174 Name of the schema in YAML files. 

175 

176 Returns 

177 ------- 

178 tables : `dict` 

179 Mapping of table names to `.schema_model.Table` instances. 

180 version : `VersionTuple` or `None` 

181 Schema version defined in schema file, `None` if version is not 

182 defined. 

183 """ 

184 schema_file = os.path.expandvars(schema_file) 

185 with open(schema_file) as yaml_stream: 

186 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader)) 

187 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name] 

188 if not schemas_list: 

189 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}") 

190 elif len(schemas_list) > 1: 

191 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}") 

192 felis_schema = felis.datamodel.Schema.model_validate(schemas_list[0]) 

193 schema = Schema.from_felis(felis_schema) 

194 

195 # convert all dicts into classes 

196 tables: MutableMapping[ApdbTables, Table] = {} 

197 for table in schema.tables: 

198 try: 

199 table_enum = ApdbTables(table.name) 

200 except ValueError: 

201 # There may be other tables in the schema that do not belong 

202 # to APDB. 

203 continue 

204 else: 

205 tables[table_enum] = table 

206 

207 version: VersionTuple | None = None 

208 if schema.version is not None: 

209 version = VersionTuple.fromString(schema.version.current) 

210 

211 return tables, version