Coverage for python/felis/check.py: 22%

112 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:48 -0700

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["CheckingVisitor", "FelisValidator"] 

25 

26import logging 

27from collections.abc import Iterable, Mapping, MutableSet 

28from typing import Any 

29 

30from astropy import units as u # type: ignore 

31from astropy.io.votable import ucd # type: ignore 

32 

33from .types import FelisType 

34from .visitor import Visitor 

35 

36_Mapping = Mapping[str, Any] 

37 

38logger = logging.getLogger("felis") 

39 

40 

41class FelisValidator: 

42 """Class defining methods for validating individual objects in a felis 

43 structure. 

44 

45 The class implements all reasonable consistency checks for types of 

46 objects (mappings) that can appear in the Felis structure. It also 

47 verifies that object ID (``@id`` field) is unique, hence all check methods 

48 can only be called once for a given object. 

49 """ 

50 

51 def __init__(self) -> None: 

52 self._ids: MutableSet[str] = set() 

53 

54 def check_schema(self, schema_obj: _Mapping) -> None: 

55 """Validate contents of Felis schema object. 

56 

57 Parameters 

58 ---------- 

59 schema_obj : `Mapping` [ `str`, `Any` ] 

60 Felis object (mapping) representing a schema. 

61 

62 Raises 

63 ------ 

64 ValueError 

65 Raised if validation fails. 

66 """ 

67 _id = self._assert_id(schema_obj) 

68 self._check_visited(_id) 

69 

70 def check_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None: 

71 """Validate contents of Felis table object. 

72 

73 Parameters 

74 ---------- 

75 table_obj : `Mapping` [ `str`, `Any` ] 

76 Felis object (mapping) representing a table. 

77 schema_obj : `Mapping` [ `str`, `Any` ] 

78 Felis object (mapping) representing parent schema. 

79 

80 Raises 

81 ------ 

82 ValueError 

83 Raised if validation fails. 

84 """ 

85 _id = self._assert_id(table_obj) 

86 self._assert_name(table_obj) 

87 self._check_visited(_id) 

88 

89 def check_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None: 

90 """Validate contents of Felis column object. 

91 

92 Parameters 

93 ---------- 

94 column_obj : `Mapping` [ `str`, `Any` ] 

95 Felis object (mapping) representing a column. 

96 table_obj : `Mapping` [ `str`, `Any` ] 

97 Felis object (mapping) representing parent table. 

98 

99 Raises 

100 ------ 

101 ValueError 

102 Raised if validation fails. 

103 """ 

104 _id = self._assert_id(column_obj) 

105 self._assert_name(column_obj) 

106 datatype_name = self._assert_datatype(column_obj) 

107 length = column_obj.get("length") 

108 felis_type = FelisType.felis_type(datatype_name) 

109 if not length and (felis_type.is_sized or felis_type.is_timestamp): 

110 # This is not a warning, because it's usually fine 

111 logger.info(f"No length defined for {_id} for type {datatype_name}") 

112 

113 # Check UCDs of columns 

114 ivoa_ucd = column_obj.get("ivoa:ucd") 

115 if not ucd.check_ucd(ivoa_ucd, check_controlled_vocabulary=True): 

116 logger.error(f"invalid ucd for {_id}: {ivoa_ucd}") 

117 

118 # Check Units of columns 

119 fits_unit = column_obj.get("fits:tunit") 

120 ivoa_unit = column_obj.get("ivoa:unit") 

121 

122 # There should only be one type of unit 

123 if fits_unit and ivoa_unit: 

124 logger.error("two types of units") 

125 elif fits_unit: 

126 unit = fits_unit 

127 elif ivoa_unit: 

128 unit = ivoa_unit 

129 else: 

130 unit = "" 

131 

132 # Check the unit using astropy 

133 try: 

134 u.Unit(unit) 

135 except ValueError as e: 

136 logger.error(f"invalid unit for {_id} " + str(e)) 

137 

138 self._check_visited(_id) 

139 

140 def check_primary_key(self, primary_key_obj: str | Iterable[str], table: _Mapping) -> None: 

141 """Validate contents of Felis primary key object. 

142 

143 Parameters 

144 ---------- 

145 primary_key_obj : `str` or `Mapping` [ `str`, `Any` ] 

146 Felis object (mapping) representing a primary key. 

147 table_obj : `Mapping` [ `str`, `Any` ] 

148 Felis object (mapping) representing parent table. 

149 

150 Raises 

151 ------ 

152 ValueError 

153 Raised if validation fails. 

154 """ 

155 pass 

156 

157 def check_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None: 

158 """Validate contents of Felis constraint object. 

159 

160 Parameters 

161 ---------- 

162 constraint_obj : `Mapping` [ `str`, `Any` ] 

163 Felis object (mapping) representing a constraint. 

164 table_obj : `Mapping` [ `str`, `Any` ] 

165 Felis object (mapping) representing parent table. 

166 

167 Raises 

168 ------ 

169 ValueError 

170 Raised if validation fails. 

171 """ 

172 _id = self._assert_id(constraint_obj) 

173 constraint_type = constraint_obj.get("@type") 

174 if not constraint_type: 

175 raise ValueError(f"Constraint has no @type: {_id}") 

176 if constraint_type not in ["ForeignKey", "Check", "Unique"]: 

177 raise ValueError(f"Not a valid constraint type: {constraint_type}") 

178 self._check_visited(_id) 

179 

180 def check_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None: 

181 """Validate contents of Felis constraint object. 

182 

183 Parameters 

184 ---------- 

185 index_obj : `Mapping` [ `str`, `Any` ] 

186 Felis object (mapping) representing an index. 

187 table_obj : `Mapping` [ `str`, `Any` ] 

188 Felis object (mapping) representing parent table. 

189 

190 Raises 

191 ------ 

192 ValueError 

193 Raised if validation fails. 

194 """ 

195 _id = self._assert_id(index_obj) 

196 self._assert_name(index_obj) 

197 if "columns" in index_obj and "expressions" in index_obj: 

198 raise ValueError(f"Defining columns and expressions is not valid for index {_id}") 

199 self._check_visited(_id) 

200 

201 def _assert_id(self, obj: _Mapping) -> str: 

202 """Verify that an object has a non-empty ``@id`` field. 

203 

204 Parameters 

205 ---------- 

206 obj : `Mapping` [ `str`, `Any` ] 

207 Felis object. 

208 

209 Raises 

210 ------ 

211 ValueError 

212 Raised if ``@id`` field is missing or empty. 

213 

214 Returns 

215 ------- 

216 id : `str` 

217 The value of ``@id`` field. 

218 """ 

219 _id: str = obj.get("@id", "") 

220 if not _id: 

221 name = obj.get("name", "") 

222 maybe_string = f"(check object with name: {name})" if name else "" 

223 raise ValueError(f"No @id defined for object {maybe_string}") 

224 return _id 

225 

226 def _assert_name(self, obj: _Mapping) -> None: 

227 """Verify that an object has a ``name`` field. 

228 

229 Parameters 

230 ---------- 

231 obj : `Mapping` [ `str`, `Any` ] 

232 Felis object. 

233 

234 Raises 

235 ------ 

236 ValueError 

237 Raised if ``name`` field is missing. 

238 """ 

239 if "name" not in obj: 

240 _id = obj.get("@id") 

241 raise ValueError(f"No name for table object {_id}") 

242 

243 def _assert_datatype(self, obj: _Mapping) -> str: 

244 """Verify that an object has a valid ``datatype`` field. 

245 

246 Parameters 

247 ---------- 

248 obj : `Mapping` [ `str`, `Any` ] 

249 Felis object. 

250 

251 Raises 

252 ------ 

253 ValueError 

254 Raised if ``datatype`` field is missing or invalid. 

255 

256 Returns 

257 ------- 

258 datatype : `str` 

259 The value of ``datatype`` field. 

260 """ 

261 datatype_name: str = obj.get("datatype", "") 

262 _id = obj["@id"] 

263 if not datatype_name: 

264 raise ValueError(f"No datatype defined for id {_id}") 

265 try: 

266 FelisType.felis_type(datatype_name) 

267 except TypeError: 

268 raise ValueError(f"Incorrect Type Name for id {_id}: {datatype_name}") from None 

269 return datatype_name 

270 

271 def _check_visited(self, _id: str) -> None: 

272 """Check that given ID has not been visited, generates a warning 

273 otherwise. 

274 

275 Parameters 

276 _id : `str` 

277 Felis object ID. 

278 """ 

279 if _id in self._ids: 

280 logger.warning(f"Duplication of @id {_id}") 

281 self._ids.add(_id) 

282 

283 

284class CheckingVisitor(Visitor[None, None, None, None, None, None]): 

285 """Visitor implementation which validates felis structures and raises 

286 exceptions for errors. 

287 """ 

288 

289 def __init__(self) -> None: 

290 super().__init__() 

291 self.checker = FelisValidator() 

292 

293 def visit_schema(self, schema_obj: _Mapping) -> None: 

294 # Docstring is inherited. 

295 self.checker.check_schema(schema_obj) 

296 for table_obj in schema_obj["tables"]: 

297 self.visit_table(table_obj, schema_obj) 

298 

299 def visit_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None: 

300 # Docstring is inherited. 

301 self.checker.check_table(table_obj, schema_obj) 

302 for column_obj in table_obj["columns"]: 

303 self.visit_column(column_obj, table_obj) 

304 self.visit_primary_key(table_obj.get("primaryKey", []), table_obj) 

305 for constraint_obj in table_obj.get("constraints", []): 

306 self.visit_constraint(constraint_obj, table_obj) 

307 for index_obj in table_obj.get("indexes", []): 

308 self.visit_index(index_obj, table_obj) 

309 

310 def visit_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None: 

311 # Docstring is inherited. 

312 self.checker.check_column(column_obj, table_obj) 

313 

314 def visit_primary_key(self, primary_key_obj: str | Iterable[str], table_obj: _Mapping) -> None: 

315 # Docstring is inherited. 

316 self.checker.check_primary_key(primary_key_obj, table_obj) 

317 

318 def visit_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None: 

319 # Docstring is inherited. 

320 self.checker.check_constraint(constraint_obj, table_obj) 

321 

322 def visit_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None: 

323 # Docstring is inherited. 

324 self.checker.check_index(index_obj, table_obj)