Coverage for python/felis/check.py: 16%

143 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 10:20 -0700

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["CheckingVisitor", "FelisValidator"] 

25 

26import logging 

27from collections.abc import Iterable, Mapping, MutableSet 

28from typing import Any 

29 

30from astropy import units as u # type: ignore 

31from astropy.io.votable import ucd # type: ignore 

32 

33from .types import FelisType 

34from .visitor import Visitor 

35 

36_Mapping = Mapping[str, Any] 

37 

38logger = logging.getLogger("felis") 

39 

40 

41class FelisValidator: 

42 """Class defining methods for validating individual objects in a felis 

43 structure. 

44 

45 The class implements all reasonable consistency checks for types of 

46 objects (mappings) that can appear in the Felis structure. It also 

47 verifies that object ID (``@id`` field) is unique, hence all check methods 

48 can only be called once for a given object. 

49 """ 

50 

51 def __init__(self) -> None: 

52 self._ids: MutableSet[str] = set() 

53 

54 def check_schema(self, schema_obj: _Mapping) -> None: 

55 """Validate contents of Felis schema object. 

56 

57 Parameters 

58 ---------- 

59 schema_obj : `Mapping` [ `str`, `Any` ] 

60 Felis object (mapping) representing a schema. 

61 

62 Raises 

63 ------ 

64 ValueError 

65 Raised if validation fails. 

66 """ 

67 _id = self._assert_id(schema_obj) 

68 self._check_visited(_id) 

69 

70 def check_schema_version(self, version_obj: Any, schema_obj: _Mapping) -> None: 

71 """Validate contents of schema description object. 

72 

73 Parameters 

74 ---------- 

75 version_obj : `Any` 

76 Felis object (should be string or mapping) representing schema 

77 version. 

78 schema_obj : `Mapping` [ `str`, `Any` ] 

79 Felis object (mapping) representing parent schema. 

80 

81 Raises 

82 ------ 

83 TypeError 

84 Raised if validation fails for expected types of items. 

85 ValueError 

86 Raised if validation fails for the content of the object. 

87 """ 

88 if isinstance(version_obj, Mapping): 

89 # "current" is required, other keys are optional. 

90 possible_keys = {"current", "compatible", "read_compatible"} 

91 if not possible_keys.issuperset(version_obj): 

92 extra_keys = list(set(version_obj) - possible_keys) 

93 logger.error(f"unexpected keys in schema version description: {extra_keys}") 

94 if "current" not in version_obj: 

95 raise ValueError(f"missing 'current' key in schema version description: {version_obj}") 

96 if not isinstance(version_obj["current"], str): 

97 raise TypeError(f"schema version 'current' value is not a string: {version_obj['current']!r}") 

98 if (compatible := version_obj.get("compatible")) is not None: 

99 if not isinstance(compatible, list): 

100 raise TypeError(f"schema version 'compatible' value is not a list: {compatible!r}") 

101 for item in compatible: 

102 if not isinstance(item, str): 

103 raise TypeError(f"items in 'compatible' value are not strings: {compatible!r}") 

104 if (read_compatible := version_obj.get("read_compatible")) is not None: 

105 if not isinstance(read_compatible, list): 

106 raise TypeError( 

107 f"schema version 'read_compatible' value is not a list: {read_compatible!r}" 

108 ) 

109 for item in read_compatible: 

110 if not isinstance(item, str): 

111 raise TypeError( 

112 f"items in 'read_compatible' value are not strings: {read_compatible!r}" 

113 ) 

114 elif not isinstance(version_obj, str): 

115 raise TypeError(f"schema version description is not a string or object: {version_obj}") 

116 

117 def check_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None: 

118 """Validate contents of Felis table object. 

119 

120 Parameters 

121 ---------- 

122 table_obj : `Mapping` [ `str`, `Any` ] 

123 Felis object (mapping) representing a table. 

124 schema_obj : `Mapping` [ `str`, `Any` ] 

125 Felis object (mapping) representing parent schema. 

126 

127 Raises 

128 ------ 

129 ValueError 

130 Raised if validation fails. 

131 """ 

132 _id = self._assert_id(table_obj) 

133 self._assert_name(table_obj) 

134 self._check_visited(_id) 

135 

136 def check_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None: 

137 """Validate contents of Felis column object. 

138 

139 Parameters 

140 ---------- 

141 column_obj : `Mapping` [ `str`, `Any` ] 

142 Felis object (mapping) representing a column. 

143 table_obj : `Mapping` [ `str`, `Any` ] 

144 Felis object (mapping) representing parent table. 

145 

146 Raises 

147 ------ 

148 ValueError 

149 Raised if validation fails. 

150 """ 

151 _id = self._assert_id(column_obj) 

152 self._assert_name(column_obj) 

153 datatype_name = self._assert_datatype(column_obj) 

154 length = column_obj.get("length") 

155 felis_type = FelisType.felis_type(datatype_name) 

156 if not length and (felis_type.is_sized or felis_type.is_timestamp): 

157 # This is not a warning, because it's usually fine 

158 logger.info(f"No length defined for {_id} for type {datatype_name}") 

159 

160 # Check UCDs of columns 

161 ivoa_ucd = column_obj.get("ivoa:ucd") 

162 if ivoa_ucd: 

163 try: 

164 ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd) 

165 except ValueError as e: 

166 logger.error(f"{e} in UCD '{ivoa_ucd}' for '{_id}'") 

167 

168 # Check Units of columns 

169 fits_unit = column_obj.get("fits:tunit") 

170 ivoa_unit = column_obj.get("ivoa:unit") 

171 

172 # There should only be one type of unit 

173 if fits_unit and ivoa_unit: 

174 logger.error("two types of units") 

175 elif fits_unit: 

176 unit = fits_unit 

177 elif ivoa_unit: 

178 unit = ivoa_unit 

179 else: 

180 unit = "" 

181 

182 # Check the unit using astropy 

183 try: 

184 u.Unit(unit) 

185 except ValueError as e: 

186 logger.error(f"invalid unit for {_id} " + str(e)) 

187 

188 self._check_visited(_id) 

189 

190 def check_primary_key(self, primary_key_obj: str | Iterable[str], table_obj: _Mapping) -> None: 

191 """Validate contents of Felis primary key object. 

192 

193 Parameters 

194 ---------- 

195 primary_key_obj : `str` or `Mapping` [ `str`, `Any` ] 

196 Felis object (mapping) representing a primary key. 

197 table_obj : `Mapping` [ `str`, `Any` ] 

198 Felis object (mapping) representing parent table. 

199 

200 Raises 

201 ------ 

202 ValueError 

203 Raised if validation fails. 

204 """ 

205 pass 

206 

207 def check_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None: 

208 """Validate contents of Felis constraint object. 

209 

210 Parameters 

211 ---------- 

212 constraint_obj : `Mapping` [ `str`, `Any` ] 

213 Felis object (mapping) representing a constraint. 

214 table_obj : `Mapping` [ `str`, `Any` ] 

215 Felis object (mapping) representing parent table. 

216 

217 Raises 

218 ------ 

219 ValueError 

220 Raised if validation fails. 

221 """ 

222 _id = self._assert_id(constraint_obj) 

223 constraint_type = constraint_obj.get("@type") 

224 if not constraint_type: 

225 raise ValueError(f"Constraint has no @type: {_id}") 

226 if constraint_type not in ["ForeignKey", "Check", "Unique"]: 

227 raise ValueError(f"Not a valid constraint type: {constraint_type}") 

228 self._check_visited(_id) 

229 

230 def check_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None: 

231 """Validate contents of Felis constraint object. 

232 

233 Parameters 

234 ---------- 

235 index_obj : `Mapping` [ `str`, `Any` ] 

236 Felis object (mapping) representing an index. 

237 table_obj : `Mapping` [ `str`, `Any` ] 

238 Felis object (mapping) representing parent table. 

239 

240 Raises 

241 ------ 

242 ValueError 

243 Raised if validation fails. 

244 """ 

245 _id = self._assert_id(index_obj) 

246 self._assert_name(index_obj) 

247 if "columns" in index_obj and "expressions" in index_obj: 

248 raise ValueError(f"Defining columns and expressions is not valid for index {_id}") 

249 self._check_visited(_id) 

250 

251 def _assert_id(self, obj: _Mapping) -> str: 

252 """Verify that an object has a non-empty ``@id`` field. 

253 

254 Parameters 

255 ---------- 

256 obj : `Mapping` [ `str`, `Any` ] 

257 Felis object. 

258 

259 Raises 

260 ------ 

261 ValueError 

262 Raised if ``@id`` field is missing or empty. 

263 

264 Returns 

265 ------- 

266 id : `str` 

267 The value of ``@id`` field. 

268 """ 

269 _id: str = obj.get("@id", "") 

270 if not _id: 

271 name = obj.get("name", "") 

272 maybe_string = f"(check object with name: {name})" if name else "" 

273 raise ValueError(f"No @id defined for object {maybe_string}") 

274 return _id 

275 

276 def _assert_name(self, obj: _Mapping) -> None: 

277 """Verify that an object has a ``name`` field. 

278 

279 Parameters 

280 ---------- 

281 obj : `Mapping` [ `str`, `Any` ] 

282 Felis object. 

283 

284 Raises 

285 ------ 

286 ValueError 

287 Raised if ``name`` field is missing. 

288 """ 

289 if "name" not in obj: 

290 _id = obj.get("@id") 

291 raise ValueError(f"No name for table object {_id}") 

292 

293 def _assert_datatype(self, obj: _Mapping) -> str: 

294 """Verify that an object has a valid ``datatype`` field. 

295 

296 Parameters 

297 ---------- 

298 obj : `Mapping` [ `str`, `Any` ] 

299 Felis object. 

300 

301 Raises 

302 ------ 

303 ValueError 

304 Raised if ``datatype`` field is missing or invalid. 

305 

306 Returns 

307 ------- 

308 datatype : `str` 

309 The value of ``datatype`` field. 

310 """ 

311 datatype_name: str = obj.get("datatype", "") 

312 _id = obj["@id"] 

313 if not datatype_name: 

314 raise ValueError(f"No datatype defined for id {_id}") 

315 try: 

316 FelisType.felis_type(datatype_name) 

317 except TypeError: 

318 raise ValueError(f"Incorrect Type Name for id {_id}: {datatype_name}") from None 

319 return datatype_name 

320 

321 def _check_visited(self, _id: str) -> None: 

322 """Check that given ID has not been visited, generates a warning 

323 otherwise. 

324 

325 Parameters 

326 ---------- 

327 _id : `str` 

328 Felis object ID. 

329 """ 

330 if _id in self._ids: 

331 logger.warning(f"Duplication of @id {_id}") 

332 self._ids.add(_id) 

333 

334 

335class CheckingVisitor(Visitor[None, None, None, None, None, None, None]): 

336 """Visitor implementation which validates felis structures and raises 

337 exceptions for errors. 

338 """ 

339 

340 def __init__(self) -> None: 

341 super().__init__() 

342 self.checker = FelisValidator() 

343 

344 def visit_schema(self, schema_obj: _Mapping) -> None: 

345 # Docstring is inherited. 

346 self.checker.check_schema(schema_obj) 

347 if (version_obj := schema_obj.get("version")) is not None: 

348 self.visit_schema_version(version_obj, schema_obj) 

349 for table_obj in schema_obj["tables"]: 

350 self.visit_table(table_obj, schema_obj) 

351 

352 def visit_schema_version(self, version_obj: str | Mapping[str, Any], schema_obj: _Mapping) -> None: 

353 # Docstring is inherited. 

354 self.checker.check_schema_version(version_obj, schema_obj) 

355 

356 def visit_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None: 

357 # Docstring is inherited. 

358 self.checker.check_table(table_obj, schema_obj) 

359 for column_obj in table_obj["columns"]: 

360 self.visit_column(column_obj, table_obj) 

361 self.visit_primary_key(table_obj.get("primaryKey", []), table_obj) 

362 for constraint_obj in table_obj.get("constraints", []): 

363 self.visit_constraint(constraint_obj, table_obj) 

364 for index_obj in table_obj.get("indexes", []): 

365 self.visit_index(index_obj, table_obj) 

366 

367 def visit_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None: 

368 # Docstring is inherited. 

369 self.checker.check_column(column_obj, table_obj) 

370 

371 def visit_primary_key(self, primary_key_obj: str | Iterable[str], table_obj: _Mapping) -> None: 

372 # Docstring is inherited. 

373 self.checker.check_primary_key(primary_key_obj, table_obj) 

374 

375 def visit_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None: 

376 # Docstring is inherited. 

377 self.checker.check_constraint(constraint_obj, table_obj) 

378 

379 def visit_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None: 

380 # Docstring is inherited. 

381 self.checker.check_index(index_obj, table_obj)