Coverage for python/felis/check.py: 16%
143 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 03:38 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 03:38 -0700
1# This file is part of felis.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["CheckingVisitor", "FelisValidator"]
26import logging
27from collections.abc import Iterable, Mapping, MutableSet
28from typing import Any
30from astropy import units as u # type: ignore
31from astropy.io.votable import ucd # type: ignore
33from .types import FelisType
34from .visitor import Visitor
36_Mapping = Mapping[str, Any]
38logger = logging.getLogger("felis")
41class FelisValidator:
42 """Class defining methods for validating individual objects in a felis
43 structure.
45 The class implements all reasonable consistency checks for types of
46 objects (mappings) that can appear in the Felis structure. It also
47 verifies that object ID (``@id`` field) is unique, hence all check methods
48 can only be called once for a given object.
49 """
51 def __init__(self) -> None:
52 self._ids: MutableSet[str] = set()
54 def check_schema(self, schema_obj: _Mapping) -> None:
55 """Validate contents of Felis schema object.
57 Parameters
58 ----------
59 schema_obj : `Mapping` [ `str`, `Any` ]
60 Felis object (mapping) representing a schema.
62 Raises
63 ------
64 ValueError
65 Raised if validation fails.
66 """
67 _id = self._assert_id(schema_obj)
68 self._check_visited(_id)
70 def check_schema_version(self, version_obj: Any, schema_obj: _Mapping) -> None:
71 """Validate contents of schema description object.
73 Parameters
74 ----------
75 version_obj : `Any`
76 Felis object (should be string or mapping) representing schema
77 version.
78 schema_obj : `Mapping` [ `str`, `Any` ]
79 Felis object (mapping) representing parent schema.
81 Raises
82 ------
83 TypeError
84 Raised if validation fails for expected types of items.
85 ValueError
86 Raised if validation fails for the content of the object.
87 """
88 if isinstance(version_obj, Mapping):
89 # "current" is required, other keys are optional.
90 possible_keys = {"current", "compatible", "read_compatible"}
91 if not possible_keys.issuperset(version_obj):
92 extra_keys = list(set(version_obj) - possible_keys)
93 logger.error(f"unexpected keys in schema version description: {extra_keys}")
94 if "current" not in version_obj:
95 raise ValueError(f"missing 'current' key in schema version description: {version_obj}")
96 if not isinstance(version_obj["current"], str):
97 raise TypeError(f"schema version 'current' value is not a string: {version_obj['current']!r}")
98 if (compatible := version_obj.get("compatible")) is not None:
99 if not isinstance(compatible, list):
100 raise TypeError(f"schema version 'compatible' value is not a list: {compatible!r}")
101 for item in compatible:
102 if not isinstance(item, str):
103 raise TypeError(f"items in 'compatible' value are not strings: {compatible!r}")
104 if (read_compatible := version_obj.get("read_compatible")) is not None:
105 if not isinstance(read_compatible, list):
106 raise TypeError(
107 f"schema version 'read_compatible' value is not a list: {read_compatible!r}"
108 )
109 for item in read_compatible:
110 if not isinstance(item, str):
111 raise TypeError(
112 f"items in 'read_compatible' value are not strings: {read_compatible!r}"
113 )
114 elif not isinstance(version_obj, str):
115 raise TypeError(f"schema version description is not a string or object: {version_obj}")
117 def check_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None:
118 """Validate contents of Felis table object.
120 Parameters
121 ----------
122 table_obj : `Mapping` [ `str`, `Any` ]
123 Felis object (mapping) representing a table.
124 schema_obj : `Mapping` [ `str`, `Any` ]
125 Felis object (mapping) representing parent schema.
127 Raises
128 ------
129 ValueError
130 Raised if validation fails.
131 """
132 _id = self._assert_id(table_obj)
133 self._assert_name(table_obj)
134 self._check_visited(_id)
136 def check_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None:
137 """Validate contents of Felis column object.
139 Parameters
140 ----------
141 column_obj : `Mapping` [ `str`, `Any` ]
142 Felis object (mapping) representing a column.
143 table_obj : `Mapping` [ `str`, `Any` ]
144 Felis object (mapping) representing parent table.
146 Raises
147 ------
148 ValueError
149 Raised if validation fails.
150 """
151 _id = self._assert_id(column_obj)
152 self._assert_name(column_obj)
153 datatype_name = self._assert_datatype(column_obj)
154 length = column_obj.get("length")
155 felis_type = FelisType.felis_type(datatype_name)
156 if not length and (felis_type.is_sized or felis_type.is_timestamp):
157 # This is not a warning, because it's usually fine
158 logger.info(f"No length defined for {_id} for type {datatype_name}")
160 # Check UCDs of columns
161 ivoa_ucd = column_obj.get("ivoa:ucd")
162 if ivoa_ucd:
163 try:
164 ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
165 except ValueError as e:
166 logger.error(f"{e} in UCD '{ivoa_ucd}' for '{_id}'")
168 # Check Units of columns
169 fits_unit = column_obj.get("fits:tunit")
170 ivoa_unit = column_obj.get("ivoa:unit")
172 # There should only be one type of unit
173 if fits_unit and ivoa_unit:
174 logger.error("two types of units")
175 elif fits_unit:
176 unit = fits_unit
177 elif ivoa_unit:
178 unit = ivoa_unit
179 else:
180 unit = ""
182 # Check the unit using astropy
183 try:
184 u.Unit(unit)
185 except ValueError as e:
186 logger.error(f"invalid unit for {_id} " + str(e))
188 self._check_visited(_id)
190 def check_primary_key(self, primary_key_obj: str | Iterable[str], table_obj: _Mapping) -> None:
191 """Validate contents of Felis primary key object.
193 Parameters
194 ----------
195 primary_key_obj : `str` or `Mapping` [ `str`, `Any` ]
196 Felis object (mapping) representing a primary key.
197 table_obj : `Mapping` [ `str`, `Any` ]
198 Felis object (mapping) representing parent table.
200 Raises
201 ------
202 ValueError
203 Raised if validation fails.
204 """
205 pass
207 def check_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None:
208 """Validate contents of Felis constraint object.
210 Parameters
211 ----------
212 constraint_obj : `Mapping` [ `str`, `Any` ]
213 Felis object (mapping) representing a constraint.
214 table_obj : `Mapping` [ `str`, `Any` ]
215 Felis object (mapping) representing parent table.
217 Raises
218 ------
219 ValueError
220 Raised if validation fails.
221 """
222 _id = self._assert_id(constraint_obj)
223 constraint_type = constraint_obj.get("@type")
224 if not constraint_type:
225 raise ValueError(f"Constraint has no @type: {_id}")
226 if constraint_type not in ["ForeignKey", "Check", "Unique"]:
227 raise ValueError(f"Not a valid constraint type: {constraint_type}")
228 self._check_visited(_id)
230 def check_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None:
231 """Validate contents of Felis constraint object.
233 Parameters
234 ----------
235 index_obj : `Mapping` [ `str`, `Any` ]
236 Felis object (mapping) representing an index.
237 table_obj : `Mapping` [ `str`, `Any` ]
238 Felis object (mapping) representing parent table.
240 Raises
241 ------
242 ValueError
243 Raised if validation fails.
244 """
245 _id = self._assert_id(index_obj)
246 self._assert_name(index_obj)
247 if "columns" in index_obj and "expressions" in index_obj:
248 raise ValueError(f"Defining columns and expressions is not valid for index {_id}")
249 self._check_visited(_id)
251 def _assert_id(self, obj: _Mapping) -> str:
252 """Verify that an object has a non-empty ``@id`` field.
254 Parameters
255 ----------
256 obj : `Mapping` [ `str`, `Any` ]
257 Felis object.
259 Raises
260 ------
261 ValueError
262 Raised if ``@id`` field is missing or empty.
264 Returns
265 -------
266 id : `str`
267 The value of ``@id`` field.
268 """
269 _id: str = obj.get("@id", "")
270 if not _id:
271 name = obj.get("name", "")
272 maybe_string = f"(check object with name: {name})" if name else ""
273 raise ValueError(f"No @id defined for object {maybe_string}")
274 return _id
276 def _assert_name(self, obj: _Mapping) -> None:
277 """Verify that an object has a ``name`` field.
279 Parameters
280 ----------
281 obj : `Mapping` [ `str`, `Any` ]
282 Felis object.
284 Raises
285 ------
286 ValueError
287 Raised if ``name`` field is missing.
288 """
289 if "name" not in obj:
290 _id = obj.get("@id")
291 raise ValueError(f"No name for table object {_id}")
293 def _assert_datatype(self, obj: _Mapping) -> str:
294 """Verify that an object has a valid ``datatype`` field.
296 Parameters
297 ----------
298 obj : `Mapping` [ `str`, `Any` ]
299 Felis object.
301 Raises
302 ------
303 ValueError
304 Raised if ``datatype`` field is missing or invalid.
306 Returns
307 -------
308 datatype : `str`
309 The value of ``datatype`` field.
310 """
311 datatype_name: str = obj.get("datatype", "")
312 _id = obj["@id"]
313 if not datatype_name:
314 raise ValueError(f"No datatype defined for id {_id}")
315 try:
316 FelisType.felis_type(datatype_name)
317 except TypeError:
318 raise ValueError(f"Incorrect Type Name for id {_id}: {datatype_name}") from None
319 return datatype_name
321 def _check_visited(self, _id: str) -> None:
322 """Check that given ID has not been visited, generates a warning
323 otherwise.
325 Parameters
326 ----------
327 _id : `str`
328 Felis object ID.
329 """
330 if _id in self._ids:
331 logger.warning(f"Duplication of @id {_id}")
332 self._ids.add(_id)
335class CheckingVisitor(Visitor[None, None, None, None, None, None, None]):
336 """Visitor implementation which validates felis structures and raises
337 exceptions for errors.
338 """
340 def __init__(self) -> None:
341 super().__init__()
342 self.checker = FelisValidator()
344 def visit_schema(self, schema_obj: _Mapping) -> None:
345 # Docstring is inherited.
346 self.checker.check_schema(schema_obj)
347 if (version_obj := schema_obj.get("version")) is not None:
348 self.visit_schema_version(version_obj, schema_obj)
349 for table_obj in schema_obj["tables"]:
350 self.visit_table(table_obj, schema_obj)
352 def visit_schema_version(self, version_obj: str | Mapping[str, Any], schema_obj: _Mapping) -> None:
353 # Docstring is inherited.
354 self.checker.check_schema_version(version_obj, schema_obj)
356 def visit_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None:
357 # Docstring is inherited.
358 self.checker.check_table(table_obj, schema_obj)
359 for column_obj in table_obj["columns"]:
360 self.visit_column(column_obj, table_obj)
361 self.visit_primary_key(table_obj.get("primaryKey", []), table_obj)
362 for constraint_obj in table_obj.get("constraints", []):
363 self.visit_constraint(constraint_obj, table_obj)
364 for index_obj in table_obj.get("indexes", []):
365 self.visit_index(index_obj, table_obj)
367 def visit_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None:
368 # Docstring is inherited.
369 self.checker.check_column(column_obj, table_obj)
371 def visit_primary_key(self, primary_key_obj: str | Iterable[str], table_obj: _Mapping) -> None:
372 # Docstring is inherited.
373 self.checker.check_primary_key(primary_key_obj, table_obj)
375 def visit_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None:
376 # Docstring is inherited.
377 self.checker.check_constraint(constraint_obj, table_obj)
379 def visit_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None:
380 # Docstring is inherited.
381 self.checker.check_index(index_obj, table_obj)