Coverage for tests / test_datamodel.py: 12%
463 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
1# This file is part of felis.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import difflib
23import os
24import pathlib
25import tempfile
26import unittest
27from collections import defaultdict
29import yaml
30from lsst.resources import ResourcePath
31from pydantic import ValidationError
33from felis.datamodel import (
34 CheckConstraint,
35 Column,
36 ColumnGroup,
37 Constraint,
38 DataType,
39 ForeignKeyConstraint,
40 Index,
41 Schema,
42 SchemaVersion,
43 Table,
44 UniqueConstraint,
45)
47TEST_DIR = os.path.abspath(os.path.dirname(__file__))
48TEST_YAML = os.path.join(TEST_DIR, "data", "test.yml")
49TEST_SALES = os.path.join(TEST_DIR, "data", "sales.yaml")
50TEST_SERIALIZATION = os.path.join(TEST_DIR, "data", "test_serialization.yaml")
51TEST_ID_GENERATION = os.path.join(TEST_DIR, "data", "test_id_generation.yaml")
54class ColumnTestCase(unittest.TestCase):
55 """Test the ``Column`` class."""
57 def test_validation(self) -> None:
58 """Test Pydantic validation of the ``Column`` class."""
59 # Default initialization should throw an exception.
60 with self.assertRaises(ValidationError):
61 Column()
63 # Setting only name should throw an exception.
64 with self.assertRaises(ValidationError):
65 Column(name="testColumn")
67 # Setting name and id should throw an exception from missing datatype.
68 with self.assertRaises(ValidationError):
69 Column(name="testColumn", id="#test_id")
71 # Setting name, id, and datatype should not throw an exception and
72 # should load data correctly.
73 col = Column(name="testColumn", id="#test_id", datatype="string", length=256)
74 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'")
75 self.assertEqual(col.id, "#test_id", "id should be '#test_id'")
76 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'")
78 # Creating from data dictionary should work and load data correctly.
79 data = {"name": "testColumn", "id": "#test_id", "datatype": "string", "length": 256}
80 col = Column(**data)
81 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'")
82 self.assertEqual(col.id, "#test_id", "id should be '#test_id'")
83 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'")
85 # Setting a bad IVOA UCD should throw an error.
86 with self.assertRaises(ValidationError):
87 Column(**data, ivoa_ucd="bad")
89 # Setting a valid IVOA UCD should not throw an error.
90 col = Column(**data, ivoa_ucd="meta.id")
91 self.assertEqual(col.ivoa_ucd, "meta.id", "ivoa_ucd should be 'meta.id'")
93 units_data = data.copy()
95 # Setting a bad IVOA unit should throw an error.
96 units_data["ivoa:unit"] = "bad"
97 with self.assertRaises(ValidationError):
98 Column(**units_data)
100 # Setting a valid IVOA unit should not throw an error.
101 units_data["ivoa:unit"] = "m"
102 col = Column(**units_data)
103 self.assertEqual(col.ivoa_unit, "m", "ivoa_unit should be 'm'")
105 units_data = data.copy()
107 # Setting a bad FITS TUNIT should throw an error.
108 units_data["fits:tunit"] = "bad"
109 with self.assertRaises(ValidationError):
110 Column(**units_data)
112 # Setting a valid FITS TUNIT should not throw an error.
113 units_data["fits:tunit"] = "m"
114 col = Column(**units_data)
115 self.assertEqual(col.fits_tunit, "m", "fits_tunit should be 'm'")
117 # Setting both IVOA unit and FITS TUNIT should throw an error.
118 units_data["ivoa:unit"] = "m"
119 with self.assertRaises(ValidationError):
120 Column(**units_data)
122 def test_description(self) -> None:
123 """Test Pydantic validation of the ``description`` attribute."""
124 # Creating a column with a description of 'None' should throw.
125 with self.assertRaises(ValueError):
126 Column(
127 **{
128 "name": "testColumn",
129 "@id": "#test_col_id",
130 "datatype": "string",
131 "description": None,
132 }
133 )
135 # Creating a column with an empty description should throw.
136 with self.assertRaises(ValueError):
137 Column(
138 **{
139 "name": "testColumn",
140 "@id": "#test_col_id",
141 "datatype": "string",
142 "description": "",
143 }
144 )
146 # Creating a column with a description that is too short should throw.
147 with self.assertRaises(ValidationError):
148 Column(
149 **{
150 "name": "testColumn",
151 "@id": "#test_col_id",
152 "datatype": "string",
153 "description": "xy",
154 }
155 )
157 def test_values(self) -> None:
158 """Test Pydantic validation of the ``value`` attribute."""
160 # Define a function to return the default column data
161 def default_coldata():
162 return defaultdict(str, {"name": "testColumn", "@id": "#test_col_id"})
164 # Setting both value and autoincrement should throw.
165 autoincr_coldata = default_coldata()
166 autoincr_coldata["datatype"] = "int"
167 autoincr_coldata["autoincrement"] = True
168 autoincr_coldata["value"] = 1
169 with self.assertRaises(ValueError):
170 Column(**autoincr_coldata)
172 # Setting an invalid default on a column with an integer type should
173 # throw.
174 bad_numeric_coldata = default_coldata()
175 for datatype in ["int", "long", "short", "byte"]:
176 for value in ["bad", "1.0", "1", 1.1]:
177 bad_numeric_coldata["datatype"] = datatype
178 bad_numeric_coldata["value"] = value
179 with self.assertRaises(ValueError):
180 Column(**bad_numeric_coldata)
182 # Setting an invalid default on a column with a decimal type should
183 # throw.
184 bad_numeric_coldata = default_coldata()
185 for datatype in ["double", "float"]:
186 for value in ["bad", "1.0", "1", 1]:
187 bad_numeric_coldata["datatype"] = datatype
188 bad_numeric_coldata["value"] = value
189 with self.assertRaises(ValueError):
190 Column(**bad_numeric_coldata)
192 # Setting a bad default on a string column should throw.
193 bad_str_coldata = default_coldata()
194 bad_str_coldata["value"] = 1
195 bad_str_coldata["length"] = 256
196 for datatype in ["string", "char", "unicode", "text"]:
197 for value in [1, 1.1, True, "", " ", " ", "\n", "\t"]:
198 bad_str_coldata["datatype"] = datatype
199 bad_str_coldata["value"] = value
200 with self.assertRaises(ValueError):
201 Column(**bad_str_coldata)
203 # Setting a non-boolean value on a boolean column should throw.
204 bool_coldata = default_coldata()
205 bool_coldata["datatype"] = "boolean"
206 bool_coldata["value"] = "bad"
207 with self.assertRaises(ValueError):
208 for value in ["bad", 1, 1.1]:
209 bool_coldata["value"] = value
210 Column(**bool_coldata)
212 # Setting a valid value on a string column should be okay.
213 str_coldata = default_coldata()
214 str_coldata["value"] = 1
215 str_coldata["length"] = 256
216 str_coldata["value"] = "okay"
217 for datatype in ["string", "char", "unicode", "text"]:
218 str_coldata["datatype"] = datatype
219 Column(**str_coldata)
221 # Setting an integer value on a column with an int type should be okay.
222 int_coldata = default_coldata()
223 int_coldata["value"] = 1
224 for datatype in ["int", "long", "short", "byte"]:
225 int_coldata["datatype"] = datatype
226 Column(**int_coldata)
228 # Setting a decimal value on a column with a float type should be okay.
229 bool_coldata = default_coldata()
230 bool_coldata["datatype"] = "boolean"
231 bool_coldata["value"] = True
232 Column(**bool_coldata)
234 def test_timestamp(self) -> None:
235 """Test validation of timestamp columns."""
236 # Check that the votable_xtype is set correctly for timestamp columns.
237 col = Column(name="testColumn", id="#test_col_id", datatype="timestamp")
238 self.assertEqual(col.votable_xtype, "timestamp")
241class TableTestCase(unittest.TestCase):
242 """Test Pydantic validation of the ``Table`` class."""
244 def test_validation(self) -> None:
245 """Test Pydantic validation of the ``Table`` class."""
246 # Default initialization should throw an exception.
247 with self.assertRaises(ValidationError):
248 Table()
250 # Setting only name should throw an exception.
251 with self.assertRaises(ValidationError):
252 Table(name="testTable")
254 # Setting name and id should throw an exception from missing columns.
255 with self.assertRaises(ValidationError):
256 Index(name="testTable", id="#test_id")
258 testCol = Column(name="testColumn", id="#test_id", datatype="string", length=256)
260 # Setting name, id, and columns should not throw an exception and
261 # should load data correctly.
262 tbl = Table(name="testTable", id="#test_id", columns=[testCol])
263 self.assertEqual(tbl.name, "testTable", "name should be 'testTable'")
264 self.assertEqual(tbl.id, "#test_id", "id should be '#test_id'")
265 self.assertEqual(tbl.columns, [testCol], "columns should be ['testColumn']")
267 # Creating a table with duplicate column names should raise an
268 # exception.
269 with self.assertRaises(ValidationError):
270 Table(name="testTable", id="#test_id", columns=[testCol, testCol])
273class ColumnGroupTestCase(unittest.TestCase):
274 """Test Pydantic validation of the ``ColumnGroup`` class."""
276 def test_validation(self) -> None:
277 """Test Pydantic validation of the ``ColumnGroup`` class."""
278 # Default initialization should throw an exception.
279 with self.assertRaises(ValidationError):
280 ColumnGroup()
282 # Setting only name should throw an exception.
283 with self.assertRaises(ValidationError):
284 ColumnGroup(name="testGroup")
286 # Setting name and id should throw an exception from missing columns.
287 with self.assertRaises(ValidationError):
288 ColumnGroup(name="testGroup", id="#test_id")
290 col = Column(name="testColumn", id="#test_col", datatype="string", length=256)
292 # Setting name, id, and columns should not throw an exception and
293 # should load data correctly.
294 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
295 self.assertEqual(group.name, "testGroup", "name should be 'testGroup'")
296 self.assertEqual(group.id, "#test_group", "id should be '#test_group'")
297 self.assertEqual(group.columns, [col], "columns should be ['testColumn']")
299 # Dereferencing columns without setting a table should raise an
300 # exception.
301 with self.assertRaises(ValueError):
302 group._dereference_columns()
304 # Creating a group with duplicate column names should raise an
305 # exception.
306 with self.assertRaises(ValidationError):
307 ColumnGroup(name="testGroup", id="#test_group", columns=[col, col])
309 # Check that including a column object in a group works correctly.
310 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
311 table = Table(
312 name="testTable",
313 id="#test_table",
314 columns=[col],
315 column_groups=[group],
316 )
317 self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
318 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")
320 # Check that column derefencing works correctly when group is assigned
321 # to a table.
322 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta")
323 table = Table(
324 name="testTable",
325 id="#test_table",
326 columns=[col],
327 column_groups=[group],
328 )
329 self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
330 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")
332 # Creating a group with a bad column should raise an exception.
333 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta")
334 with self.assertRaises(ValueError):
335 table = Table(
336 name="testTable",
337 id="#test_table",
338 columns=[col],
339 column_groups=[group],
340 )
343class ConstraintTestCase(unittest.TestCase):
344 """Test Pydantic validation of the different constraint classes."""
346 def test_base_constraint(self) -> None:
347 """Test validation of base constraint type."""
348 # Default initialization should throw an exception.
349 with self.assertRaises(ValidationError):
350 Constraint()
352 # Setting only name should throw an exception.
353 with self.assertRaises(ValidationError):
354 Constraint(name="test_constraint")
356 # Setting name and id should not throw an exception and should load
357 # data correctly.
358 Constraint(name="test_constraint", id="#test_constraint")
360 # Setting initially without deferrable should throw an exception.
361 with self.assertRaises(ValidationError):
362 Constraint(name="test_constraint", id="#test_constraint", deferrable=False, initially="IMMEDIATE")
364 # Seting a bad value for initially should throw an exception.
365 with self.assertRaises(ValidationError):
366 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="BAD_VALUE")
368 # Setting a valid value for initially should not throw an exception.
369 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="IMMEDIATE")
370 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="DEFERRED")
372 def test_unique_constraint(self) -> None:
373 """Test validation of unique constraints."""
374 # Setting name and id should throw an exception from missing columns.
375 with self.assertRaises(ValidationError):
376 UniqueConstraint(name="test_constraint", id="#test_constraint")
378 # Setting name, id, and columns should not throw an exception and
379 # should load data correctly.
380 constraint = UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"])
381 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'")
382 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'")
383 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
385 # Creating from data dictionary should work and load data correctly.
386 data = {"name": "uniq_test", "id": "#uniq_test", "columns": ["test_column"]}
387 constraint = UniqueConstraint(**data)
388 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'")
389 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'")
390 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
392 def test_foreign_key_constraint(self) -> None:
393 """Test validation of foreign key constraints."""
394 # Setting name and id should throw an exception from missing columns.
395 with self.assertRaises(ValidationError):
396 ForeignKeyConstraint(name="fk_test", id="#fk_test")
398 # Setting name, id, and columns should not throw an exception and
399 # should load data correctly.
400 constraint = ForeignKeyConstraint(
401 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=["test_column"]
402 )
403 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'")
404 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'")
405 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
406 self.assertEqual(
407 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']"
408 )
410 # Creating from data dictionary should work and load data correctly.
411 data = {
412 "name": "fk_test",
413 "id": "#fk_test",
414 "columns": ["test_column"],
415 "referenced_columns": ["test_column"],
416 }
417 constraint = ForeignKeyConstraint(**data)
418 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'")
419 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'")
420 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
421 self.assertEqual(
422 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']"
423 )
425 # Creating a foreign key constraint with no columns should raise an
426 # exception.
427 with self.assertRaises(ValidationError):
428 ForeignKeyConstraint(
429 name="fk_test", id="#fk_test", columns=[], referenced_columns=["test_column"]
430 )
432 # Creating a foreign key constraint with no referenced columns should
433 # raise an exception.
434 with self.assertRaises(ValidationError):
435 ForeignKeyConstraint(
436 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=[]
437 )
439 # Creating a foreign key constraint where the number of foreign key
440 # columns does not match the number of referenced columns should raise
441 # an exception.
442 with self.assertRaises(ValidationError):
443 ForeignKeyConstraint(
444 name="fk_test",
445 id="#fk_test",
446 columns=["test_column", "test_column2"],
447 referenced_columns=["test_column"],
448 )
450 def test_check_constraint(self) -> None:
451 """Test validation of check constraints."""
452 # Setting name and id should throw an exception from missing
453 # expression.
454 with self.assertRaises(ValidationError):
455 CheckConstraint(name="check_test", id="#check_test")
457 # Setting name, id, and expression should not throw an exception and
458 # should load data correctly.
459 constraint = CheckConstraint(name="check_test", id="#check_test", expression="1+2")
460 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'")
461 self.assertEqual(constraint.id, "#check_test", "id should be '#check_test'")
462 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'")
464 # Creating from data dictionary should work and load data correctly.
465 data = {
466 "name": "check_test",
467 "id": "#check_test",
468 "expression": "1+2",
469 }
470 constraint = CheckConstraint(**data)
471 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'")
472 self.assertEqual(constraint.id, "#check_test", "id should be '#test_id'")
473 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'")
475 def test_bad_constraint_type(self) -> None:
476 with self.assertRaises(ValidationError):
477 UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"], type="BAD_TYPE")
479 def test_constraint_column_checks(self) -> None:
480 """Test the extra validation in the ``Schema`` that checks the
481 constraint column references.
482 """
484 def _create_test_schema(constraint: Constraint) -> None:
485 """Create a test schema with the given constraint."""
486 test_col = Column(name="testColumn", id="#test_col_id", datatype="int")
487 test_col2 = Column(name="testColumn2", id="#test_col_id2", datatype="int")
488 test_tbl = Table(
489 name="testTable", id="#test_tbl_id", columns=[test_col, test_col2], constraints=[constraint]
490 )
491 test_col = Column(name="testColumn", id="#test_col2_id", datatype="int")
492 test_col2 = Column(name="testColumn2", id="#test_col2_id2", datatype="int")
493 test_tbl2 = Table(name="testTable2", id="#test_tbl2_id", columns=[test_col, test_col2])
494 Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl, test_tbl2])
496 # Creating a unique constraint on a bad column should raise an
497 # exception.
498 with self.assertRaises(ValidationError):
499 _create_test_schema(
500 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["bad_column"])
501 )
503 # Creating a foreign key constraint with a bad column should raise an
504 # exception.
505 with self.assertRaises(ValidationError):
506 _create_test_schema(
507 ForeignKeyConstraint(
508 name="testForeignKey",
509 id="#test_fk_id",
510 columns=["bad_column"],
511 referenced_columns=["#test_col2_id"],
512 )
513 )
515 # Creating a foreign key constraint with a bad referenced column should
516 # raise an exception.
517 with self.assertRaises(ValidationError):
518 _create_test_schema(
519 ForeignKeyConstraint(
520 name="testForeignKey",
521 id="#test_fk_id",
522 columns=["#test_col_id"],
523 referenced_columns=["bad_column"],
524 )
525 )
527 # Creating a foreign key constraint where the source column is not in
528 # the same table as the constraint should raise an exception.
529 with self.assertRaises(ValidationError):
530 _create_test_schema(
531 ForeignKeyConstraint(
532 name="testForeignKey",
533 id="#test_fk_id",
534 columns=["#test_col2_id"], # This column is in test_tbl2, not test_tbl
535 referenced_columns=["#test_col_id"],
536 )
537 )
539 # Creating a foreign key constraint where the referenced column is not
540 # a column object should raise an exception.
541 with self.assertRaises(ValidationError):
542 _create_test_schema(
543 ForeignKeyConstraint(
544 name="testForeignKey",
545 id="#test_fk_id",
546 columns=["#test_col_id"],
547 referenced_columns=["#test_schema_id"],
548 )
549 )
551 # Creating a valid unique constraint should not raise an exception.
552 _create_test_schema(
553 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["#test_col_id"])
554 )
556 # Creating a valid foreign key constraint should not raise an
557 # exception.
558 _create_test_schema(
559 ForeignKeyConstraint(
560 name="testForeignKey",
561 id="#test_fk_id",
562 columns=["#test_col_id"],
563 referenced_columns=["#test_col2_id"],
564 )
565 )
567 # Creating a foreign key constraint with a composite key should not
568 # raise an exception.
569 _create_test_schema(
570 ForeignKeyConstraint(
571 name="testCompositeForeignKey",
572 id="#test_composite_fk_id",
573 columns=["#test_col_id", "#test_col_id2"],
574 referenced_columns=["#test_col2_id", "#test_col2_id2"],
575 )
576 )
579class IndexTestCase(unittest.TestCase):
580 """Test Pydantic validation of the ``Index`` class."""
582 def test_index_validation(self) -> None:
583 """Test validation of indexes."""
584 # Default initialization should throw an exception.
585 with self.assertRaises(ValidationError):
586 Index()
588 # Setting only name should throw an exception.
589 with self.assertRaises(ValidationError):
590 Index(name="idx_test")
592 # Setting name and id should throw an exception from missing columns.
593 with self.assertRaises(ValidationError):
594 Index(name="idx_test", id="#idx_test")
596 # Setting name, id, and columns should not throw an exception and
597 # should load data correctly.
598 idx = Index(name="idx_test", id="#idx_test", columns=["#test_column"])
599 self.assertEqual(idx.name, "idx_test", "name should be 'test_constraint'")
600 self.assertEqual(idx.id, "#idx_test", "id should be '#test_id'")
601 self.assertEqual(idx.columns, ["#test_column"], "columns should be ['test_column']")
603 # Creating from data dictionary should work and load data correctly.
604 data = {"name": "idx_test", "id": "#idx_test", "columns": ["test_column"]}
605 idx = Index(**data)
606 self.assertEqual(idx.name, "idx_test", "name should be 'idx_test'")
607 self.assertEqual(idx.id, "#idx_test", "id should be '#idx_test'")
608 self.assertEqual(idx.columns, ["test_column"], "columns should be ['test_column']")
610 # Setting both columns and expressions on an index should throw an
611 # exception.
612 with self.assertRaises(ValidationError):
613 Index(name="idx_test", id="#idx_test", columns=["test_column"], expressions=["1+2"])
616class SchemaTestCase(unittest.TestCase):
617 """Test Pydantic validation of the ``Schema`` class."""
619 def test_validation(self) -> None:
620 """Test Pydantic validation of the main schema class."""
621 # Default initialization should throw an exception.
622 with self.assertRaises(ValidationError):
623 Schema()
625 # Setting only name should throw an exception.
626 with self.assertRaises(ValidationError):
627 Schema(name="testSchema")
629 # Setting name and id should throw an exception from missing columns.
630 with self.assertRaises(ValidationError):
631 Schema(name="testSchema", id="#test_id")
633 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256)
634 test_tbl = Table(name="testTable", id="#test_tbl_id", columns=[test_col])
636 # Setting name, id, and columns should not throw an exception and
637 # should load data correctly.
638 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl])
639 self.assertEqual(sch.name, "testSchema", "name should be 'testSchema'")
640 self.assertEqual(sch.id, "#test_sch_id", "id should be '#test_sch_id'")
641 self.assertEqual(sch.tables, [test_tbl], "tables should be ['testTable']")
643 # Creating a schema with duplicate table names should raise an
644 # exception.
645 with self.assertRaises(ValidationError):
646 Schema(name="testSchema", id="#test_id", tables=[test_tbl, test_tbl])
648 # Using an undefined YAML field should raise an exception.
649 with self.assertRaises(ValidationError):
650 Schema(**{"name": "testSchema", "id": "#test_sch_id", "bad_field": "1234"}, tables=[test_tbl])
652 # Creating a schema containing duplicate IDs should raise an error.
653 with self.assertRaises(ValidationError):
654 Schema(
655 name="testSchema",
656 id="#test_sch_id",
657 tables=[
658 Table(
659 name="testTable",
660 id="#test_tbl_id",
661 columns=[
662 Column(name="testColumn", id="#test_col_id", datatype="string"),
663 Column(name="testColumn2", id="#test_col_id", datatype="string"),
664 ],
665 )
666 ],
667 )
669 def test_schema_object_ids(self) -> None:
670 """Test that the ``id_map`` is properly populated."""
671 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256)
672 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col])
673 sch = Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl])
675 for id in ["#test_col_id", "#test_table_id", "#test_schema_id"]:
676 # Test that the schema contains the expected id.
677 self.assertTrue(id in sch, f"schema should contain '{id}'")
679 # Check that types of returned objects are correct.
680 self.assertIsInstance(sch["#test_col_id"], Column, "schema[id] should return a Column")
681 self.assertIsInstance(sch["#test_table_id"], Table, "schema[id] should return a Table")
682 self.assertIsInstance(sch["#test_schema_id"], Schema, "schema[id] should return a Schema")
684 with self.assertRaises(KeyError):
685 # Test that an invalid id raises an exception.
686 sch["#bad_id"]
688 def test_check_unique_constraint_names(self) -> None:
689 """Test that constraint names are unique."""
690 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256)
691 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col])
692 test_cons = UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["testColumn"])
693 test_cons2 = UniqueConstraint(
694 name="testConstraint", id="#test_constraint2_id", columns=["testColumn"]
695 )
696 test_tbl.constraints = [test_cons, test_cons2]
697 with self.assertRaises(ValidationError):
698 Schema(name="testSchema", id="#test_id", tables=[test_tbl])
700 def test_check_unique_index_names(self) -> None:
701 """Test that index names are unique."""
702 test_col = Column(name="test_column1", id="#test_table.test_column1", datatype="int")
703 test_col2 = Column(name="test_column2", id="#test_table.test_column2", datatype="string", length=256)
704 test_tbl = Table(name="test_table", id="#test_table", columns=[test_col, test_col2])
705 test_idx = Index(name="idx_test", id="#idx_test", columns=[test_col.id])
706 test_idx2 = Index(name="idx_test", id="#idx_test2", columns=[test_col2.id])
707 test_tbl.indexes = [test_idx, test_idx2]
708 with self.assertRaises(ValidationError):
709 Schema(name="test_schema", id="#test-schema", tables=[test_tbl])
711 def test_model_validate(self) -> None:
712 """Load a YAML test file and validate the schema data model."""
713 with open(TEST_YAML) as test_yaml:
714 data = yaml.safe_load(test_yaml)
715 Schema.model_validate(data)
717 def test_id_generation(self) -> None:
718 """Test ID generation."""
719 test_path = os.path.join(TEST_ID_GENERATION)
720 with open(test_path) as test_yaml:
721 yaml_data = yaml.safe_load(test_yaml)
722 # Generate IDs for objects in the test schema.
723 Schema.model_validate(yaml_data, context={"id_generation": True})
724 with open(test_path) as test_yaml:
725 yaml_data = yaml.safe_load(test_yaml)
726 # Test that an error is raised when id generation is disabled.
727 with self.assertRaises(ValidationError):
728 Schema.model_validate(yaml_data, context={"id_generation": False})
730 def test_get_table_by_column(self) -> None:
731 """Test the ``get_table_by_column`` method."""
732 # Test that the correct table is returned when searching by column.
733 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256)
734 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col])
735 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl])
736 self.assertEqual(sch.get_table_by_column(test_col), test_tbl)
738 # Test that an error is raised when the column is not found.
739 bad_col = Column(name="bad_column", id="#test_tbl.bad_column", datatype="string", length=256)
740 with self.assertRaises(ValueError):
741 sch.get_table_by_column(bad_col)
743 def test_find_object_by_id(self) -> None:
744 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256)
745 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col])
746 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl])
747 self.assertEqual(sch.find_object_by_id("#test_tbl.test_col", Column), test_col)
748 with self.assertRaises(KeyError):
749 sch.find_object_by_id("#bad_id", Column)
750 with self.assertRaises(TypeError):
751 sch.find_object_by_id("#test_tbl", Column)
753 def test_from_file(self) -> None:
754 """Test loading a schema from a file."""
755 # Test file object.
756 with open(TEST_SALES) as test_file:
757 schema = Schema.from_stream(test_file)
758 self.assertIsInstance(schema, Schema)
760 # Test path string.
761 with open(TEST_SALES) as test_file:
762 schema = Schema.from_stream(test_file)
763 self.assertIsInstance(schema, Schema)
765 # Path object.
766 test_file_path = pathlib.Path(TEST_SALES)
767 schema = Schema.from_uri(test_file_path)
768 self.assertIsInstance(schema, Schema)
770 def test_from_resource(self) -> None:
771 """Test loading a schema from a resource."""
772 # Test loading a schema from a resource string.
773 schema = Schema.from_uri(
774 "resource://felis/config/tap_schema/tap_schema_std.yaml", context={"id_generation": True}
775 )
776 self.assertIsInstance(schema, Schema)
778 # Test loading a schema from a ResourcePath.
779 schema = Schema.from_uri(
780 ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml"),
781 context={"id_generation": True},
782 )
783 self.assertIsInstance(schema, Schema)
785 # Test loading from a nonexistant resource.
786 with self.assertRaises(ValueError):
787 Schema.from_uri("resource://fake/schemas/bad_schema.yaml")
789 # Without ID generation enabled, this schema should fail validation.
790 with self.assertRaises(ValidationError):
791 Schema.from_uri("resource://felis/config/tap_schema/tap_schema_std.yaml")
794class SchemaVersionTest(unittest.TestCase):
795 """Test the schema version."""
797 def test_validation(self) -> None:
798 """Test validation of the schema version class."""
799 # Default initialization should throw an exception.
800 with self.assertRaises(ValidationError):
801 SchemaVersion()
803 # Setting current should not throw an exception and should load data
804 # correctly.
805 sv = SchemaVersion(current="1.0.0")
806 self.assertEqual(sv.current, "1.0.0", "current should be '1.0.0'")
808 # Check that schema version can be specified as a single string or
809 # an object.
810 data = {
811 "name": "schema",
812 "@id": "#schema",
813 "tables": [],
814 "version": "1.2.3",
815 }
816 schema = Schema.model_validate(data)
817 self.assertEqual(schema.version, "1.2.3")
819 data = {
820 "name": "schema",
821 "@id": "#schema",
822 "tables": [],
823 "version": {
824 "current": "1.2.3",
825 "compatible": ["1.2.0", "1.2.1", "1.2.2"],
826 "read_compatible": ["1.1.0", "1.1.1"],
827 },
828 }
829 schema = Schema.model_validate(data)
830 self.assertEqual(schema.version.current, "1.2.3")
831 self.assertEqual(schema.version.compatible, ["1.2.0", "1.2.1", "1.2.2"])
832 self.assertEqual(schema.version.read_compatible, ["1.1.0", "1.1.1"])
835class ValidationFlagsTest(unittest.TestCase):
836 """Test optional validation flags on the schema."""
838 def test_check_tap_table_indexes(self) -> None:
839 """Test the ``check_tap_table_indexes`` validation flag."""
840 cxt = {"check_tap_table_indexes": True}
841 schema_dict = {
842 "name": "testSchema",
843 "id": "#test_schema_id",
844 "tables": [
845 {
846 "name": "test_table",
847 "id": "#test_table_id",
848 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}],
849 }
850 ],
851 }
853 # Creating a schema without a TAP table index should throw.
854 with self.assertRaises(ValidationError):
855 Schema.model_validate(schema_dict, context=cxt)
857 # Creating a schema with a TAP table index should not throw.
858 schema_dict["tables"][0]["tap_table_index"] = 1
859 Schema.model_validate(schema_dict, context=cxt)
860 schema_dict["tables"].append(
861 {
862 "name": "test_table2",
863 "id": "#test_table2",
864 "tap_table_index": 1,
865 "columns": [{"name": "test_col2", "id": "#test_col2", "datatype": "int"}],
866 }
867 )
869 # Creating a schema with a duplicate TAP table index should throw.
870 with self.assertRaises(ValidationError):
871 Schema.model_validate(schema_dict, context=cxt)
873 # Multiple, unique TAP table indexes should not throw.
874 schema_dict["tables"][1]["tap_table_index"] = 2
875 Schema.model_validate(schema_dict, context=cxt)
877 def test_check_tap_principal(self) -> None:
878 """Test the ``check_tap_principal` validation flag."""
879 cxt = {"check_tap_principal": True}
880 schema_dict = {
881 "name": "testSchema",
882 "id": "#test_schema_id",
883 "tables": [
884 {
885 "name": "test_table",
886 "id": "#test_table_id",
887 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}],
888 }
889 ],
890 }
892 # Creating a table without a TAP table principal column should throw.
893 with self.assertRaises(ValidationError):
894 Schema.model_validate(schema_dict, context=cxt)
896 # Creating a table with a TAP table principal column should not throw.
897 schema_dict["tables"][0]["columns"][0]["tap_principal"] = 1
898 Schema.model_validate(schema_dict, context=cxt)
900 def test_check_description(self) -> None:
901 """Test the ``check_description`` flag."""
902 cxt = {"check_description": True}
903 schema_dict = {
904 "name": "testSchema",
905 "id": "#test_schema_id",
906 "tables": [
907 {
908 "name": "test_table",
909 "id": "#test_table_id",
910 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}],
911 }
912 ],
913 }
915 # Creating a schema without object descriptions should throw.
916 with self.assertRaises(ValidationError):
917 Schema.model_validate(schema_dict, context=cxt)
919 # Creating a schema with object descriptions should not throw.
920 schema_dict["description"] = "Test schema"
921 schema_dict["tables"][0]["description"] = "Test table"
922 schema_dict["tables"][0]["columns"][0]["description"] = "Test column"
923 Schema.model_validate(schema_dict, context=cxt)
926class RedundantDatatypesTest(unittest.TestCase):
927 """Test validation of redundant datatype definitions."""
929 def test_mysql_datatypes(self) -> None:
930 class ColumnGenerator:
931 """Generate column data for redundant datatype testing."""
933 def __init__(self, name, id, db_name):
934 self.name = name
935 self.id = id
936 self.db_name = db_name
937 self.context = {"check_redundant_datatypes": True}
939 def col(self, datatype: str, db_datatype: str, length=None):
940 return Column.model_validate(
941 {
942 "name": self.name,
943 "@id": self.id,
944 "datatype": datatype,
945 f"{self.db_name}:datatype": db_datatype,
946 "length": length,
947 },
948 context=self.context,
949 )
951 """Test that redundant datatype definitions raise an error."""
952 coldata = ColumnGenerator("test_col", "#test_col_id", "mysql")
954 with self.assertRaises(ValidationError):
955 coldata.col("double", "DOUBLE")
957 with self.assertRaises(ValidationError):
958 coldata.col("int", "INTEGER")
960 with self.assertRaises(ValidationError):
961 coldata.col("float", "FLOAT")
963 with self.assertRaises(ValidationError):
964 coldata.col("char", "CHAR", length=8)
966 with self.assertRaises(ValidationError):
967 coldata.col("string", "VARCHAR", length=32)
969 with self.assertRaises(ValidationError):
970 coldata.col("byte", "TINYINT")
972 with self.assertRaises(ValidationError):
973 coldata.col("short", "SMALLINT")
975 with self.assertRaises(ValidationError):
976 coldata.col("long", "BIGINT")
978 with self.assertRaises(ValidationError):
979 coldata.col("boolean", "BOOLEAN")
981 with self.assertRaises(ValidationError):
982 coldata.col("unicode", "NVARCHAR", length=32)
984 with self.assertRaises(ValidationError):
985 coldata.col("timestamp", "DATETIME")
987 # DM-42257: Felis does not handle unbounded text types properly.
988 # coldata.col("text", "TEXT", length=32)
990 with self.assertRaises(ValidationError):
991 coldata.col("binary", "LONGBLOB", length=1024)
993 with self.assertRaises(ValidationError):
994 # Same type and length
995 coldata.col("string", "VARCHAR(128)", length=128)
997 # Check the old type mapping for MySQL, which is now okay
998 coldata.col("boolean", "BIT(1)")
1000 # Different types, which is okay
1001 coldata.col("double", "FLOAT")
1003 # Same base type with different lengths, which is okay
1004 coldata.col("string", "VARCHAR(128)", length=32)
1006 # Different string types, which is okay
1007 coldata.col("string", "CHAR", length=32)
1008 coldata.col("unicode", "CHAR", length=32)
1010 def test_precision(self) -> None:
1011 """Test that precision is not allowed for datatypes other than
1012 timestamp.
1013 """
1014 with self.assertRaises(ValidationError):
1015 Column(**{"name": "testColumn", "@id": "#test_col_id", "datatype": "double", "precision": 6})
1018class SchemaSerializationTest(unittest.TestCase):
1019 """Test serialization and deserialization of the schema data model."""
1021 def test_serialization(self) -> None:
1022 """Test serialization of the schema data model."""
1023 # Read the original YAML content from the test_serialization.yaml file
1024 with open(TEST_SERIALIZATION) as file:
1025 original_yaml_content = file.read()
1027 # Load the schema from the original YAML content
1028 schema_out = Schema.from_uri(TEST_SERIALIZATION)
1029 serialized_data = schema_out.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True)
1031 # Write the serialized data to a temporary YAML file
1032 with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml", mode="w+") as temp_file:
1033 yaml.dump(serialized_data, temp_file, default_flow_style=False, sort_keys=False)
1034 temp_file.seek(0)
1035 # Read the deserialized YAML content from the temporary file
1036 deserialized_yaml_content = temp_file.read()
1038 # Show the differences between the original and deserialized YAML
1039 diff = difflib.unified_diff(
1040 original_yaml_content.splitlines(keepends=True),
1041 deserialized_yaml_content.splitlines(keepends=True),
1042 fromfile="original.yaml",
1043 tofile="deserialized.yaml",
1044 )
1045 print("Differences:\n", "".join(diff))
1047 # Assert that the original and deserialized YAML are the same
1048 self.assertEqual(
1049 yaml.safe_load(original_yaml_content),
1050 yaml.safe_load(deserialized_yaml_content),
1051 "The original and deserialized YAML contents should be the same",
1052 )
1055if __name__ == "__main__": 1055 ↛ 1056line 1055 didn't jump to line 1056 because the condition on line 1055 was never true
1056 unittest.main()