Coverage for tests / test_datamodel.py: 11%
713 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
1# This file is part of felis.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import difflib
23import os
24import pathlib
25import re
26import shutil
27import tempfile
28import unittest
29from collections import defaultdict
31import yaml
32from lsst.resources import ResourcePath
33from pydantic import ValidationError
35from felis.datamodel import (
36 CheckConstraint,
37 Column,
38 ColumnGroup,
39 ColumnOverrides,
40 Constraint,
41 DataType,
42 ForeignKeyConstraint,
43 Index,
44 Schema,
45 SchemaVersion,
46 Table,
47 UniqueConstraint,
48)
50TEST_DIR = os.path.abspath(os.path.dirname(__file__))
51TEST_YAML = os.path.join(TEST_DIR, "data", "test.yml")
52TEST_SALES = os.path.join(TEST_DIR, "data", "sales.yaml")
53TEST_SERIALIZATION = os.path.join(TEST_DIR, "data", "test_serialization.yaml")
54TEST_ID_GENERATION = os.path.join(TEST_DIR, "data", "test_id_generation.yaml")
57class ColumnTestCase(unittest.TestCase):
58 """Test the ``Column`` class."""
60 def test_validation(self) -> None:
61 """Test Pydantic validation of the ``Column`` class."""
62 # Default initialization should throw an exception.
63 with self.assertRaises(ValidationError):
64 Column()
66 # Setting only name should throw an exception.
67 with self.assertRaises(ValidationError):
68 Column(name="testColumn")
70 # Setting name and id should throw an exception from missing datatype.
71 with self.assertRaises(ValidationError):
72 Column(name="testColumn", id="#test_id")
74 # Setting name, id, and datatype should not throw an exception and
75 # should load data correctly.
76 col = Column(name="testColumn", id="#test_id", datatype="string", length=256)
77 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'")
78 self.assertEqual(col.id, "#test_id", "id should be '#test_id'")
79 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'")
81 # Creating from data dictionary should work and load data correctly.
82 data = {"name": "testColumn", "id": "#test_id", "datatype": "string", "length": 256}
83 col = Column(**data)
84 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'")
85 self.assertEqual(col.id, "#test_id", "id should be '#test_id'")
86 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'")
88 # Setting a bad IVOA UCD should throw an error.
89 with self.assertRaises(ValidationError):
90 Column(**data, ivoa_ucd="bad")
92 # Setting a valid IVOA UCD should not throw an error.
93 col = Column(**data, ivoa_ucd="meta.id")
94 self.assertEqual(col.ivoa_ucd, "meta.id", "ivoa_ucd should be 'meta.id'")
96 units_data = data.copy()
98 # Setting a bad IVOA unit should throw an error.
99 units_data["ivoa:unit"] = "bad"
100 with self.assertRaises(ValidationError):
101 Column(**units_data)
103 # Setting a valid IVOA unit should not throw an error.
104 units_data["ivoa:unit"] = "m"
105 col = Column(**units_data)
106 self.assertEqual(col.ivoa_unit, "m", "ivoa_unit should be 'm'")
108 units_data = data.copy()
110 # Setting a bad FITS TUNIT should throw an error.
111 units_data["fits:tunit"] = "bad"
112 with self.assertRaises(ValidationError):
113 Column(**units_data)
115 # Setting a valid FITS TUNIT should not throw an error.
116 units_data["fits:tunit"] = "m"
117 col = Column(**units_data)
118 self.assertEqual(col.fits_tunit, "m", "fits_tunit should be 'm'")
120 # Setting both IVOA unit and FITS TUNIT should throw an error.
121 units_data["ivoa:unit"] = "m"
122 with self.assertRaises(ValidationError):
123 Column(**units_data)
125 def test_description(self) -> None:
126 """Test Pydantic validation of the ``description`` attribute."""
127 # Creating a column with a description of 'None' should throw.
128 with self.assertRaises(ValueError):
129 Column(
130 **{
131 "name": "testColumn",
132 "@id": "#test_col_id",
133 "datatype": "string",
134 "description": None,
135 }
136 )
138 # Creating a column with an empty description should throw.
139 with self.assertRaises(ValueError):
140 Column(
141 **{
142 "name": "testColumn",
143 "@id": "#test_col_id",
144 "datatype": "string",
145 "description": "",
146 }
147 )
149 # Creating a column with a description that is too short should throw.
150 with self.assertRaises(ValidationError):
151 Column(
152 **{
153 "name": "testColumn",
154 "@id": "#test_col_id",
155 "datatype": "string",
156 "description": "xy",
157 }
158 )
160 def test_values(self) -> None:
161 """Test Pydantic validation of the ``value`` attribute."""
163 # Define a function to return the default column data
164 def default_coldata():
165 return defaultdict(str, {"name": "testColumn", "@id": "#test_col_id"})
167 # Setting both value and autoincrement should throw.
168 autoincr_coldata = default_coldata()
169 autoincr_coldata["datatype"] = "int"
170 autoincr_coldata["autoincrement"] = True
171 autoincr_coldata["value"] = 1
172 with self.assertRaises(ValueError):
173 Column(**autoincr_coldata)
175 # Setting an invalid default on a column with an integer type should
176 # throw.
177 bad_numeric_coldata = default_coldata()
178 for datatype in ["int", "long", "short", "byte"]:
179 for value in ["bad", "1.0", "1", 1.1]:
180 bad_numeric_coldata["datatype"] = datatype
181 bad_numeric_coldata["value"] = value
182 with self.assertRaises(ValueError):
183 Column(**bad_numeric_coldata)
185 # Setting an invalid default on a column with a decimal type should
186 # throw.
187 bad_numeric_coldata = default_coldata()
188 for datatype in ["double", "float"]:
189 for value in ["bad", "1.0", "1", 1]:
190 bad_numeric_coldata["datatype"] = datatype
191 bad_numeric_coldata["value"] = value
192 with self.assertRaises(ValueError):
193 Column(**bad_numeric_coldata)
195 # Setting a bad default on a string column should throw.
196 bad_str_coldata = default_coldata()
197 bad_str_coldata["value"] = 1
198 bad_str_coldata["length"] = 256
199 for datatype in ["string", "char", "unicode", "text"]:
200 for value in [1, 1.1, True, "", " ", " ", "\n", "\t"]:
201 bad_str_coldata["datatype"] = datatype
202 bad_str_coldata["value"] = value
203 with self.assertRaises(ValueError):
204 Column(**bad_str_coldata)
206 # Setting a non-boolean value on a boolean column should throw.
207 bool_coldata = default_coldata()
208 bool_coldata["datatype"] = "boolean"
209 bool_coldata["value"] = "bad"
210 with self.assertRaises(ValueError):
211 for value in ["bad", 1, 1.1]:
212 bool_coldata["value"] = value
213 Column(**bool_coldata)
215 # Setting a valid value on a string column should be okay.
216 str_coldata = default_coldata()
217 str_coldata["value"] = 1
218 str_coldata["length"] = 256
219 str_coldata["value"] = "okay"
220 for datatype in ["string", "char", "unicode", "text"]:
221 str_coldata["datatype"] = datatype
222 Column(**str_coldata)
224 # Setting an integer value on a column with an int type should be okay.
225 int_coldata = default_coldata()
226 int_coldata["value"] = 1
227 for datatype in ["int", "long", "short", "byte"]:
228 int_coldata["datatype"] = datatype
229 Column(**int_coldata)
231 # Setting a decimal value on a column with a float type should be okay.
232 bool_coldata = default_coldata()
233 bool_coldata["datatype"] = "boolean"
234 bool_coldata["value"] = True
235 Column(**bool_coldata)
237 def test_timestamp(self) -> None:
238 """Test validation of timestamp columns."""
239 # Check that the votable_xtype is set correctly for timestamp columns.
240 col = Column(name="testColumn", id="#test_col_id", datatype="timestamp")
241 self.assertEqual(col.votable_xtype, "timestamp")
244class TableTestCase(unittest.TestCase):
245 """Test Pydantic validation of the ``Table`` class."""
247 def test_validation(self) -> None:
248 """Test Pydantic validation of the ``Table`` class."""
249 # Default initialization should throw an exception.
250 with self.assertRaises(ValidationError):
251 Table()
253 # Setting only name should throw an exception.
254 with self.assertRaises(ValidationError):
255 Table(name="testTable")
257 # Setting name and id should throw an exception from missing columns.
258 with self.assertRaises(ValidationError):
259 Index(name="testTable", id="#test_id")
261 testCol = Column(name="testColumn", id="#test_id", datatype="string", length=256)
263 # Setting name, id, and columns should not throw an exception and
264 # should load data correctly.
265 tbl = Table(name="testTable", id="#test_id", columns=[testCol])
266 self.assertEqual(tbl.name, "testTable", "name should be 'testTable'")
267 self.assertEqual(tbl.id, "#test_id", "id should be '#test_id'")
268 self.assertEqual(tbl.columns, [testCol], "columns should be ['testColumn']")
270 # Creating a table with duplicate column names should raise an
271 # exception.
272 with self.assertRaises(ValidationError):
273 Table(name="testTable", id="#test_id", columns=[testCol, testCol])
276class ColumnGroupTestCase(unittest.TestCase):
277 """Test Pydantic validation of the ``ColumnGroup`` class."""
279 def test_validation(self) -> None:
280 """Test Pydantic validation of the ``ColumnGroup`` class."""
281 # Default initialization should throw an exception.
282 with self.assertRaises(ValidationError):
283 ColumnGroup()
285 # Setting only name should throw an exception.
286 with self.assertRaises(ValidationError):
287 ColumnGroup(name="testGroup")
289 # Setting name and id should throw an exception from missing columns.
290 with self.assertRaises(ValidationError):
291 ColumnGroup(name="testGroup", id="#test_id")
293 col = Column(name="testColumn", id="#test_col", datatype="string", length=256)
295 # Setting name, id, and columns should not throw an exception and
296 # should load data correctly.
297 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
298 self.assertEqual(group.name, "testGroup", "name should be 'testGroup'")
299 self.assertEqual(group.id, "#test_group", "id should be '#test_group'")
300 self.assertEqual(group.columns, [col], "columns should be ['testColumn']")
302 # Dereferencing columns without setting a table should raise an
303 # exception.
304 with self.assertRaises(ValueError):
305 group._dereference_columns()
307 # Creating a group with duplicate column names should raise an
308 # exception.
309 with self.assertRaises(ValidationError):
310 ColumnGroup(name="testGroup", id="#test_group", columns=[col, col])
312 # Check that including a column object in a group works correctly.
313 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
314 table = Table(
315 name="testTable",
316 id="#test_table",
317 columns=[col],
318 column_groups=[group],
319 )
320 self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
321 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")
323 # Check that column derefencing works correctly when group is assigned
324 # to a table.
325 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta")
326 table = Table(
327 name="testTable",
328 id="#test_table",
329 columns=[col],
330 column_groups=[group],
331 )
332 self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
333 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")
335 # Creating a group with a bad column should raise an exception.
336 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta")
337 with self.assertRaises(ValueError):
338 table = Table(
339 name="testTable",
340 id="#test_table",
341 columns=[col],
342 column_groups=[group],
343 )
346class ConstraintTestCase(unittest.TestCase):
347 """Test Pydantic validation of the different constraint classes."""
349 def test_base_constraint(self) -> None:
350 """Test validation of base constraint type."""
351 # Default initialization should throw an exception.
352 with self.assertRaises(ValidationError):
353 Constraint()
355 # Setting only name should throw an exception.
356 with self.assertRaises(ValidationError):
357 Constraint(name="test_constraint")
359 # Setting name and id should not throw an exception and should load
360 # data correctly.
361 Constraint(name="test_constraint", id="#test_constraint")
363 # Setting initially without deferrable should throw an exception.
364 with self.assertRaises(ValidationError):
365 Constraint(name="test_constraint", id="#test_constraint", deferrable=False, initially="IMMEDIATE")
367 # Seting a bad value for initially should throw an exception.
368 with self.assertRaises(ValidationError):
369 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="BAD_VALUE")
371 # Setting a valid value for initially should not throw an exception.
372 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="IMMEDIATE")
373 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="DEFERRED")
375 def test_unique_constraint(self) -> None:
376 """Test validation of unique constraints."""
377 # Setting name and id should throw an exception from missing columns.
378 with self.assertRaises(ValidationError):
379 UniqueConstraint(name="test_constraint", id="#test_constraint")
381 # Setting name, id, and columns should not throw an exception and
382 # should load data correctly.
383 constraint = UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"])
384 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'")
385 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'")
386 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
388 # Creating from data dictionary should work and load data correctly.
389 data = {"name": "uniq_test", "id": "#uniq_test", "columns": ["test_column"]}
390 constraint = UniqueConstraint(**data)
391 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'")
392 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'")
393 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
395 def test_foreign_key_constraint(self) -> None:
396 """Test validation of foreign key constraints."""
397 # Setting name and id should throw an exception from missing columns.
398 with self.assertRaises(ValidationError):
399 ForeignKeyConstraint(name="fk_test", id="#fk_test")
401 # Setting name, id, and columns should not throw an exception and
402 # should load data correctly.
403 constraint = ForeignKeyConstraint(
404 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=["test_column"]
405 )
406 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'")
407 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'")
408 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
409 self.assertEqual(
410 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']"
411 )
413 # Creating from data dictionary should work and load data correctly.
414 data = {
415 "name": "fk_test",
416 "id": "#fk_test",
417 "columns": ["test_column"],
418 "referenced_columns": ["test_column"],
419 }
420 constraint = ForeignKeyConstraint(**data)
421 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'")
422 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'")
423 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']")
424 self.assertEqual(
425 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']"
426 )
428 # Creating a foreign key constraint with no columns should raise an
429 # exception.
430 with self.assertRaises(ValidationError):
431 ForeignKeyConstraint(
432 name="fk_test", id="#fk_test", columns=[], referenced_columns=["test_column"]
433 )
435 # Creating a foreign key constraint with no referenced columns should
436 # raise an exception.
437 with self.assertRaises(ValidationError):
438 ForeignKeyConstraint(
439 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=[]
440 )
442 # Creating a foreign key constraint where the number of foreign key
443 # columns does not match the number of referenced columns should raise
444 # an exception.
445 with self.assertRaises(ValidationError):
446 ForeignKeyConstraint(
447 name="fk_test",
448 id="#fk_test",
449 columns=["test_column", "test_column2"],
450 referenced_columns=["test_column"],
451 )
453 def test_check_constraint(self) -> None:
454 """Test validation of check constraints."""
455 # Setting name and id should throw an exception from missing
456 # expression.
457 with self.assertRaises(ValidationError):
458 CheckConstraint(name="check_test", id="#check_test")
460 # Setting name, id, and expression should not throw an exception and
461 # should load data correctly.
462 constraint = CheckConstraint(name="check_test", id="#check_test", expression="1+2")
463 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'")
464 self.assertEqual(constraint.id, "#check_test", "id should be '#check_test'")
465 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'")
467 # Creating from data dictionary should work and load data correctly.
468 data = {
469 "name": "check_test",
470 "id": "#check_test",
471 "expression": "1+2",
472 }
473 constraint = CheckConstraint(**data)
474 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'")
475 self.assertEqual(constraint.id, "#check_test", "id should be '#test_id'")
476 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'")
478 def test_bad_constraint_type(self) -> None:
479 with self.assertRaises(ValidationError):
480 UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"], type="BAD_TYPE")
482 def test_constraint_column_checks(self) -> None:
483 """Test the extra validation in the ``Schema`` that checks the
484 constraint column references.
485 """
487 def _create_test_schema(constraint: Constraint) -> None:
488 """Create a test schema with the given constraint."""
489 test_col = Column(name="testColumn", id="#test_col_id", datatype="int")
490 test_col2 = Column(name="testColumn2", id="#test_col_id2", datatype="int")
491 test_tbl = Table(
492 name="testTable", id="#test_tbl_id", columns=[test_col, test_col2], constraints=[constraint]
493 )
494 test_col = Column(name="testColumn", id="#test_col2_id", datatype="int")
495 test_col2 = Column(name="testColumn2", id="#test_col2_id2", datatype="int")
496 test_tbl2 = Table(name="testTable2", id="#test_tbl2_id", columns=[test_col, test_col2])
497 Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl, test_tbl2])
499 # Creating a unique constraint on a bad column should raise an
500 # exception.
501 with self.assertRaises(ValidationError):
502 _create_test_schema(
503 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["bad_column"])
504 )
506 # Creating a foreign key constraint with a bad column should raise an
507 # exception.
508 with self.assertRaises(ValidationError):
509 _create_test_schema(
510 ForeignKeyConstraint(
511 name="testForeignKey",
512 id="#test_fk_id",
513 columns=["bad_column"],
514 referenced_columns=["#test_col2_id"],
515 )
516 )
518 # Creating a foreign key constraint with a bad referenced column should
519 # raise an exception.
520 with self.assertRaises(ValidationError):
521 _create_test_schema(
522 ForeignKeyConstraint(
523 name="testForeignKey",
524 id="#test_fk_id",
525 columns=["#test_col_id"],
526 referenced_columns=["bad_column"],
527 )
528 )
530 # Creating a foreign key constraint where the source column is not in
531 # the same table as the constraint should raise an exception.
532 with self.assertRaises(ValidationError):
533 _create_test_schema(
534 ForeignKeyConstraint(
535 name="testForeignKey",
536 id="#test_fk_id",
537 columns=["#test_col2_id"], # This column is in test_tbl2, not test_tbl
538 referenced_columns=["#test_col_id"],
539 )
540 )
542 # Creating a foreign key constraint where the referenced column is not
543 # a column object should raise an exception.
544 with self.assertRaises(ValidationError):
545 _create_test_schema(
546 ForeignKeyConstraint(
547 name="testForeignKey",
548 id="#test_fk_id",
549 columns=["#test_col_id"],
550 referenced_columns=["#test_schema_id"],
551 )
552 )
554 # Creating a valid unique constraint should not raise an exception.
555 _create_test_schema(
556 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["#test_col_id"])
557 )
559 # Creating a valid foreign key constraint should not raise an
560 # exception.
561 _create_test_schema(
562 ForeignKeyConstraint(
563 name="testForeignKey",
564 id="#test_fk_id",
565 columns=["#test_col_id"],
566 referenced_columns=["#test_col2_id"],
567 )
568 )
570 # Creating a foreign key constraint with a composite key should not
571 # raise an exception.
572 _create_test_schema(
573 ForeignKeyConstraint(
574 name="testCompositeForeignKey",
575 id="#test_composite_fk_id",
576 columns=["#test_col_id", "#test_col_id2"],
577 referenced_columns=["#test_col2_id", "#test_col2_id2"],
578 )
579 )
582class IndexTestCase(unittest.TestCase):
583 """Test Pydantic validation of the ``Index`` class."""
585 def test_index_validation(self) -> None:
586 """Test validation of indexes."""
587 # Default initialization should throw an exception.
588 with self.assertRaises(ValidationError):
589 Index()
591 # Setting only name should throw an exception.
592 with self.assertRaises(ValidationError):
593 Index(name="idx_test")
595 # Setting name and id should throw an exception from missing columns.
596 with self.assertRaises(ValidationError):
597 Index(name="idx_test", id="#idx_test")
599 # Setting name, id, and columns should not throw an exception and
600 # should load data correctly.
601 idx = Index(name="idx_test", id="#idx_test", columns=["#test_column"])
602 self.assertEqual(idx.name, "idx_test", "name should be 'test_constraint'")
603 self.assertEqual(idx.id, "#idx_test", "id should be '#test_id'")
604 self.assertEqual(idx.columns, ["#test_column"], "columns should be ['test_column']")
606 # Creating from data dictionary should work and load data correctly.
607 data = {"name": "idx_test", "id": "#idx_test", "columns": ["test_column"]}
608 idx = Index(**data)
609 self.assertEqual(idx.name, "idx_test", "name should be 'idx_test'")
610 self.assertEqual(idx.id, "#idx_test", "id should be '#idx_test'")
611 self.assertEqual(idx.columns, ["test_column"], "columns should be ['test_column']")
613 # Setting both columns and expressions on an index should throw an
614 # exception.
615 with self.assertRaises(ValidationError):
616 Index(name="idx_test", id="#idx_test", columns=["test_column"], expressions=["1+2"])
619class SchemaTestCase(unittest.TestCase):
620 """Test Pydantic validation of the ``Schema`` class."""
622 def test_validation(self) -> None:
623 """Test Pydantic validation of the main schema class."""
624 # Default initialization should throw an exception.
625 with self.assertRaises(ValidationError):
626 Schema()
628 # Setting only name should throw an exception.
629 with self.assertRaises(ValidationError):
630 Schema(name="testSchema")
632 # Setting name and id should throw an exception from missing columns.
633 with self.assertRaises(ValidationError):
634 Schema(name="testSchema", id="#test_id")
636 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256)
637 test_tbl = Table(name="testTable", id="#test_tbl_id", columns=[test_col])
639 # Setting name, id, and columns should not throw an exception and
640 # should load data correctly.
641 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl])
642 self.assertEqual(sch.name, "testSchema", "name should be 'testSchema'")
643 self.assertEqual(sch.id, "#test_sch_id", "id should be '#test_sch_id'")
644 self.assertEqual(sch.tables, [test_tbl], "tables should be ['testTable']")
646 # Creating a schema with duplicate table names should raise an
647 # exception.
648 with self.assertRaises(ValidationError):
649 Schema(name="testSchema", id="#test_id", tables=[test_tbl, test_tbl])
651 # Using an undefined YAML field should raise an exception.
652 with self.assertRaises(ValidationError):
653 Schema(**{"name": "testSchema", "id": "#test_sch_id", "bad_field": "1234"}, tables=[test_tbl])
655 # Creating a schema containing duplicate IDs should raise an error.
656 with self.assertRaises(ValidationError):
657 Schema(
658 name="testSchema",
659 id="#test_sch_id",
660 tables=[
661 Table(
662 name="testTable",
663 id="#test_tbl_id",
664 columns=[
665 Column(name="testColumn", id="#test_col_id", datatype="string"),
666 Column(name="testColumn2", id="#test_col_id", datatype="string"),
667 ],
668 )
669 ],
670 )
672 def test_schema_object_ids(self) -> None:
673 """Test that the ``id_map`` is properly populated."""
674 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256)
675 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col])
676 sch = Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl])
678 for id in ["#test_col_id", "#test_table_id", "#test_schema_id"]:
679 # Test that the schema contains the expected id.
680 self.assertTrue(id in sch, f"schema should contain '{id}'")
682 # Check that types of returned objects are correct.
683 self.assertIsInstance(sch["#test_col_id"], Column, "schema[id] should return a Column")
684 self.assertIsInstance(sch["#test_table_id"], Table, "schema[id] should return a Table")
685 self.assertIsInstance(sch["#test_schema_id"], Schema, "schema[id] should return a Schema")
687 with self.assertRaises(KeyError):
688 # Test that an invalid id raises an exception.
689 sch["#bad_id"]
691 def test_check_unique_constraint_names(self) -> None:
692 """Test that constraint names are unique."""
693 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256)
694 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col])
695 test_cons = UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["testColumn"])
696 test_cons2 = UniqueConstraint(
697 name="testConstraint", id="#test_constraint2_id", columns=["testColumn"]
698 )
699 test_tbl.constraints = [test_cons, test_cons2]
700 with self.assertRaises(ValidationError):
701 Schema(name="testSchema", id="#test_id", tables=[test_tbl])
703 def test_check_unique_index_names(self) -> None:
704 """Test that index names are unique."""
705 test_col = Column(name="test_column1", id="#test_table.test_column1", datatype="int")
706 test_col2 = Column(name="test_column2", id="#test_table.test_column2", datatype="string", length=256)
707 test_tbl = Table(name="test_table", id="#test_table", columns=[test_col, test_col2])
708 test_idx = Index(name="idx_test", id="#idx_test", columns=[test_col.id])
709 test_idx2 = Index(name="idx_test", id="#idx_test2", columns=[test_col2.id])
710 test_tbl.indexes = [test_idx, test_idx2]
711 with self.assertRaises(ValidationError):
712 Schema(name="test_schema", id="#test-schema", tables=[test_tbl])
714 def test_model_validate(self) -> None:
715 """Load a YAML test file and validate the schema data model."""
716 with open(TEST_YAML) as test_yaml:
717 data = yaml.safe_load(test_yaml)
718 Schema.model_validate(data)
720 def test_id_generation(self) -> None:
721 """Test ID generation."""
722 test_path = os.path.join(TEST_ID_GENERATION)
723 with open(test_path) as test_yaml:
724 yaml_data = yaml.safe_load(test_yaml)
725 # Generate IDs for objects in the test schema.
726 Schema.model_validate(yaml_data, context={"id_generation": True})
727 with open(test_path) as test_yaml:
728 yaml_data = yaml.safe_load(test_yaml)
729 # Test that an error is raised when id generation is disabled.
730 with self.assertRaises(ValidationError):
731 Schema.model_validate(yaml_data, context={"id_generation": False})
733 def test_get_table_by_column(self) -> None:
734 """Test the ``get_table_by_column`` method."""
735 # Test that the correct table is returned when searching by column.
736 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256)
737 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col])
738 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl])
739 self.assertEqual(sch.get_table_by_column(test_col), test_tbl)
741 # Test that an error is raised when the column is not found.
742 bad_col = Column(name="bad_column", id="#test_tbl.bad_column", datatype="string", length=256)
743 with self.assertRaises(ValueError):
744 sch.get_table_by_column(bad_col)
746 def test_find_object_by_id(self) -> None:
747 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256)
748 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col])
749 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl])
750 self.assertEqual(sch.find_object_by_id("#test_tbl.test_col", Column), test_col)
751 with self.assertRaises(KeyError):
752 sch.find_object_by_id("#bad_id", Column)
753 with self.assertRaises(TypeError):
754 sch.find_object_by_id("#test_tbl", Column)
756 def test_from_file(self) -> None:
757 """Test loading a schema from a file."""
758 # Test file object.
759 with open(TEST_SALES) as test_file:
760 schema = Schema.from_stream(test_file)
761 self.assertIsInstance(schema, Schema)
763 # Test path string.
764 with open(TEST_SALES) as test_file:
765 schema = Schema.from_stream(test_file)
766 self.assertIsInstance(schema, Schema)
768 # Path object.
769 test_file_path = pathlib.Path(TEST_SALES)
770 schema = Schema.from_uri(test_file_path)
771 self.assertIsInstance(schema, Schema)
773 def test_from_resource(self) -> None:
774 """Test loading a schema from a resource."""
775 # Test loading a schema from a resource string.
776 schema = Schema.from_uri(
777 "resource://felis/config/tap_schema/tap_schema_std.yaml", context={"id_generation": True}
778 )
779 self.assertIsInstance(schema, Schema)
781 # Test loading a schema from a ResourcePath.
782 schema = Schema.from_uri(
783 ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml"),
784 context={"id_generation": True},
785 )
786 self.assertIsInstance(schema, Schema)
788 # Test loading from a nonexistant resource.
789 with self.assertRaises(ValueError):
790 Schema.from_uri("resource://fake/schemas/bad_schema.yaml")
792 # Without ID generation enabled, this schema should fail validation.
793 with self.assertRaises(ValidationError):
794 Schema.from_uri("resource://felis/config/tap_schema/tap_schema_std.yaml")
796 def test_find_table_by_name(self) -> None:
797 """Test the ``_find_table_by_name`` method."""
798 # Create a simple schema with two tables
799 test_col1 = Column(name="test_column1", id="#test_tbl1.test_col1", datatype="int")
800 test_col2 = Column(name="test_column2", id="#test_tbl2.test_col2", datatype="string", length=256)
801 test_tbl1 = Table(name="test_table1", id="#test_tbl1", columns=[test_col1])
802 test_tbl2 = Table(name="test_table2", id="#test_tbl2", columns=[test_col2])
803 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl1, test_tbl2])
805 # Test that the correct table is returned when searching by name
806 self.assertEqual(sch._find_table_by_name("test_table1"), test_tbl1)
807 self.assertEqual(sch._find_table_by_name("test_table2"), test_tbl2)
809 # Test that a KeyError is raised when the table is not found
810 with self.assertRaises(KeyError):
811 sch._find_table_by_name("nonexistent_table")
814class SchemaVersionTest(unittest.TestCase):
815 """Test the schema version."""
817 def test_validation(self) -> None:
818 """Test validation of the schema version class."""
819 # Default initialization should throw an exception.
820 with self.assertRaises(ValidationError):
821 SchemaVersion()
823 # Setting current should not throw an exception and should load data
824 # correctly.
825 sv = SchemaVersion(current="1.0.0")
826 self.assertEqual(sv.current, "1.0.0", "current should be '1.0.0'")
828 # Check that schema version can be specified as a single string or
829 # an object.
830 data = {
831 "name": "schema",
832 "@id": "#schema",
833 "tables": [],
834 "version": "1.2.3",
835 }
836 schema = Schema.model_validate(data)
837 self.assertEqual(schema.version, "1.2.3")
839 data = {
840 "name": "schema",
841 "@id": "#schema",
842 "tables": [],
843 "version": {
844 "current": "1.2.3",
845 "compatible": ["1.2.0", "1.2.1", "1.2.2"],
846 "read_compatible": ["1.1.0", "1.1.1"],
847 },
848 }
849 schema = Schema.model_validate(data)
850 self.assertEqual(schema.version.current, "1.2.3")
851 self.assertEqual(schema.version.compatible, ["1.2.0", "1.2.1", "1.2.2"])
852 self.assertEqual(schema.version.read_compatible, ["1.1.0", "1.1.1"])
855class ValidationFlagsTest(unittest.TestCase):
856 """Test optional validation flags on the schema."""
858 def test_check_tap_table_indexes(self) -> None:
859 """Test the ``check_tap_table_indexes`` validation flag."""
860 cxt = {"check_tap_table_indexes": True}
861 schema_dict = {
862 "name": "testSchema",
863 "id": "#test_schema_id",
864 "tables": [
865 {
866 "name": "test_table",
867 "id": "#test_table_id",
868 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}],
869 }
870 ],
871 }
873 # Creating a schema without a TAP table index should throw.
874 with self.assertRaises(ValidationError):
875 Schema.model_validate(schema_dict, context=cxt)
877 # Creating a schema with a TAP table index should not throw.
878 schema_dict["tables"][0]["tap_table_index"] = 1
879 Schema.model_validate(schema_dict, context=cxt)
880 schema_dict["tables"].append(
881 {
882 "name": "test_table2",
883 "id": "#test_table2",
884 "tap_table_index": 1,
885 "columns": [{"name": "test_col2", "id": "#test_col2", "datatype": "int"}],
886 }
887 )
889 # Creating a schema with a duplicate TAP table index should throw.
890 with self.assertRaises(ValidationError):
891 Schema.model_validate(schema_dict, context=cxt)
893 # Multiple, unique TAP table indexes should not throw.
894 schema_dict["tables"][1]["tap_table_index"] = 2
895 Schema.model_validate(schema_dict, context=cxt)
897 def test_check_tap_principal(self) -> None:
898 """Test the ``check_tap_principal` validation flag."""
899 cxt = {"check_tap_principal": True}
900 schema_dict = {
901 "name": "testSchema",
902 "id": "#test_schema_id",
903 "tables": [
904 {
905 "name": "test_table",
906 "id": "#test_table_id",
907 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}],
908 }
909 ],
910 }
912 # Creating a table without a TAP table principal column should throw.
913 with self.assertRaises(ValidationError):
914 Schema.model_validate(schema_dict, context=cxt)
916 # Creating a table with a TAP table principal column should not throw.
917 schema_dict["tables"][0]["columns"][0]["tap_principal"] = 1
918 Schema.model_validate(schema_dict, context=cxt)
920 def test_check_description(self) -> None:
921 """Test the ``check_description`` flag."""
922 cxt = {"check_description": True}
923 schema_dict = {
924 "name": "testSchema",
925 "id": "#test_schema_id",
926 "tables": [
927 {
928 "name": "test_table",
929 "id": "#test_table_id",
930 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}],
931 }
932 ],
933 }
935 # Creating a schema without object descriptions should throw.
936 with self.assertRaises(ValidationError):
937 Schema.model_validate(schema_dict, context=cxt)
939 # Creating a schema with object descriptions should not throw.
940 schema_dict["description"] = "Test schema"
941 schema_dict["tables"][0]["description"] = "Test table"
942 schema_dict["tables"][0]["columns"][0]["description"] = "Test column"
943 Schema.model_validate(schema_dict, context=cxt)
946class RedundantDatatypesTest(unittest.TestCase):
947 """Test validation of redundant datatype definitions."""
949 def test_mysql_datatypes(self) -> None:
950 class ColumnGenerator:
951 """Generate column data for redundant datatype testing."""
953 def __init__(self, name, id, db_name):
954 self.name = name
955 self.id = id
956 self.db_name = db_name
957 self.context = {"check_redundant_datatypes": True}
959 def col(self, datatype: str, db_datatype: str, length=None):
960 return Column.model_validate(
961 {
962 "name": self.name,
963 "@id": self.id,
964 "datatype": datatype,
965 f"{self.db_name}:datatype": db_datatype,
966 "length": length,
967 },
968 context=self.context,
969 )
971 """Test that redundant datatype definitions raise an error."""
972 coldata = ColumnGenerator("test_col", "#test_col_id", "mysql")
974 with self.assertRaises(ValidationError):
975 coldata.col("double", "DOUBLE")
977 with self.assertRaises(ValidationError):
978 coldata.col("int", "INTEGER")
980 with self.assertRaises(ValidationError):
981 coldata.col("float", "FLOAT")
983 with self.assertRaises(ValidationError):
984 coldata.col("char", "CHAR", length=8)
986 with self.assertRaises(ValidationError):
987 coldata.col("string", "VARCHAR", length=32)
989 with self.assertRaises(ValidationError):
990 coldata.col("byte", "TINYINT")
992 with self.assertRaises(ValidationError):
993 coldata.col("short", "SMALLINT")
995 with self.assertRaises(ValidationError):
996 coldata.col("long", "BIGINT")
998 with self.assertRaises(ValidationError):
999 coldata.col("boolean", "BOOLEAN")
1001 with self.assertRaises(ValidationError):
1002 coldata.col("unicode", "NVARCHAR", length=32)
1004 with self.assertRaises(ValidationError):
1005 coldata.col("timestamp", "DATETIME")
1007 # DM-42257: Felis does not handle unbounded text types properly.
1008 # coldata.col("text", "TEXT", length=32)
1010 with self.assertRaises(ValidationError):
1011 coldata.col("binary", "LONGBLOB", length=1024)
1013 with self.assertRaises(ValidationError):
1014 # Same type and length
1015 coldata.col("string", "VARCHAR(128)", length=128)
1017 # Check the old type mapping for MySQL, which is now okay
1018 coldata.col("boolean", "BIT(1)")
1020 # Different types, which is okay
1021 coldata.col("double", "FLOAT")
1023 # Same base type with different lengths, which is okay
1024 coldata.col("string", "VARCHAR(128)", length=32)
1026 # Different string types, which is okay
1027 coldata.col("string", "CHAR", length=32)
1028 coldata.col("unicode", "CHAR", length=32)
1030 def test_precision(self) -> None:
1031 """Test that precision is not allowed for datatypes other than
1032 timestamp.
1033 """
1034 with self.assertRaises(ValidationError):
1035 Column(**{"name": "testColumn", "@id": "#test_col_id", "datatype": "double", "precision": 6})
1038class SchemaSerializationTest(unittest.TestCase):
1039 """Test serialization and deserialization of the schema data model."""
1041 def test_serialization(self) -> None:
1042 """Test serialization of the schema data model."""
1043 # Read the original YAML content from the test_serialization.yaml file
1044 with open(TEST_SERIALIZATION) as file:
1045 original_yaml_content = file.read()
1047 # Load the schema from the original YAML content
1048 schema_out = Schema.from_uri(TEST_SERIALIZATION)
1049 serialized_data = schema_out.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True)
1051 # Write the serialized data to a temporary YAML file
1052 with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml", mode="w+") as temp_file:
1053 yaml.dump(serialized_data, temp_file, default_flow_style=False, sort_keys=False)
1054 temp_file.seek(0)
1055 # Read the deserialized YAML content from the temporary file
1056 deserialized_yaml_content = temp_file.read()
1058 # Show the differences between the original and deserialized YAML
1059 diff = difflib.unified_diff(
1060 original_yaml_content.splitlines(keepends=True),
1061 deserialized_yaml_content.splitlines(keepends=True),
1062 fromfile="original.yaml",
1063 tofile="deserialized.yaml",
1064 )
1065 print("Differences:\n", "".join(diff))
1067 # Assert that the original and deserialized YAML are the same
1068 self.assertEqual(
1069 yaml.safe_load(original_yaml_content),
1070 yaml.safe_load(deserialized_yaml_content),
1071 "The original and deserialized YAML contents should be the same",
1072 )
1075class ResourceTestCase(unittest.TestCase):
1076 """Test loading of column definitions from external schema resources."""
1078 def setUp(self) -> None:
1079 """Set up test resources."""
1080 self.temp_dir = tempfile.mkdtemp()
1082 # Write out source schema file
1083 source_schema_content = """
1084name: source_schema
1085description: Test resource schema
1086tables:
1087- name: source_table
1088 description: Source table
1089 columns:
1090 - name: test_column
1091 datatype: int
1092 description: "Test column"
1093"""
1094 self.source_schema_path = os.path.join(self.temp_dir, "source_schema.yaml")
1095 with open(self.source_schema_path, "w") as f:
1096 f.write(source_schema_content.strip())
1098 # Write out referencing schema file
1099 ref_schema_content = """
1100name: ref_schema
1101description: Test referencing schema
1102resources:
1103 source_schema:
1104 uri: {resource_path}
1105tables:
1106- name: ref_table
1107 description: Referencing table
1108 columnRefs:
1109 source_schema:
1110 source_table:
1111 test_column: null # Explicit null = no overrides, use same name
1112 renamed_column:
1113 ref_name: test_column
1114 overrides:
1115 description: "Renamed test column"
1116 datatype: short
1117 tap:principal: 1
1118 tap:column_index: 2
1119"""
1120 self.ref_schema_path = os.path.join(self.temp_dir, "ref_schema.yaml")
1121 ref_content = ref_schema_content.format(resource_path=self.source_schema_path)
1122 with open(self.ref_schema_path, "w") as f:
1123 f.write(ref_content.strip())
1125 def tearDown(self) -> None:
1126 """Clean up test resources."""
1127 shutil.rmtree(self.temp_dir)
1129 def test_schema_resource(self) -> None:
1130 """Test loading a schema as a resource with column references."""
1131 # First test that the source schema loads correctly on its own
1132 source_schema = Schema.from_uri(self.source_schema_path, context={"id_generation": True})
1133 self.assertEqual(source_schema.name, "source_schema")
1134 self.assertEqual(len(source_schema.tables), 1)
1135 self.assertEqual(source_schema.tables[0].name, "source_table")
1137 # Now test loading the ref schema
1138 ref_schema = Schema.from_uri(self.ref_schema_path, context={"id_generation": True})
1139 self.assertEqual(ref_schema.name, "ref_schema")
1141 # Check that the resource was loaded
1142 self.assertIn("source_schema", ref_schema._resource_map)
1144 # Check that the referencing table has the expected columns
1145 ref_table = ref_schema.tables[0]
1146 self.assertEqual(ref_table.name, "ref_table")
1148 # Check the column_refs structure
1149 column_refs = ref_table.column_refs
1150 self.assertIsNotNone(column_refs)
1151 self.assertIsInstance(column_refs, dict)
1153 # Check the schema resource reference
1154 self.assertIn("source_schema", column_refs)
1155 source_schema_refs = column_refs["source_schema"]
1156 self.assertIsInstance(source_schema_refs, dict)
1158 # Check the table reference
1159 self.assertIn("source_table", source_schema_refs)
1160 source_table_refs = source_schema_refs["source_table"]
1161 self.assertIsInstance(source_table_refs, dict)
1163 # Verify the column_refs structure details
1164 # Should have 2 column references: test_column and renamed_column
1165 self.assertEqual(len(source_table_refs), 2)
1167 # Check test_column reference (null/no overrides)
1168 self.assertIn("test_column", source_table_refs)
1169 test_column_ref = source_table_refs["test_column"]
1170 self.assertIsNone(test_column_ref)
1172 # Check renamed_column reference (with ref_name and overrides)
1173 self.assertIn("renamed_column", source_table_refs)
1174 renamed_column_ref = source_table_refs["renamed_column"]
1175 self.assertIsNotNone(renamed_column_ref)
1176 self.assertEqual(renamed_column_ref.ref_name, "test_column")
1177 self.assertIsNotNone(renamed_column_ref.overrides)
1178 self.assertEqual(renamed_column_ref.overrides.description, "Renamed test column")
1179 self.assertEqual(renamed_column_ref.overrides.tap_principal, 1)
1180 self.assertEqual(renamed_column_ref.overrides.tap_column_index, 2)
1181 self.assertEqual(renamed_column_ref.overrides.datatype.value, "short")
1183 # Now check structure of dereferenced columns in the ref_table
1184 self.assertEqual(len(ref_table.columns), 2)
1186 # Check dereferenced test_column (no overrides)
1187 test_col = next((col for col in ref_table.columns if col.name == "test_column"), None)
1188 self.assertIsNotNone(test_col)
1189 self.assertEqual(test_col.datatype, "int")
1190 self.assertEqual(test_col.description, "Test column")
1192 # Check dereferenced renamed_column (includes overrides)
1193 renamed_col = next((col for col in ref_table.columns if col.name == "renamed_column"), None)
1194 self.assertIsNotNone(renamed_col)
1195 self.assertEqual(renamed_col.datatype, "short") # Inherited from source
1196 self.assertEqual(renamed_col.description, "Renamed test column")
1197 self.assertEqual(renamed_col.tap_principal, 1)
1198 self.assertEqual(renamed_col.tap_column_index, 2)
1200 # Verify that the columns are present in the ID map
1201 try:
1202 ref_schema.find_object_by_id(test_col.id, Column)
1203 except KeyError:
1204 self.fail(f"Test column ID '{test_col.id}' not found in schema ID map.")
1205 try:
1206 ref_schema.find_object_by_id(renamed_col.id, Column)
1207 except KeyError:
1208 self.fail(f"Renamed column ID '{renamed_col.id}' not found in schema ID map.")
1210 def test_schema_resource_missing_column_error(self) -> None:
1211 """Test that referencing a non-existent column raises an error."""
1212 error_ref_content = f"""
1213name: error_ref_schema
1214description: Test referencing schema with error
1215resources:
1216 source_schema:
1217 uri: {self.source_schema_path}
1218tables:
1219- name: error_table
1220 description: Table with bad reference
1221 columnRefs:
1222 source_schema:
1223 source_table:
1224 bad_column: null # This column doesn't exist in source
1225"""
1227 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml")
1228 with open(error_ref_path, "w") as f:
1229 f.write(error_ref_content.strip())
1231 # This should raise a ValueError
1232 with self.assertRaises(ValueError) as cm:
1233 Schema.from_uri(error_ref_path, context={"id_generation": True})
1235 self.assertIn("Column 'bad_column' not found", str(cm.exception))
1237 def test_schema_resource_missing_ref_name_error(self) -> None:
1238 """Test that using ref_name for non-existent column raises an error."""
1239 # Create a ref schema with bad ref_name
1240 error_ref_content = f"""
1241name: error_ref_schema
1242description: Test referencing schema with bad ref_name
1243resources:
1244 source_schema:
1245 uri: {self.source_schema_path}
1246tables:
1247- name: error_table
1248 description: Table with bad ref_name
1249 columnRefs:
1250 source_schema:
1251 source_table:
1252 some_column:
1253 ref_name: nonexistent_column # This column doesn't exist
1254"""
1256 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml")
1257 with open(error_ref_path, "w") as f:
1258 f.write(error_ref_content.strip())
1260 with self.assertRaises(ValueError) as cm:
1261 Schema.from_uri(error_ref_path, context={"id_generation": True})
1262 self.assertIn("Column 'nonexistent_column' not found", str(cm.exception))
1264 def test_schema_resource_not_found_error(self) -> None:
1265 """Test that referencing a non-existent schema resource raises an
1266 error.
1267 """
1268 error_ref_content = f"""
1269name: error_ref_schema
1270description: Test referencing non-existent schema resource
1271resources:
1272 source_schema:
1273 uri: {self.source_schema_path}
1274tables:
1275- name: error_table
1276 description: Table with bad schema resource reference
1277 columnRefs:
1278 nonexistent_schema: # This schema resource doesn't exist
1279 some_table:
1280 some_column: null
1281"""
1283 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml")
1284 with open(error_ref_path, "w") as f:
1285 f.write(error_ref_content.strip())
1287 with self.assertRaises(ValueError) as cm:
1288 Schema.from_uri(error_ref_path, context={"id_generation": True})
1289 self.assertIn("Schema resource 'nonexistent_schema' was not found in resources", str(cm.exception))
1291 def test_schema_resource_table_not_found_error(self) -> None:
1292 """Test that referencing a non-existent table in schema resource raises
1293 an error.
1294 """
1295 error_ref_content = f"""
1296name: error_ref_schema
1297description: Test referencing non-existent table in schema resource
1298resources:
1299 source_schema:
1300 uri: {self.source_schema_path}
1301tables:
1302- name: error_table
1303 description: Table with bad table reference
1304 columnRefs:
1305 source_schema:
1306 nonexistent_table: # This table doesn't exist in source_schema
1307 some_column: null
1308"""
1310 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml")
1311 with open(error_ref_path, "w") as f:
1312 f.write(error_ref_content.strip())
1314 with self.assertRaises(ValueError) as cm:
1315 Schema.from_uri(error_ref_path, context={"id_generation": True})
1316 self.assertIn("Table 'nonexistent_table' not found in resource 'source_schema'", str(cm.exception))
1318 def test_schema_resource_bad_uri_error(self) -> None:
1319 """Test that a bad URI in resource loading raises an error."""
1320 error_ref_content = """
1321name: error_ref_schema
1322description: Test schema with bad resource URI
1323resources:
1324 bad_resource:
1325 uri: /nonexistent/path/to/schema.yaml
1326tables:
1327- name: error_table
1328 description: Table referencing bad resource
1329 columnRefs:
1330 bad_resource:
1331 some_table:
1332 some_column: null
1333"""
1335 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml")
1336 with open(error_ref_path, "w") as f:
1337 f.write(error_ref_content.strip())
1339 with self.assertRaises(ValueError) as cm:
1340 Schema.from_uri(error_ref_path, context={"id_generation": True})
1341 self.assertIn(
1342 "Failed to load resource 'bad_resource' from URI '/nonexistent/path/to/schema.yaml'",
1343 str(cm.exception),
1344 )
1346 def test_ref_schema_with_indexes(self) -> None:
1347 """Test that indexes are properly handled when loading schema
1348 resources.
1349 """
1350 # Write out referencing schema file
1351 ref_schema_content_with_indexes = """
1352name: ref_schema
1353description: Test referencing schema
1354resources:
1355 source_schema:
1356 uri: {resource_path}
1357tables:
1358- name: ref_table
1359 description: Referencing table
1360 columnRefs:
1361 source_schema:
1362 source_table:
1363 test_column: null
1364 renamed_column:
1365 ref_name: test_column
1366 overrides:
1367 description: "Renamed test column"
1368 tap:principal: 1
1369 tap:column_index: 2
1370 indexes:
1371 - name: idx_test_column
1372 columns:
1373 - "#ref_table.test_column"
1374 - name: idx_renamed_column
1375 columns:
1376 - "#ref_table.renamed_column"
1377"""
1379 source_schema_with_indexes_path = os.path.join(self.temp_dir, "source_schema_with_indexes.yaml")
1380 ref_content = ref_schema_content_with_indexes.format(resource_path=self.source_schema_path)
1381 with open(source_schema_with_indexes_path, "w") as f:
1382 f.write(ref_content.strip())
1384 ref_schema = Schema.from_uri(source_schema_with_indexes_path, context={"id_generation": True})
1386 # Check index content; columns are not automatically resolved to
1387 # objects by the validation.
1388 indexes = ref_schema.tables[0].indexes
1389 self.assertEqual(len(indexes), 2)
1390 self.assertEqual(indexes[0].name, "idx_test_column")
1391 self.assertEqual(indexes[0].columns, ["#ref_table.test_column"])
1392 self.assertEqual(indexes[1].name, "idx_renamed_column")
1393 self.assertEqual(indexes[1].columns, ["#ref_table.renamed_column"])
1395 def test_ref_schema_with_foreign_key(self) -> None:
1396 """Test that indexes are properly handled when loading schema
1397 resources.
1398 """
1399 # Write out referencing schema file
1400 ref_schema_content_with_foreign_key = """
1401name: ref_schema
1402description: Test referencing schema
1403resources:
1404 source_schema:
1405 uri: {resource_path}
1406tables:
1407- name: src_table
1408 description: Source table for foreign key
1409 primaryKey: "#src_table.test_column"
1410 columnRefs:
1411 source_schema:
1412 source_table:
1413 test_column: null
1414 renamed_column:
1415 ref_name: test_column
1416 overrides:
1417 description: "Renamed test column"
1418 tap:principal: 1
1419 tap:column_index: 2
1420- name: target_table
1421 description: Target table for foreign key
1422 columns:
1423 - name: fk_column
1424 datatype: int
1425 description: "Foreign key column"
1426 constraints:
1427 - name: fk_src_table
1428 '@type': ForeignKey
1429 columns:
1430 - "#target_table.fk_column"
1431 referencedColumns:
1432 - "#src_table.test_column"
1433"""
1435 source_schema_with_foreign_key_path = os.path.join(
1436 self.temp_dir, "source_schema_with_foreign_key.yaml"
1437 )
1438 ref_content = ref_schema_content_with_foreign_key.format(resource_path=self.source_schema_path)
1439 with open(source_schema_with_foreign_key_path, "w") as f:
1440 f.write(ref_content.strip())
1442 ref_schema = Schema.from_uri(source_schema_with_foreign_key_path, context={"id_generation": True})
1444 # Check foreign key constraint content
1445 fk_constraint = ref_schema.tables[1].constraints[0]
1446 self.assertIsInstance(fk_constraint, ForeignKeyConstraint)
1447 self.assertEqual(fk_constraint.name, "fk_src_table")
1448 self.assertEqual(fk_constraint.columns, ["#target_table.fk_column"])
1449 self.assertEqual(fk_constraint.referenced_columns, ["#src_table.test_column"])
1451 def test_ref_schema_serialization(self) -> None:
1452 """Test serialization of a reference schema."""
1453 # Load the referencing schema and then serialize it back to YAML
1454 ref_schema = Schema.from_uri(self.ref_schema_path, context={"id_generation": True})
1455 yaml_data = ref_schema.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True)
1456 serialized_schema_path = os.path.join(self.temp_dir, "serialized_ref_schema.yaml")
1457 with open(serialized_schema_path, "w") as f:
1458 yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)
1460 # Read back the serialized YAML data
1461 with open(serialized_schema_path) as f:
1462 serialized_yaml_data = yaml.safe_load(f)
1464 # Ensure that columns were not serialized directly in the table
1465 self.assertEqual(len(serialized_yaml_data["tables"][0]["columns"]), 0)
1467 # Deserialize the schema and check that the expected columns are
1468 # present
1469 deserialized_schema = Schema.from_uri(serialized_schema_path, context={"id_generation": True})
1470 self.assertEqual(len(deserialized_schema.tables[0].columns), 2)
1472 # Check that the columnRefs structure is still present
1473 try:
1474 ref_columns = deserialized_schema.tables[0].column_refs["source_schema"]["source_table"]
1475 except Exception:
1476 self.fail("The column refs are missing after deserialization.")
1477 self.assertEqual(len(ref_columns), 2)
1479 def test_ref_schema_with_dereference_columns(self) -> None:
1480 """Test loading a reference schema with dereferencing of columns so
1481 that column_refs is set to empty after loading.
1482 """
1483 ref_schema = Schema.from_uri(
1484 self.ref_schema_path, context={"id_generation": True, "dereference_resources": True}
1485 )
1487 # Check that the columns were dereferenced into the table
1488 ref_table = ref_schema.tables[0]
1489 self.assertEqual(len(ref_table.columns), 2)
1490 col_names = {col.name for col in ref_table.columns}
1491 self.assertIn("test_column", col_names)
1492 self.assertIn("renamed_column", col_names)
1494 # Check that column_refs is empty after dereferencing
1495 self.assertEqual(len(ref_table.column_refs), 0)
1497 def test_tap_column_index_with_overrides(self) -> None:
1498 """Test that TAP column index is correctly assigned when an override
1499 of that field is present in the column ref.
1500 """
1501 # Write out source schema file
1502 source_schema_content = """
1503name: source_schema
1504tables:
1505- name: source_table
1506 columns:
1507 - name: col1
1508 datatype: int
1509 - name: col2
1510 datatype: int
1511 - name: col3
1512 datatype: int
1513"""
1514 source_schema_path = os.path.join(self.temp_dir, "source_schema.yaml")
1515 with open(source_schema_path, "w") as f:
1516 f.write(source_schema_content.strip())
1518 # Write out referencing schema file
1519 ref_schema_content = """
1520name: ref_schema
1521resources:
1522 source_schema:
1523 uri: {resource_path}
1524tables:
1525- name: ref_table
1526 columnRefs:
1527 source_schema:
1528 source_table:
1529 col1:
1530 col2:
1531 overrides:
1532 tap:column_index: 15
1533 col3:
1534"""
1535 ref_schema_path = os.path.join(self.temp_dir, "ref_schema.yaml")
1536 ref_content = ref_schema_content.format(resource_path=source_schema_path)
1537 with open(ref_schema_path, "w") as f:
1538 f.write(ref_content.strip())
1540 ref_schema = Schema.from_uri(
1541 ref_schema_path,
1542 context={"id_generation": True, "column_ref_index_increment": 10},
1543 )
1545 for column in ref_schema.tables[0].columns:
1546 if column.name == "col1":
1547 self.assertEqual(column.tap_column_index, 10)
1548 elif column.name == "col2":
1549 self.assertEqual(column.tap_column_index, 15)
1550 elif column.name == "col3":
1551 self.assertEqual(column.tap_column_index, 20)
1552 else:
1553 self.fail(f"Unexpected column name: {column.name}")
1556class ColumnOverridesTestCase(unittest.TestCase):
1557 """Test application of overrides to a column, setting all allowed
1558 fields.
1559 """
1561 def test_all_override_fields_exist_on_column(self) -> None:
1562 """Ensure every ColumnOverrides field corresponds to an attribute on
1563 Column.
1564 """
1565 override_fields = set(ColumnOverrides.model_fields)
1566 column_fields = set(Column.model_fields)
1568 missing = override_fields - column_fields
1570 self.assertFalse(
1571 missing,
1572 f"Column is missing attributes for override fields: {sorted(missing)}",
1573 )
1575 def test_overrides_all(self) -> None:
1576 """Test updating all allowed column fields from overrides."""
1577 # Create a base column
1578 base_column = Column(
1579 name="base_column",
1580 id="#base_column",
1581 description="Base column",
1582 datatype="char",
1583 length=64,
1584 nullable=False,
1585 tap_principal=1,
1586 tap_column_index=10,
1587 )
1589 # Override all allowed fields with different values
1590 overrides = ColumnOverrides(
1591 description="Ref column",
1592 datatype="string",
1593 length=256,
1594 nullable=True,
1595 tap_principal=0,
1596 tap_column_index=100,
1597 )
1599 # Apply overrides
1600 base_column._update_from_overrides(overrides)
1602 # Check that the attributes were updated correctly
1603 self.assertEqual(base_column.description, "Ref column")
1604 self.assertEqual(base_column.datatype, "string")
1605 self.assertEqual(base_column.length, 256)
1606 self.assertEqual(base_column.nullable, True)
1607 self.assertEqual(base_column.tap_principal, 0)
1608 self.assertEqual(base_column.tap_column_index, 100)
1610 def test_overrides_subset(self) -> None:
1611 """Test updating a subset of allowed column fields from overrides."""
1612 # Create a base column
1613 base_column = Column(
1614 name="base_column",
1615 id="#base_column",
1616 description="Base column",
1617 datatype="char",
1618 length=64,
1619 nullable=False,
1620 tap_principal=1,
1621 tap_column_index=10,
1622 )
1624 # Override all allowed fields with different values
1625 overrides = ColumnOverrides(
1626 description="Ref column",
1627 tap_column_index=100,
1628 )
1630 # Apply overrides
1631 base_column._update_from_overrides(overrides)
1633 # Check that the attributes were updated correctly
1634 self.assertEqual(base_column.description, "Ref column")
1635 self.assertEqual(base_column.datatype, "char")
1636 self.assertEqual(base_column.length, 64)
1637 self.assertEqual(base_column.nullable, False)
1638 self.assertEqual(base_column.tap_principal, 1)
1639 self.assertEqual(base_column.tap_column_index, 100)
1641 def test_overrides_default(self) -> None:
1642 """Test that applying the default overrides is a no-op."""
1643 # Create a base column
1644 base_column = Column(
1645 name="base_column",
1646 id="#base_column",
1647 description="Base column",
1648 datatype="char",
1649 length=64,
1650 nullable=False,
1651 tap_principal=1,
1652 tap_column_index=10,
1653 )
1655 # Apply overrides
1656 base_column._update_from_overrides(ColumnOverrides())
1658 # Check that the attributes remain unchanged
1659 self.assertEqual(base_column.description, "Base column")
1660 self.assertEqual(base_column.datatype, "char")
1661 self.assertEqual(base_column.length, 64)
1662 self.assertEqual(base_column.nullable, False)
1663 self.assertEqual(base_column.tap_principal, 1)
1664 self.assertEqual(base_column.tap_column_index, 10)
1666 def test_overrides_with_explicit_none_values(self) -> None:
1667 """Test that passing explicit None values in overrides does update
1668 the column attributes where allowed and raises errors if it is not.
1669 """
1670 # Create a base column
1671 base_column = Column(
1672 name="base_column",
1673 id="#base_column",
1674 description="Base column",
1675 datatype="int",
1676 length=64,
1677 nullable=False,
1678 tap_principal=1,
1679 tap_column_index=10,
1680 )
1682 # Create overrides with explicit None values for nullable fields
1683 overrides = ColumnOverrides(
1684 description=None,
1685 tap_column_index=None,
1686 )
1688 # Apply overrides
1689 base_column._update_from_overrides(overrides)
1691 # Check that the attributes were updated to None where allowed
1692 self.assertIsNone(base_column.description)
1693 self.assertIsNone(base_column.tap_column_index)
1695 # Check that setting non-nullable fields to None raise a specific
1696 # ValueError on ColumnOverrides creation
1697 for non_nullable_field in ("datatype", "length", "nullable", "tap_principal"):
1698 with self.assertRaisesRegex(
1699 ValueError,
1700 re.escape(f"The '{non_nullable_field}' field cannot be overridden to null"),
1701 ):
1702 ColumnOverrides(**{non_nullable_field: None})
1704 def test_extra_fields_in_overrides(self) -> None:
1705 """Test that extra fields in ColumnOverrides raise a
1706 ValidationError.
1707 """
1708 with self.assertRaises(ValidationError) as cm:
1709 ColumnOverrides(
1710 description="Test column",
1711 extra_field="This should not be allowed",
1712 )
1714 self.assertIn("Extra inputs are not permitted", str(cm.exception))
1716 def test_overrides_accept_alias_keys(self) -> None:
1717 """Test that alias keys for TAP fields are accepted and populate the
1718 corresponding model fields.
1719 """
1720 overrides = ColumnOverrides(**{"tap:principal": 1, "tap:column_index": 42})
1722 self.assertEqual(overrides.tap_principal, 1)
1723 self.assertEqual(overrides.tap_column_index, 42)
1725 # Ensure these count as explicitly provided (for model_fields_set
1726 # logic).
1727 self.assertIn("tap_principal", overrides.model_fields_set)
1728 self.assertIn("tap_column_index", overrides.model_fields_set)
1730 def test_datatype_deserialize_and_serialize(self) -> None:
1731 """Test that datatype is deserialized from a string to DataType and
1732 serialized back to a string.
1733 """
1734 overrides = ColumnOverrides(datatype="char")
1736 # Deserialization should yield a DataType instance (not a raw str).
1737 self.assertIsInstance(overrides.datatype, DataType)
1738 self.assertEqual(str(overrides.datatype), "char")
1740 # Serialization should produce a JSON-friendly string value.
1741 dumped = overrides.model_dump(mode="json")
1742 self.assertEqual(dumped["datatype"], "char")
1744 # None should remain None on serialization.
1745 overrides_none = ColumnOverrides()
1746 dumped_none = overrides_none.model_dump(mode="json")
1747 self.assertIsNone(dumped_none["datatype"])
1749 def test_non_nullable_overrides_data_is_none(self) -> None:
1750 """Test that passing None to ``_check_non_nullable_overrides`` does not
1751 raise an error.
1752 """
1753 ColumnOverrides()._check_non_nullable_overrides(None)
1756if __name__ == "__main__": 1756 ↛ 1757line 1756 didn't jump to line 1757 because the condition on line 1756 was never true
1757 unittest.main()