Coverage for tests / test_datamodel.py: 11%

713 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-17 08:49 +0000

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import difflib 

23import os 

24import pathlib 

25import re 

26import shutil 

27import tempfile 

28import unittest 

29from collections import defaultdict 

30 

31import yaml 

32from lsst.resources import ResourcePath 

33from pydantic import ValidationError 

34 

35from felis.datamodel import ( 

36 CheckConstraint, 

37 Column, 

38 ColumnGroup, 

39 ColumnOverrides, 

40 Constraint, 

41 DataType, 

42 ForeignKeyConstraint, 

43 Index, 

44 Schema, 

45 SchemaVersion, 

46 Table, 

47 UniqueConstraint, 

48) 

49 

50TEST_DIR = os.path.abspath(os.path.dirname(__file__)) 

51TEST_YAML = os.path.join(TEST_DIR, "data", "test.yml") 

52TEST_SALES = os.path.join(TEST_DIR, "data", "sales.yaml") 

53TEST_SERIALIZATION = os.path.join(TEST_DIR, "data", "test_serialization.yaml") 

54TEST_ID_GENERATION = os.path.join(TEST_DIR, "data", "test_id_generation.yaml") 

55 

56 

57class ColumnTestCase(unittest.TestCase): 

58 """Test the ``Column`` class.""" 

59 

60 def test_validation(self) -> None: 

61 """Test Pydantic validation of the ``Column`` class.""" 

62 # Default initialization should throw an exception. 

63 with self.assertRaises(ValidationError): 

64 Column() 

65 

66 # Setting only name should throw an exception. 

67 with self.assertRaises(ValidationError): 

68 Column(name="testColumn") 

69 

70 # Setting name and id should throw an exception from missing datatype. 

71 with self.assertRaises(ValidationError): 

72 Column(name="testColumn", id="#test_id") 

73 

74 # Setting name, id, and datatype should not throw an exception and 

75 # should load data correctly. 

76 col = Column(name="testColumn", id="#test_id", datatype="string", length=256) 

77 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'") 

78 self.assertEqual(col.id, "#test_id", "id should be '#test_id'") 

79 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'") 

80 

81 # Creating from data dictionary should work and load data correctly. 

82 data = {"name": "testColumn", "id": "#test_id", "datatype": "string", "length": 256} 

83 col = Column(**data) 

84 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'") 

85 self.assertEqual(col.id, "#test_id", "id should be '#test_id'") 

86 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'") 

87 

88 # Setting a bad IVOA UCD should throw an error. 

89 with self.assertRaises(ValidationError): 

90 Column(**data, ivoa_ucd="bad") 

91 

92 # Setting a valid IVOA UCD should not throw an error. 

93 col = Column(**data, ivoa_ucd="meta.id") 

94 self.assertEqual(col.ivoa_ucd, "meta.id", "ivoa_ucd should be 'meta.id'") 

95 

96 units_data = data.copy() 

97 

98 # Setting a bad IVOA unit should throw an error. 

99 units_data["ivoa:unit"] = "bad" 

100 with self.assertRaises(ValidationError): 

101 Column(**units_data) 

102 

103 # Setting a valid IVOA unit should not throw an error. 

104 units_data["ivoa:unit"] = "m" 

105 col = Column(**units_data) 

106 self.assertEqual(col.ivoa_unit, "m", "ivoa_unit should be 'm'") 

107 

108 units_data = data.copy() 

109 

110 # Setting a bad FITS TUNIT should throw an error. 

111 units_data["fits:tunit"] = "bad" 

112 with self.assertRaises(ValidationError): 

113 Column(**units_data) 

114 

115 # Setting a valid FITS TUNIT should not throw an error. 

116 units_data["fits:tunit"] = "m" 

117 col = Column(**units_data) 

118 self.assertEqual(col.fits_tunit, "m", "fits_tunit should be 'm'") 

119 

120 # Setting both IVOA unit and FITS TUNIT should throw an error. 

121 units_data["ivoa:unit"] = "m" 

122 with self.assertRaises(ValidationError): 

123 Column(**units_data) 

124 

125 def test_description(self) -> None: 

126 """Test Pydantic validation of the ``description`` attribute.""" 

127 # Creating a column with a description of 'None' should throw. 

128 with self.assertRaises(ValueError): 

129 Column( 

130 **{ 

131 "name": "testColumn", 

132 "@id": "#test_col_id", 

133 "datatype": "string", 

134 "description": None, 

135 } 

136 ) 

137 

138 # Creating a column with an empty description should throw. 

139 with self.assertRaises(ValueError): 

140 Column( 

141 **{ 

142 "name": "testColumn", 

143 "@id": "#test_col_id", 

144 "datatype": "string", 

145 "description": "", 

146 } 

147 ) 

148 

149 # Creating a column with a description that is too short should throw. 

150 with self.assertRaises(ValidationError): 

151 Column( 

152 **{ 

153 "name": "testColumn", 

154 "@id": "#test_col_id", 

155 "datatype": "string", 

156 "description": "xy", 

157 } 

158 ) 

159 

160 def test_values(self) -> None: 

161 """Test Pydantic validation of the ``value`` attribute.""" 

162 

163 # Define a function to return the default column data 

164 def default_coldata(): 

165 return defaultdict(str, {"name": "testColumn", "@id": "#test_col_id"}) 

166 

167 # Setting both value and autoincrement should throw. 

168 autoincr_coldata = default_coldata() 

169 autoincr_coldata["datatype"] = "int" 

170 autoincr_coldata["autoincrement"] = True 

171 autoincr_coldata["value"] = 1 

172 with self.assertRaises(ValueError): 

173 Column(**autoincr_coldata) 

174 

175 # Setting an invalid default on a column with an integer type should 

176 # throw. 

177 bad_numeric_coldata = default_coldata() 

178 for datatype in ["int", "long", "short", "byte"]: 

179 for value in ["bad", "1.0", "1", 1.1]: 

180 bad_numeric_coldata["datatype"] = datatype 

181 bad_numeric_coldata["value"] = value 

182 with self.assertRaises(ValueError): 

183 Column(**bad_numeric_coldata) 

184 

185 # Setting an invalid default on a column with a decimal type should 

186 # throw. 

187 bad_numeric_coldata = default_coldata() 

188 for datatype in ["double", "float"]: 

189 for value in ["bad", "1.0", "1", 1]: 

190 bad_numeric_coldata["datatype"] = datatype 

191 bad_numeric_coldata["value"] = value 

192 with self.assertRaises(ValueError): 

193 Column(**bad_numeric_coldata) 

194 

195 # Setting a bad default on a string column should throw. 

196 bad_str_coldata = default_coldata() 

197 bad_str_coldata["value"] = 1 

198 bad_str_coldata["length"] = 256 

199 for datatype in ["string", "char", "unicode", "text"]: 

200 for value in [1, 1.1, True, "", " ", " ", "\n", "\t"]: 

201 bad_str_coldata["datatype"] = datatype 

202 bad_str_coldata["value"] = value 

203 with self.assertRaises(ValueError): 

204 Column(**bad_str_coldata) 

205 

206 # Setting a non-boolean value on a boolean column should throw. 

207 bool_coldata = default_coldata() 

208 bool_coldata["datatype"] = "boolean" 

209 bool_coldata["value"] = "bad" 

210 with self.assertRaises(ValueError): 

211 for value in ["bad", 1, 1.1]: 

212 bool_coldata["value"] = value 

213 Column(**bool_coldata) 

214 

215 # Setting a valid value on a string column should be okay. 

216 str_coldata = default_coldata() 

217 str_coldata["value"] = 1 

218 str_coldata["length"] = 256 

219 str_coldata["value"] = "okay" 

220 for datatype in ["string", "char", "unicode", "text"]: 

221 str_coldata["datatype"] = datatype 

222 Column(**str_coldata) 

223 

224 # Setting an integer value on a column with an int type should be okay. 

225 int_coldata = default_coldata() 

226 int_coldata["value"] = 1 

227 for datatype in ["int", "long", "short", "byte"]: 

228 int_coldata["datatype"] = datatype 

229 Column(**int_coldata) 

230 

231 # Setting a decimal value on a column with a float type should be okay. 

232 bool_coldata = default_coldata() 

233 bool_coldata["datatype"] = "boolean" 

234 bool_coldata["value"] = True 

235 Column(**bool_coldata) 

236 

237 def test_timestamp(self) -> None: 

238 """Test validation of timestamp columns.""" 

239 # Check that the votable_xtype is set correctly for timestamp columns. 

240 col = Column(name="testColumn", id="#test_col_id", datatype="timestamp") 

241 self.assertEqual(col.votable_xtype, "timestamp") 

242 

243 

244class TableTestCase(unittest.TestCase): 

245 """Test Pydantic validation of the ``Table`` class.""" 

246 

247 def test_validation(self) -> None: 

248 """Test Pydantic validation of the ``Table`` class.""" 

249 # Default initialization should throw an exception. 

250 with self.assertRaises(ValidationError): 

251 Table() 

252 

253 # Setting only name should throw an exception. 

254 with self.assertRaises(ValidationError): 

255 Table(name="testTable") 

256 

257 # Setting name and id should throw an exception from missing columns. 

258 with self.assertRaises(ValidationError): 

259 Index(name="testTable", id="#test_id") 

260 

261 testCol = Column(name="testColumn", id="#test_id", datatype="string", length=256) 

262 

263 # Setting name, id, and columns should not throw an exception and 

264 # should load data correctly. 

265 tbl = Table(name="testTable", id="#test_id", columns=[testCol]) 

266 self.assertEqual(tbl.name, "testTable", "name should be 'testTable'") 

267 self.assertEqual(tbl.id, "#test_id", "id should be '#test_id'") 

268 self.assertEqual(tbl.columns, [testCol], "columns should be ['testColumn']") 

269 

270 # Creating a table with duplicate column names should raise an 

271 # exception. 

272 with self.assertRaises(ValidationError): 

273 Table(name="testTable", id="#test_id", columns=[testCol, testCol]) 

274 

275 

276class ColumnGroupTestCase(unittest.TestCase): 

277 """Test Pydantic validation of the ``ColumnGroup`` class.""" 

278 

279 def test_validation(self) -> None: 

280 """Test Pydantic validation of the ``ColumnGroup`` class.""" 

281 # Default initialization should throw an exception. 

282 with self.assertRaises(ValidationError): 

283 ColumnGroup() 

284 

285 # Setting only name should throw an exception. 

286 with self.assertRaises(ValidationError): 

287 ColumnGroup(name="testGroup") 

288 

289 # Setting name and id should throw an exception from missing columns. 

290 with self.assertRaises(ValidationError): 

291 ColumnGroup(name="testGroup", id="#test_id") 

292 

293 col = Column(name="testColumn", id="#test_col", datatype="string", length=256) 

294 

295 # Setting name, id, and columns should not throw an exception and 

296 # should load data correctly. 

297 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta") 

298 self.assertEqual(group.name, "testGroup", "name should be 'testGroup'") 

299 self.assertEqual(group.id, "#test_group", "id should be '#test_group'") 

300 self.assertEqual(group.columns, [col], "columns should be ['testColumn']") 

301 

302 # Dereferencing columns without setting a table should raise an 

303 # exception. 

304 with self.assertRaises(ValueError): 

305 group._dereference_columns() 

306 

307 # Creating a group with duplicate column names should raise an 

308 # exception. 

309 with self.assertRaises(ValidationError): 

310 ColumnGroup(name="testGroup", id="#test_group", columns=[col, col]) 

311 

312 # Check that including a column object in a group works correctly. 

313 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta") 

314 table = Table( 

315 name="testTable", 

316 id="#test_table", 

317 columns=[col], 

318 column_groups=[group], 

319 ) 

320 self.assertEqual(table.column_groups, [group], "column_groups should be [group]") 

321 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol") 

322 

323 # Check that column derefencing works correctly when group is assigned 

324 # to a table. 

325 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta") 

326 table = Table( 

327 name="testTable", 

328 id="#test_table", 

329 columns=[col], 

330 column_groups=[group], 

331 ) 

332 self.assertEqual(table.column_groups, [group], "column_groups should be [group]") 

333 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol") 

334 

335 # Creating a group with a bad column should raise an exception. 

336 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta") 

337 with self.assertRaises(ValueError): 

338 table = Table( 

339 name="testTable", 

340 id="#test_table", 

341 columns=[col], 

342 column_groups=[group], 

343 ) 

344 

345 

346class ConstraintTestCase(unittest.TestCase): 

347 """Test Pydantic validation of the different constraint classes.""" 

348 

349 def test_base_constraint(self) -> None: 

350 """Test validation of base constraint type.""" 

351 # Default initialization should throw an exception. 

352 with self.assertRaises(ValidationError): 

353 Constraint() 

354 

355 # Setting only name should throw an exception. 

356 with self.assertRaises(ValidationError): 

357 Constraint(name="test_constraint") 

358 

359 # Setting name and id should not throw an exception and should load 

360 # data correctly. 

361 Constraint(name="test_constraint", id="#test_constraint") 

362 

363 # Setting initially without deferrable should throw an exception. 

364 with self.assertRaises(ValidationError): 

365 Constraint(name="test_constraint", id="#test_constraint", deferrable=False, initially="IMMEDIATE") 

366 

367 # Seting a bad value for initially should throw an exception. 

368 with self.assertRaises(ValidationError): 

369 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="BAD_VALUE") 

370 

371 # Setting a valid value for initially should not throw an exception. 

372 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="IMMEDIATE") 

373 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="DEFERRED") 

374 

375 def test_unique_constraint(self) -> None: 

376 """Test validation of unique constraints.""" 

377 # Setting name and id should throw an exception from missing columns. 

378 with self.assertRaises(ValidationError): 

379 UniqueConstraint(name="test_constraint", id="#test_constraint") 

380 

381 # Setting name, id, and columns should not throw an exception and 

382 # should load data correctly. 

383 constraint = UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"]) 

384 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'") 

385 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'") 

386 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

387 

388 # Creating from data dictionary should work and load data correctly. 

389 data = {"name": "uniq_test", "id": "#uniq_test", "columns": ["test_column"]} 

390 constraint = UniqueConstraint(**data) 

391 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'") 

392 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'") 

393 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

394 

395 def test_foreign_key_constraint(self) -> None: 

396 """Test validation of foreign key constraints.""" 

397 # Setting name and id should throw an exception from missing columns. 

398 with self.assertRaises(ValidationError): 

399 ForeignKeyConstraint(name="fk_test", id="#fk_test") 

400 

401 # Setting name, id, and columns should not throw an exception and 

402 # should load data correctly. 

403 constraint = ForeignKeyConstraint( 

404 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=["test_column"] 

405 ) 

406 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'") 

407 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'") 

408 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

409 self.assertEqual( 

410 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']" 

411 ) 

412 

413 # Creating from data dictionary should work and load data correctly. 

414 data = { 

415 "name": "fk_test", 

416 "id": "#fk_test", 

417 "columns": ["test_column"], 

418 "referenced_columns": ["test_column"], 

419 } 

420 constraint = ForeignKeyConstraint(**data) 

421 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'") 

422 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'") 

423 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

424 self.assertEqual( 

425 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']" 

426 ) 

427 

428 # Creating a foreign key constraint with no columns should raise an 

429 # exception. 

430 with self.assertRaises(ValidationError): 

431 ForeignKeyConstraint( 

432 name="fk_test", id="#fk_test", columns=[], referenced_columns=["test_column"] 

433 ) 

434 

435 # Creating a foreign key constraint with no referenced columns should 

436 # raise an exception. 

437 with self.assertRaises(ValidationError): 

438 ForeignKeyConstraint( 

439 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=[] 

440 ) 

441 

442 # Creating a foreign key constraint where the number of foreign key 

443 # columns does not match the number of referenced columns should raise 

444 # an exception. 

445 with self.assertRaises(ValidationError): 

446 ForeignKeyConstraint( 

447 name="fk_test", 

448 id="#fk_test", 

449 columns=["test_column", "test_column2"], 

450 referenced_columns=["test_column"], 

451 ) 

452 

453 def test_check_constraint(self) -> None: 

454 """Test validation of check constraints.""" 

455 # Setting name and id should throw an exception from missing 

456 # expression. 

457 with self.assertRaises(ValidationError): 

458 CheckConstraint(name="check_test", id="#check_test") 

459 

460 # Setting name, id, and expression should not throw an exception and 

461 # should load data correctly. 

462 constraint = CheckConstraint(name="check_test", id="#check_test", expression="1+2") 

463 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'") 

464 self.assertEqual(constraint.id, "#check_test", "id should be '#check_test'") 

465 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'") 

466 

467 # Creating from data dictionary should work and load data correctly. 

468 data = { 

469 "name": "check_test", 

470 "id": "#check_test", 

471 "expression": "1+2", 

472 } 

473 constraint = CheckConstraint(**data) 

474 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'") 

475 self.assertEqual(constraint.id, "#check_test", "id should be '#test_id'") 

476 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'") 

477 

478 def test_bad_constraint_type(self) -> None: 

479 with self.assertRaises(ValidationError): 

480 UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"], type="BAD_TYPE") 

481 

482 def test_constraint_column_checks(self) -> None: 

483 """Test the extra validation in the ``Schema`` that checks the 

484 constraint column references. 

485 """ 

486 

487 def _create_test_schema(constraint: Constraint) -> None: 

488 """Create a test schema with the given constraint.""" 

489 test_col = Column(name="testColumn", id="#test_col_id", datatype="int") 

490 test_col2 = Column(name="testColumn2", id="#test_col_id2", datatype="int") 

491 test_tbl = Table( 

492 name="testTable", id="#test_tbl_id", columns=[test_col, test_col2], constraints=[constraint] 

493 ) 

494 test_col = Column(name="testColumn", id="#test_col2_id", datatype="int") 

495 test_col2 = Column(name="testColumn2", id="#test_col2_id2", datatype="int") 

496 test_tbl2 = Table(name="testTable2", id="#test_tbl2_id", columns=[test_col, test_col2]) 

497 Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl, test_tbl2]) 

498 

499 # Creating a unique constraint on a bad column should raise an 

500 # exception. 

501 with self.assertRaises(ValidationError): 

502 _create_test_schema( 

503 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["bad_column"]) 

504 ) 

505 

506 # Creating a foreign key constraint with a bad column should raise an 

507 # exception. 

508 with self.assertRaises(ValidationError): 

509 _create_test_schema( 

510 ForeignKeyConstraint( 

511 name="testForeignKey", 

512 id="#test_fk_id", 

513 columns=["bad_column"], 

514 referenced_columns=["#test_col2_id"], 

515 ) 

516 ) 

517 

518 # Creating a foreign key constraint with a bad referenced column should 

519 # raise an exception. 

520 with self.assertRaises(ValidationError): 

521 _create_test_schema( 

522 ForeignKeyConstraint( 

523 name="testForeignKey", 

524 id="#test_fk_id", 

525 columns=["#test_col_id"], 

526 referenced_columns=["bad_column"], 

527 ) 

528 ) 

529 

530 # Creating a foreign key constraint where the source column is not in 

531 # the same table as the constraint should raise an exception. 

532 with self.assertRaises(ValidationError): 

533 _create_test_schema( 

534 ForeignKeyConstraint( 

535 name="testForeignKey", 

536 id="#test_fk_id", 

537 columns=["#test_col2_id"], # This column is in test_tbl2, not test_tbl 

538 referenced_columns=["#test_col_id"], 

539 ) 

540 ) 

541 

542 # Creating a foreign key constraint where the referenced column is not 

543 # a column object should raise an exception. 

544 with self.assertRaises(ValidationError): 

545 _create_test_schema( 

546 ForeignKeyConstraint( 

547 name="testForeignKey", 

548 id="#test_fk_id", 

549 columns=["#test_col_id"], 

550 referenced_columns=["#test_schema_id"], 

551 ) 

552 ) 

553 

554 # Creating a valid unique constraint should not raise an exception. 

555 _create_test_schema( 

556 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["#test_col_id"]) 

557 ) 

558 

559 # Creating a valid foreign key constraint should not raise an 

560 # exception. 

561 _create_test_schema( 

562 ForeignKeyConstraint( 

563 name="testForeignKey", 

564 id="#test_fk_id", 

565 columns=["#test_col_id"], 

566 referenced_columns=["#test_col2_id"], 

567 ) 

568 ) 

569 

570 # Creating a foreign key constraint with a composite key should not 

571 # raise an exception. 

572 _create_test_schema( 

573 ForeignKeyConstraint( 

574 name="testCompositeForeignKey", 

575 id="#test_composite_fk_id", 

576 columns=["#test_col_id", "#test_col_id2"], 

577 referenced_columns=["#test_col2_id", "#test_col2_id2"], 

578 ) 

579 ) 

580 

581 

582class IndexTestCase(unittest.TestCase): 

583 """Test Pydantic validation of the ``Index`` class.""" 

584 

585 def test_index_validation(self) -> None: 

586 """Test validation of indexes.""" 

587 # Default initialization should throw an exception. 

588 with self.assertRaises(ValidationError): 

589 Index() 

590 

591 # Setting only name should throw an exception. 

592 with self.assertRaises(ValidationError): 

593 Index(name="idx_test") 

594 

595 # Setting name and id should throw an exception from missing columns. 

596 with self.assertRaises(ValidationError): 

597 Index(name="idx_test", id="#idx_test") 

598 

599 # Setting name, id, and columns should not throw an exception and 

600 # should load data correctly. 

601 idx = Index(name="idx_test", id="#idx_test", columns=["#test_column"]) 

602 self.assertEqual(idx.name, "idx_test", "name should be 'test_constraint'") 

603 self.assertEqual(idx.id, "#idx_test", "id should be '#test_id'") 

604 self.assertEqual(idx.columns, ["#test_column"], "columns should be ['test_column']") 

605 

606 # Creating from data dictionary should work and load data correctly. 

607 data = {"name": "idx_test", "id": "#idx_test", "columns": ["test_column"]} 

608 idx = Index(**data) 

609 self.assertEqual(idx.name, "idx_test", "name should be 'idx_test'") 

610 self.assertEqual(idx.id, "#idx_test", "id should be '#idx_test'") 

611 self.assertEqual(idx.columns, ["test_column"], "columns should be ['test_column']") 

612 

613 # Setting both columns and expressions on an index should throw an 

614 # exception. 

615 with self.assertRaises(ValidationError): 

616 Index(name="idx_test", id="#idx_test", columns=["test_column"], expressions=["1+2"]) 

617 

618 

619class SchemaTestCase(unittest.TestCase): 

620 """Test Pydantic validation of the ``Schema`` class.""" 

621 

622 def test_validation(self) -> None: 

623 """Test Pydantic validation of the main schema class.""" 

624 # Default initialization should throw an exception. 

625 with self.assertRaises(ValidationError): 

626 Schema() 

627 

628 # Setting only name should throw an exception. 

629 with self.assertRaises(ValidationError): 

630 Schema(name="testSchema") 

631 

632 # Setting name and id should throw an exception from missing columns. 

633 with self.assertRaises(ValidationError): 

634 Schema(name="testSchema", id="#test_id") 

635 

636 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256) 

637 test_tbl = Table(name="testTable", id="#test_tbl_id", columns=[test_col]) 

638 

639 # Setting name, id, and columns should not throw an exception and 

640 # should load data correctly. 

641 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl]) 

642 self.assertEqual(sch.name, "testSchema", "name should be 'testSchema'") 

643 self.assertEqual(sch.id, "#test_sch_id", "id should be '#test_sch_id'") 

644 self.assertEqual(sch.tables, [test_tbl], "tables should be ['testTable']") 

645 

646 # Creating a schema with duplicate table names should raise an 

647 # exception. 

648 with self.assertRaises(ValidationError): 

649 Schema(name="testSchema", id="#test_id", tables=[test_tbl, test_tbl]) 

650 

651 # Using an undefined YAML field should raise an exception. 

652 with self.assertRaises(ValidationError): 

653 Schema(**{"name": "testSchema", "id": "#test_sch_id", "bad_field": "1234"}, tables=[test_tbl]) 

654 

655 # Creating a schema containing duplicate IDs should raise an error. 

656 with self.assertRaises(ValidationError): 

657 Schema( 

658 name="testSchema", 

659 id="#test_sch_id", 

660 tables=[ 

661 Table( 

662 name="testTable", 

663 id="#test_tbl_id", 

664 columns=[ 

665 Column(name="testColumn", id="#test_col_id", datatype="string"), 

666 Column(name="testColumn2", id="#test_col_id", datatype="string"), 

667 ], 

668 ) 

669 ], 

670 ) 

671 

672 def test_schema_object_ids(self) -> None: 

673 """Test that the ``id_map`` is properly populated.""" 

674 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256) 

675 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col]) 

676 sch = Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl]) 

677 

678 for id in ["#test_col_id", "#test_table_id", "#test_schema_id"]: 

679 # Test that the schema contains the expected id. 

680 self.assertTrue(id in sch, f"schema should contain '{id}'") 

681 

682 # Check that types of returned objects are correct. 

683 self.assertIsInstance(sch["#test_col_id"], Column, "schema[id] should return a Column") 

684 self.assertIsInstance(sch["#test_table_id"], Table, "schema[id] should return a Table") 

685 self.assertIsInstance(sch["#test_schema_id"], Schema, "schema[id] should return a Schema") 

686 

687 with self.assertRaises(KeyError): 

688 # Test that an invalid id raises an exception. 

689 sch["#bad_id"] 

690 

691 def test_check_unique_constraint_names(self) -> None: 

692 """Test that constraint names are unique.""" 

693 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256) 

694 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col]) 

695 test_cons = UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["testColumn"]) 

696 test_cons2 = UniqueConstraint( 

697 name="testConstraint", id="#test_constraint2_id", columns=["testColumn"] 

698 ) 

699 test_tbl.constraints = [test_cons, test_cons2] 

700 with self.assertRaises(ValidationError): 

701 Schema(name="testSchema", id="#test_id", tables=[test_tbl]) 

702 

703 def test_check_unique_index_names(self) -> None: 

704 """Test that index names are unique.""" 

705 test_col = Column(name="test_column1", id="#test_table.test_column1", datatype="int") 

706 test_col2 = Column(name="test_column2", id="#test_table.test_column2", datatype="string", length=256) 

707 test_tbl = Table(name="test_table", id="#test_table", columns=[test_col, test_col2]) 

708 test_idx = Index(name="idx_test", id="#idx_test", columns=[test_col.id]) 

709 test_idx2 = Index(name="idx_test", id="#idx_test2", columns=[test_col2.id]) 

710 test_tbl.indexes = [test_idx, test_idx2] 

711 with self.assertRaises(ValidationError): 

712 Schema(name="test_schema", id="#test-schema", tables=[test_tbl]) 

713 

714 def test_model_validate(self) -> None: 

715 """Load a YAML test file and validate the schema data model.""" 

716 with open(TEST_YAML) as test_yaml: 

717 data = yaml.safe_load(test_yaml) 

718 Schema.model_validate(data) 

719 

720 def test_id_generation(self) -> None: 

721 """Test ID generation.""" 

722 test_path = os.path.join(TEST_ID_GENERATION) 

723 with open(test_path) as test_yaml: 

724 yaml_data = yaml.safe_load(test_yaml) 

725 # Generate IDs for objects in the test schema. 

726 Schema.model_validate(yaml_data, context={"id_generation": True}) 

727 with open(test_path) as test_yaml: 

728 yaml_data = yaml.safe_load(test_yaml) 

729 # Test that an error is raised when id generation is disabled. 

730 with self.assertRaises(ValidationError): 

731 Schema.model_validate(yaml_data, context={"id_generation": False}) 

732 

733 def test_get_table_by_column(self) -> None: 

734 """Test the ``get_table_by_column`` method.""" 

735 # Test that the correct table is returned when searching by column. 

736 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256) 

737 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col]) 

738 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl]) 

739 self.assertEqual(sch.get_table_by_column(test_col), test_tbl) 

740 

741 # Test that an error is raised when the column is not found. 

742 bad_col = Column(name="bad_column", id="#test_tbl.bad_column", datatype="string", length=256) 

743 with self.assertRaises(ValueError): 

744 sch.get_table_by_column(bad_col) 

745 

746 def test_find_object_by_id(self) -> None: 

747 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256) 

748 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col]) 

749 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl]) 

750 self.assertEqual(sch.find_object_by_id("#test_tbl.test_col", Column), test_col) 

751 with self.assertRaises(KeyError): 

752 sch.find_object_by_id("#bad_id", Column) 

753 with self.assertRaises(TypeError): 

754 sch.find_object_by_id("#test_tbl", Column) 

755 

756 def test_from_file(self) -> None: 

757 """Test loading a schema from a file.""" 

758 # Test file object. 

759 with open(TEST_SALES) as test_file: 

760 schema = Schema.from_stream(test_file) 

761 self.assertIsInstance(schema, Schema) 

762 

763 # Test path string. 

764 with open(TEST_SALES) as test_file: 

765 schema = Schema.from_stream(test_file) 

766 self.assertIsInstance(schema, Schema) 

767 

768 # Path object. 

769 test_file_path = pathlib.Path(TEST_SALES) 

770 schema = Schema.from_uri(test_file_path) 

771 self.assertIsInstance(schema, Schema) 

772 

773 def test_from_resource(self) -> None: 

774 """Test loading a schema from a resource.""" 

775 # Test loading a schema from a resource string. 

776 schema = Schema.from_uri( 

777 "resource://felis/config/tap_schema/tap_schema_std.yaml", context={"id_generation": True} 

778 ) 

779 self.assertIsInstance(schema, Schema) 

780 

781 # Test loading a schema from a ResourcePath. 

782 schema = Schema.from_uri( 

783 ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml"), 

784 context={"id_generation": True}, 

785 ) 

786 self.assertIsInstance(schema, Schema) 

787 

788 # Test loading from a nonexistant resource. 

789 with self.assertRaises(ValueError): 

790 Schema.from_uri("resource://fake/schemas/bad_schema.yaml") 

791 

792 # Without ID generation enabled, this schema should fail validation. 

793 with self.assertRaises(ValidationError): 

794 Schema.from_uri("resource://felis/config/tap_schema/tap_schema_std.yaml") 

795 

796 def test_find_table_by_name(self) -> None: 

797 """Test the ``_find_table_by_name`` method.""" 

798 # Create a simple schema with two tables 

799 test_col1 = Column(name="test_column1", id="#test_tbl1.test_col1", datatype="int") 

800 test_col2 = Column(name="test_column2", id="#test_tbl2.test_col2", datatype="string", length=256) 

801 test_tbl1 = Table(name="test_table1", id="#test_tbl1", columns=[test_col1]) 

802 test_tbl2 = Table(name="test_table2", id="#test_tbl2", columns=[test_col2]) 

803 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl1, test_tbl2]) 

804 

805 # Test that the correct table is returned when searching by name 

806 self.assertEqual(sch._find_table_by_name("test_table1"), test_tbl1) 

807 self.assertEqual(sch._find_table_by_name("test_table2"), test_tbl2) 

808 

809 # Test that a KeyError is raised when the table is not found 

810 with self.assertRaises(KeyError): 

811 sch._find_table_by_name("nonexistent_table") 

812 

813 

814class SchemaVersionTest(unittest.TestCase): 

815 """Test the schema version.""" 

816 

817 def test_validation(self) -> None: 

818 """Test validation of the schema version class.""" 

819 # Default initialization should throw an exception. 

820 with self.assertRaises(ValidationError): 

821 SchemaVersion() 

822 

823 # Setting current should not throw an exception and should load data 

824 # correctly. 

825 sv = SchemaVersion(current="1.0.0") 

826 self.assertEqual(sv.current, "1.0.0", "current should be '1.0.0'") 

827 

828 # Check that schema version can be specified as a single string or 

829 # an object. 

830 data = { 

831 "name": "schema", 

832 "@id": "#schema", 

833 "tables": [], 

834 "version": "1.2.3", 

835 } 

836 schema = Schema.model_validate(data) 

837 self.assertEqual(schema.version, "1.2.3") 

838 

839 data = { 

840 "name": "schema", 

841 "@id": "#schema", 

842 "tables": [], 

843 "version": { 

844 "current": "1.2.3", 

845 "compatible": ["1.2.0", "1.2.1", "1.2.2"], 

846 "read_compatible": ["1.1.0", "1.1.1"], 

847 }, 

848 } 

849 schema = Schema.model_validate(data) 

850 self.assertEqual(schema.version.current, "1.2.3") 

851 self.assertEqual(schema.version.compatible, ["1.2.0", "1.2.1", "1.2.2"]) 

852 self.assertEqual(schema.version.read_compatible, ["1.1.0", "1.1.1"]) 

853 

854 

855class ValidationFlagsTest(unittest.TestCase): 

856 """Test optional validation flags on the schema.""" 

857 

858 def test_check_tap_table_indexes(self) -> None: 

859 """Test the ``check_tap_table_indexes`` validation flag.""" 

860 cxt = {"check_tap_table_indexes": True} 

861 schema_dict = { 

862 "name": "testSchema", 

863 "id": "#test_schema_id", 

864 "tables": [ 

865 { 

866 "name": "test_table", 

867 "id": "#test_table_id", 

868 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}], 

869 } 

870 ], 

871 } 

872 

873 # Creating a schema without a TAP table index should throw. 

874 with self.assertRaises(ValidationError): 

875 Schema.model_validate(schema_dict, context=cxt) 

876 

877 # Creating a schema with a TAP table index should not throw. 

878 schema_dict["tables"][0]["tap_table_index"] = 1 

879 Schema.model_validate(schema_dict, context=cxt) 

880 schema_dict["tables"].append( 

881 { 

882 "name": "test_table2", 

883 "id": "#test_table2", 

884 "tap_table_index": 1, 

885 "columns": [{"name": "test_col2", "id": "#test_col2", "datatype": "int"}], 

886 } 

887 ) 

888 

889 # Creating a schema with a duplicate TAP table index should throw. 

890 with self.assertRaises(ValidationError): 

891 Schema.model_validate(schema_dict, context=cxt) 

892 

893 # Multiple, unique TAP table indexes should not throw. 

894 schema_dict["tables"][1]["tap_table_index"] = 2 

895 Schema.model_validate(schema_dict, context=cxt) 

896 

897 def test_check_tap_principal(self) -> None: 

898 """Test the ``check_tap_principal` validation flag.""" 

899 cxt = {"check_tap_principal": True} 

900 schema_dict = { 

901 "name": "testSchema", 

902 "id": "#test_schema_id", 

903 "tables": [ 

904 { 

905 "name": "test_table", 

906 "id": "#test_table_id", 

907 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}], 

908 } 

909 ], 

910 } 

911 

912 # Creating a table without a TAP table principal column should throw. 

913 with self.assertRaises(ValidationError): 

914 Schema.model_validate(schema_dict, context=cxt) 

915 

916 # Creating a table with a TAP table principal column should not throw. 

917 schema_dict["tables"][0]["columns"][0]["tap_principal"] = 1 

918 Schema.model_validate(schema_dict, context=cxt) 

919 

920 def test_check_description(self) -> None: 

921 """Test the ``check_description`` flag.""" 

922 cxt = {"check_description": True} 

923 schema_dict = { 

924 "name": "testSchema", 

925 "id": "#test_schema_id", 

926 "tables": [ 

927 { 

928 "name": "test_table", 

929 "id": "#test_table_id", 

930 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}], 

931 } 

932 ], 

933 } 

934 

935 # Creating a schema without object descriptions should throw. 

936 with self.assertRaises(ValidationError): 

937 Schema.model_validate(schema_dict, context=cxt) 

938 

939 # Creating a schema with object descriptions should not throw. 

940 schema_dict["description"] = "Test schema" 

941 schema_dict["tables"][0]["description"] = "Test table" 

942 schema_dict["tables"][0]["columns"][0]["description"] = "Test column" 

943 Schema.model_validate(schema_dict, context=cxt) 

944 

945 

946class RedundantDatatypesTest(unittest.TestCase): 

947 """Test validation of redundant datatype definitions.""" 

948 

949 def test_mysql_datatypes(self) -> None: 

950 class ColumnGenerator: 

951 """Generate column data for redundant datatype testing.""" 

952 

953 def __init__(self, name, id, db_name): 

954 self.name = name 

955 self.id = id 

956 self.db_name = db_name 

957 self.context = {"check_redundant_datatypes": True} 

958 

959 def col(self, datatype: str, db_datatype: str, length=None): 

960 return Column.model_validate( 

961 { 

962 "name": self.name, 

963 "@id": self.id, 

964 "datatype": datatype, 

965 f"{self.db_name}:datatype": db_datatype, 

966 "length": length, 

967 }, 

968 context=self.context, 

969 ) 

970 

971 """Test that redundant datatype definitions raise an error.""" 

972 coldata = ColumnGenerator("test_col", "#test_col_id", "mysql") 

973 

974 with self.assertRaises(ValidationError): 

975 coldata.col("double", "DOUBLE") 

976 

977 with self.assertRaises(ValidationError): 

978 coldata.col("int", "INTEGER") 

979 

980 with self.assertRaises(ValidationError): 

981 coldata.col("float", "FLOAT") 

982 

983 with self.assertRaises(ValidationError): 

984 coldata.col("char", "CHAR", length=8) 

985 

986 with self.assertRaises(ValidationError): 

987 coldata.col("string", "VARCHAR", length=32) 

988 

989 with self.assertRaises(ValidationError): 

990 coldata.col("byte", "TINYINT") 

991 

992 with self.assertRaises(ValidationError): 

993 coldata.col("short", "SMALLINT") 

994 

995 with self.assertRaises(ValidationError): 

996 coldata.col("long", "BIGINT") 

997 

998 with self.assertRaises(ValidationError): 

999 coldata.col("boolean", "BOOLEAN") 

1000 

1001 with self.assertRaises(ValidationError): 

1002 coldata.col("unicode", "NVARCHAR", length=32) 

1003 

1004 with self.assertRaises(ValidationError): 

1005 coldata.col("timestamp", "DATETIME") 

1006 

1007 # DM-42257: Felis does not handle unbounded text types properly. 

1008 # coldata.col("text", "TEXT", length=32) 

1009 

1010 with self.assertRaises(ValidationError): 

1011 coldata.col("binary", "LONGBLOB", length=1024) 

1012 

1013 with self.assertRaises(ValidationError): 

1014 # Same type and length 

1015 coldata.col("string", "VARCHAR(128)", length=128) 

1016 

1017 # Check the old type mapping for MySQL, which is now okay 

1018 coldata.col("boolean", "BIT(1)") 

1019 

1020 # Different types, which is okay 

1021 coldata.col("double", "FLOAT") 

1022 

1023 # Same base type with different lengths, which is okay 

1024 coldata.col("string", "VARCHAR(128)", length=32) 

1025 

1026 # Different string types, which is okay 

1027 coldata.col("string", "CHAR", length=32) 

1028 coldata.col("unicode", "CHAR", length=32) 

1029 

1030 def test_precision(self) -> None: 

1031 """Test that precision is not allowed for datatypes other than 

1032 timestamp. 

1033 """ 

1034 with self.assertRaises(ValidationError): 

1035 Column(**{"name": "testColumn", "@id": "#test_col_id", "datatype": "double", "precision": 6}) 

1036 

1037 

1038class SchemaSerializationTest(unittest.TestCase): 

1039 """Test serialization and deserialization of the schema data model.""" 

1040 

1041 def test_serialization(self) -> None: 

1042 """Test serialization of the schema data model.""" 

1043 # Read the original YAML content from the test_serialization.yaml file 

1044 with open(TEST_SERIALIZATION) as file: 

1045 original_yaml_content = file.read() 

1046 

1047 # Load the schema from the original YAML content 

1048 schema_out = Schema.from_uri(TEST_SERIALIZATION) 

1049 serialized_data = schema_out.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True) 

1050 

1051 # Write the serialized data to a temporary YAML file 

1052 with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml", mode="w+") as temp_file: 

1053 yaml.dump(serialized_data, temp_file, default_flow_style=False, sort_keys=False) 

1054 temp_file.seek(0) 

1055 # Read the deserialized YAML content from the temporary file 

1056 deserialized_yaml_content = temp_file.read() 

1057 

1058 # Show the differences between the original and deserialized YAML 

1059 diff = difflib.unified_diff( 

1060 original_yaml_content.splitlines(keepends=True), 

1061 deserialized_yaml_content.splitlines(keepends=True), 

1062 fromfile="original.yaml", 

1063 tofile="deserialized.yaml", 

1064 ) 

1065 print("Differences:\n", "".join(diff)) 

1066 

1067 # Assert that the original and deserialized YAML are the same 

1068 self.assertEqual( 

1069 yaml.safe_load(original_yaml_content), 

1070 yaml.safe_load(deserialized_yaml_content), 

1071 "The original and deserialized YAML contents should be the same", 

1072 ) 

1073 

1074 

1075class ResourceTestCase(unittest.TestCase): 

1076 """Test loading of column definitions from external schema resources.""" 

1077 

1078 def setUp(self) -> None: 

1079 """Set up test resources.""" 

1080 self.temp_dir = tempfile.mkdtemp() 

1081 

1082 # Write out source schema file 

1083 source_schema_content = """ 

1084name: source_schema 

1085description: Test resource schema 

1086tables: 

1087- name: source_table 

1088 description: Source table 

1089 columns: 

1090 - name: test_column 

1091 datatype: int 

1092 description: "Test column" 

1093""" 

1094 self.source_schema_path = os.path.join(self.temp_dir, "source_schema.yaml") 

1095 with open(self.source_schema_path, "w") as f: 

1096 f.write(source_schema_content.strip()) 

1097 

1098 # Write out referencing schema file 

1099 ref_schema_content = """ 

1100name: ref_schema 

1101description: Test referencing schema 

1102resources: 

1103 source_schema: 

1104 uri: {resource_path} 

1105tables: 

1106- name: ref_table 

1107 description: Referencing table 

1108 columnRefs: 

1109 source_schema: 

1110 source_table: 

1111 test_column: null # Explicit null = no overrides, use same name 

1112 renamed_column: 

1113 ref_name: test_column 

1114 overrides: 

1115 description: "Renamed test column" 

1116 datatype: short 

1117 tap:principal: 1 

1118 tap:column_index: 2 

1119""" 

1120 self.ref_schema_path = os.path.join(self.temp_dir, "ref_schema.yaml") 

1121 ref_content = ref_schema_content.format(resource_path=self.source_schema_path) 

1122 with open(self.ref_schema_path, "w") as f: 

1123 f.write(ref_content.strip()) 

1124 

1125 def tearDown(self) -> None: 

1126 """Clean up test resources.""" 

1127 shutil.rmtree(self.temp_dir) 

1128 

1129 def test_schema_resource(self) -> None: 

1130 """Test loading a schema as a resource with column references.""" 

1131 # First test that the source schema loads correctly on its own 

1132 source_schema = Schema.from_uri(self.source_schema_path, context={"id_generation": True}) 

1133 self.assertEqual(source_schema.name, "source_schema") 

1134 self.assertEqual(len(source_schema.tables), 1) 

1135 self.assertEqual(source_schema.tables[0].name, "source_table") 

1136 

1137 # Now test loading the ref schema 

1138 ref_schema = Schema.from_uri(self.ref_schema_path, context={"id_generation": True}) 

1139 self.assertEqual(ref_schema.name, "ref_schema") 

1140 

1141 # Check that the resource was loaded 

1142 self.assertIn("source_schema", ref_schema._resource_map) 

1143 

1144 # Check that the referencing table has the expected columns 

1145 ref_table = ref_schema.tables[0] 

1146 self.assertEqual(ref_table.name, "ref_table") 

1147 

1148 # Check the column_refs structure 

1149 column_refs = ref_table.column_refs 

1150 self.assertIsNotNone(column_refs) 

1151 self.assertIsInstance(column_refs, dict) 

1152 

1153 # Check the schema resource reference 

1154 self.assertIn("source_schema", column_refs) 

1155 source_schema_refs = column_refs["source_schema"] 

1156 self.assertIsInstance(source_schema_refs, dict) 

1157 

1158 # Check the table reference 

1159 self.assertIn("source_table", source_schema_refs) 

1160 source_table_refs = source_schema_refs["source_table"] 

1161 self.assertIsInstance(source_table_refs, dict) 

1162 

1163 # Verify the column_refs structure details 

1164 # Should have 2 column references: test_column and renamed_column 

1165 self.assertEqual(len(source_table_refs), 2) 

1166 

1167 # Check test_column reference (null/no overrides) 

1168 self.assertIn("test_column", source_table_refs) 

1169 test_column_ref = source_table_refs["test_column"] 

1170 self.assertIsNone(test_column_ref) 

1171 

1172 # Check renamed_column reference (with ref_name and overrides) 

1173 self.assertIn("renamed_column", source_table_refs) 

1174 renamed_column_ref = source_table_refs["renamed_column"] 

1175 self.assertIsNotNone(renamed_column_ref) 

1176 self.assertEqual(renamed_column_ref.ref_name, "test_column") 

1177 self.assertIsNotNone(renamed_column_ref.overrides) 

1178 self.assertEqual(renamed_column_ref.overrides.description, "Renamed test column") 

1179 self.assertEqual(renamed_column_ref.overrides.tap_principal, 1) 

1180 self.assertEqual(renamed_column_ref.overrides.tap_column_index, 2) 

1181 self.assertEqual(renamed_column_ref.overrides.datatype.value, "short") 

1182 

1183 # Now check structure of dereferenced columns in the ref_table 

1184 self.assertEqual(len(ref_table.columns), 2) 

1185 

1186 # Check dereferenced test_column (no overrides) 

1187 test_col = next((col for col in ref_table.columns if col.name == "test_column"), None) 

1188 self.assertIsNotNone(test_col) 

1189 self.assertEqual(test_col.datatype, "int") 

1190 self.assertEqual(test_col.description, "Test column") 

1191 

1192 # Check dereferenced renamed_column (includes overrides) 

1193 renamed_col = next((col for col in ref_table.columns if col.name == "renamed_column"), None) 

1194 self.assertIsNotNone(renamed_col) 

1195 self.assertEqual(renamed_col.datatype, "short") # Inherited from source 

1196 self.assertEqual(renamed_col.description, "Renamed test column") 

1197 self.assertEqual(renamed_col.tap_principal, 1) 

1198 self.assertEqual(renamed_col.tap_column_index, 2) 

1199 

1200 # Verify that the columns are present in the ID map 

1201 try: 

1202 ref_schema.find_object_by_id(test_col.id, Column) 

1203 except KeyError: 

1204 self.fail(f"Test column ID '{test_col.id}' not found in schema ID map.") 

1205 try: 

1206 ref_schema.find_object_by_id(renamed_col.id, Column) 

1207 except KeyError: 

1208 self.fail(f"Renamed column ID '{renamed_col.id}' not found in schema ID map.") 

1209 

1210 def test_schema_resource_missing_column_error(self) -> None: 

1211 """Test that referencing a non-existent column raises an error.""" 

1212 error_ref_content = f""" 

1213name: error_ref_schema 

1214description: Test referencing schema with error 

1215resources: 

1216 source_schema: 

1217 uri: {self.source_schema_path} 

1218tables: 

1219- name: error_table 

1220 description: Table with bad reference 

1221 columnRefs: 

1222 source_schema: 

1223 source_table: 

1224 bad_column: null # This column doesn't exist in source 

1225""" 

1226 

1227 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml") 

1228 with open(error_ref_path, "w") as f: 

1229 f.write(error_ref_content.strip()) 

1230 

1231 # This should raise a ValueError 

1232 with self.assertRaises(ValueError) as cm: 

1233 Schema.from_uri(error_ref_path, context={"id_generation": True}) 

1234 

1235 self.assertIn("Column 'bad_column' not found", str(cm.exception)) 

1236 

1237 def test_schema_resource_missing_ref_name_error(self) -> None: 

1238 """Test that using ref_name for non-existent column raises an error.""" 

1239 # Create a ref schema with bad ref_name 

1240 error_ref_content = f""" 

1241name: error_ref_schema 

1242description: Test referencing schema with bad ref_name 

1243resources: 

1244 source_schema: 

1245 uri: {self.source_schema_path} 

1246tables: 

1247- name: error_table 

1248 description: Table with bad ref_name 

1249 columnRefs: 

1250 source_schema: 

1251 source_table: 

1252 some_column: 

1253 ref_name: nonexistent_column # This column doesn't exist 

1254""" 

1255 

1256 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml") 

1257 with open(error_ref_path, "w") as f: 

1258 f.write(error_ref_content.strip()) 

1259 

1260 with self.assertRaises(ValueError) as cm: 

1261 Schema.from_uri(error_ref_path, context={"id_generation": True}) 

1262 self.assertIn("Column 'nonexistent_column' not found", str(cm.exception)) 

1263 

1264 def test_schema_resource_not_found_error(self) -> None: 

1265 """Test that referencing a non-existent schema resource raises an 

1266 error. 

1267 """ 

1268 error_ref_content = f""" 

1269name: error_ref_schema 

1270description: Test referencing non-existent schema resource 

1271resources: 

1272 source_schema: 

1273 uri: {self.source_schema_path} 

1274tables: 

1275- name: error_table 

1276 description: Table with bad schema resource reference 

1277 columnRefs: 

1278 nonexistent_schema: # This schema resource doesn't exist 

1279 some_table: 

1280 some_column: null 

1281""" 

1282 

1283 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml") 

1284 with open(error_ref_path, "w") as f: 

1285 f.write(error_ref_content.strip()) 

1286 

1287 with self.assertRaises(ValueError) as cm: 

1288 Schema.from_uri(error_ref_path, context={"id_generation": True}) 

1289 self.assertIn("Schema resource 'nonexistent_schema' was not found in resources", str(cm.exception)) 

1290 

1291 def test_schema_resource_table_not_found_error(self) -> None: 

1292 """Test that referencing a non-existent table in schema resource raises 

1293 an error. 

1294 """ 

1295 error_ref_content = f""" 

1296name: error_ref_schema 

1297description: Test referencing non-existent table in schema resource 

1298resources: 

1299 source_schema: 

1300 uri: {self.source_schema_path} 

1301tables: 

1302- name: error_table 

1303 description: Table with bad table reference 

1304 columnRefs: 

1305 source_schema: 

1306 nonexistent_table: # This table doesn't exist in source_schema 

1307 some_column: null 

1308""" 

1309 

1310 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml") 

1311 with open(error_ref_path, "w") as f: 

1312 f.write(error_ref_content.strip()) 

1313 

1314 with self.assertRaises(ValueError) as cm: 

1315 Schema.from_uri(error_ref_path, context={"id_generation": True}) 

1316 self.assertIn("Table 'nonexistent_table' not found in resource 'source_schema'", str(cm.exception)) 

1317 

1318 def test_schema_resource_bad_uri_error(self) -> None: 

1319 """Test that a bad URI in resource loading raises an error.""" 

1320 error_ref_content = """ 

1321name: error_ref_schema 

1322description: Test schema with bad resource URI 

1323resources: 

1324 bad_resource: 

1325 uri: /nonexistent/path/to/schema.yaml 

1326tables: 

1327- name: error_table 

1328 description: Table referencing bad resource 

1329 columnRefs: 

1330 bad_resource: 

1331 some_table: 

1332 some_column: null 

1333""" 

1334 

1335 error_ref_path = os.path.join(self.temp_dir, "error_ref_schema.yaml") 

1336 with open(error_ref_path, "w") as f: 

1337 f.write(error_ref_content.strip()) 

1338 

1339 with self.assertRaises(ValueError) as cm: 

1340 Schema.from_uri(error_ref_path, context={"id_generation": True}) 

1341 self.assertIn( 

1342 "Failed to load resource 'bad_resource' from URI '/nonexistent/path/to/schema.yaml'", 

1343 str(cm.exception), 

1344 ) 

1345 

1346 def test_ref_schema_with_indexes(self) -> None: 

1347 """Test that indexes are properly handled when loading schema 

1348 resources. 

1349 """ 

1350 # Write out referencing schema file 

1351 ref_schema_content_with_indexes = """ 

1352name: ref_schema 

1353description: Test referencing schema 

1354resources: 

1355 source_schema: 

1356 uri: {resource_path} 

1357tables: 

1358- name: ref_table 

1359 description: Referencing table 

1360 columnRefs: 

1361 source_schema: 

1362 source_table: 

1363 test_column: null 

1364 renamed_column: 

1365 ref_name: test_column 

1366 overrides: 

1367 description: "Renamed test column" 

1368 tap:principal: 1 

1369 tap:column_index: 2 

1370 indexes: 

1371 - name: idx_test_column 

1372 columns: 

1373 - "#ref_table.test_column" 

1374 - name: idx_renamed_column 

1375 columns: 

1376 - "#ref_table.renamed_column" 

1377""" 

1378 

1379 source_schema_with_indexes_path = os.path.join(self.temp_dir, "source_schema_with_indexes.yaml") 

1380 ref_content = ref_schema_content_with_indexes.format(resource_path=self.source_schema_path) 

1381 with open(source_schema_with_indexes_path, "w") as f: 

1382 f.write(ref_content.strip()) 

1383 

1384 ref_schema = Schema.from_uri(source_schema_with_indexes_path, context={"id_generation": True}) 

1385 

1386 # Check index content; columns are not automatically resolved to 

1387 # objects by the validation. 

1388 indexes = ref_schema.tables[0].indexes 

1389 self.assertEqual(len(indexes), 2) 

1390 self.assertEqual(indexes[0].name, "idx_test_column") 

1391 self.assertEqual(indexes[0].columns, ["#ref_table.test_column"]) 

1392 self.assertEqual(indexes[1].name, "idx_renamed_column") 

1393 self.assertEqual(indexes[1].columns, ["#ref_table.renamed_column"]) 

1394 

1395 def test_ref_schema_with_foreign_key(self) -> None: 

1396 """Test that indexes are properly handled when loading schema 

1397 resources. 

1398 """ 

1399 # Write out referencing schema file 

1400 ref_schema_content_with_foreign_key = """ 

1401name: ref_schema 

1402description: Test referencing schema 

1403resources: 

1404 source_schema: 

1405 uri: {resource_path} 

1406tables: 

1407- name: src_table 

1408 description: Source table for foreign key 

1409 primaryKey: "#src_table.test_column" 

1410 columnRefs: 

1411 source_schema: 

1412 source_table: 

1413 test_column: null 

1414 renamed_column: 

1415 ref_name: test_column 

1416 overrides: 

1417 description: "Renamed test column" 

1418 tap:principal: 1 

1419 tap:column_index: 2 

1420- name: target_table 

1421 description: Target table for foreign key 

1422 columns: 

1423 - name: fk_column 

1424 datatype: int 

1425 description: "Foreign key column" 

1426 constraints: 

1427 - name: fk_src_table 

1428 '@type': ForeignKey 

1429 columns: 

1430 - "#target_table.fk_column" 

1431 referencedColumns: 

1432 - "#src_table.test_column" 

1433""" 

1434 

1435 source_schema_with_foreign_key_path = os.path.join( 

1436 self.temp_dir, "source_schema_with_foreign_key.yaml" 

1437 ) 

1438 ref_content = ref_schema_content_with_foreign_key.format(resource_path=self.source_schema_path) 

1439 with open(source_schema_with_foreign_key_path, "w") as f: 

1440 f.write(ref_content.strip()) 

1441 

1442 ref_schema = Schema.from_uri(source_schema_with_foreign_key_path, context={"id_generation": True}) 

1443 

1444 # Check foreign key constraint content 

1445 fk_constraint = ref_schema.tables[1].constraints[0] 

1446 self.assertIsInstance(fk_constraint, ForeignKeyConstraint) 

1447 self.assertEqual(fk_constraint.name, "fk_src_table") 

1448 self.assertEqual(fk_constraint.columns, ["#target_table.fk_column"]) 

1449 self.assertEqual(fk_constraint.referenced_columns, ["#src_table.test_column"]) 

1450 

1451 def test_ref_schema_serialization(self) -> None: 

1452 """Test serialization of a reference schema.""" 

1453 # Load the referencing schema and then serialize it back to YAML 

1454 ref_schema = Schema.from_uri(self.ref_schema_path, context={"id_generation": True}) 

1455 yaml_data = ref_schema.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True) 

1456 serialized_schema_path = os.path.join(self.temp_dir, "serialized_ref_schema.yaml") 

1457 with open(serialized_schema_path, "w") as f: 

1458 yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False) 

1459 

1460 # Read back the serialized YAML data 

1461 with open(serialized_schema_path) as f: 

1462 serialized_yaml_data = yaml.safe_load(f) 

1463 

1464 # Ensure that columns were not serialized directly in the table 

1465 self.assertEqual(len(serialized_yaml_data["tables"][0]["columns"]), 0) 

1466 

1467 # Deserialize the schema and check that the expected columns are 

1468 # present 

1469 deserialized_schema = Schema.from_uri(serialized_schema_path, context={"id_generation": True}) 

1470 self.assertEqual(len(deserialized_schema.tables[0].columns), 2) 

1471 

1472 # Check that the columnRefs structure is still present 

1473 try: 

1474 ref_columns = deserialized_schema.tables[0].column_refs["source_schema"]["source_table"] 

1475 except Exception: 

1476 self.fail("The column refs are missing after deserialization.") 

1477 self.assertEqual(len(ref_columns), 2) 

1478 

1479 def test_ref_schema_with_dereference_columns(self) -> None: 

1480 """Test loading a reference schema with dereferencing of columns so 

1481 that column_refs is set to empty after loading. 

1482 """ 

1483 ref_schema = Schema.from_uri( 

1484 self.ref_schema_path, context={"id_generation": True, "dereference_resources": True} 

1485 ) 

1486 

1487 # Check that the columns were dereferenced into the table 

1488 ref_table = ref_schema.tables[0] 

1489 self.assertEqual(len(ref_table.columns), 2) 

1490 col_names = {col.name for col in ref_table.columns} 

1491 self.assertIn("test_column", col_names) 

1492 self.assertIn("renamed_column", col_names) 

1493 

1494 # Check that column_refs is empty after dereferencing 

1495 self.assertEqual(len(ref_table.column_refs), 0) 

1496 

1497 def test_tap_column_index_with_overrides(self) -> None: 

1498 """Test that TAP column index is correctly assigned when an override 

1499 of that field is present in the column ref. 

1500 """ 

1501 # Write out source schema file 

1502 source_schema_content = """ 

1503name: source_schema 

1504tables: 

1505- name: source_table 

1506 columns: 

1507 - name: col1 

1508 datatype: int 

1509 - name: col2 

1510 datatype: int 

1511 - name: col3 

1512 datatype: int 

1513""" 

1514 source_schema_path = os.path.join(self.temp_dir, "source_schema.yaml") 

1515 with open(source_schema_path, "w") as f: 

1516 f.write(source_schema_content.strip()) 

1517 

1518 # Write out referencing schema file 

1519 ref_schema_content = """ 

1520name: ref_schema 

1521resources: 

1522 source_schema: 

1523 uri: {resource_path} 

1524tables: 

1525- name: ref_table 

1526 columnRefs: 

1527 source_schema: 

1528 source_table: 

1529 col1: 

1530 col2: 

1531 overrides: 

1532 tap:column_index: 15 

1533 col3: 

1534""" 

1535 ref_schema_path = os.path.join(self.temp_dir, "ref_schema.yaml") 

1536 ref_content = ref_schema_content.format(resource_path=source_schema_path) 

1537 with open(ref_schema_path, "w") as f: 

1538 f.write(ref_content.strip()) 

1539 

1540 ref_schema = Schema.from_uri( 

1541 ref_schema_path, 

1542 context={"id_generation": True, "column_ref_index_increment": 10}, 

1543 ) 

1544 

1545 for column in ref_schema.tables[0].columns: 

1546 if column.name == "col1": 

1547 self.assertEqual(column.tap_column_index, 10) 

1548 elif column.name == "col2": 

1549 self.assertEqual(column.tap_column_index, 15) 

1550 elif column.name == "col3": 

1551 self.assertEqual(column.tap_column_index, 20) 

1552 else: 

1553 self.fail(f"Unexpected column name: {column.name}") 

1554 

1555 

1556class ColumnOverridesTestCase(unittest.TestCase): 

1557 """Test application of overrides to a column, setting all allowed 

1558 fields. 

1559 """ 

1560 

1561 def test_all_override_fields_exist_on_column(self) -> None: 

1562 """Ensure every ColumnOverrides field corresponds to an attribute on 

1563 Column. 

1564 """ 

1565 override_fields = set(ColumnOverrides.model_fields) 

1566 column_fields = set(Column.model_fields) 

1567 

1568 missing = override_fields - column_fields 

1569 

1570 self.assertFalse( 

1571 missing, 

1572 f"Column is missing attributes for override fields: {sorted(missing)}", 

1573 ) 

1574 

1575 def test_overrides_all(self) -> None: 

1576 """Test updating all allowed column fields from overrides.""" 

1577 # Create a base column 

1578 base_column = Column( 

1579 name="base_column", 

1580 id="#base_column", 

1581 description="Base column", 

1582 datatype="char", 

1583 length=64, 

1584 nullable=False, 

1585 tap_principal=1, 

1586 tap_column_index=10, 

1587 ) 

1588 

1589 # Override all allowed fields with different values 

1590 overrides = ColumnOverrides( 

1591 description="Ref column", 

1592 datatype="string", 

1593 length=256, 

1594 nullable=True, 

1595 tap_principal=0, 

1596 tap_column_index=100, 

1597 ) 

1598 

1599 # Apply overrides 

1600 base_column._update_from_overrides(overrides) 

1601 

1602 # Check that the attributes were updated correctly 

1603 self.assertEqual(base_column.description, "Ref column") 

1604 self.assertEqual(base_column.datatype, "string") 

1605 self.assertEqual(base_column.length, 256) 

1606 self.assertEqual(base_column.nullable, True) 

1607 self.assertEqual(base_column.tap_principal, 0) 

1608 self.assertEqual(base_column.tap_column_index, 100) 

1609 

1610 def test_overrides_subset(self) -> None: 

1611 """Test updating a subset of allowed column fields from overrides.""" 

1612 # Create a base column 

1613 base_column = Column( 

1614 name="base_column", 

1615 id="#base_column", 

1616 description="Base column", 

1617 datatype="char", 

1618 length=64, 

1619 nullable=False, 

1620 tap_principal=1, 

1621 tap_column_index=10, 

1622 ) 

1623 

1624 # Override all allowed fields with different values 

1625 overrides = ColumnOverrides( 

1626 description="Ref column", 

1627 tap_column_index=100, 

1628 ) 

1629 

1630 # Apply overrides 

1631 base_column._update_from_overrides(overrides) 

1632 

1633 # Check that the attributes were updated correctly 

1634 self.assertEqual(base_column.description, "Ref column") 

1635 self.assertEqual(base_column.datatype, "char") 

1636 self.assertEqual(base_column.length, 64) 

1637 self.assertEqual(base_column.nullable, False) 

1638 self.assertEqual(base_column.tap_principal, 1) 

1639 self.assertEqual(base_column.tap_column_index, 100) 

1640 

1641 def test_overrides_default(self) -> None: 

1642 """Test that applying the default overrides is a no-op.""" 

1643 # Create a base column 

1644 base_column = Column( 

1645 name="base_column", 

1646 id="#base_column", 

1647 description="Base column", 

1648 datatype="char", 

1649 length=64, 

1650 nullable=False, 

1651 tap_principal=1, 

1652 tap_column_index=10, 

1653 ) 

1654 

1655 # Apply overrides 

1656 base_column._update_from_overrides(ColumnOverrides()) 

1657 

1658 # Check that the attributes remain unchanged 

1659 self.assertEqual(base_column.description, "Base column") 

1660 self.assertEqual(base_column.datatype, "char") 

1661 self.assertEqual(base_column.length, 64) 

1662 self.assertEqual(base_column.nullable, False) 

1663 self.assertEqual(base_column.tap_principal, 1) 

1664 self.assertEqual(base_column.tap_column_index, 10) 

1665 

1666 def test_overrides_with_explicit_none_values(self) -> None: 

1667 """Test that passing explicit None values in overrides does update 

1668 the column attributes where allowed and raises errors if it is not. 

1669 """ 

1670 # Create a base column 

1671 base_column = Column( 

1672 name="base_column", 

1673 id="#base_column", 

1674 description="Base column", 

1675 datatype="int", 

1676 length=64, 

1677 nullable=False, 

1678 tap_principal=1, 

1679 tap_column_index=10, 

1680 ) 

1681 

1682 # Create overrides with explicit None values for nullable fields 

1683 overrides = ColumnOverrides( 

1684 description=None, 

1685 tap_column_index=None, 

1686 ) 

1687 

1688 # Apply overrides 

1689 base_column._update_from_overrides(overrides) 

1690 

1691 # Check that the attributes were updated to None where allowed 

1692 self.assertIsNone(base_column.description) 

1693 self.assertIsNone(base_column.tap_column_index) 

1694 

1695 # Check that setting non-nullable fields to None raise a specific 

1696 # ValueError on ColumnOverrides creation 

1697 for non_nullable_field in ("datatype", "length", "nullable", "tap_principal"): 

1698 with self.assertRaisesRegex( 

1699 ValueError, 

1700 re.escape(f"The '{non_nullable_field}' field cannot be overridden to null"), 

1701 ): 

1702 ColumnOverrides(**{non_nullable_field: None}) 

1703 

1704 def test_extra_fields_in_overrides(self) -> None: 

1705 """Test that extra fields in ColumnOverrides raise a 

1706 ValidationError. 

1707 """ 

1708 with self.assertRaises(ValidationError) as cm: 

1709 ColumnOverrides( 

1710 description="Test column", 

1711 extra_field="This should not be allowed", 

1712 ) 

1713 

1714 self.assertIn("Extra inputs are not permitted", str(cm.exception)) 

1715 

1716 def test_overrides_accept_alias_keys(self) -> None: 

1717 """Test that alias keys for TAP fields are accepted and populate the 

1718 corresponding model fields. 

1719 """ 

1720 overrides = ColumnOverrides(**{"tap:principal": 1, "tap:column_index": 42}) 

1721 

1722 self.assertEqual(overrides.tap_principal, 1) 

1723 self.assertEqual(overrides.tap_column_index, 42) 

1724 

1725 # Ensure these count as explicitly provided (for model_fields_set 

1726 # logic). 

1727 self.assertIn("tap_principal", overrides.model_fields_set) 

1728 self.assertIn("tap_column_index", overrides.model_fields_set) 

1729 

1730 def test_datatype_deserialize_and_serialize(self) -> None: 

1731 """Test that datatype is deserialized from a string to DataType and 

1732 serialized back to a string. 

1733 """ 

1734 overrides = ColumnOverrides(datatype="char") 

1735 

1736 # Deserialization should yield a DataType instance (not a raw str). 

1737 self.assertIsInstance(overrides.datatype, DataType) 

1738 self.assertEqual(str(overrides.datatype), "char") 

1739 

1740 # Serialization should produce a JSON-friendly string value. 

1741 dumped = overrides.model_dump(mode="json") 

1742 self.assertEqual(dumped["datatype"], "char") 

1743 

1744 # None should remain None on serialization. 

1745 overrides_none = ColumnOverrides() 

1746 dumped_none = overrides_none.model_dump(mode="json") 

1747 self.assertIsNone(dumped_none["datatype"]) 

1748 

1749 def test_non_nullable_overrides_data_is_none(self) -> None: 

1750 """Test that passing None to ``_check_non_nullable_overrides`` does not 

1751 raise an error. 

1752 """ 

1753 ColumnOverrides()._check_non_nullable_overrides(None) 

1754 

1755 

1756if __name__ == "__main__": 1756 ↛ 1757line 1756 didn't jump to line 1757 because the condition on line 1756 was never true

1757 unittest.main()