Coverage for tests / test_datamodel.py: 12%

463 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:37 +0000

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import difflib 

23import os 

24import pathlib 

25import tempfile 

26import unittest 

27from collections import defaultdict 

28 

29import yaml 

30from lsst.resources import ResourcePath 

31from pydantic import ValidationError 

32 

33from felis.datamodel import ( 

34 CheckConstraint, 

35 Column, 

36 ColumnGroup, 

37 Constraint, 

38 DataType, 

39 ForeignKeyConstraint, 

40 Index, 

41 Schema, 

42 SchemaVersion, 

43 Table, 

44 UniqueConstraint, 

45) 

46 

47TEST_DIR = os.path.abspath(os.path.dirname(__file__)) 

48TEST_YAML = os.path.join(TEST_DIR, "data", "test.yml") 

49TEST_SALES = os.path.join(TEST_DIR, "data", "sales.yaml") 

50TEST_SERIALIZATION = os.path.join(TEST_DIR, "data", "test_serialization.yaml") 

51TEST_ID_GENERATION = os.path.join(TEST_DIR, "data", "test_id_generation.yaml") 

52 

53 

54class ColumnTestCase(unittest.TestCase): 

55 """Test the ``Column`` class.""" 

56 

57 def test_validation(self) -> None: 

58 """Test Pydantic validation of the ``Column`` class.""" 

59 # Default initialization should throw an exception. 

60 with self.assertRaises(ValidationError): 

61 Column() 

62 

63 # Setting only name should throw an exception. 

64 with self.assertRaises(ValidationError): 

65 Column(name="testColumn") 

66 

67 # Setting name and id should throw an exception from missing datatype. 

68 with self.assertRaises(ValidationError): 

69 Column(name="testColumn", id="#test_id") 

70 

71 # Setting name, id, and datatype should not throw an exception and 

72 # should load data correctly. 

73 col = Column(name="testColumn", id="#test_id", datatype="string", length=256) 

74 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'") 

75 self.assertEqual(col.id, "#test_id", "id should be '#test_id'") 

76 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'") 

77 

78 # Creating from data dictionary should work and load data correctly. 

79 data = {"name": "testColumn", "id": "#test_id", "datatype": "string", "length": 256} 

80 col = Column(**data) 

81 self.assertEqual(col.name, "testColumn", "name should be 'testColumn'") 

82 self.assertEqual(col.id, "#test_id", "id should be '#test_id'") 

83 self.assertEqual(col.datatype, DataType.string, "datatype should be 'DataType.string'") 

84 

85 # Setting a bad IVOA UCD should throw an error. 

86 with self.assertRaises(ValidationError): 

87 Column(**data, ivoa_ucd="bad") 

88 

89 # Setting a valid IVOA UCD should not throw an error. 

90 col = Column(**data, ivoa_ucd="meta.id") 

91 self.assertEqual(col.ivoa_ucd, "meta.id", "ivoa_ucd should be 'meta.id'") 

92 

93 units_data = data.copy() 

94 

95 # Setting a bad IVOA unit should throw an error. 

96 units_data["ivoa:unit"] = "bad" 

97 with self.assertRaises(ValidationError): 

98 Column(**units_data) 

99 

100 # Setting a valid IVOA unit should not throw an error. 

101 units_data["ivoa:unit"] = "m" 

102 col = Column(**units_data) 

103 self.assertEqual(col.ivoa_unit, "m", "ivoa_unit should be 'm'") 

104 

105 units_data = data.copy() 

106 

107 # Setting a bad FITS TUNIT should throw an error. 

108 units_data["fits:tunit"] = "bad" 

109 with self.assertRaises(ValidationError): 

110 Column(**units_data) 

111 

112 # Setting a valid FITS TUNIT should not throw an error. 

113 units_data["fits:tunit"] = "m" 

114 col = Column(**units_data) 

115 self.assertEqual(col.fits_tunit, "m", "fits_tunit should be 'm'") 

116 

117 # Setting both IVOA unit and FITS TUNIT should throw an error. 

118 units_data["ivoa:unit"] = "m" 

119 with self.assertRaises(ValidationError): 

120 Column(**units_data) 

121 

122 def test_description(self) -> None: 

123 """Test Pydantic validation of the ``description`` attribute.""" 

124 # Creating a column with a description of 'None' should throw. 

125 with self.assertRaises(ValueError): 

126 Column( 

127 **{ 

128 "name": "testColumn", 

129 "@id": "#test_col_id", 

130 "datatype": "string", 

131 "description": None, 

132 } 

133 ) 

134 

135 # Creating a column with an empty description should throw. 

136 with self.assertRaises(ValueError): 

137 Column( 

138 **{ 

139 "name": "testColumn", 

140 "@id": "#test_col_id", 

141 "datatype": "string", 

142 "description": "", 

143 } 

144 ) 

145 

146 # Creating a column with a description that is too short should throw. 

147 with self.assertRaises(ValidationError): 

148 Column( 

149 **{ 

150 "name": "testColumn", 

151 "@id": "#test_col_id", 

152 "datatype": "string", 

153 "description": "xy", 

154 } 

155 ) 

156 

157 def test_values(self) -> None: 

158 """Test Pydantic validation of the ``value`` attribute.""" 

159 

160 # Define a function to return the default column data 

161 def default_coldata(): 

162 return defaultdict(str, {"name": "testColumn", "@id": "#test_col_id"}) 

163 

164 # Setting both value and autoincrement should throw. 

165 autoincr_coldata = default_coldata() 

166 autoincr_coldata["datatype"] = "int" 

167 autoincr_coldata["autoincrement"] = True 

168 autoincr_coldata["value"] = 1 

169 with self.assertRaises(ValueError): 

170 Column(**autoincr_coldata) 

171 

172 # Setting an invalid default on a column with an integer type should 

173 # throw. 

174 bad_numeric_coldata = default_coldata() 

175 for datatype in ["int", "long", "short", "byte"]: 

176 for value in ["bad", "1.0", "1", 1.1]: 

177 bad_numeric_coldata["datatype"] = datatype 

178 bad_numeric_coldata["value"] = value 

179 with self.assertRaises(ValueError): 

180 Column(**bad_numeric_coldata) 

181 

182 # Setting an invalid default on a column with a decimal type should 

183 # throw. 

184 bad_numeric_coldata = default_coldata() 

185 for datatype in ["double", "float"]: 

186 for value in ["bad", "1.0", "1", 1]: 

187 bad_numeric_coldata["datatype"] = datatype 

188 bad_numeric_coldata["value"] = value 

189 with self.assertRaises(ValueError): 

190 Column(**bad_numeric_coldata) 

191 

192 # Setting a bad default on a string column should throw. 

193 bad_str_coldata = default_coldata() 

194 bad_str_coldata["value"] = 1 

195 bad_str_coldata["length"] = 256 

196 for datatype in ["string", "char", "unicode", "text"]: 

197 for value in [1, 1.1, True, "", " ", " ", "\n", "\t"]: 

198 bad_str_coldata["datatype"] = datatype 

199 bad_str_coldata["value"] = value 

200 with self.assertRaises(ValueError): 

201 Column(**bad_str_coldata) 

202 

203 # Setting a non-boolean value on a boolean column should throw. 

204 bool_coldata = default_coldata() 

205 bool_coldata["datatype"] = "boolean" 

206 bool_coldata["value"] = "bad" 

207 with self.assertRaises(ValueError): 

208 for value in ["bad", 1, 1.1]: 

209 bool_coldata["value"] = value 

210 Column(**bool_coldata) 

211 

212 # Setting a valid value on a string column should be okay. 

213 str_coldata = default_coldata() 

214 str_coldata["value"] = 1 

215 str_coldata["length"] = 256 

216 str_coldata["value"] = "okay" 

217 for datatype in ["string", "char", "unicode", "text"]: 

218 str_coldata["datatype"] = datatype 

219 Column(**str_coldata) 

220 

221 # Setting an integer value on a column with an int type should be okay. 

222 int_coldata = default_coldata() 

223 int_coldata["value"] = 1 

224 for datatype in ["int", "long", "short", "byte"]: 

225 int_coldata["datatype"] = datatype 

226 Column(**int_coldata) 

227 

228 # Setting a decimal value on a column with a float type should be okay. 

229 bool_coldata = default_coldata() 

230 bool_coldata["datatype"] = "boolean" 

231 bool_coldata["value"] = True 

232 Column(**bool_coldata) 

233 

234 def test_timestamp(self) -> None: 

235 """Test validation of timestamp columns.""" 

236 # Check that the votable_xtype is set correctly for timestamp columns. 

237 col = Column(name="testColumn", id="#test_col_id", datatype="timestamp") 

238 self.assertEqual(col.votable_xtype, "timestamp") 

239 

240 

241class TableTestCase(unittest.TestCase): 

242 """Test Pydantic validation of the ``Table`` class.""" 

243 

244 def test_validation(self) -> None: 

245 """Test Pydantic validation of the ``Table`` class.""" 

246 # Default initialization should throw an exception. 

247 with self.assertRaises(ValidationError): 

248 Table() 

249 

250 # Setting only name should throw an exception. 

251 with self.assertRaises(ValidationError): 

252 Table(name="testTable") 

253 

254 # Setting name and id should throw an exception from missing columns. 

255 with self.assertRaises(ValidationError): 

256 Index(name="testTable", id="#test_id") 

257 

258 testCol = Column(name="testColumn", id="#test_id", datatype="string", length=256) 

259 

260 # Setting name, id, and columns should not throw an exception and 

261 # should load data correctly. 

262 tbl = Table(name="testTable", id="#test_id", columns=[testCol]) 

263 self.assertEqual(tbl.name, "testTable", "name should be 'testTable'") 

264 self.assertEqual(tbl.id, "#test_id", "id should be '#test_id'") 

265 self.assertEqual(tbl.columns, [testCol], "columns should be ['testColumn']") 

266 

267 # Creating a table with duplicate column names should raise an 

268 # exception. 

269 with self.assertRaises(ValidationError): 

270 Table(name="testTable", id="#test_id", columns=[testCol, testCol]) 

271 

272 

273class ColumnGroupTestCase(unittest.TestCase): 

274 """Test Pydantic validation of the ``ColumnGroup`` class.""" 

275 

276 def test_validation(self) -> None: 

277 """Test Pydantic validation of the ``ColumnGroup`` class.""" 

278 # Default initialization should throw an exception. 

279 with self.assertRaises(ValidationError): 

280 ColumnGroup() 

281 

282 # Setting only name should throw an exception. 

283 with self.assertRaises(ValidationError): 

284 ColumnGroup(name="testGroup") 

285 

286 # Setting name and id should throw an exception from missing columns. 

287 with self.assertRaises(ValidationError): 

288 ColumnGroup(name="testGroup", id="#test_id") 

289 

290 col = Column(name="testColumn", id="#test_col", datatype="string", length=256) 

291 

292 # Setting name, id, and columns should not throw an exception and 

293 # should load data correctly. 

294 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta") 

295 self.assertEqual(group.name, "testGroup", "name should be 'testGroup'") 

296 self.assertEqual(group.id, "#test_group", "id should be '#test_group'") 

297 self.assertEqual(group.columns, [col], "columns should be ['testColumn']") 

298 

299 # Dereferencing columns without setting a table should raise an 

300 # exception. 

301 with self.assertRaises(ValueError): 

302 group._dereference_columns() 

303 

304 # Creating a group with duplicate column names should raise an 

305 # exception. 

306 with self.assertRaises(ValidationError): 

307 ColumnGroup(name="testGroup", id="#test_group", columns=[col, col]) 

308 

309 # Check that including a column object in a group works correctly. 

310 group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta") 

311 table = Table( 

312 name="testTable", 

313 id="#test_table", 

314 columns=[col], 

315 column_groups=[group], 

316 ) 

317 self.assertEqual(table.column_groups, [group], "column_groups should be [group]") 

318 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol") 

319 

320 # Check that column derefencing works correctly when group is assigned 

321 # to a table. 

322 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta") 

323 table = Table( 

324 name="testTable", 

325 id="#test_table", 

326 columns=[col], 

327 column_groups=[group], 

328 ) 

329 self.assertEqual(table.column_groups, [group], "column_groups should be [group]") 

330 self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol") 

331 

332 # Creating a group with a bad column should raise an exception. 

333 group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta") 

334 with self.assertRaises(ValueError): 

335 table = Table( 

336 name="testTable", 

337 id="#test_table", 

338 columns=[col], 

339 column_groups=[group], 

340 ) 

341 

342 

343class ConstraintTestCase(unittest.TestCase): 

344 """Test Pydantic validation of the different constraint classes.""" 

345 

346 def test_base_constraint(self) -> None: 

347 """Test validation of base constraint type.""" 

348 # Default initialization should throw an exception. 

349 with self.assertRaises(ValidationError): 

350 Constraint() 

351 

352 # Setting only name should throw an exception. 

353 with self.assertRaises(ValidationError): 

354 Constraint(name="test_constraint") 

355 

356 # Setting name and id should not throw an exception and should load 

357 # data correctly. 

358 Constraint(name="test_constraint", id="#test_constraint") 

359 

360 # Setting initially without deferrable should throw an exception. 

361 with self.assertRaises(ValidationError): 

362 Constraint(name="test_constraint", id="#test_constraint", deferrable=False, initially="IMMEDIATE") 

363 

364 # Seting a bad value for initially should throw an exception. 

365 with self.assertRaises(ValidationError): 

366 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="BAD_VALUE") 

367 

368 # Setting a valid value for initially should not throw an exception. 

369 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="IMMEDIATE") 

370 Constraint(name="test_constraint", id="#test_constraint", deferrable=True, initially="DEFERRED") 

371 

372 def test_unique_constraint(self) -> None: 

373 """Test validation of unique constraints.""" 

374 # Setting name and id should throw an exception from missing columns. 

375 with self.assertRaises(ValidationError): 

376 UniqueConstraint(name="test_constraint", id="#test_constraint") 

377 

378 # Setting name, id, and columns should not throw an exception and 

379 # should load data correctly. 

380 constraint = UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"]) 

381 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'") 

382 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'") 

383 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

384 

385 # Creating from data dictionary should work and load data correctly. 

386 data = {"name": "uniq_test", "id": "#uniq_test", "columns": ["test_column"]} 

387 constraint = UniqueConstraint(**data) 

388 self.assertEqual(constraint.name, "uniq_test", "name should be 'uniq_test'") 

389 self.assertEqual(constraint.id, "#uniq_test", "id should be '#uniq_test'") 

390 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

391 

392 def test_foreign_key_constraint(self) -> None: 

393 """Test validation of foreign key constraints.""" 

394 # Setting name and id should throw an exception from missing columns. 

395 with self.assertRaises(ValidationError): 

396 ForeignKeyConstraint(name="fk_test", id="#fk_test") 

397 

398 # Setting name, id, and columns should not throw an exception and 

399 # should load data correctly. 

400 constraint = ForeignKeyConstraint( 

401 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=["test_column"] 

402 ) 

403 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'") 

404 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'") 

405 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

406 self.assertEqual( 

407 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']" 

408 ) 

409 

410 # Creating from data dictionary should work and load data correctly. 

411 data = { 

412 "name": "fk_test", 

413 "id": "#fk_test", 

414 "columns": ["test_column"], 

415 "referenced_columns": ["test_column"], 

416 } 

417 constraint = ForeignKeyConstraint(**data) 

418 self.assertEqual(constraint.name, "fk_test", "name should be 'fk_test'") 

419 self.assertEqual(constraint.id, "#fk_test", "id should be '#fk_test'") 

420 self.assertEqual(constraint.columns, ["test_column"], "columns should be ['test_column']") 

421 self.assertEqual( 

422 constraint.referenced_columns, ["test_column"], "referenced_columns should be ['test_column']" 

423 ) 

424 

425 # Creating a foreign key constraint with no columns should raise an 

426 # exception. 

427 with self.assertRaises(ValidationError): 

428 ForeignKeyConstraint( 

429 name="fk_test", id="#fk_test", columns=[], referenced_columns=["test_column"] 

430 ) 

431 

432 # Creating a foreign key constraint with no referenced columns should 

433 # raise an exception. 

434 with self.assertRaises(ValidationError): 

435 ForeignKeyConstraint( 

436 name="fk_test", id="#fk_test", columns=["test_column"], referenced_columns=[] 

437 ) 

438 

439 # Creating a foreign key constraint where the number of foreign key 

440 # columns does not match the number of referenced columns should raise 

441 # an exception. 

442 with self.assertRaises(ValidationError): 

443 ForeignKeyConstraint( 

444 name="fk_test", 

445 id="#fk_test", 

446 columns=["test_column", "test_column2"], 

447 referenced_columns=["test_column"], 

448 ) 

449 

450 def test_check_constraint(self) -> None: 

451 """Test validation of check constraints.""" 

452 # Setting name and id should throw an exception from missing 

453 # expression. 

454 with self.assertRaises(ValidationError): 

455 CheckConstraint(name="check_test", id="#check_test") 

456 

457 # Setting name, id, and expression should not throw an exception and 

458 # should load data correctly. 

459 constraint = CheckConstraint(name="check_test", id="#check_test", expression="1+2") 

460 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'") 

461 self.assertEqual(constraint.id, "#check_test", "id should be '#check_test'") 

462 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'") 

463 

464 # Creating from data dictionary should work and load data correctly. 

465 data = { 

466 "name": "check_test", 

467 "id": "#check_test", 

468 "expression": "1+2", 

469 } 

470 constraint = CheckConstraint(**data) 

471 self.assertEqual(constraint.name, "check_test", "name should be 'check_test'") 

472 self.assertEqual(constraint.id, "#check_test", "id should be '#test_id'") 

473 self.assertEqual(constraint.expression, "1+2", "expression should be '1+2'") 

474 

475 def test_bad_constraint_type(self) -> None: 

476 with self.assertRaises(ValidationError): 

477 UniqueConstraint(name="uniq_test", id="#uniq_test", columns=["test_column"], type="BAD_TYPE") 

478 

479 def test_constraint_column_checks(self) -> None: 

480 """Test the extra validation in the ``Schema`` that checks the 

481 constraint column references. 

482 """ 

483 

484 def _create_test_schema(constraint: Constraint) -> None: 

485 """Create a test schema with the given constraint.""" 

486 test_col = Column(name="testColumn", id="#test_col_id", datatype="int") 

487 test_col2 = Column(name="testColumn2", id="#test_col_id2", datatype="int") 

488 test_tbl = Table( 

489 name="testTable", id="#test_tbl_id", columns=[test_col, test_col2], constraints=[constraint] 

490 ) 

491 test_col = Column(name="testColumn", id="#test_col2_id", datatype="int") 

492 test_col2 = Column(name="testColumn2", id="#test_col2_id2", datatype="int") 

493 test_tbl2 = Table(name="testTable2", id="#test_tbl2_id", columns=[test_col, test_col2]) 

494 Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl, test_tbl2]) 

495 

496 # Creating a unique constraint on a bad column should raise an 

497 # exception. 

498 with self.assertRaises(ValidationError): 

499 _create_test_schema( 

500 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["bad_column"]) 

501 ) 

502 

503 # Creating a foreign key constraint with a bad column should raise an 

504 # exception. 

505 with self.assertRaises(ValidationError): 

506 _create_test_schema( 

507 ForeignKeyConstraint( 

508 name="testForeignKey", 

509 id="#test_fk_id", 

510 columns=["bad_column"], 

511 referenced_columns=["#test_col2_id"], 

512 ) 

513 ) 

514 

515 # Creating a foreign key constraint with a bad referenced column should 

516 # raise an exception. 

517 with self.assertRaises(ValidationError): 

518 _create_test_schema( 

519 ForeignKeyConstraint( 

520 name="testForeignKey", 

521 id="#test_fk_id", 

522 columns=["#test_col_id"], 

523 referenced_columns=["bad_column"], 

524 ) 

525 ) 

526 

527 # Creating a foreign key constraint where the source column is not in 

528 # the same table as the constraint should raise an exception. 

529 with self.assertRaises(ValidationError): 

530 _create_test_schema( 

531 ForeignKeyConstraint( 

532 name="testForeignKey", 

533 id="#test_fk_id", 

534 columns=["#test_col2_id"], # This column is in test_tbl2, not test_tbl 

535 referenced_columns=["#test_col_id"], 

536 ) 

537 ) 

538 

539 # Creating a foreign key constraint where the referenced column is not 

540 # a column object should raise an exception. 

541 with self.assertRaises(ValidationError): 

542 _create_test_schema( 

543 ForeignKeyConstraint( 

544 name="testForeignKey", 

545 id="#test_fk_id", 

546 columns=["#test_col_id"], 

547 referenced_columns=["#test_schema_id"], 

548 ) 

549 ) 

550 

551 # Creating a valid unique constraint should not raise an exception. 

552 _create_test_schema( 

553 UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["#test_col_id"]) 

554 ) 

555 

556 # Creating a valid foreign key constraint should not raise an 

557 # exception. 

558 _create_test_schema( 

559 ForeignKeyConstraint( 

560 name="testForeignKey", 

561 id="#test_fk_id", 

562 columns=["#test_col_id"], 

563 referenced_columns=["#test_col2_id"], 

564 ) 

565 ) 

566 

567 # Creating a foreign key constraint with a composite key should not 

568 # raise an exception. 

569 _create_test_schema( 

570 ForeignKeyConstraint( 

571 name="testCompositeForeignKey", 

572 id="#test_composite_fk_id", 

573 columns=["#test_col_id", "#test_col_id2"], 

574 referenced_columns=["#test_col2_id", "#test_col2_id2"], 

575 ) 

576 ) 

577 

578 

579class IndexTestCase(unittest.TestCase): 

580 """Test Pydantic validation of the ``Index`` class.""" 

581 

582 def test_index_validation(self) -> None: 

583 """Test validation of indexes.""" 

584 # Default initialization should throw an exception. 

585 with self.assertRaises(ValidationError): 

586 Index() 

587 

588 # Setting only name should throw an exception. 

589 with self.assertRaises(ValidationError): 

590 Index(name="idx_test") 

591 

592 # Setting name and id should throw an exception from missing columns. 

593 with self.assertRaises(ValidationError): 

594 Index(name="idx_test", id="#idx_test") 

595 

596 # Setting name, id, and columns should not throw an exception and 

597 # should load data correctly. 

598 idx = Index(name="idx_test", id="#idx_test", columns=["#test_column"]) 

599 self.assertEqual(idx.name, "idx_test", "name should be 'test_constraint'") 

600 self.assertEqual(idx.id, "#idx_test", "id should be '#test_id'") 

601 self.assertEqual(idx.columns, ["#test_column"], "columns should be ['test_column']") 

602 

603 # Creating from data dictionary should work and load data correctly. 

604 data = {"name": "idx_test", "id": "#idx_test", "columns": ["test_column"]} 

605 idx = Index(**data) 

606 self.assertEqual(idx.name, "idx_test", "name should be 'idx_test'") 

607 self.assertEqual(idx.id, "#idx_test", "id should be '#idx_test'") 

608 self.assertEqual(idx.columns, ["test_column"], "columns should be ['test_column']") 

609 

610 # Setting both columns and expressions on an index should throw an 

611 # exception. 

612 with self.assertRaises(ValidationError): 

613 Index(name="idx_test", id="#idx_test", columns=["test_column"], expressions=["1+2"]) 

614 

615 

616class SchemaTestCase(unittest.TestCase): 

617 """Test Pydantic validation of the ``Schema`` class.""" 

618 

619 def test_validation(self) -> None: 

620 """Test Pydantic validation of the main schema class.""" 

621 # Default initialization should throw an exception. 

622 with self.assertRaises(ValidationError): 

623 Schema() 

624 

625 # Setting only name should throw an exception. 

626 with self.assertRaises(ValidationError): 

627 Schema(name="testSchema") 

628 

629 # Setting name and id should throw an exception from missing columns. 

630 with self.assertRaises(ValidationError): 

631 Schema(name="testSchema", id="#test_id") 

632 

633 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256) 

634 test_tbl = Table(name="testTable", id="#test_tbl_id", columns=[test_col]) 

635 

636 # Setting name, id, and columns should not throw an exception and 

637 # should load data correctly. 

638 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl]) 

639 self.assertEqual(sch.name, "testSchema", "name should be 'testSchema'") 

640 self.assertEqual(sch.id, "#test_sch_id", "id should be '#test_sch_id'") 

641 self.assertEqual(sch.tables, [test_tbl], "tables should be ['testTable']") 

642 

643 # Creating a schema with duplicate table names should raise an 

644 # exception. 

645 with self.assertRaises(ValidationError): 

646 Schema(name="testSchema", id="#test_id", tables=[test_tbl, test_tbl]) 

647 

648 # Using an undefined YAML field should raise an exception. 

649 with self.assertRaises(ValidationError): 

650 Schema(**{"name": "testSchema", "id": "#test_sch_id", "bad_field": "1234"}, tables=[test_tbl]) 

651 

652 # Creating a schema containing duplicate IDs should raise an error. 

653 with self.assertRaises(ValidationError): 

654 Schema( 

655 name="testSchema", 

656 id="#test_sch_id", 

657 tables=[ 

658 Table( 

659 name="testTable", 

660 id="#test_tbl_id", 

661 columns=[ 

662 Column(name="testColumn", id="#test_col_id", datatype="string"), 

663 Column(name="testColumn2", id="#test_col_id", datatype="string"), 

664 ], 

665 ) 

666 ], 

667 ) 

668 

669 def test_schema_object_ids(self) -> None: 

670 """Test that the ``id_map`` is properly populated.""" 

671 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256) 

672 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col]) 

673 sch = Schema(name="testSchema", id="#test_schema_id", tables=[test_tbl]) 

674 

675 for id in ["#test_col_id", "#test_table_id", "#test_schema_id"]: 

676 # Test that the schema contains the expected id. 

677 self.assertTrue(id in sch, f"schema should contain '{id}'") 

678 

679 # Check that types of returned objects are correct. 

680 self.assertIsInstance(sch["#test_col_id"], Column, "schema[id] should return a Column") 

681 self.assertIsInstance(sch["#test_table_id"], Table, "schema[id] should return a Table") 

682 self.assertIsInstance(sch["#test_schema_id"], Schema, "schema[id] should return a Schema") 

683 

684 with self.assertRaises(KeyError): 

685 # Test that an invalid id raises an exception. 

686 sch["#bad_id"] 

687 

688 def test_check_unique_constraint_names(self) -> None: 

689 """Test that constraint names are unique.""" 

690 test_col = Column(name="testColumn", id="#test_col_id", datatype="string", length=256) 

691 test_tbl = Table(name="testTable", id="#test_table_id", columns=[test_col]) 

692 test_cons = UniqueConstraint(name="testConstraint", id="#test_constraint_id", columns=["testColumn"]) 

693 test_cons2 = UniqueConstraint( 

694 name="testConstraint", id="#test_constraint2_id", columns=["testColumn"] 

695 ) 

696 test_tbl.constraints = [test_cons, test_cons2] 

697 with self.assertRaises(ValidationError): 

698 Schema(name="testSchema", id="#test_id", tables=[test_tbl]) 

699 

700 def test_check_unique_index_names(self) -> None: 

701 """Test that index names are unique.""" 

702 test_col = Column(name="test_column1", id="#test_table.test_column1", datatype="int") 

703 test_col2 = Column(name="test_column2", id="#test_table.test_column2", datatype="string", length=256) 

704 test_tbl = Table(name="test_table", id="#test_table", columns=[test_col, test_col2]) 

705 test_idx = Index(name="idx_test", id="#idx_test", columns=[test_col.id]) 

706 test_idx2 = Index(name="idx_test", id="#idx_test2", columns=[test_col2.id]) 

707 test_tbl.indexes = [test_idx, test_idx2] 

708 with self.assertRaises(ValidationError): 

709 Schema(name="test_schema", id="#test-schema", tables=[test_tbl]) 

710 

711 def test_model_validate(self) -> None: 

712 """Load a YAML test file and validate the schema data model.""" 

713 with open(TEST_YAML) as test_yaml: 

714 data = yaml.safe_load(test_yaml) 

715 Schema.model_validate(data) 

716 

717 def test_id_generation(self) -> None: 

718 """Test ID generation.""" 

719 test_path = os.path.join(TEST_ID_GENERATION) 

720 with open(test_path) as test_yaml: 

721 yaml_data = yaml.safe_load(test_yaml) 

722 # Generate IDs for objects in the test schema. 

723 Schema.model_validate(yaml_data, context={"id_generation": True}) 

724 with open(test_path) as test_yaml: 

725 yaml_data = yaml.safe_load(test_yaml) 

726 # Test that an error is raised when id generation is disabled. 

727 with self.assertRaises(ValidationError): 

728 Schema.model_validate(yaml_data, context={"id_generation": False}) 

729 

730 def test_get_table_by_column(self) -> None: 

731 """Test the ``get_table_by_column`` method.""" 

732 # Test that the correct table is returned when searching by column. 

733 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256) 

734 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col]) 

735 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl]) 

736 self.assertEqual(sch.get_table_by_column(test_col), test_tbl) 

737 

738 # Test that an error is raised when the column is not found. 

739 bad_col = Column(name="bad_column", id="#test_tbl.bad_column", datatype="string", length=256) 

740 with self.assertRaises(ValueError): 

741 sch.get_table_by_column(bad_col) 

742 

743 def test_find_object_by_id(self) -> None: 

744 test_col = Column(name="test_column", id="#test_tbl.test_col", datatype="string", length=256) 

745 test_tbl = Table(name="test_table", id="#test_tbl", columns=[test_col]) 

746 sch = Schema(name="testSchema", id="#test_sch_id", tables=[test_tbl]) 

747 self.assertEqual(sch.find_object_by_id("#test_tbl.test_col", Column), test_col) 

748 with self.assertRaises(KeyError): 

749 sch.find_object_by_id("#bad_id", Column) 

750 with self.assertRaises(TypeError): 

751 sch.find_object_by_id("#test_tbl", Column) 

752 

753 def test_from_file(self) -> None: 

754 """Test loading a schema from a file.""" 

755 # Test file object. 

756 with open(TEST_SALES) as test_file: 

757 schema = Schema.from_stream(test_file) 

758 self.assertIsInstance(schema, Schema) 

759 

760 # Test path string. 

761 with open(TEST_SALES) as test_file: 

762 schema = Schema.from_stream(test_file) 

763 self.assertIsInstance(schema, Schema) 

764 

765 # Path object. 

766 test_file_path = pathlib.Path(TEST_SALES) 

767 schema = Schema.from_uri(test_file_path) 

768 self.assertIsInstance(schema, Schema) 

769 

770 def test_from_resource(self) -> None: 

771 """Test loading a schema from a resource.""" 

772 # Test loading a schema from a resource string. 

773 schema = Schema.from_uri( 

774 "resource://felis/config/tap_schema/tap_schema_std.yaml", context={"id_generation": True} 

775 ) 

776 self.assertIsInstance(schema, Schema) 

777 

778 # Test loading a schema from a ResourcePath. 

779 schema = Schema.from_uri( 

780 ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml"), 

781 context={"id_generation": True}, 

782 ) 

783 self.assertIsInstance(schema, Schema) 

784 

785 # Test loading from a nonexistant resource. 

786 with self.assertRaises(ValueError): 

787 Schema.from_uri("resource://fake/schemas/bad_schema.yaml") 

788 

789 # Without ID generation enabled, this schema should fail validation. 

790 with self.assertRaises(ValidationError): 

791 Schema.from_uri("resource://felis/config/tap_schema/tap_schema_std.yaml") 

792 

793 

794class SchemaVersionTest(unittest.TestCase): 

795 """Test the schema version.""" 

796 

797 def test_validation(self) -> None: 

798 """Test validation of the schema version class.""" 

799 # Default initialization should throw an exception. 

800 with self.assertRaises(ValidationError): 

801 SchemaVersion() 

802 

803 # Setting current should not throw an exception and should load data 

804 # correctly. 

805 sv = SchemaVersion(current="1.0.0") 

806 self.assertEqual(sv.current, "1.0.0", "current should be '1.0.0'") 

807 

808 # Check that schema version can be specified as a single string or 

809 # an object. 

810 data = { 

811 "name": "schema", 

812 "@id": "#schema", 

813 "tables": [], 

814 "version": "1.2.3", 

815 } 

816 schema = Schema.model_validate(data) 

817 self.assertEqual(schema.version, "1.2.3") 

818 

819 data = { 

820 "name": "schema", 

821 "@id": "#schema", 

822 "tables": [], 

823 "version": { 

824 "current": "1.2.3", 

825 "compatible": ["1.2.0", "1.2.1", "1.2.2"], 

826 "read_compatible": ["1.1.0", "1.1.1"], 

827 }, 

828 } 

829 schema = Schema.model_validate(data) 

830 self.assertEqual(schema.version.current, "1.2.3") 

831 self.assertEqual(schema.version.compatible, ["1.2.0", "1.2.1", "1.2.2"]) 

832 self.assertEqual(schema.version.read_compatible, ["1.1.0", "1.1.1"]) 

833 

834 

835class ValidationFlagsTest(unittest.TestCase): 

836 """Test optional validation flags on the schema.""" 

837 

838 def test_check_tap_table_indexes(self) -> None: 

839 """Test the ``check_tap_table_indexes`` validation flag.""" 

840 cxt = {"check_tap_table_indexes": True} 

841 schema_dict = { 

842 "name": "testSchema", 

843 "id": "#test_schema_id", 

844 "tables": [ 

845 { 

846 "name": "test_table", 

847 "id": "#test_table_id", 

848 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}], 

849 } 

850 ], 

851 } 

852 

853 # Creating a schema without a TAP table index should throw. 

854 with self.assertRaises(ValidationError): 

855 Schema.model_validate(schema_dict, context=cxt) 

856 

857 # Creating a schema with a TAP table index should not throw. 

858 schema_dict["tables"][0]["tap_table_index"] = 1 

859 Schema.model_validate(schema_dict, context=cxt) 

860 schema_dict["tables"].append( 

861 { 

862 "name": "test_table2", 

863 "id": "#test_table2", 

864 "tap_table_index": 1, 

865 "columns": [{"name": "test_col2", "id": "#test_col2", "datatype": "int"}], 

866 } 

867 ) 

868 

869 # Creating a schema with a duplicate TAP table index should throw. 

870 with self.assertRaises(ValidationError): 

871 Schema.model_validate(schema_dict, context=cxt) 

872 

873 # Multiple, unique TAP table indexes should not throw. 

874 schema_dict["tables"][1]["tap_table_index"] = 2 

875 Schema.model_validate(schema_dict, context=cxt) 

876 

877 def test_check_tap_principal(self) -> None: 

878 """Test the ``check_tap_principal` validation flag.""" 

879 cxt = {"check_tap_principal": True} 

880 schema_dict = { 

881 "name": "testSchema", 

882 "id": "#test_schema_id", 

883 "tables": [ 

884 { 

885 "name": "test_table", 

886 "id": "#test_table_id", 

887 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}], 

888 } 

889 ], 

890 } 

891 

892 # Creating a table without a TAP table principal column should throw. 

893 with self.assertRaises(ValidationError): 

894 Schema.model_validate(schema_dict, context=cxt) 

895 

896 # Creating a table with a TAP table principal column should not throw. 

897 schema_dict["tables"][0]["columns"][0]["tap_principal"] = 1 

898 Schema.model_validate(schema_dict, context=cxt) 

899 

900 def test_check_description(self) -> None: 

901 """Test the ``check_description`` flag.""" 

902 cxt = {"check_description": True} 

903 schema_dict = { 

904 "name": "testSchema", 

905 "id": "#test_schema_id", 

906 "tables": [ 

907 { 

908 "name": "test_table", 

909 "id": "#test_table_id", 

910 "columns": [{"name": "test_col", "id": "#test_col", "datatype": "int"}], 

911 } 

912 ], 

913 } 

914 

915 # Creating a schema without object descriptions should throw. 

916 with self.assertRaises(ValidationError): 

917 Schema.model_validate(schema_dict, context=cxt) 

918 

919 # Creating a schema with object descriptions should not throw. 

920 schema_dict["description"] = "Test schema" 

921 schema_dict["tables"][0]["description"] = "Test table" 

922 schema_dict["tables"][0]["columns"][0]["description"] = "Test column" 

923 Schema.model_validate(schema_dict, context=cxt) 

924 

925 

926class RedundantDatatypesTest(unittest.TestCase): 

927 """Test validation of redundant datatype definitions.""" 

928 

929 def test_mysql_datatypes(self) -> None: 

930 class ColumnGenerator: 

931 """Generate column data for redundant datatype testing.""" 

932 

933 def __init__(self, name, id, db_name): 

934 self.name = name 

935 self.id = id 

936 self.db_name = db_name 

937 self.context = {"check_redundant_datatypes": True} 

938 

939 def col(self, datatype: str, db_datatype: str, length=None): 

940 return Column.model_validate( 

941 { 

942 "name": self.name, 

943 "@id": self.id, 

944 "datatype": datatype, 

945 f"{self.db_name}:datatype": db_datatype, 

946 "length": length, 

947 }, 

948 context=self.context, 

949 ) 

950 

951 """Test that redundant datatype definitions raise an error.""" 

952 coldata = ColumnGenerator("test_col", "#test_col_id", "mysql") 

953 

954 with self.assertRaises(ValidationError): 

955 coldata.col("double", "DOUBLE") 

956 

957 with self.assertRaises(ValidationError): 

958 coldata.col("int", "INTEGER") 

959 

960 with self.assertRaises(ValidationError): 

961 coldata.col("float", "FLOAT") 

962 

963 with self.assertRaises(ValidationError): 

964 coldata.col("char", "CHAR", length=8) 

965 

966 with self.assertRaises(ValidationError): 

967 coldata.col("string", "VARCHAR", length=32) 

968 

969 with self.assertRaises(ValidationError): 

970 coldata.col("byte", "TINYINT") 

971 

972 with self.assertRaises(ValidationError): 

973 coldata.col("short", "SMALLINT") 

974 

975 with self.assertRaises(ValidationError): 

976 coldata.col("long", "BIGINT") 

977 

978 with self.assertRaises(ValidationError): 

979 coldata.col("boolean", "BOOLEAN") 

980 

981 with self.assertRaises(ValidationError): 

982 coldata.col("unicode", "NVARCHAR", length=32) 

983 

984 with self.assertRaises(ValidationError): 

985 coldata.col("timestamp", "DATETIME") 

986 

987 # DM-42257: Felis does not handle unbounded text types properly. 

988 # coldata.col("text", "TEXT", length=32) 

989 

990 with self.assertRaises(ValidationError): 

991 coldata.col("binary", "LONGBLOB", length=1024) 

992 

993 with self.assertRaises(ValidationError): 

994 # Same type and length 

995 coldata.col("string", "VARCHAR(128)", length=128) 

996 

997 # Check the old type mapping for MySQL, which is now okay 

998 coldata.col("boolean", "BIT(1)") 

999 

1000 # Different types, which is okay 

1001 coldata.col("double", "FLOAT") 

1002 

1003 # Same base type with different lengths, which is okay 

1004 coldata.col("string", "VARCHAR(128)", length=32) 

1005 

1006 # Different string types, which is okay 

1007 coldata.col("string", "CHAR", length=32) 

1008 coldata.col("unicode", "CHAR", length=32) 

1009 

1010 def test_precision(self) -> None: 

1011 """Test that precision is not allowed for datatypes other than 

1012 timestamp. 

1013 """ 

1014 with self.assertRaises(ValidationError): 

1015 Column(**{"name": "testColumn", "@id": "#test_col_id", "datatype": "double", "precision": 6}) 

1016 

1017 

1018class SchemaSerializationTest(unittest.TestCase): 

1019 """Test serialization and deserialization of the schema data model.""" 

1020 

1021 def test_serialization(self) -> None: 

1022 """Test serialization of the schema data model.""" 

1023 # Read the original YAML content from the test_serialization.yaml file 

1024 with open(TEST_SERIALIZATION) as file: 

1025 original_yaml_content = file.read() 

1026 

1027 # Load the schema from the original YAML content 

1028 schema_out = Schema.from_uri(TEST_SERIALIZATION) 

1029 serialized_data = schema_out.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True) 

1030 

1031 # Write the serialized data to a temporary YAML file 

1032 with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml", mode="w+") as temp_file: 

1033 yaml.dump(serialized_data, temp_file, default_flow_style=False, sort_keys=False) 

1034 temp_file.seek(0) 

1035 # Read the deserialized YAML content from the temporary file 

1036 deserialized_yaml_content = temp_file.read() 

1037 

1038 # Show the differences between the original and deserialized YAML 

1039 diff = difflib.unified_diff( 

1040 original_yaml_content.splitlines(keepends=True), 

1041 deserialized_yaml_content.splitlines(keepends=True), 

1042 fromfile="original.yaml", 

1043 tofile="deserialized.yaml", 

1044 ) 

1045 print("Differences:\n", "".join(diff)) 

1046 

1047 # Assert that the original and deserialized YAML are the same 

1048 self.assertEqual( 

1049 yaml.safe_load(original_yaml_content), 

1050 yaml.safe_load(deserialized_yaml_content), 

1051 "The original and deserialized YAML contents should be the same", 

1052 ) 

1053 

1054 

1055if __name__ == "__main__": 1055 ↛ 1056line 1055 didn't jump to line 1056 because the condition on line 1055 was never true

1056 unittest.main()