Coverage for tests/test_datasets.py: 8%

332 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import copy 

29import pickle 

30import unittest 

31import uuid 

32 

33from lsst.daf.butler import ( 

34 DataCoordinate, 

35 DatasetRef, 

36 DatasetType, 

37 DimensionUniverse, 

38 FileDataset, 

39 StorageClass, 

40 StorageClassFactory, 

41) 

42from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo 

43 

44"""Tests for datasets module. 

45""" 

46 

47 

48class DatasetTypeTestCase(unittest.TestCase): 

49 """Test for DatasetType.""" 

50 

51 def setUp(self) -> None: 

52 self.universe = DimensionUniverse() 

53 

54 def testConstructor(self) -> None: 

55 """Test construction preserves values. 

56 

57 Note that construction doesn't check for valid storageClass. 

58 This can only be verified for a particular schema. 

59 """ 

60 datasetTypeName = "test" 

61 storageClass = StorageClass("test_StructuredData") 

62 dimensions = self.universe.extract(("visit", "instrument")) 

63 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

64 self.assertEqual(datasetType.name, datasetTypeName) 

65 self.assertEqual(datasetType.storageClass, storageClass) 

66 self.assertEqual(datasetType.dimensions, dimensions) 

67 

68 with self.assertRaises(ValueError, msg="Construct component without parent storage class"): 

69 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass) 

70 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"): 

71 DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed") 

72 

73 def testConstructor2(self) -> None: 

74 """Test construction from StorageClass name.""" 

75 datasetTypeName = "test" 

76 storageClass = StorageClass("test_constructor2") 

77 StorageClassFactory().registerStorageClass(storageClass) 

78 dimensions = self.universe.extract(("instrument", "visit")) 

79 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2") 

80 self.assertEqual(datasetType.name, datasetTypeName) 

81 self.assertEqual(datasetType.storageClass, storageClass) 

82 self.assertEqual(datasetType.dimensions, dimensions) 

83 

84 def testNameValidation(self) -> None: 

85 """Test that dataset type names only contain certain characters 

86 in certain positions. 

87 """ 

88 dimensions = self.universe.extract(("instrument", "visit")) 

89 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b", "_a") 

90 badNames = ("1", "a%b", "B+Z", "T[0]") 

91 

92 # Construct storage class with all the good names included as 

93 # components so that we can test internal consistency 

94 storageClass = StorageClass( 

95 "test_StructuredData", components={n: StorageClass("component") for n in goodNames} 

96 ) 

97 

98 for name in goodNames: 

99 composite = DatasetType(name, dimensions, storageClass) 

100 self.assertEqual(composite.name, name) 

101 for suffix in goodNames: 

102 full = DatasetType.nameWithComponent(name, suffix) 

103 component = composite.makeComponentDatasetType(suffix) 

104 self.assertEqual(component.name, full) 

105 assert component.parentStorageClass is not None 

106 self.assertEqual(component.parentStorageClass.name, "test_StructuredData") 

107 for suffix in badNames: 

108 full = DatasetType.nameWithComponent(name, suffix) 

109 with self.subTest(full=full): 

110 with self.assertRaises(ValueError): 

111 DatasetType(full, dimensions, storageClass) 

112 for name in badNames: 

113 with self.subTest(name=name): 

114 with self.assertRaises(ValueError): 

115 DatasetType(name, dimensions, storageClass) 

116 

117 def testEquality(self) -> None: 

118 storageA = StorageClass("test_a") 

119 storageB = StorageClass("test_b") 

120 parent = StorageClass("test") 

121 dimensionsA = self.universe.extract(["instrument"]) 

122 dimensionsB = self.universe.extract(["skymap"]) 

123 self.assertEqual( 

124 DatasetType( 

125 "a", 

126 dimensionsA, 

127 storageA, 

128 ), 

129 DatasetType( 

130 "a", 

131 dimensionsA, 

132 storageA, 

133 ), 

134 ) 

135 self.assertEqual( 

136 DatasetType( 

137 "a", 

138 dimensionsA, 

139 "test_a", 

140 ), 

141 DatasetType( 

142 "a", 

143 dimensionsA, 

144 storageA, 

145 ), 

146 ) 

147 self.assertEqual( 

148 DatasetType( 

149 "a", 

150 dimensionsA, 

151 storageA, 

152 ), 

153 DatasetType( 

154 "a", 

155 dimensionsA, 

156 "test_a", 

157 ), 

158 ) 

159 self.assertEqual( 

160 DatasetType( 

161 "a", 

162 dimensionsA, 

163 "test_a", 

164 ), 

165 DatasetType( 

166 "a", 

167 dimensionsA, 

168 "test_a", 

169 ), 

170 ) 

171 self.assertEqual( 

172 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent), 

173 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent), 

174 ) 

175 self.assertEqual( 

176 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

177 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

178 ) 

179 self.assertNotEqual( 

180 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=True), 

181 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=False), 

182 ) 

183 self.assertNotEqual( 

184 DatasetType( 

185 "a", 

186 dimensionsA, 

187 storageA, 

188 ), 

189 DatasetType( 

190 "b", 

191 dimensionsA, 

192 storageA, 

193 ), 

194 ) 

195 self.assertNotEqual( 

196 DatasetType( 

197 "a", 

198 dimensionsA, 

199 storageA, 

200 ), 

201 DatasetType( 

202 "b", 

203 dimensionsA, 

204 "test_a", 

205 ), 

206 ) 

207 self.assertNotEqual( 

208 DatasetType( 

209 "a", 

210 dimensionsA, 

211 storageA, 

212 ), 

213 DatasetType( 

214 "a", 

215 dimensionsA, 

216 storageB, 

217 ), 

218 ) 

219 self.assertNotEqual( 

220 DatasetType( 

221 "a", 

222 dimensionsA, 

223 storageA, 

224 ), 

225 DatasetType( 

226 "a", 

227 dimensionsA, 

228 "test_b", 

229 ), 

230 ) 

231 self.assertNotEqual( 

232 DatasetType( 

233 "a", 

234 dimensionsA, 

235 storageA, 

236 ), 

237 DatasetType( 

238 "a", 

239 dimensionsB, 

240 storageA, 

241 ), 

242 ) 

243 self.assertNotEqual( 

244 DatasetType( 

245 "a", 

246 dimensionsA, 

247 storageA, 

248 ), 

249 DatasetType( 

250 "a", 

251 dimensionsB, 

252 "test_a", 

253 ), 

254 ) 

255 self.assertNotEqual( 

256 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA), 

257 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB), 

258 ) 

259 self.assertNotEqual( 

260 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"), 

261 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"), 

262 ) 

263 

264 def testCompatibility(self) -> None: 

265 storageA = StorageClass("test_a", pytype=set, converters={"list": "builtins.set"}) 

266 storageB = StorageClass("test_b", pytype=list) 

267 storageC = StorageClass("test_c", pytype=dict) 

268 self.assertTrue(storageA.can_convert(storageB)) 

269 dimensionsA = self.universe.extract(["instrument"]) 

270 

271 dA = DatasetType("a", dimensionsA, storageA) 

272 dA2 = DatasetType("a", dimensionsA, storageB) 

273 self.assertNotEqual(dA, dA2) 

274 self.assertTrue(dA.is_compatible_with(dA)) 

275 self.assertTrue(dA.is_compatible_with(dA2)) 

276 self.assertFalse(dA2.is_compatible_with(dA)) 

277 

278 dA3 = DatasetType("a", dimensionsA, storageC) 

279 self.assertFalse(dA.is_compatible_with(dA3)) 

280 

281 def testOverrideStorageClass(self) -> None: 

282 storageA = StorageClass("test_a", pytype=list, converters={"dict": "builtins.list"}) 

283 storageB = StorageClass("test_b", pytype=dict) 

284 dimensions = self.universe.extract(["instrument"]) 

285 

286 dA = DatasetType("a", dimensions, storageA) 

287 dB = dA.overrideStorageClass(storageB) 

288 self.assertNotEqual(dA, dB) 

289 self.assertEqual(dB.storageClass, storageB) 

290 

291 round_trip = dB.overrideStorageClass(storageA) 

292 self.assertEqual(round_trip, dA) 

293 

294 # Check that parents move over. 

295 parent = StorageClass("composite", components={"a": storageA, "c": storageA}) 

296 dP = DatasetType("comp", dimensions, parent) 

297 dP_A = dP.makeComponentDatasetType("a") 

298 print(dP_A) 

299 dp_B = dP_A.overrideStorageClass(storageB) 

300 self.assertEqual(dp_B.storageClass, storageB) 

301 self.assertEqual(dp_B.parentStorageClass, parent) 

302 

303 def testJson(self) -> None: 

304 storageA = StorageClass("test_a") 

305 dimensionsA = self.universe.extract(["instrument"]) 

306 self.assertEqual( 

307 DatasetType( 

308 "a", 

309 dimensionsA, 

310 storageA, 

311 ), 

312 DatasetType.from_json( 

313 DatasetType( 

314 "a", 

315 dimensionsA, 

316 storageA, 

317 ).to_json(), 

318 self.universe, 

319 ), 

320 ) 

321 self.assertEqual( 

322 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

323 DatasetType.from_json( 

324 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent").to_json(), 

325 self.universe, 

326 ), 

327 ) 

328 

329 def testSorting(self) -> None: 

330 """Can we sort a DatasetType""" 

331 storage = StorageClass("test_a") 

332 dimensions = self.universe.extract(["instrument"]) 

333 

334 d_a = DatasetType("a", dimensions, storage) 

335 d_f = DatasetType("f", dimensions, storage) 

336 d_p = DatasetType("p", dimensions, storage) 

337 

338 sort = sorted([d_p, d_f, d_a]) 

339 self.assertEqual(sort, [d_a, d_f, d_p]) 

340 

341 # Now with strings 

342 with self.assertRaises(TypeError): 

343 sort = sorted(["z", d_p, "c", d_f, d_a, "d"]) # type: ignore [list-item] 

344 

345 def testHashability(self) -> None: 

346 """Test `DatasetType.__hash__`. 

347 

348 This test is performed by checking that `DatasetType` entries can 

349 be inserted into a `set` and that unique values of its 

350 (`name`, `storageClass`, `dimensions`) parameters result in separate 

351 entries (and equal ones don't). 

352 

353 This does not check for uniformity of hashing or the actual values 

354 of the hash function. 

355 """ 

356 types: list[DatasetType] = [] 

357 unique = 0 

358 storageC = StorageClass("test_c") 

359 storageD = StorageClass("test_d") 

360 for name in ["a", "b"]: 

361 for storageClass in [storageC, storageD]: 

362 for dims in [("instrument",), ("skymap",)]: 

363 datasetType = DatasetType(name, self.universe.extract(dims), storageClass) 

364 datasetTypeCopy = DatasetType(name, self.universe.extract(dims), storageClass) 

365 types.extend((datasetType, datasetTypeCopy)) 

366 unique += 1 # datasetType should always equal its copy 

367 self.assertEqual(len(set(types)), unique) # all other combinations are unique 

368 

369 # also check that hashes of instances constructed with StorageClass 

370 # name matches hashes of instances constructed with instances 

371 dimensions = self.universe.extract(["instrument"]) 

372 self.assertEqual( 

373 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c")) 

374 ) 

375 self.assertEqual( 

376 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_c")) 

377 ) 

378 self.assertNotEqual( 

379 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_d")) 

380 ) 

381 self.assertNotEqual( 

382 hash(DatasetType("a", dimensions, storageD)), hash(DatasetType("a", dimensions, "test_c")) 

383 ) 

384 self.assertNotEqual( 

385 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_d")) 

386 ) 

387 

388 def testDeepCopy(self) -> None: 

389 """Test that we can copy a dataset type.""" 

390 storageClass = StorageClass("test_copy") 

391 datasetTypeName = "test" 

392 dimensions = self.universe.extract(("instrument", "visit")) 

393 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

394 dcopy = copy.deepcopy(datasetType) 

395 self.assertEqual(dcopy, datasetType) 

396 

397 # Now with calibration flag set 

398 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) 

399 dcopy = copy.deepcopy(datasetType) 

400 self.assertEqual(dcopy, datasetType) 

401 self.assertTrue(dcopy.isCalibration()) 

402 

403 # And again with a composite 

404 componentStorageClass = StorageClass("copy_component") 

405 componentDatasetType = DatasetType( 

406 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

407 dimensions, 

408 componentStorageClass, 

409 parentStorageClass=storageClass, 

410 ) 

411 dcopy = copy.deepcopy(componentDatasetType) 

412 self.assertEqual(dcopy, componentDatasetType) 

413 

414 def testPickle(self) -> None: 

415 """Test pickle support.""" 

416 storageClass = StorageClass("test_pickle") 

417 datasetTypeName = "test" 

418 dimensions = self.universe.extract(("instrument", "visit")) 

419 # Un-pickling requires that storage class is registered with factory. 

420 StorageClassFactory().registerStorageClass(storageClass) 

421 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

422 datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) 

423 self.assertIsInstance(datasetTypeOut, DatasetType) 

424 self.assertEqual(datasetType.name, datasetTypeOut.name) 

425 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names) 

426 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass) 

427 self.assertIsNone(datasetTypeOut.parentStorageClass) 

428 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) 

429 self.assertFalse(datasetTypeOut.isCalibration()) 

430 

431 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) 

432 datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) 

433 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) 

434 self.assertTrue(datasetTypeOut.isCalibration()) 

435 

436 # And again with a composite 

437 componentStorageClass = StorageClass("pickle_component") 

438 StorageClassFactory().registerStorageClass(componentStorageClass) 

439 componentDatasetType = DatasetType( 

440 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

441 dimensions, 

442 componentStorageClass, 

443 parentStorageClass=storageClass, 

444 ) 

445 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

446 self.assertIsInstance(datasetTypeOut, DatasetType) 

447 self.assertEqual(componentDatasetType.name, datasetTypeOut.name) 

448 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names) 

449 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass) 

450 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass) 

451 self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name) 

452 self.assertEqual(datasetTypeOut, componentDatasetType) 

453 

454 # Now with a string and not a real storage class to test that 

455 # pickling doesn't force the StorageClass to be resolved 

456 componentDatasetType = DatasetType( 

457 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

458 dimensions, 

459 "StrangeComponent", 

460 parentStorageClass="UnknownParent", 

461 ) 

462 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

463 self.assertEqual(datasetTypeOut, componentDatasetType) 

464 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName) 

465 

466 # Now with a storage class that is created by the factory 

467 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass") 

468 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass") 

469 componentDatasetType = DatasetType( 

470 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

471 dimensions, 

472 factoryComponentStorageClassClass(), 

473 parentStorageClass=factoryStorageClassClass(), 

474 ) 

475 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

476 self.assertEqual(datasetTypeOut, componentDatasetType) 

477 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName) 

478 

479 def test_composites(self) -> None: 

480 """Test components within composite DatasetTypes.""" 

481 storageClassA = StorageClass("compA") 

482 storageClassB = StorageClass("compB") 

483 storageClass = StorageClass( 

484 "test_composite", components={"compA": storageClassA, "compB": storageClassB} 

485 ) 

486 self.assertTrue(storageClass.isComposite()) 

487 self.assertFalse(storageClassA.isComposite()) 

488 self.assertFalse(storageClassB.isComposite()) 

489 

490 dimensions = self.universe.extract(("instrument", "visit")) 

491 

492 datasetTypeComposite = DatasetType("composite", dimensions, storageClass) 

493 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA") 

494 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB") 

495 

496 self.assertTrue(datasetTypeComposite.isComposite()) 

497 self.assertFalse(datasetTypeComponentA.isComposite()) 

498 self.assertTrue(datasetTypeComponentB.isComponent()) 

499 self.assertFalse(datasetTypeComposite.isComponent()) 

500 

501 self.assertEqual(datasetTypeComposite.name, "composite") 

502 self.assertEqual(datasetTypeComponentA.name, "composite.compA") 

503 self.assertEqual(datasetTypeComponentB.component(), "compB") 

504 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None)) 

505 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA")) 

506 

507 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass) 

508 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass) 

509 self.assertIsNone(datasetTypeComposite.parentStorageClass) 

510 

511 with self.assertRaises(KeyError): 

512 datasetTypeComposite.makeComponentDatasetType("compF") 

513 

514 

515class DatasetRefTestCase(unittest.TestCase): 

516 """Test for DatasetRef.""" 

517 

518 def setUp(self) -> None: 

519 self.universe = DimensionUniverse() 

520 datasetTypeName = "test" 

521 self.componentStorageClass1 = StorageClass("Component1") 

522 self.componentStorageClass2 = StorageClass("Component2") 

523 self.parentStorageClass = StorageClass( 

524 "Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2} 

525 ) 

526 sc_factory = StorageClassFactory() 

527 sc_factory.registerStorageClass(self.componentStorageClass1) 

528 sc_factory.registerStorageClass(self.componentStorageClass2) 

529 sc_factory.registerStorageClass(self.parentStorageClass) 

530 dimensions = self.universe.extract(("instrument", "visit")) 

531 self.dataId = DataCoordinate.standardize( 

532 dict(instrument="DummyCam", visit=42), universe=self.universe 

533 ) 

534 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass) 

535 

536 def _make_datastore_records(self, ref: DatasetRef, *paths: str) -> DatasetRef: 

537 """Return an updated dataset ref with datastore records.""" 

538 opaque_table_name = "datastore_records" 

539 datastore_records = { 

540 opaque_table_name: [ 

541 StoredFileInfo( 

542 formatter="", 

543 path=path, 

544 storageClass=ref.datasetType.storageClass, 

545 component=None, 

546 checksum=None, 

547 file_size=1, 

548 ) 

549 for path in paths 

550 ] 

551 } 

552 return ref.replace(datastore_records=datastore_records) 

553 

554 def testConstructor(self) -> None: 

555 """Test that construction preserves and validates values.""" 

556 # Constructing a ref requires a run. 

557 with self.assertRaises(TypeError): 

558 DatasetRef(self.datasetType, self.dataId, id=uuid.uuid4()) # type: ignore [call-arg] 

559 

560 # Constructing an unresolved ref with run and/or components should 

561 # issue a ref with an id. 

562 run = "somerun" 

563 ref = DatasetRef(self.datasetType, self.dataId, run=run) 

564 self.assertEqual(ref.datasetType, self.datasetType) 

565 self.assertEqual( 

566 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId 

567 ) 

568 self.assertIsNotNone(ref.id) 

569 

570 # Passing a data ID that is missing dimensions should fail. 

571 # Create a full DataCoordinate to ensure that we are testing the 

572 # right thing. 

573 dimensions = self.universe.extract(("instrument",)) 

574 dataId = DataCoordinate.standardize(instrument="DummyCam", graph=dimensions) 

575 with self.assertRaises(KeyError): 

576 DatasetRef(self.datasetType, dataId, run="run") 

577 # Constructing a resolved ref should preserve run as well as everything 

578 # else. 

579 id_ = uuid.uuid4() 

580 ref = DatasetRef(self.datasetType, self.dataId, id=id_, run=run) 

581 self.assertEqual(ref.datasetType, self.datasetType) 

582 self.assertEqual( 

583 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId 

584 ) 

585 self.assertIsInstance(ref.dataId, DataCoordinate) 

586 self.assertEqual(ref.id, id_) 

587 self.assertEqual(ref.run, run) 

588 

589 with self.assertRaises(ValueError): 

590 DatasetRef(self.datasetType, self.dataId, run=run, id_generation_mode=42) # type: ignore 

591 

592 def testSorting(self) -> None: 

593 """Can we sort a DatasetRef""" 

594 # All refs have the same run. 

595 dimensions = self.universe.extract(("instrument", "visit")) 

596 ref1 = DatasetRef( 

597 self.datasetType, 

598 DataCoordinate.standardize(instrument="DummyCam", visit=1, graph=dimensions), 

599 run="run", 

600 ) 

601 ref2 = DatasetRef( 

602 self.datasetType, 

603 DataCoordinate.standardize(instrument="DummyCam", visit=10, graph=dimensions), 

604 run="run", 

605 ) 

606 ref3 = DatasetRef( 

607 self.datasetType, 

608 DataCoordinate.standardize(instrument="DummyCam", visit=22, graph=dimensions), 

609 run="run", 

610 ) 

611 

612 # Enable detailed diff report 

613 self.maxDiff = None 

614 

615 # This will sort them on visit number 

616 sort = sorted([ref3, ref1, ref2]) 

617 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}") 

618 

619 # Now include different runs. 

620 ref1 = DatasetRef( 

621 self.datasetType, 

622 DataCoordinate.standardize(instrument="DummyCam", visit=43, graph=dimensions), 

623 run="b", 

624 ) 

625 self.assertEqual(ref1.run, "b") 

626 ref4 = DatasetRef( 

627 self.datasetType, 

628 DataCoordinate.standardize(instrument="DummyCam", visit=10, graph=dimensions), 

629 run="b", 

630 ) 

631 ref2 = DatasetRef( 

632 self.datasetType, 

633 DataCoordinate.standardize(instrument="DummyCam", visit=4, graph=dimensions), 

634 run="a", 

635 ) 

636 ref3 = DatasetRef( 

637 self.datasetType, 

638 DataCoordinate.standardize(instrument="DummyCam", visit=104, graph=dimensions), 

639 run="c", 

640 ) 

641 

642 # This will sort them on run before visit 

643 sort = sorted([ref3, ref1, ref2, ref4]) 

644 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}") 

645 

646 # Now with strings 

647 with self.assertRaises(TypeError): 

648 sort = sorted(["z", ref1, "c"]) # type: ignore [list-item] 

649 

650 def testOverrideStorageClass(self) -> None: 

651 storageA = StorageClass("test_a", pytype=list) 

652 

653 ref = DatasetRef(self.datasetType, self.dataId, run="somerun") 

654 

655 ref_new = ref.overrideStorageClass(storageA) 

656 self.assertNotEqual(ref, ref_new) 

657 self.assertEqual(ref_new.datasetType.storageClass, storageA) 

658 self.assertEqual(ref_new.overrideStorageClass(ref.datasetType.storageClass), ref) 

659 self.assertTrue(ref.is_compatible_with(ref_new)) 

660 with self.assertRaises(AttributeError): 

661 ref_new.is_compatible_with(None) # type: ignore 

662 

663 # Check different code paths of incompatibility. 

664 ref_incompat = DatasetRef(ref.datasetType, ref.dataId, run="somerun2", id=ref.id) 

665 self.assertFalse(ref.is_compatible_with(ref_incompat)) # bad run 

666 ref_incompat = DatasetRef(ref.datasetType, ref.dataId, run="somerun") 

667 self.assertFalse(ref.is_compatible_with(ref_incompat)) # bad ID 

668 

669 incompatible_sc = StorageClass("my_int", pytype=int) 

670 with self.assertRaises(ValueError): 

671 # Do not test against "ref" because it has a default storage class 

672 # of "object" which is compatible with everything. 

673 ref_new.overrideStorageClass(incompatible_sc) 

674 

675 def testReplace(self) -> None: 

676 """Test for `DatasetRef.replace` method.""" 

677 ref = DatasetRef(self.datasetType, self.dataId, run="somerun") 

678 

679 ref2 = ref.replace(run="somerun2") 

680 self.assertEqual(ref2.run, "somerun2") 

681 self.assertIsNotNone(ref2.id) 

682 self.assertNotEqual(ref2.id, ref.id) 

683 

684 ref3 = ref.replace(run="somerun3", id=ref2.id) 

685 self.assertEqual(ref3.run, "somerun3") 

686 self.assertEqual(ref3.id, ref2.id) 

687 

688 ref4 = ref.replace(id=ref2.id) 

689 self.assertEqual(ref4.run, "somerun") 

690 self.assertEqual(ref4.id, ref2.id) 

691 

692 ref5 = ref.replace() 

693 self.assertEqual(ref5.run, "somerun") 

694 self.assertEqual(ref5, ref) 

695 

696 self.assertIsNone(ref5._datastore_records) 

697 ref5 = ref5.replace(datastore_records={}) 

698 self.assertEqual(ref5._datastore_records, {}) 

699 ref5 = ref5.replace(datastore_records=None) 

700 self.assertIsNone(ref5._datastore_records) 

701 

702 def testPickle(self) -> None: 

703 ref = DatasetRef(self.datasetType, self.dataId, run="somerun") 

704 s = pickle.dumps(ref) 

705 self.assertEqual(pickle.loads(s), ref) 

706 

707 def testJson(self) -> None: 

708 ref = DatasetRef(self.datasetType, self.dataId, run="somerun") 

709 s = ref.to_json() 

710 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref) 

711 

712 # Also test ref with datastore records, serialization does not 

713 # preserve those. 

714 ref = self._make_datastore_records(ref, "/path1", "/path2") 

715 s = ref.to_json() 

716 ref2 = DatasetRef.from_json(s, universe=self.universe) 

717 self.assertEqual(ref2, ref) 

718 self.assertIsNone(ref2._datastore_records) 

719 

720 def testFileDataset(self) -> None: 

721 ref = DatasetRef(self.datasetType, self.dataId, run="somerun") 

722 file_dataset = FileDataset(path="something.yaml", refs=ref) 

723 self.assertEqual(file_dataset.refs, [ref]) 

724 

725 ref2 = DatasetRef(self.datasetType, self.dataId, run="somerun2") 

726 with self.assertRaises(ValueError): 

727 FileDataset(path="other.yaml", refs=[ref, ref2]) 

728 

729 

730if __name__ == "__main__": 

731 unittest.main()