Coverage for tests/test_datasets.py: 11%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

247 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import copy 

23import pickle 

24import unittest 

25 

26from lsst.daf.butler import ( 

27 DataCoordinate, 

28 DatasetRef, 

29 DatasetType, 

30 DimensionUniverse, 

31 StorageClass, 

32 StorageClassFactory, 

33) 

34 

35"""Tests for datasets module. 

36""" 

37 

38 

39class DatasetTypeTestCase(unittest.TestCase): 

40 """Test for DatasetType.""" 

41 

42 def setUp(self): 

43 self.universe = DimensionUniverse() 

44 

45 def testConstructor(self): 

46 """Test construction preserves values. 

47 

48 Note that construction doesn't check for valid storageClass. 

49 This can only be verified for a particular schema. 

50 """ 

51 datasetTypeName = "test" 

52 storageClass = StorageClass("test_StructuredData") 

53 dimensions = self.universe.extract(("visit", "instrument")) 

54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

55 self.assertEqual(datasetType.name, datasetTypeName) 

56 self.assertEqual(datasetType.storageClass, storageClass) 

57 self.assertEqual(datasetType.dimensions, dimensions) 

58 

59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"): 

60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass) 

61 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"): 

62 DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed") 

63 

64 def testConstructor2(self): 

65 """Test construction from StorageClass name.""" 

66 datasetTypeName = "test" 

67 storageClass = StorageClass("test_constructor2") 

68 StorageClassFactory().registerStorageClass(storageClass) 

69 dimensions = self.universe.extract(("instrument", "visit")) 

70 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2") 

71 self.assertEqual(datasetType.name, datasetTypeName) 

72 self.assertEqual(datasetType.storageClass, storageClass) 

73 self.assertEqual(datasetType.dimensions, dimensions) 

74 

75 def testNameValidation(self): 

76 """Test that dataset type names only contain certain characters 

77 in certain positions. 

78 """ 

79 dimensions = self.universe.extract(("instrument", "visit")) 

80 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b") 

81 badNames = ("1", "_", "a%b", "B+Z", "T[0]") 

82 

83 # Construct storage class with all the good names included as 

84 # components so that we can test internal consistency 

85 storageClass = StorageClass( 

86 "test_StructuredData", components={n: StorageClass("component") for n in goodNames} 

87 ) 

88 

89 for name in goodNames: 

90 composite = DatasetType(name, dimensions, storageClass) 

91 self.assertEqual(composite.name, name) 

92 for suffix in goodNames: 

93 full = DatasetType.nameWithComponent(name, suffix) 

94 component = composite.makeComponentDatasetType(suffix) 

95 self.assertEqual(component.name, full) 

96 self.assertEqual(component.parentStorageClass.name, "test_StructuredData") 

97 for suffix in badNames: 

98 full = DatasetType.nameWithComponent(name, suffix) 

99 with self.subTest(full=full): 

100 with self.assertRaises(ValueError): 

101 DatasetType(full, dimensions, storageClass) 

102 for name in badNames: 

103 with self.subTest(name=name): 

104 with self.assertRaises(ValueError): 

105 DatasetType(name, dimensions, storageClass) 

106 

107 def testEquality(self): 

108 storageA = StorageClass("test_a") 

109 storageB = StorageClass("test_b") 

110 parent = StorageClass("test") 

111 dimensionsA = self.universe.extract(["instrument"]) 

112 dimensionsB = self.universe.extract(["skymap"]) 

113 self.assertEqual( 

114 DatasetType( 

115 "a", 

116 dimensionsA, 

117 storageA, 

118 ), 

119 DatasetType( 

120 "a", 

121 dimensionsA, 

122 storageA, 

123 ), 

124 ) 

125 self.assertEqual( 

126 DatasetType( 

127 "a", 

128 dimensionsA, 

129 "test_a", 

130 ), 

131 DatasetType( 

132 "a", 

133 dimensionsA, 

134 storageA, 

135 ), 

136 ) 

137 self.assertEqual( 

138 DatasetType( 

139 "a", 

140 dimensionsA, 

141 storageA, 

142 ), 

143 DatasetType( 

144 "a", 

145 dimensionsA, 

146 "test_a", 

147 ), 

148 ) 

149 self.assertEqual( 

150 DatasetType( 

151 "a", 

152 dimensionsA, 

153 "test_a", 

154 ), 

155 DatasetType( 

156 "a", 

157 dimensionsA, 

158 "test_a", 

159 ), 

160 ) 

161 self.assertEqual( 

162 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent), 

163 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent), 

164 ) 

165 self.assertEqual( 

166 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

167 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

168 ) 

169 self.assertNotEqual( 

170 DatasetType( 

171 "a", 

172 dimensionsA, 

173 storageA, 

174 ), 

175 DatasetType( 

176 "b", 

177 dimensionsA, 

178 storageA, 

179 ), 

180 ) 

181 self.assertNotEqual( 

182 DatasetType( 

183 "a", 

184 dimensionsA, 

185 storageA, 

186 ), 

187 DatasetType( 

188 "b", 

189 dimensionsA, 

190 "test_a", 

191 ), 

192 ) 

193 self.assertNotEqual( 

194 DatasetType( 

195 "a", 

196 dimensionsA, 

197 storageA, 

198 ), 

199 DatasetType( 

200 "a", 

201 dimensionsA, 

202 storageB, 

203 ), 

204 ) 

205 self.assertNotEqual( 

206 DatasetType( 

207 "a", 

208 dimensionsA, 

209 storageA, 

210 ), 

211 DatasetType( 

212 "a", 

213 dimensionsA, 

214 "test_b", 

215 ), 

216 ) 

217 self.assertNotEqual( 

218 DatasetType( 

219 "a", 

220 dimensionsA, 

221 storageA, 

222 ), 

223 DatasetType( 

224 "a", 

225 dimensionsB, 

226 storageA, 

227 ), 

228 ) 

229 self.assertNotEqual( 

230 DatasetType( 

231 "a", 

232 dimensionsA, 

233 storageA, 

234 ), 

235 DatasetType( 

236 "a", 

237 dimensionsB, 

238 "test_a", 

239 ), 

240 ) 

241 self.assertNotEqual( 

242 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA), 

243 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB), 

244 ) 

245 self.assertNotEqual( 

246 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"), 

247 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"), 

248 ) 

249 

250 def testJson(self): 

251 storageA = StorageClass("test_a") 

252 dimensionsA = self.universe.extract(["instrument"]) 

253 self.assertEqual( 

254 DatasetType( 

255 "a", 

256 dimensionsA, 

257 storageA, 

258 ), 

259 DatasetType.from_json( 

260 DatasetType( 

261 "a", 

262 dimensionsA, 

263 storageA, 

264 ).to_json(), 

265 self.universe, 

266 ), 

267 ) 

268 self.assertEqual( 

269 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

270 DatasetType.from_json( 

271 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent").to_json(), 

272 self.universe, 

273 ), 

274 ) 

275 

276 def testSorting(self): 

277 """Can we sort a DatasetType""" 

278 storage = StorageClass("test_a") 

279 dimensions = self.universe.extract(["instrument"]) 

280 

281 d_a = DatasetType("a", dimensions, storage) 

282 d_f = DatasetType("f", dimensions, storage) 

283 d_p = DatasetType("p", dimensions, storage) 

284 

285 sort = sorted([d_p, d_f, d_a]) 

286 self.assertEqual(sort, [d_a, d_f, d_p]) 

287 

288 # Now with strings 

289 with self.assertRaises(TypeError): 

290 sort = sorted(["z", d_p, "c", d_f, d_a, "d"]) 

291 

292 def testHashability(self): 

293 """Test `DatasetType.__hash__`. 

294 

295 This test is performed by checking that `DatasetType` entries can 

296 be inserted into a `set` and that unique values of its 

297 (`name`, `storageClass`, `dimensions`) parameters result in separate 

298 entries (and equal ones don't). 

299 

300 This does not check for uniformity of hashing or the actual values 

301 of the hash function. 

302 """ 

303 types = [] 

304 unique = 0 

305 storageC = StorageClass("test_c") 

306 storageD = StorageClass("test_d") 

307 for name in ["a", "b"]: 

308 for storageClass in [storageC, storageD]: 

309 for dimensions in [("instrument",), ("skymap",)]: 

310 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass) 

311 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass) 

312 types.extend((datasetType, datasetTypeCopy)) 

313 unique += 1 # datasetType should always equal its copy 

314 self.assertEqual(len(set(types)), unique) # all other combinations are unique 

315 

316 # also check that hashes of instances constructed with StorageClass 

317 # name matches hashes of instances constructed with instances 

318 dimensions = self.universe.extract(["instrument"]) 

319 self.assertEqual( 

320 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c")) 

321 ) 

322 self.assertEqual( 

323 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_c")) 

324 ) 

325 self.assertNotEqual( 

326 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_d")) 

327 ) 

328 self.assertNotEqual( 

329 hash(DatasetType("a", dimensions, storageD)), hash(DatasetType("a", dimensions, "test_c")) 

330 ) 

331 self.assertNotEqual( 

332 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_d")) 

333 ) 

334 

335 def testDeepCopy(self): 

336 """Test that we can copy a dataset type.""" 

337 storageClass = StorageClass("test_copy") 

338 datasetTypeName = "test" 

339 dimensions = self.universe.extract(("instrument", "visit")) 

340 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

341 dcopy = copy.deepcopy(datasetType) 

342 self.assertEqual(dcopy, datasetType) 

343 

344 # Now with calibration flag set 

345 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) 

346 dcopy = copy.deepcopy(datasetType) 

347 self.assertEqual(dcopy, datasetType) 

348 self.assertTrue(dcopy.isCalibration()) 

349 

350 # And again with a composite 

351 componentStorageClass = StorageClass("copy_component") 

352 componentDatasetType = DatasetType( 

353 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

354 dimensions, 

355 componentStorageClass, 

356 parentStorageClass=storageClass, 

357 ) 

358 dcopy = copy.deepcopy(componentDatasetType) 

359 self.assertEqual(dcopy, componentDatasetType) 

360 

361 def testPickle(self): 

362 """Test pickle support.""" 

363 storageClass = StorageClass("test_pickle") 

364 datasetTypeName = "test" 

365 dimensions = self.universe.extract(("instrument", "visit")) 

366 # Un-pickling requires that storage class is registered with factory. 

367 StorageClassFactory().registerStorageClass(storageClass) 

368 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

369 datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) 

370 self.assertIsInstance(datasetTypeOut, DatasetType) 

371 self.assertEqual(datasetType.name, datasetTypeOut.name) 

372 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names) 

373 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass) 

374 self.assertIsNone(datasetTypeOut.parentStorageClass) 

375 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) 

376 self.assertFalse(datasetTypeOut.isCalibration()) 

377 

378 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) 

379 datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) 

380 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) 

381 self.assertTrue(datasetTypeOut.isCalibration()) 

382 

383 # And again with a composite 

384 componentStorageClass = StorageClass("pickle_component") 

385 StorageClassFactory().registerStorageClass(componentStorageClass) 

386 componentDatasetType = DatasetType( 

387 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

388 dimensions, 

389 componentStorageClass, 

390 parentStorageClass=storageClass, 

391 ) 

392 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

393 self.assertIsInstance(datasetTypeOut, DatasetType) 

394 self.assertEqual(componentDatasetType.name, datasetTypeOut.name) 

395 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names) 

396 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass) 

397 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass) 

398 self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name) 

399 self.assertEqual(datasetTypeOut, componentDatasetType) 

400 

401 # Now with a string and not a real storage class to test that 

402 # pickling doesn't force the StorageClass to be resolved 

403 componentDatasetType = DatasetType( 

404 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

405 dimensions, 

406 "StrangeComponent", 

407 parentStorageClass="UnknownParent", 

408 ) 

409 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

410 self.assertEqual(datasetTypeOut, componentDatasetType) 

411 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName) 

412 

413 # Now with a storage class that is created by the factory 

414 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass") 

415 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass") 

416 componentDatasetType = DatasetType( 

417 DatasetType.nameWithComponent(datasetTypeName, "comp"), 

418 dimensions, 

419 factoryComponentStorageClassClass(), 

420 parentStorageClass=factoryStorageClassClass(), 

421 ) 

422 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

423 self.assertEqual(datasetTypeOut, componentDatasetType) 

424 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName) 

425 

426 def test_composites(self): 

427 """Test components within composite DatasetTypes.""" 

428 storageClassA = StorageClass("compA") 

429 storageClassB = StorageClass("compB") 

430 storageClass = StorageClass( 

431 "test_composite", components={"compA": storageClassA, "compB": storageClassB} 

432 ) 

433 self.assertTrue(storageClass.isComposite()) 

434 self.assertFalse(storageClassA.isComposite()) 

435 self.assertFalse(storageClassB.isComposite()) 

436 

437 dimensions = self.universe.extract(("instrument", "visit")) 

438 

439 datasetTypeComposite = DatasetType("composite", dimensions, storageClass) 

440 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA") 

441 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB") 

442 

443 self.assertTrue(datasetTypeComposite.isComposite()) 

444 self.assertFalse(datasetTypeComponentA.isComposite()) 

445 self.assertTrue(datasetTypeComponentB.isComponent()) 

446 self.assertFalse(datasetTypeComposite.isComponent()) 

447 

448 self.assertEqual(datasetTypeComposite.name, "composite") 

449 self.assertEqual(datasetTypeComponentA.name, "composite.compA") 

450 self.assertEqual(datasetTypeComponentB.component(), "compB") 

451 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None)) 

452 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA")) 

453 

454 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass) 

455 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass) 

456 self.assertIsNone(datasetTypeComposite.parentStorageClass) 

457 

458 

459class DatasetRefTestCase(unittest.TestCase): 

460 """Test for DatasetRef.""" 

461 

462 def setUp(self): 

463 self.universe = DimensionUniverse() 

464 datasetTypeName = "test" 

465 self.componentStorageClass1 = StorageClass("Component1") 

466 self.componentStorageClass2 = StorageClass("Component2") 

467 self.parentStorageClass = StorageClass( 

468 "Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2} 

469 ) 

470 dimensions = self.universe.extract(("instrument", "visit")) 

471 self.dataId = dict(instrument="DummyCam", visit=42) 

472 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass) 

473 

474 def testConstructor(self): 

475 """Test that construction preserves and validates values.""" 

476 # Construct an unresolved ref. 

477 ref = DatasetRef(self.datasetType, self.dataId) 

478 self.assertEqual(ref.datasetType, self.datasetType) 

479 self.assertEqual( 

480 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId 

481 ) 

482 self.assertIsInstance(ref.dataId, DataCoordinate) 

483 # Constructing an unresolved ref with run and/or components should 

484 # fail. 

485 run = "somerun" 

486 with self.assertRaises(ValueError): 

487 DatasetRef(self.datasetType, self.dataId, run=run) 

488 # Passing a data ID that is missing dimensions should fail. 

489 with self.assertRaises(KeyError): 

490 DatasetRef(self.datasetType, {"instrument": "DummyCam"}) 

491 # Constructing a resolved ref should preserve run as well as everything 

492 # else. 

493 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run) 

494 self.assertEqual(ref.datasetType, self.datasetType) 

495 self.assertEqual( 

496 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId 

497 ) 

498 self.assertIsInstance(ref.dataId, DataCoordinate) 

499 self.assertEqual(ref.id, 1) 

500 self.assertEqual(ref.run, run) 

501 

502 def testSorting(self): 

503 """Can we sort a DatasetRef""" 

504 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1)) 

505 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10)) 

506 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22)) 

507 

508 # Enable detailed diff report 

509 self.maxDiff = None 

510 

511 # This will sort them on visit number 

512 sort = sorted([ref3, ref1, ref2]) 

513 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}") 

514 

515 # Now include a run 

516 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2) 

517 self.assertEqual(ref1.run, "b") 

518 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2) 

519 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1) 

520 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3) 

521 

522 # This will sort them on run before visit 

523 sort = sorted([ref3, ref1, ref2, ref4]) 

524 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}") 

525 

526 # Now with strings 

527 with self.assertRaises(TypeError): 

528 sort = sorted(["z", ref1, "c"]) 

529 

530 def testResolving(self): 

531 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") 

532 unresolvedRef = ref.unresolved() 

533 self.assertIsNotNone(ref.id) 

534 self.assertIsNone(unresolvedRef.id) 

535 self.assertIsNone(unresolvedRef.run) 

536 self.assertNotEqual(ref, unresolvedRef) 

537 self.assertEqual(ref.unresolved(), unresolvedRef) 

538 self.assertEqual(ref.datasetType, unresolvedRef.datasetType) 

539 self.assertEqual(ref.dataId, unresolvedRef.dataId) 

540 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun") 

541 self.assertEqual(ref, reresolvedRef) 

542 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef) 

543 self.assertIsNotNone(reresolvedRef.run) 

544 

545 def testPickle(self): 

546 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") 

547 s = pickle.dumps(ref) 

548 self.assertEqual(pickle.loads(s), ref) 

549 

550 def testJson(self): 

551 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") 

552 s = ref.to_json() 

553 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref) 

554 

555 

556if __name__ == "__main__": 556 ↛ 557line 556 didn't jump to line 557, because the condition on line 556 was never true

557 unittest.main()