Coverage for tests/test_datasets.py: 9%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

247 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import unittest 

23import pickle 

24import copy 

25 

26from lsst.daf.butler import ( 

27 DataCoordinate, 

28 DatasetType, 

29 DatasetRef, 

30 DimensionUniverse, 

31 StorageClass, 

32 StorageClassFactory, 

33) 

34 

35"""Tests for datasets module. 

36""" 

37 

38 

39class DatasetTypeTestCase(unittest.TestCase): 

40 """Test for DatasetType. 

41 """ 

42 def setUp(self): 

43 self.universe = DimensionUniverse() 

44 

45 def testConstructor(self): 

46 """Test construction preserves values. 

47 

48 Note that construction doesn't check for valid storageClass. 

49 This can only be verified for a particular schema. 

50 """ 

51 datasetTypeName = "test" 

52 storageClass = StorageClass("test_StructuredData") 

53 dimensions = self.universe.extract(("visit", "instrument")) 

54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

55 self.assertEqual(datasetType.name, datasetTypeName) 

56 self.assertEqual(datasetType.storageClass, storageClass) 

57 self.assertEqual(datasetType.dimensions, dimensions) 

58 

59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"): 

60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), 

61 dimensions, storageClass) 

62 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"): 

63 DatasetType(datasetTypeName, 

64 dimensions, storageClass, parentStorageClass="NotAllowed") 

65 

66 def testConstructor2(self): 

67 """Test construction from StorageClass name. 

68 """ 

69 datasetTypeName = "test" 

70 storageClass = StorageClass("test_constructor2") 

71 StorageClassFactory().registerStorageClass(storageClass) 

72 dimensions = self.universe.extract(("instrument", "visit")) 

73 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2") 

74 self.assertEqual(datasetType.name, datasetTypeName) 

75 self.assertEqual(datasetType.storageClass, storageClass) 

76 self.assertEqual(datasetType.dimensions, dimensions) 

77 

78 def testNameValidation(self): 

79 """Test that dataset type names only contain certain characters 

80 in certain positions. 

81 """ 

82 dimensions = self.universe.extract(("instrument", "visit")) 

83 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b") 

84 badNames = ("1", "_", "a%b", "B+Z", "T[0]") 

85 

86 # Construct storage class with all the good names included as 

87 # components so that we can test internal consistency 

88 storageClass = StorageClass("test_StructuredData", 

89 components={n: StorageClass("component") for n in goodNames}) 

90 

91 for name in goodNames: 

92 composite = DatasetType(name, dimensions, storageClass) 

93 self.assertEqual(composite.name, name) 

94 for suffix in goodNames: 

95 full = DatasetType.nameWithComponent(name, suffix) 

96 component = composite.makeComponentDatasetType(suffix) 

97 self.assertEqual(component.name, full) 

98 self.assertEqual(component.parentStorageClass.name, "test_StructuredData") 

99 for suffix in badNames: 

100 full = DatasetType.nameWithComponent(name, suffix) 

101 with self.subTest(full=full): 

102 with self.assertRaises(ValueError): 

103 DatasetType(full, dimensions, storageClass) 

104 for name in badNames: 

105 with self.subTest(name=name): 

106 with self.assertRaises(ValueError): 

107 DatasetType(name, dimensions, storageClass) 

108 

109 def testEquality(self): 

110 storageA = StorageClass("test_a") 

111 storageB = StorageClass("test_b") 

112 parent = StorageClass("test") 

113 dimensionsA = self.universe.extract(["instrument"]) 

114 dimensionsB = self.universe.extract(["skymap"]) 

115 self.assertEqual(DatasetType("a", dimensionsA, storageA,), 

116 DatasetType("a", dimensionsA, storageA,)) 

117 self.assertEqual(DatasetType("a", dimensionsA, "test_a",), 

118 DatasetType("a", dimensionsA, storageA,)) 

119 self.assertEqual(DatasetType("a", dimensionsA, storageA,), 

120 DatasetType("a", dimensionsA, "test_a",)) 

121 self.assertEqual(DatasetType("a", dimensionsA, "test_a",), 

122 DatasetType("a", dimensionsA, "test_a",)) 

123 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent), 

124 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent)) 

125 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

126 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent")) 

127 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,), 

128 DatasetType("b", dimensionsA, storageA,)) 

129 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,), 

130 DatasetType("b", dimensionsA, "test_a",)) 

131 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,), 

132 DatasetType("a", dimensionsA, storageB,)) 

133 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,), 

134 DatasetType("a", dimensionsA, "test_b",)) 

135 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,), 

136 DatasetType("a", dimensionsB, storageA,)) 

137 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,), 

138 DatasetType("a", dimensionsB, "test_a",)) 

139 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA), 

140 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB)) 

141 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"), 

142 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB")) 

143 

144 def testJson(self): 

145 storageA = StorageClass("test_a") 

146 dimensionsA = self.universe.extract(["instrument"]) 

147 self.assertEqual(DatasetType("a", dimensionsA, storageA,), 

148 DatasetType.from_json(DatasetType("a", dimensionsA, storageA,).to_json(), 

149 self.universe)) 

150 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"), 

151 DatasetType.from_json(DatasetType("a.b", dimensionsA, "test_b", 

152 parentStorageClass="parent").to_json(), 

153 self.universe)) 

154 

155 def testSorting(self): 

156 """Can we sort a DatasetType""" 

157 storage = StorageClass("test_a") 

158 dimensions = self.universe.extract(["instrument"]) 

159 

160 d_a = DatasetType("a", dimensions, storage) 

161 d_f = DatasetType("f", dimensions, storage) 

162 d_p = DatasetType("p", dimensions, storage) 

163 

164 sort = sorted([d_p, d_f, d_a]) 

165 self.assertEqual(sort, [d_a, d_f, d_p]) 

166 

167 # Now with strings 

168 with self.assertRaises(TypeError): 

169 sort = sorted(["z", d_p, "c", d_f, d_a, "d"]) 

170 

171 def testHashability(self): 

172 """Test `DatasetType.__hash__`. 

173 

174 This test is performed by checking that `DatasetType` entries can 

175 be inserted into a `set` and that unique values of its 

176 (`name`, `storageClass`, `dimensions`) parameters result in separate 

177 entries (and equal ones don't). 

178 

179 This does not check for uniformity of hashing or the actual values 

180 of the hash function. 

181 """ 

182 types = [] 

183 unique = 0 

184 storageC = StorageClass("test_c") 

185 storageD = StorageClass("test_d") 

186 for name in ["a", "b"]: 

187 for storageClass in [storageC, storageD]: 

188 for dimensions in [("instrument", ), ("skymap", )]: 

189 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass) 

190 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass) 

191 types.extend((datasetType, datasetTypeCopy)) 

192 unique += 1 # datasetType should always equal its copy 

193 self.assertEqual(len(set(types)), unique) # all other combinations are unique 

194 

195 # also check that hashes of instances constructed with StorageClass 

196 # name matches hashes of instances constructed with instances 

197 dimensions = self.universe.extract(["instrument"]) 

198 self.assertEqual(hash(DatasetType("a", dimensions, storageC)), 

199 hash(DatasetType("a", dimensions, "test_c"))) 

200 self.assertEqual(hash(DatasetType("a", dimensions, "test_c")), 

201 hash(DatasetType("a", dimensions, "test_c"))) 

202 self.assertNotEqual(hash(DatasetType("a", dimensions, storageC)), 

203 hash(DatasetType("a", dimensions, "test_d"))) 

204 self.assertNotEqual(hash(DatasetType("a", dimensions, storageD)), 

205 hash(DatasetType("a", dimensions, "test_c"))) 

206 self.assertNotEqual(hash(DatasetType("a", dimensions, "test_c")), 

207 hash(DatasetType("a", dimensions, "test_d"))) 

208 

209 def testDeepCopy(self): 

210 """Test that we can copy a dataset type.""" 

211 storageClass = StorageClass("test_copy") 

212 datasetTypeName = "test" 

213 dimensions = self.universe.extract(("instrument", "visit")) 

214 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

215 dcopy = copy.deepcopy(datasetType) 

216 self.assertEqual(dcopy, datasetType) 

217 

218 # Now with calibration flag set 

219 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) 

220 dcopy = copy.deepcopy(datasetType) 

221 self.assertEqual(dcopy, datasetType) 

222 self.assertTrue(dcopy.isCalibration()) 

223 

224 # And again with a composite 

225 componentStorageClass = StorageClass("copy_component") 

226 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), 

227 dimensions, componentStorageClass, 

228 parentStorageClass=storageClass) 

229 dcopy = copy.deepcopy(componentDatasetType) 

230 self.assertEqual(dcopy, componentDatasetType) 

231 

232 def testPickle(self): 

233 """Test pickle support. 

234 """ 

235 storageClass = StorageClass("test_pickle") 

236 datasetTypeName = "test" 

237 dimensions = self.universe.extract(("instrument", "visit")) 

238 # Un-pickling requires that storage class is registered with factory. 

239 StorageClassFactory().registerStorageClass(storageClass) 

240 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

241 datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) 

242 self.assertIsInstance(datasetTypeOut, DatasetType) 

243 self.assertEqual(datasetType.name, datasetTypeOut.name) 

244 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names) 

245 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass) 

246 self.assertIsNone(datasetTypeOut.parentStorageClass) 

247 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) 

248 self.assertFalse(datasetTypeOut.isCalibration()) 

249 

250 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) 

251 datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) 

252 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) 

253 self.assertTrue(datasetTypeOut.isCalibration()) 

254 

255 # And again with a composite 

256 componentStorageClass = StorageClass("pickle_component") 

257 StorageClassFactory().registerStorageClass(componentStorageClass) 

258 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), 

259 dimensions, componentStorageClass, 

260 parentStorageClass=storageClass) 

261 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

262 self.assertIsInstance(datasetTypeOut, DatasetType) 

263 self.assertEqual(componentDatasetType.name, datasetTypeOut.name) 

264 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names) 

265 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass) 

266 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass) 

267 self.assertEqual(datasetTypeOut.parentStorageClass.name, 

268 storageClass.name) 

269 self.assertEqual(datasetTypeOut, componentDatasetType) 

270 

271 # Now with a string and not a real storage class to test that 

272 # pickling doesn't force the StorageClass to be resolved 

273 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), 

274 dimensions, "StrangeComponent", 

275 parentStorageClass="UnknownParent") 

276 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

277 self.assertEqual(datasetTypeOut, componentDatasetType) 

278 self.assertEqual(datasetTypeOut._parentStorageClassName, 

279 componentDatasetType._parentStorageClassName) 

280 

281 # Now with a storage class that is created by the factory 

282 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass") 

283 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass") 

284 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), 

285 dimensions, factoryComponentStorageClassClass(), 

286 parentStorageClass=factoryStorageClassClass()) 

287 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) 

288 self.assertEqual(datasetTypeOut, componentDatasetType) 

289 self.assertEqual(datasetTypeOut._parentStorageClassName, 

290 componentDatasetType._parentStorageClassName) 

291 

292 def test_composites(self): 

293 """Test components within composite DatasetTypes.""" 

294 storageClassA = StorageClass("compA") 

295 storageClassB = StorageClass("compB") 

296 storageClass = StorageClass("test_composite", components={"compA": storageClassA, 

297 "compB": storageClassB}) 

298 self.assertTrue(storageClass.isComposite()) 

299 self.assertFalse(storageClassA.isComposite()) 

300 self.assertFalse(storageClassB.isComposite()) 

301 

302 dimensions = self.universe.extract(("instrument", "visit")) 

303 

304 datasetTypeComposite = DatasetType("composite", dimensions, storageClass) 

305 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA") 

306 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB") 

307 

308 self.assertTrue(datasetTypeComposite.isComposite()) 

309 self.assertFalse(datasetTypeComponentA.isComposite()) 

310 self.assertTrue(datasetTypeComponentB.isComponent()) 

311 self.assertFalse(datasetTypeComposite.isComponent()) 

312 

313 self.assertEqual(datasetTypeComposite.name, "composite") 

314 self.assertEqual(datasetTypeComponentA.name, "composite.compA") 

315 self.assertEqual(datasetTypeComponentB.component(), "compB") 

316 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None)) 

317 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA")) 

318 

319 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass) 

320 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass) 

321 self.assertIsNone(datasetTypeComposite.parentStorageClass) 

322 

323 

324class DatasetRefTestCase(unittest.TestCase): 

325 """Test for DatasetRef. 

326 """ 

327 

328 def setUp(self): 

329 self.universe = DimensionUniverse() 

330 datasetTypeName = "test" 

331 self.componentStorageClass1 = StorageClass("Component1") 

332 self.componentStorageClass2 = StorageClass("Component2") 

333 self.parentStorageClass = StorageClass("Parent", components={"a": self.componentStorageClass1, 

334 "b": self.componentStorageClass2}) 

335 dimensions = self.universe.extract(("instrument", "visit")) 

336 self.dataId = dict(instrument="DummyCam", visit=42) 

337 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass) 

338 

339 def testConstructor(self): 

340 """Test that construction preserves and validates values. 

341 """ 

342 # Construct an unresolved ref. 

343 ref = DatasetRef(self.datasetType, self.dataId) 

344 self.assertEqual(ref.datasetType, self.datasetType) 

345 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), 

346 msg=ref.dataId) 

347 self.assertIsInstance(ref.dataId, DataCoordinate) 

348 # Constructing an unresolved ref with run and/or components should 

349 # fail. 

350 run = "somerun" 

351 with self.assertRaises(ValueError): 

352 DatasetRef(self.datasetType, self.dataId, run=run) 

353 # Passing a data ID that is missing dimensions should fail. 

354 with self.assertRaises(KeyError): 

355 DatasetRef(self.datasetType, {"instrument": "DummyCam"}) 

356 # Constructing a resolved ref should preserve run as well as everything 

357 # else. 

358 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run) 

359 self.assertEqual(ref.datasetType, self.datasetType) 

360 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), 

361 msg=ref.dataId) 

362 self.assertIsInstance(ref.dataId, DataCoordinate) 

363 self.assertEqual(ref.id, 1) 

364 self.assertEqual(ref.run, run) 

365 

366 def testSorting(self): 

367 """Can we sort a DatasetRef""" 

368 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1)) 

369 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10)) 

370 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22)) 

371 

372 # Enable detailed diff report 

373 self.maxDiff = None 

374 

375 # This will sort them on visit number 

376 sort = sorted([ref3, ref1, ref2]) 

377 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}") 

378 

379 # Now include a run 

380 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2) 

381 self.assertEqual(ref1.run, "b") 

382 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2) 

383 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1) 

384 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3) 

385 

386 # This will sort them on run before visit 

387 sort = sorted([ref3, ref1, ref2, ref4]) 

388 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}") 

389 

390 # Now with strings 

391 with self.assertRaises(TypeError): 

392 sort = sorted(["z", ref1, "c"]) 

393 

394 def testResolving(self): 

395 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") 

396 unresolvedRef = ref.unresolved() 

397 self.assertIsNotNone(ref.id) 

398 self.assertIsNone(unresolvedRef.id) 

399 self.assertIsNone(unresolvedRef.run) 

400 self.assertNotEqual(ref, unresolvedRef) 

401 self.assertEqual(ref.unresolved(), unresolvedRef) 

402 self.assertEqual(ref.datasetType, unresolvedRef.datasetType) 

403 self.assertEqual(ref.dataId, unresolvedRef.dataId) 

404 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun") 

405 self.assertEqual(ref, reresolvedRef) 

406 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef) 

407 self.assertIsNotNone(reresolvedRef.run) 

408 

409 def testPickle(self): 

410 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") 

411 s = pickle.dumps(ref) 

412 self.assertEqual(pickle.loads(s), ref) 

413 

414 def testJson(self): 

415 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") 

416 s = ref.to_json() 

417 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref) 

418 

419 

420if __name__ == "__main__": 420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true

421 unittest.main()