Coverage for tests/test_datasets.py: 11%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import unittest
23import pickle
24import copy
26from lsst.daf.butler import (
27 DataCoordinate,
28 DatasetType,
29 DatasetRef,
30 DimensionUniverse,
31 StorageClass,
32 StorageClassFactory,
33)
35"""Tests for datasets module.
36"""
39class DatasetTypeTestCase(unittest.TestCase):
40 """Test for DatasetType.
41 """
42 def setUp(self):
43 self.universe = DimensionUniverse()
45 def testConstructor(self):
46 """Test construction preserves values.
48 Note that construction doesn't check for valid storageClass.
49 This can only be verified for a particular schema.
50 """
51 datasetTypeName = "test"
52 storageClass = StorageClass("test_StructuredData")
53 dimensions = self.universe.extract(("visit", "instrument"))
54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
55 self.assertEqual(datasetType.name, datasetTypeName)
56 self.assertEqual(datasetType.storageClass, storageClass)
57 self.assertEqual(datasetType.dimensions, dimensions)
59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
61 dimensions, storageClass)
62 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
63 DatasetType(datasetTypeName,
64 dimensions, storageClass, parentStorageClass="NotAllowed")
66 def testConstructor2(self):
67 """Test construction from StorageClass name.
68 """
69 datasetTypeName = "test"
70 storageClass = StorageClass("test_constructor2")
71 StorageClassFactory().registerStorageClass(storageClass)
72 dimensions = self.universe.extract(("instrument", "visit"))
73 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
74 self.assertEqual(datasetType.name, datasetTypeName)
75 self.assertEqual(datasetType.storageClass, storageClass)
76 self.assertEqual(datasetType.dimensions, dimensions)
78 def testNameValidation(self):
79 """Test that dataset type names only contain certain characters
80 in certain positions.
81 """
82 dimensions = self.universe.extract(("instrument", "visit"))
83 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b")
84 badNames = ("1", "_", "a%b", "B+Z", "T[0]")
86 # Construct storage class with all the good names included as
87 # components so that we can test internal consistency
88 storageClass = StorageClass("test_StructuredData",
89 components={n: StorageClass("component") for n in goodNames})
91 for name in goodNames:
92 composite = DatasetType(name, dimensions, storageClass)
93 self.assertEqual(composite.name, name)
94 for suffix in goodNames:
95 full = DatasetType.nameWithComponent(name, suffix)
96 component = composite.makeComponentDatasetType(suffix)
97 self.assertEqual(component.name, full)
98 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
99 for suffix in badNames:
100 full = DatasetType.nameWithComponent(name, suffix)
101 with self.subTest(full=full):
102 with self.assertRaises(ValueError):
103 DatasetType(full, dimensions, storageClass)
104 for name in badNames:
105 with self.subTest(name=name):
106 with self.assertRaises(ValueError):
107 DatasetType(name, dimensions, storageClass)
109 def testEquality(self):
110 storageA = StorageClass("test_a")
111 storageB = StorageClass("test_b")
112 parent = StorageClass("test")
113 dimensionsA = self.universe.extract(["instrument"])
114 dimensionsB = self.universe.extract(["skymap"])
115 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
116 DatasetType("a", dimensionsA, storageA,))
117 self.assertEqual(DatasetType("a", dimensionsA, "test_a",),
118 DatasetType("a", dimensionsA, storageA,))
119 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
120 DatasetType("a", dimensionsA, "test_a",))
121 self.assertEqual(DatasetType("a", dimensionsA, "test_a",),
122 DatasetType("a", dimensionsA, "test_a",))
123 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
124 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent))
125 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
126 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"))
127 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
128 DatasetType("b", dimensionsA, storageA,))
129 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
130 DatasetType("b", dimensionsA, "test_a",))
131 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
132 DatasetType("a", dimensionsA, storageB,))
133 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
134 DatasetType("a", dimensionsA, "test_b",))
135 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
136 DatasetType("a", dimensionsB, storageA,))
137 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
138 DatasetType("a", dimensionsB, "test_a",))
139 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
140 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB))
141 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
142 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"))
144 def testJson(self):
145 storageA = StorageClass("test_a")
146 dimensionsA = self.universe.extract(["instrument"])
147 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
148 DatasetType.from_json(DatasetType("a", dimensionsA, storageA,).to_json(),
149 self.universe))
150 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
151 DatasetType.from_json(DatasetType("a.b", dimensionsA, "test_b",
152 parentStorageClass="parent").to_json(),
153 self.universe))
155 def testSorting(self):
156 """Can we sort a DatasetType"""
157 storage = StorageClass("test_a")
158 dimensions = self.universe.extract(["instrument"])
160 d_a = DatasetType("a", dimensions, storage)
161 d_f = DatasetType("f", dimensions, storage)
162 d_p = DatasetType("p", dimensions, storage)
164 sort = sorted([d_p, d_f, d_a])
165 self.assertEqual(sort, [d_a, d_f, d_p])
167 # Now with strings
168 with self.assertRaises(TypeError):
169 sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
171 def testHashability(self):
172 """Test `DatasetType.__hash__`.
174 This test is performed by checking that `DatasetType` entries can
175 be inserted into a `set` and that unique values of its
176 (`name`, `storageClass`, `dimensions`) parameters result in separate
177 entries (and equal ones don't).
179 This does not check for uniformity of hashing or the actual values
180 of the hash function.
181 """
182 types = []
183 unique = 0
184 storageC = StorageClass("test_c")
185 storageD = StorageClass("test_d")
186 for name in ["a", "b"]:
187 for storageClass in [storageC, storageD]:
188 for dimensions in [("instrument", ), ("skymap", )]:
189 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass)
190 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass)
191 types.extend((datasetType, datasetTypeCopy))
192 unique += 1 # datasetType should always equal its copy
193 self.assertEqual(len(set(types)), unique) # all other combinations are unique
195 # also check that hashes of instances constructed with StorageClass
196 # name matches hashes of instances constructed with instances
197 dimensions = self.universe.extract(["instrument"])
198 self.assertEqual(hash(DatasetType("a", dimensions, storageC)),
199 hash(DatasetType("a", dimensions, "test_c")))
200 self.assertEqual(hash(DatasetType("a", dimensions, "test_c")),
201 hash(DatasetType("a", dimensions, "test_c")))
202 self.assertNotEqual(hash(DatasetType("a", dimensions, storageC)),
203 hash(DatasetType("a", dimensions, "test_d")))
204 self.assertNotEqual(hash(DatasetType("a", dimensions, storageD)),
205 hash(DatasetType("a", dimensions, "test_c")))
206 self.assertNotEqual(hash(DatasetType("a", dimensions, "test_c")),
207 hash(DatasetType("a", dimensions, "test_d")))
209 def testDeepCopy(self):
210 """Test that we can copy a dataset type."""
211 storageClass = StorageClass("test_copy")
212 datasetTypeName = "test"
213 dimensions = self.universe.extract(("instrument", "visit"))
214 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
215 dcopy = copy.deepcopy(datasetType)
216 self.assertEqual(dcopy, datasetType)
218 # Now with calibration flag set
219 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
220 dcopy = copy.deepcopy(datasetType)
221 self.assertEqual(dcopy, datasetType)
222 self.assertTrue(dcopy.isCalibration())
224 # And again with a composite
225 componentStorageClass = StorageClass("copy_component")
226 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
227 dimensions, componentStorageClass,
228 parentStorageClass=storageClass)
229 dcopy = copy.deepcopy(componentDatasetType)
230 self.assertEqual(dcopy, componentDatasetType)
232 def testPickle(self):
233 """Test pickle support.
234 """
235 storageClass = StorageClass("test_pickle")
236 datasetTypeName = "test"
237 dimensions = self.universe.extract(("instrument", "visit"))
238 # Un-pickling requires that storage class is registered with factory.
239 StorageClassFactory().registerStorageClass(storageClass)
240 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
241 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
242 self.assertIsInstance(datasetTypeOut, DatasetType)
243 self.assertEqual(datasetType.name, datasetTypeOut.name)
244 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
245 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
246 self.assertIsNone(datasetTypeOut.parentStorageClass)
247 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
248 self.assertFalse(datasetTypeOut.isCalibration())
250 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
251 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
252 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
253 self.assertTrue(datasetTypeOut.isCalibration())
255 # And again with a composite
256 componentStorageClass = StorageClass("pickle_component")
257 StorageClassFactory().registerStorageClass(componentStorageClass)
258 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
259 dimensions, componentStorageClass,
260 parentStorageClass=storageClass)
261 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
262 self.assertIsInstance(datasetTypeOut, DatasetType)
263 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
264 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
265 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
266 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
267 self.assertEqual(datasetTypeOut.parentStorageClass.name,
268 storageClass.name)
269 self.assertEqual(datasetTypeOut, componentDatasetType)
271 # Now with a string and not a real storage class to test that
272 # pickling doesn't force the StorageClass to be resolved
273 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
274 dimensions, "StrangeComponent",
275 parentStorageClass="UnknownParent")
276 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
277 self.assertEqual(datasetTypeOut, componentDatasetType)
278 self.assertEqual(datasetTypeOut._parentStorageClassName,
279 componentDatasetType._parentStorageClassName)
281 # Now with a storage class that is created by the factory
282 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
283 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
284 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
285 dimensions, factoryComponentStorageClassClass(),
286 parentStorageClass=factoryStorageClassClass())
287 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
288 self.assertEqual(datasetTypeOut, componentDatasetType)
289 self.assertEqual(datasetTypeOut._parentStorageClassName,
290 componentDatasetType._parentStorageClassName)
292 def test_composites(self):
293 """Test components within composite DatasetTypes."""
294 storageClassA = StorageClass("compA")
295 storageClassB = StorageClass("compB")
296 storageClass = StorageClass("test_composite", components={"compA": storageClassA,
297 "compB": storageClassB})
298 self.assertTrue(storageClass.isComposite())
299 self.assertFalse(storageClassA.isComposite())
300 self.assertFalse(storageClassB.isComposite())
302 dimensions = self.universe.extract(("instrument", "visit"))
304 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
305 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
306 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
308 self.assertTrue(datasetTypeComposite.isComposite())
309 self.assertFalse(datasetTypeComponentA.isComposite())
310 self.assertTrue(datasetTypeComponentB.isComponent())
311 self.assertFalse(datasetTypeComposite.isComponent())
313 self.assertEqual(datasetTypeComposite.name, "composite")
314 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
315 self.assertEqual(datasetTypeComponentB.component(), "compB")
316 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
317 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
319 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
320 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
321 self.assertIsNone(datasetTypeComposite.parentStorageClass)
324class DatasetRefTestCase(unittest.TestCase):
325 """Test for DatasetRef.
326 """
328 def setUp(self):
329 self.universe = DimensionUniverse()
330 datasetTypeName = "test"
331 self.componentStorageClass1 = StorageClass("Component1")
332 self.componentStorageClass2 = StorageClass("Component2")
333 self.parentStorageClass = StorageClass("Parent", components={"a": self.componentStorageClass1,
334 "b": self.componentStorageClass2})
335 dimensions = self.universe.extract(("instrument", "visit"))
336 self.dataId = dict(instrument="DummyCam", visit=42)
337 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
339 def testConstructor(self):
340 """Test that construction preserves and validates values.
341 """
342 # Construct an unresolved ref.
343 ref = DatasetRef(self.datasetType, self.dataId)
344 self.assertEqual(ref.datasetType, self.datasetType)
345 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
346 msg=ref.dataId)
347 self.assertIsInstance(ref.dataId, DataCoordinate)
348 # Constructing an unresolved ref with run and/or components should
349 # fail.
350 run = "somerun"
351 with self.assertRaises(ValueError):
352 DatasetRef(self.datasetType, self.dataId, run=run)
353 # Passing a data ID that is missing dimensions should fail.
354 with self.assertRaises(KeyError):
355 DatasetRef(self.datasetType, {"instrument": "DummyCam"})
356 # Constructing a resolved ref should preserve run as well as everything
357 # else.
358 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
359 self.assertEqual(ref.datasetType, self.datasetType)
360 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
361 msg=ref.dataId)
362 self.assertIsInstance(ref.dataId, DataCoordinate)
363 self.assertEqual(ref.id, 1)
364 self.assertEqual(ref.run, run)
366 def testSorting(self):
367 """Can we sort a DatasetRef"""
368 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1))
369 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10))
370 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22))
372 # Enable detailed diff report
373 self.maxDiff = None
375 # This will sort them on visit number
376 sort = sorted([ref3, ref1, ref2])
377 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
379 # Now include a run
380 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2)
381 self.assertEqual(ref1.run, "b")
382 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2)
383 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1)
384 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3)
386 # This will sort them on run before visit
387 sort = sorted([ref3, ref1, ref2, ref4])
388 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
390 # Now with strings
391 with self.assertRaises(TypeError):
392 sort = sorted(["z", ref1, "c"])
394 def testResolving(self):
395 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
396 unresolvedRef = ref.unresolved()
397 self.assertIsNotNone(ref.id)
398 self.assertIsNone(unresolvedRef.id)
399 self.assertIsNone(unresolvedRef.run)
400 self.assertNotEqual(ref, unresolvedRef)
401 self.assertEqual(ref.unresolved(), unresolvedRef)
402 self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
403 self.assertEqual(ref.dataId, unresolvedRef.dataId)
404 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
405 self.assertEqual(ref, reresolvedRef)
406 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
407 self.assertIsNotNone(reresolvedRef.run)
409 def testPickle(self):
410 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
411 s = pickle.dumps(ref)
412 self.assertEqual(pickle.loads(s), ref)
414 def testJson(self):
415 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
416 s = ref.to_json()
417 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)
420if __name__ == "__main__": 420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true
421 unittest.main()