Coverage for tests/test_datasets.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import unittest
23import pickle
24import copy
26from lsst.daf.butler import (
27 DataCoordinate,
28 DatasetType,
29 DatasetRef,
30 DimensionUniverse,
31 StorageClass,
32 StorageClassFactory,
33)
35"""Tests for datasets module.
36"""
39class DatasetTypeTestCase(unittest.TestCase):
40 """Test for DatasetType.
41 """
42 def setUp(self):
43 self.universe = DimensionUniverse()
45 def testConstructor(self):
46 """Test construction preserves values.
48 Note that construction doesn't check for valid storageClass.
49 This can only be verified for a particular schema.
50 """
51 datasetTypeName = "test"
52 storageClass = StorageClass("test_StructuredData")
53 dimensions = self.universe.extract(("instrument", "visit"))
54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
55 self.assertEqual(datasetType.name, datasetTypeName)
56 self.assertEqual(datasetType.storageClass, storageClass)
57 self.assertEqual(datasetType.dimensions, dimensions)
59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
61 dimensions, storageClass)
62 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
63 DatasetType(datasetTypeName,
64 dimensions, storageClass, parentStorageClass="NotAllowed")
66 def testConstructor2(self):
67 """Test construction from StorageClass name.
68 """
69 datasetTypeName = "test"
70 storageClass = StorageClass("test_constructor2")
71 StorageClassFactory().registerStorageClass(storageClass)
72 dimensions = self.universe.extract(("instrument", "visit"))
73 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
74 self.assertEqual(datasetType.name, datasetTypeName)
75 self.assertEqual(datasetType.storageClass, storageClass)
76 self.assertEqual(datasetType.dimensions, dimensions)
78 def testNameValidation(self):
79 """Test that dataset type names only contain certain characters
80 in certain positions.
81 """
82 dimensions = self.universe.extract(("instrument", "visit"))
83 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b")
84 badNames = ("1", "_", "a%b", "B+Z", "T[0]")
86 # Construct storage class with all the good names included as
87 # components so that we can test internal consistency
88 storageClass = StorageClass("test_StructuredData",
89 components={n: StorageClass("component") for n in goodNames})
91 for name in goodNames:
92 composite = DatasetType(name, dimensions, storageClass)
93 self.assertEqual(composite.name, name)
94 for suffix in goodNames:
95 full = DatasetType.nameWithComponent(name, suffix)
96 component = composite.makeComponentDatasetType(suffix)
97 self.assertEqual(component.name, full)
98 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
99 for suffix in badNames:
100 full = DatasetType.nameWithComponent(name, suffix)
101 with self.subTest(full=full):
102 with self.assertRaises(ValueError):
103 DatasetType(full, dimensions, storageClass)
104 for name in badNames:
105 with self.subTest(name=name):
106 with self.assertRaises(ValueError):
107 DatasetType(name, dimensions, storageClass)
109 def testEquality(self):
110 storageA = StorageClass("test_a")
111 storageB = StorageClass("test_b")
112 parent = StorageClass("test")
113 dimensionsA = self.universe.extract(["instrument"])
114 dimensionsB = self.universe.extract(["skymap"])
115 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
116 DatasetType("a", dimensionsA, storageA,))
117 self.assertEqual(DatasetType("a", dimensionsA, "test_a",),
118 DatasetType("a", dimensionsA, storageA,))
119 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
120 DatasetType("a", dimensionsA, "test_a",))
121 self.assertEqual(DatasetType("a", dimensionsA, "test_a",),
122 DatasetType("a", dimensionsA, "test_a",))
123 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
124 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent))
125 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
126 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"))
127 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
128 DatasetType("b", dimensionsA, storageA,))
129 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
130 DatasetType("b", dimensionsA, "test_a",))
131 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
132 DatasetType("a", dimensionsA, storageB,))
133 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
134 DatasetType("a", dimensionsA, "test_b",))
135 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
136 DatasetType("a", dimensionsB, storageA,))
137 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
138 DatasetType("a", dimensionsB, "test_a",))
139 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
140 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB))
141 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
142 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"))
144 def testSorting(self):
145 """Can we sort a DatasetType"""
146 storage = StorageClass("test_a")
147 dimensions = self.universe.extract(["instrument"])
149 d_a = DatasetType("a", dimensions, storage)
150 d_f = DatasetType("f", dimensions, storage)
151 d_p = DatasetType("p", dimensions, storage)
153 sort = sorted([d_p, d_f, d_a])
154 self.assertEqual(sort, [d_a, d_f, d_p])
156 # Now with strings
157 with self.assertRaises(TypeError):
158 sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
160 def testParentPlaceholder(self):
161 """Test that a parent placeholder can be replaced."""
162 storageComp = StorageClass("component")
163 storageParent = StorageClass("Parent")
164 dimensions = self.universe.extract(["instrument"])
165 component = DatasetType("a.b", dimensions, storageComp,
166 parentStorageClass=DatasetType.PlaceholderParentStorageClass)
167 self.assertIsNotNone(component.parentStorageClass)
169 with self.assertRaises(ValueError):
170 component.finalizeParentStorageClass("parent")
172 component.finalizeParentStorageClass(storageParent)
173 self.assertEqual(component.parentStorageClass, storageParent)
175 component = DatasetType("a.b", dimensions, storageComp,
176 parentStorageClass=storageParent)
178 with self.assertRaises(ValueError):
179 # Can not replace unless a placeholder
180 component.finalizeParentStorageClass(storageComp)
182 datasetType = DatasetType("a", dimensions, storageParent)
183 with self.assertRaises(ValueError):
184 # Can not add parent if not component
185 datasetType.finalizeParentStorageClass(storageComp)
187 def testHashability(self):
188 """Test `DatasetType.__hash__`.
190 This test is performed by checking that `DatasetType` entries can
191 be inserted into a `set` and that unique values of its
192 (`name`, `storageClass`, `dimensions`) parameters result in separate
193 entries (and equal ones don't).
195 This does not check for uniformity of hashing or the actual values
196 of the hash function.
197 """
198 types = []
199 unique = 0
200 storageC = StorageClass("test_c")
201 storageD = StorageClass("test_d")
202 for name in ["a", "b"]:
203 for storageClass in [storageC, storageD]:
204 for dimensions in [("instrument", ), ("skymap", )]:
205 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass)
206 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass)
207 types.extend((datasetType, datasetTypeCopy))
208 unique += 1 # datasetType should always equal its copy
209 self.assertEqual(len(set(types)), unique) # all other combinations are unique
211 # also check that hashes of instances constructed with StorageClass
212 # name matches hashes of instances constructed with instances
213 dimensions = self.universe.extract(["instrument"])
214 self.assertEqual(hash(DatasetType("a", dimensions, storageC)),
215 hash(DatasetType("a", dimensions, "test_c")))
216 self.assertEqual(hash(DatasetType("a", dimensions, "test_c")),
217 hash(DatasetType("a", dimensions, "test_c")))
218 self.assertNotEqual(hash(DatasetType("a", dimensions, storageC)),
219 hash(DatasetType("a", dimensions, "test_d")))
220 self.assertNotEqual(hash(DatasetType("a", dimensions, storageD)),
221 hash(DatasetType("a", dimensions, "test_c")))
222 self.assertNotEqual(hash(DatasetType("a", dimensions, "test_c")),
223 hash(DatasetType("a", dimensions, "test_d")))
225 def testDeepCopy(self):
226 """Test that we can copy a dataset type."""
227 storageClass = StorageClass("test_copy")
228 datasetTypeName = "test"
229 dimensions = self.universe.extract(("instrument", "visit"))
230 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
231 dcopy = copy.deepcopy(datasetType)
232 self.assertEqual(dcopy, datasetType)
234 # Now with calibration flag set
235 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
236 dcopy = copy.deepcopy(datasetType)
237 self.assertEqual(dcopy, datasetType)
238 self.assertTrue(dcopy.isCalibration())
240 # And again with a composite
241 componentStorageClass = StorageClass("copy_component")
242 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
243 dimensions, componentStorageClass,
244 parentStorageClass=storageClass)
245 dcopy = copy.deepcopy(componentDatasetType)
246 self.assertEqual(dcopy, componentDatasetType)
248 def testPickle(self):
249 """Test pickle support.
250 """
251 storageClass = StorageClass("test_pickle")
252 datasetTypeName = "test"
253 dimensions = self.universe.extract(("instrument", "visit"))
254 # Un-pickling requires that storage class is registered with factory.
255 StorageClassFactory().registerStorageClass(storageClass)
256 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
257 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
258 self.assertIsInstance(datasetTypeOut, DatasetType)
259 self.assertEqual(datasetType.name, datasetTypeOut.name)
260 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
261 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
262 self.assertIsNone(datasetTypeOut.parentStorageClass)
263 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
264 self.assertFalse(datasetTypeOut.isCalibration())
266 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
267 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
268 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
269 self.assertTrue(datasetTypeOut.isCalibration())
271 # And again with a composite
272 componentStorageClass = StorageClass("pickle_component")
273 StorageClassFactory().registerStorageClass(componentStorageClass)
274 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
275 dimensions, componentStorageClass,
276 parentStorageClass=storageClass)
277 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
278 self.assertIsInstance(datasetTypeOut, DatasetType)
279 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
280 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
281 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
282 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
283 self.assertEqual(datasetTypeOut.parentStorageClass.name,
284 storageClass.name)
285 self.assertEqual(datasetTypeOut, componentDatasetType)
287 # Now with a string and not a real storage class to test that
288 # pickling doesn't force the StorageClass to be resolved
289 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
290 dimensions, "StrangeComponent",
291 parentStorageClass="UnknownParent")
292 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
293 self.assertEqual(datasetTypeOut, componentDatasetType)
294 self.assertEqual(datasetTypeOut._parentStorageClassName,
295 componentDatasetType._parentStorageClassName)
297 # Now with a storage class that is created by the factory
298 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
299 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
300 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
301 dimensions, factoryComponentStorageClassClass(),
302 parentStorageClass=factoryStorageClassClass())
303 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
304 self.assertEqual(datasetTypeOut, componentDatasetType)
305 self.assertEqual(datasetTypeOut._parentStorageClassName,
306 componentDatasetType._parentStorageClassName)
308 def test_composites(self):
309 """Test components within composite DatasetTypes."""
310 storageClassA = StorageClass("compA")
311 storageClassB = StorageClass("compB")
312 storageClass = StorageClass("test_composite", components={"compA": storageClassA,
313 "compB": storageClassB})
314 self.assertTrue(storageClass.isComposite())
315 self.assertFalse(storageClassA.isComposite())
316 self.assertFalse(storageClassB.isComposite())
318 dimensions = self.universe.extract(("instrument", "visit"))
320 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
321 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
322 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
324 self.assertTrue(datasetTypeComposite.isComposite())
325 self.assertFalse(datasetTypeComponentA.isComposite())
326 self.assertTrue(datasetTypeComponentB.isComponent())
327 self.assertFalse(datasetTypeComposite.isComponent())
329 self.assertEqual(datasetTypeComposite.name, "composite")
330 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
331 self.assertEqual(datasetTypeComponentB.component(), "compB")
332 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
333 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
335 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
336 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
337 self.assertIsNone(datasetTypeComposite.parentStorageClass)
340class DatasetRefTestCase(unittest.TestCase):
341 """Test for DatasetRef.
342 """
344 def setUp(self):
345 self.universe = DimensionUniverse()
346 datasetTypeName = "test"
347 self.componentStorageClass1 = StorageClass("Component1")
348 self.componentStorageClass2 = StorageClass("Component2")
349 self.parentStorageClass = StorageClass("Parent", components={"a": self.componentStorageClass1,
350 "b": self.componentStorageClass2})
351 dimensions = self.universe.extract(("instrument", "visit"))
352 self.dataId = dict(instrument="DummyCam", visit=42)
353 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
355 def testConstructor(self):
356 """Test that construction preserves and validates values.
357 """
358 # Construct an unresolved ref.
359 ref = DatasetRef(self.datasetType, self.dataId)
360 self.assertEqual(ref.datasetType, self.datasetType)
361 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
362 msg=ref.dataId)
363 self.assertIsInstance(ref.dataId, DataCoordinate)
364 # Constructing an unresolved ref with run and/or components should
365 # fail.
366 run = "somerun"
367 with self.assertRaises(ValueError):
368 DatasetRef(self.datasetType, self.dataId, run=run)
369 # Passing a data ID that is missing dimensions should fail.
370 with self.assertRaises(KeyError):
371 DatasetRef(self.datasetType, {"instrument": "DummyCam"})
372 # Constructing a resolved ref should preserve run as well as everything
373 # else.
374 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
375 self.assertEqual(ref.datasetType, self.datasetType)
376 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
377 msg=ref.dataId)
378 self.assertIsInstance(ref.dataId, DataCoordinate)
379 self.assertEqual(ref.id, 1)
380 self.assertEqual(ref.run, run)
382 def testSorting(self):
383 """Can we sort a DatasetRef"""
384 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=42))
385 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43))
386 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=44))
388 # This will sort them on visit number
389 sort = sorted([ref3, ref1, ref2])
390 self.assertEqual(sort, [ref1, ref2, ref3])
392 # Now include a run
393 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=42), run="b", id=2)
394 self.assertEqual(ref1.run, "b")
395 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=41), run="b", id=2)
396 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="a", id=1)
397 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=44), run="c", id=3)
399 # This will sort them on run before visit
400 sort = sorted([ref3, ref1, ref2, ref4])
401 self.assertEqual(sort, [ref2, ref4, ref1, ref3])
403 # Now with strings
404 with self.assertRaises(TypeError):
405 sort = sorted(["z", ref1, "c"])
407 def testResolving(self):
408 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
409 unresolvedRef = ref.unresolved()
410 self.assertIsNotNone(ref.id)
411 self.assertIsNone(unresolvedRef.id)
412 self.assertIsNone(unresolvedRef.run)
413 self.assertNotEqual(ref, unresolvedRef)
414 self.assertEqual(ref.unresolved(), unresolvedRef)
415 self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
416 self.assertEqual(ref.dataId, unresolvedRef.dataId)
417 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
418 self.assertEqual(ref, reresolvedRef)
419 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
420 self.assertIsNotNone(reresolvedRef.run)
422 def testPickle(self):
423 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
424 s = pickle.dumps(ref)
425 self.assertEqual(pickle.loads(s), ref)
428if __name__ == "__main__": 428 ↛ 429line 428 didn't jump to line 429, because the condition on line 428 was never true
429 unittest.main()