Coverage for tests/test_datasets.py: 10%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import unittest
23import pickle
24import copy
26from lsst.daf.butler import (
27 DataCoordinate,
28 DatasetType,
29 DatasetRef,
30 DimensionUniverse,
31 StorageClass,
32 StorageClassFactory,
33)
35"""Tests for datasets module.
36"""
39class DatasetTypeTestCase(unittest.TestCase):
40 """Test for DatasetType.
41 """
42 def setUp(self):
43 self.universe = DimensionUniverse()
45 def testConstructor(self):
46 """Test construction preserves values.
48 Note that construction doesn't check for valid storageClass.
49 This can only be verified for a particular schema.
50 """
51 datasetTypeName = "test"
52 storageClass = StorageClass("test_StructuredData")
53 dimensions = self.universe.extract(("visit", "instrument"))
54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
55 self.assertEqual(datasetType.name, datasetTypeName)
56 self.assertEqual(datasetType.storageClass, storageClass)
57 self.assertEqual(datasetType.dimensions, dimensions)
59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
61 dimensions, storageClass)
62 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
63 DatasetType(datasetTypeName,
64 dimensions, storageClass, parentStorageClass="NotAllowed")
66 def testConstructor2(self):
67 """Test construction from StorageClass name.
68 """
69 datasetTypeName = "test"
70 storageClass = StorageClass("test_constructor2")
71 StorageClassFactory().registerStorageClass(storageClass)
72 dimensions = self.universe.extract(("instrument", "visit"))
73 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
74 self.assertEqual(datasetType.name, datasetTypeName)
75 self.assertEqual(datasetType.storageClass, storageClass)
76 self.assertEqual(datasetType.dimensions, dimensions)
78 def testNameValidation(self):
79 """Test that dataset type names only contain certain characters
80 in certain positions.
81 """
82 dimensions = self.universe.extract(("instrument", "visit"))
83 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b")
84 badNames = ("1", "_", "a%b", "B+Z", "T[0]")
86 # Construct storage class with all the good names included as
87 # components so that we can test internal consistency
88 storageClass = StorageClass("test_StructuredData",
89 components={n: StorageClass("component") for n in goodNames})
91 for name in goodNames:
92 composite = DatasetType(name, dimensions, storageClass)
93 self.assertEqual(composite.name, name)
94 for suffix in goodNames:
95 full = DatasetType.nameWithComponent(name, suffix)
96 component = composite.makeComponentDatasetType(suffix)
97 self.assertEqual(component.name, full)
98 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
99 for suffix in badNames:
100 full = DatasetType.nameWithComponent(name, suffix)
101 with self.subTest(full=full):
102 with self.assertRaises(ValueError):
103 DatasetType(full, dimensions, storageClass)
104 for name in badNames:
105 with self.subTest(name=name):
106 with self.assertRaises(ValueError):
107 DatasetType(name, dimensions, storageClass)
109 def testEquality(self):
110 storageA = StorageClass("test_a")
111 storageB = StorageClass("test_b")
112 parent = StorageClass("test")
113 dimensionsA = self.universe.extract(["instrument"])
114 dimensionsB = self.universe.extract(["skymap"])
115 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
116 DatasetType("a", dimensionsA, storageA,))
117 self.assertEqual(DatasetType("a", dimensionsA, "test_a",),
118 DatasetType("a", dimensionsA, storageA,))
119 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
120 DatasetType("a", dimensionsA, "test_a",))
121 self.assertEqual(DatasetType("a", dimensionsA, "test_a",),
122 DatasetType("a", dimensionsA, "test_a",))
123 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
124 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent))
125 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
126 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"))
127 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
128 DatasetType("b", dimensionsA, storageA,))
129 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
130 DatasetType("b", dimensionsA, "test_a",))
131 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
132 DatasetType("a", dimensionsA, storageB,))
133 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
134 DatasetType("a", dimensionsA, "test_b",))
135 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
136 DatasetType("a", dimensionsB, storageA,))
137 self.assertNotEqual(DatasetType("a", dimensionsA, storageA,),
138 DatasetType("a", dimensionsB, "test_a",))
139 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
140 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB))
141 self.assertNotEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
142 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"))
144 def testJson(self):
145 storageA = StorageClass("test_a")
146 dimensionsA = self.universe.extract(["instrument"])
147 self.assertEqual(DatasetType("a", dimensionsA, storageA,),
148 DatasetType.from_json(DatasetType("a", dimensionsA, storageA,).to_json(),
149 self.universe))
150 self.assertEqual(DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
151 DatasetType.from_json(DatasetType("a.b", dimensionsA, "test_b",
152 parentStorageClass="parent").to_json(),
153 self.universe))
155 def testSorting(self):
156 """Can we sort a DatasetType"""
157 storage = StorageClass("test_a")
158 dimensions = self.universe.extract(["instrument"])
160 d_a = DatasetType("a", dimensions, storage)
161 d_f = DatasetType("f", dimensions, storage)
162 d_p = DatasetType("p", dimensions, storage)
164 sort = sorted([d_p, d_f, d_a])
165 self.assertEqual(sort, [d_a, d_f, d_p])
167 # Now with strings
168 with self.assertRaises(TypeError):
169 sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
171 def testParentPlaceholder(self):
172 """Test that a parent placeholder can be replaced."""
173 storageComp = StorageClass("component")
174 storageParent = StorageClass("Parent")
175 dimensions = self.universe.extract(["instrument"])
176 component = DatasetType("a.b", dimensions, storageComp,
177 parentStorageClass=DatasetType.PlaceholderParentStorageClass)
178 self.assertIsNotNone(component.parentStorageClass)
180 with self.assertRaises(ValueError):
181 component.finalizeParentStorageClass("parent")
183 component.finalizeParentStorageClass(storageParent)
184 self.assertEqual(component.parentStorageClass, storageParent)
186 component = DatasetType("a.b", dimensions, storageComp,
187 parentStorageClass=storageParent)
189 with self.assertRaises(ValueError):
190 # Can not replace unless a placeholder
191 component.finalizeParentStorageClass(storageComp)
193 datasetType = DatasetType("a", dimensions, storageParent)
194 with self.assertRaises(ValueError):
195 # Can not add parent if not component
196 datasetType.finalizeParentStorageClass(storageComp)
198 def testHashability(self):
199 """Test `DatasetType.__hash__`.
201 This test is performed by checking that `DatasetType` entries can
202 be inserted into a `set` and that unique values of its
203 (`name`, `storageClass`, `dimensions`) parameters result in separate
204 entries (and equal ones don't).
206 This does not check for uniformity of hashing or the actual values
207 of the hash function.
208 """
209 types = []
210 unique = 0
211 storageC = StorageClass("test_c")
212 storageD = StorageClass("test_d")
213 for name in ["a", "b"]:
214 for storageClass in [storageC, storageD]:
215 for dimensions in [("instrument", ), ("skymap", )]:
216 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass)
217 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass)
218 types.extend((datasetType, datasetTypeCopy))
219 unique += 1 # datasetType should always equal its copy
220 self.assertEqual(len(set(types)), unique) # all other combinations are unique
222 # also check that hashes of instances constructed with StorageClass
223 # name matches hashes of instances constructed with instances
224 dimensions = self.universe.extract(["instrument"])
225 self.assertEqual(hash(DatasetType("a", dimensions, storageC)),
226 hash(DatasetType("a", dimensions, "test_c")))
227 self.assertEqual(hash(DatasetType("a", dimensions, "test_c")),
228 hash(DatasetType("a", dimensions, "test_c")))
229 self.assertNotEqual(hash(DatasetType("a", dimensions, storageC)),
230 hash(DatasetType("a", dimensions, "test_d")))
231 self.assertNotEqual(hash(DatasetType("a", dimensions, storageD)),
232 hash(DatasetType("a", dimensions, "test_c")))
233 self.assertNotEqual(hash(DatasetType("a", dimensions, "test_c")),
234 hash(DatasetType("a", dimensions, "test_d")))
236 def testDeepCopy(self):
237 """Test that we can copy a dataset type."""
238 storageClass = StorageClass("test_copy")
239 datasetTypeName = "test"
240 dimensions = self.universe.extract(("instrument", "visit"))
241 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
242 dcopy = copy.deepcopy(datasetType)
243 self.assertEqual(dcopy, datasetType)
245 # Now with calibration flag set
246 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
247 dcopy = copy.deepcopy(datasetType)
248 self.assertEqual(dcopy, datasetType)
249 self.assertTrue(dcopy.isCalibration())
251 # And again with a composite
252 componentStorageClass = StorageClass("copy_component")
253 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
254 dimensions, componentStorageClass,
255 parentStorageClass=storageClass)
256 dcopy = copy.deepcopy(componentDatasetType)
257 self.assertEqual(dcopy, componentDatasetType)
259 def testPickle(self):
260 """Test pickle support.
261 """
262 storageClass = StorageClass("test_pickle")
263 datasetTypeName = "test"
264 dimensions = self.universe.extract(("instrument", "visit"))
265 # Un-pickling requires that storage class is registered with factory.
266 StorageClassFactory().registerStorageClass(storageClass)
267 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
268 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
269 self.assertIsInstance(datasetTypeOut, DatasetType)
270 self.assertEqual(datasetType.name, datasetTypeOut.name)
271 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
272 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
273 self.assertIsNone(datasetTypeOut.parentStorageClass)
274 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
275 self.assertFalse(datasetTypeOut.isCalibration())
277 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
278 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
279 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
280 self.assertTrue(datasetTypeOut.isCalibration())
282 # And again with a composite
283 componentStorageClass = StorageClass("pickle_component")
284 StorageClassFactory().registerStorageClass(componentStorageClass)
285 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
286 dimensions, componentStorageClass,
287 parentStorageClass=storageClass)
288 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
289 self.assertIsInstance(datasetTypeOut, DatasetType)
290 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
291 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
292 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
293 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
294 self.assertEqual(datasetTypeOut.parentStorageClass.name,
295 storageClass.name)
296 self.assertEqual(datasetTypeOut, componentDatasetType)
298 # Now with a string and not a real storage class to test that
299 # pickling doesn't force the StorageClass to be resolved
300 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
301 dimensions, "StrangeComponent",
302 parentStorageClass="UnknownParent")
303 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
304 self.assertEqual(datasetTypeOut, componentDatasetType)
305 self.assertEqual(datasetTypeOut._parentStorageClassName,
306 componentDatasetType._parentStorageClassName)
308 # Now with a storage class that is created by the factory
309 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
310 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
311 componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
312 dimensions, factoryComponentStorageClassClass(),
313 parentStorageClass=factoryStorageClassClass())
314 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
315 self.assertEqual(datasetTypeOut, componentDatasetType)
316 self.assertEqual(datasetTypeOut._parentStorageClassName,
317 componentDatasetType._parentStorageClassName)
319 def test_composites(self):
320 """Test components within composite DatasetTypes."""
321 storageClassA = StorageClass("compA")
322 storageClassB = StorageClass("compB")
323 storageClass = StorageClass("test_composite", components={"compA": storageClassA,
324 "compB": storageClassB})
325 self.assertTrue(storageClass.isComposite())
326 self.assertFalse(storageClassA.isComposite())
327 self.assertFalse(storageClassB.isComposite())
329 dimensions = self.universe.extract(("instrument", "visit"))
331 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
332 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
333 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
335 self.assertTrue(datasetTypeComposite.isComposite())
336 self.assertFalse(datasetTypeComponentA.isComposite())
337 self.assertTrue(datasetTypeComponentB.isComponent())
338 self.assertFalse(datasetTypeComposite.isComponent())
340 self.assertEqual(datasetTypeComposite.name, "composite")
341 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
342 self.assertEqual(datasetTypeComponentB.component(), "compB")
343 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
344 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
346 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
347 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
348 self.assertIsNone(datasetTypeComposite.parentStorageClass)
351class DatasetRefTestCase(unittest.TestCase):
352 """Test for DatasetRef.
353 """
355 def setUp(self):
356 self.universe = DimensionUniverse()
357 datasetTypeName = "test"
358 self.componentStorageClass1 = StorageClass("Component1")
359 self.componentStorageClass2 = StorageClass("Component2")
360 self.parentStorageClass = StorageClass("Parent", components={"a": self.componentStorageClass1,
361 "b": self.componentStorageClass2})
362 dimensions = self.universe.extract(("instrument", "visit"))
363 self.dataId = dict(instrument="DummyCam", visit=42)
364 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
366 def testConstructor(self):
367 """Test that construction preserves and validates values.
368 """
369 # Construct an unresolved ref.
370 ref = DatasetRef(self.datasetType, self.dataId)
371 self.assertEqual(ref.datasetType, self.datasetType)
372 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
373 msg=ref.dataId)
374 self.assertIsInstance(ref.dataId, DataCoordinate)
375 # Constructing an unresolved ref with run and/or components should
376 # fail.
377 run = "somerun"
378 with self.assertRaises(ValueError):
379 DatasetRef(self.datasetType, self.dataId, run=run)
380 # Passing a data ID that is missing dimensions should fail.
381 with self.assertRaises(KeyError):
382 DatasetRef(self.datasetType, {"instrument": "DummyCam"})
383 # Constructing a resolved ref should preserve run as well as everything
384 # else.
385 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
386 self.assertEqual(ref.datasetType, self.datasetType)
387 self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
388 msg=ref.dataId)
389 self.assertIsInstance(ref.dataId, DataCoordinate)
390 self.assertEqual(ref.id, 1)
391 self.assertEqual(ref.run, run)
393 def testSorting(self):
394 """Can we sort a DatasetRef"""
395 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1))
396 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10))
397 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22))
399 # Enable detailed diff report
400 self.maxDiff = None
402 # This will sort them on visit number
403 sort = sorted([ref3, ref1, ref2])
404 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
406 # Now include a run
407 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2)
408 self.assertEqual(ref1.run, "b")
409 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2)
410 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1)
411 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3)
413 # This will sort them on run before visit
414 sort = sorted([ref3, ref1, ref2, ref4])
415 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
417 # Now with strings
418 with self.assertRaises(TypeError):
419 sort = sorted(["z", ref1, "c"])
421 def testResolving(self):
422 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
423 unresolvedRef = ref.unresolved()
424 self.assertIsNotNone(ref.id)
425 self.assertIsNone(unresolvedRef.id)
426 self.assertIsNone(unresolvedRef.run)
427 self.assertNotEqual(ref, unresolvedRef)
428 self.assertEqual(ref.unresolved(), unresolvedRef)
429 self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
430 self.assertEqual(ref.dataId, unresolvedRef.dataId)
431 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
432 self.assertEqual(ref, reresolvedRef)
433 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
434 self.assertIsNotNone(reresolvedRef.run)
436 def testPickle(self):
437 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
438 s = pickle.dumps(ref)
439 self.assertEqual(pickle.loads(s), ref)
441 def testJson(self):
442 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
443 s = ref.to_json()
444 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)
447if __name__ == "__main__": 447 ↛ 448line 447 didn't jump to line 448, because the condition on line 447 was never true
448 unittest.main()