Coverage for tests/test_datasets.py: 11%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import copy
23import pickle
24import unittest
26from lsst.daf.butler import (
27 DataCoordinate,
28 DatasetRef,
29 DatasetType,
30 DimensionUniverse,
31 StorageClass,
32 StorageClassFactory,
33)
35"""Tests for datasets module.
36"""
39class DatasetTypeTestCase(unittest.TestCase):
40 """Test for DatasetType."""
42 def setUp(self):
43 self.universe = DimensionUniverse()
45 def testConstructor(self):
46 """Test construction preserves values.
48 Note that construction doesn't check for valid storageClass.
49 This can only be verified for a particular schema.
50 """
51 datasetTypeName = "test"
52 storageClass = StorageClass("test_StructuredData")
53 dimensions = self.universe.extract(("visit", "instrument"))
54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
55 self.assertEqual(datasetType.name, datasetTypeName)
56 self.assertEqual(datasetType.storageClass, storageClass)
57 self.assertEqual(datasetType.dimensions, dimensions)
59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass)
61 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
62 DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed")
64 def testConstructor2(self):
65 """Test construction from StorageClass name."""
66 datasetTypeName = "test"
67 storageClass = StorageClass("test_constructor2")
68 StorageClassFactory().registerStorageClass(storageClass)
69 dimensions = self.universe.extract(("instrument", "visit"))
70 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
71 self.assertEqual(datasetType.name, datasetTypeName)
72 self.assertEqual(datasetType.storageClass, storageClass)
73 self.assertEqual(datasetType.dimensions, dimensions)
75 def testNameValidation(self):
76 """Test that dataset type names only contain certain characters
77 in certain positions.
78 """
79 dimensions = self.universe.extract(("instrument", "visit"))
80 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b")
81 badNames = ("1", "_", "a%b", "B+Z", "T[0]")
83 # Construct storage class with all the good names included as
84 # components so that we can test internal consistency
85 storageClass = StorageClass(
86 "test_StructuredData", components={n: StorageClass("component") for n in goodNames}
87 )
89 for name in goodNames:
90 composite = DatasetType(name, dimensions, storageClass)
91 self.assertEqual(composite.name, name)
92 for suffix in goodNames:
93 full = DatasetType.nameWithComponent(name, suffix)
94 component = composite.makeComponentDatasetType(suffix)
95 self.assertEqual(component.name, full)
96 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
97 for suffix in badNames:
98 full = DatasetType.nameWithComponent(name, suffix)
99 with self.subTest(full=full):
100 with self.assertRaises(ValueError):
101 DatasetType(full, dimensions, storageClass)
102 for name in badNames:
103 with self.subTest(name=name):
104 with self.assertRaises(ValueError):
105 DatasetType(name, dimensions, storageClass)
107 def testEquality(self):
108 storageA = StorageClass("test_a")
109 storageB = StorageClass("test_b")
110 parent = StorageClass("test")
111 dimensionsA = self.universe.extract(["instrument"])
112 dimensionsB = self.universe.extract(["skymap"])
113 self.assertEqual(
114 DatasetType(
115 "a",
116 dimensionsA,
117 storageA,
118 ),
119 DatasetType(
120 "a",
121 dimensionsA,
122 storageA,
123 ),
124 )
125 self.assertEqual(
126 DatasetType(
127 "a",
128 dimensionsA,
129 "test_a",
130 ),
131 DatasetType(
132 "a",
133 dimensionsA,
134 storageA,
135 ),
136 )
137 self.assertEqual(
138 DatasetType(
139 "a",
140 dimensionsA,
141 storageA,
142 ),
143 DatasetType(
144 "a",
145 dimensionsA,
146 "test_a",
147 ),
148 )
149 self.assertEqual(
150 DatasetType(
151 "a",
152 dimensionsA,
153 "test_a",
154 ),
155 DatasetType(
156 "a",
157 dimensionsA,
158 "test_a",
159 ),
160 )
161 self.assertEqual(
162 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
163 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
164 )
165 self.assertEqual(
166 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
167 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
168 )
169 self.assertNotEqual(
170 DatasetType(
171 "a",
172 dimensionsA,
173 storageA,
174 ),
175 DatasetType(
176 "b",
177 dimensionsA,
178 storageA,
179 ),
180 )
181 self.assertNotEqual(
182 DatasetType(
183 "a",
184 dimensionsA,
185 storageA,
186 ),
187 DatasetType(
188 "b",
189 dimensionsA,
190 "test_a",
191 ),
192 )
193 self.assertNotEqual(
194 DatasetType(
195 "a",
196 dimensionsA,
197 storageA,
198 ),
199 DatasetType(
200 "a",
201 dimensionsA,
202 storageB,
203 ),
204 )
205 self.assertNotEqual(
206 DatasetType(
207 "a",
208 dimensionsA,
209 storageA,
210 ),
211 DatasetType(
212 "a",
213 dimensionsA,
214 "test_b",
215 ),
216 )
217 self.assertNotEqual(
218 DatasetType(
219 "a",
220 dimensionsA,
221 storageA,
222 ),
223 DatasetType(
224 "a",
225 dimensionsB,
226 storageA,
227 ),
228 )
229 self.assertNotEqual(
230 DatasetType(
231 "a",
232 dimensionsA,
233 storageA,
234 ),
235 DatasetType(
236 "a",
237 dimensionsB,
238 "test_a",
239 ),
240 )
241 self.assertNotEqual(
242 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
243 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB),
244 )
245 self.assertNotEqual(
246 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
247 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"),
248 )
250 def testJson(self):
251 storageA = StorageClass("test_a")
252 dimensionsA = self.universe.extract(["instrument"])
253 self.assertEqual(
254 DatasetType(
255 "a",
256 dimensionsA,
257 storageA,
258 ),
259 DatasetType.from_json(
260 DatasetType(
261 "a",
262 dimensionsA,
263 storageA,
264 ).to_json(),
265 self.universe,
266 ),
267 )
268 self.assertEqual(
269 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
270 DatasetType.from_json(
271 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent").to_json(),
272 self.universe,
273 ),
274 )
276 def testSorting(self):
277 """Can we sort a DatasetType"""
278 storage = StorageClass("test_a")
279 dimensions = self.universe.extract(["instrument"])
281 d_a = DatasetType("a", dimensions, storage)
282 d_f = DatasetType("f", dimensions, storage)
283 d_p = DatasetType("p", dimensions, storage)
285 sort = sorted([d_p, d_f, d_a])
286 self.assertEqual(sort, [d_a, d_f, d_p])
288 # Now with strings
289 with self.assertRaises(TypeError):
290 sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
292 def testHashability(self):
293 """Test `DatasetType.__hash__`.
295 This test is performed by checking that `DatasetType` entries can
296 be inserted into a `set` and that unique values of its
297 (`name`, `storageClass`, `dimensions`) parameters result in separate
298 entries (and equal ones don't).
300 This does not check for uniformity of hashing or the actual values
301 of the hash function.
302 """
303 types = []
304 unique = 0
305 storageC = StorageClass("test_c")
306 storageD = StorageClass("test_d")
307 for name in ["a", "b"]:
308 for storageClass in [storageC, storageD]:
309 for dimensions in [("instrument",), ("skymap",)]:
310 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass)
311 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass)
312 types.extend((datasetType, datasetTypeCopy))
313 unique += 1 # datasetType should always equal its copy
314 self.assertEqual(len(set(types)), unique) # all other combinations are unique
316 # also check that hashes of instances constructed with StorageClass
317 # name matches hashes of instances constructed with instances
318 dimensions = self.universe.extract(["instrument"])
319 self.assertEqual(
320 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c"))
321 )
322 self.assertEqual(
323 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_c"))
324 )
325 self.assertNotEqual(
326 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_d"))
327 )
328 self.assertNotEqual(
329 hash(DatasetType("a", dimensions, storageD)), hash(DatasetType("a", dimensions, "test_c"))
330 )
331 self.assertNotEqual(
332 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_d"))
333 )
335 def testDeepCopy(self):
336 """Test that we can copy a dataset type."""
337 storageClass = StorageClass("test_copy")
338 datasetTypeName = "test"
339 dimensions = self.universe.extract(("instrument", "visit"))
340 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
341 dcopy = copy.deepcopy(datasetType)
342 self.assertEqual(dcopy, datasetType)
344 # Now with calibration flag set
345 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
346 dcopy = copy.deepcopy(datasetType)
347 self.assertEqual(dcopy, datasetType)
348 self.assertTrue(dcopy.isCalibration())
350 # And again with a composite
351 componentStorageClass = StorageClass("copy_component")
352 componentDatasetType = DatasetType(
353 DatasetType.nameWithComponent(datasetTypeName, "comp"),
354 dimensions,
355 componentStorageClass,
356 parentStorageClass=storageClass,
357 )
358 dcopy = copy.deepcopy(componentDatasetType)
359 self.assertEqual(dcopy, componentDatasetType)
361 def testPickle(self):
362 """Test pickle support."""
363 storageClass = StorageClass("test_pickle")
364 datasetTypeName = "test"
365 dimensions = self.universe.extract(("instrument", "visit"))
366 # Un-pickling requires that storage class is registered with factory.
367 StorageClassFactory().registerStorageClass(storageClass)
368 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
369 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
370 self.assertIsInstance(datasetTypeOut, DatasetType)
371 self.assertEqual(datasetType.name, datasetTypeOut.name)
372 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
373 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
374 self.assertIsNone(datasetTypeOut.parentStorageClass)
375 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
376 self.assertFalse(datasetTypeOut.isCalibration())
378 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
379 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
380 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
381 self.assertTrue(datasetTypeOut.isCalibration())
383 # And again with a composite
384 componentStorageClass = StorageClass("pickle_component")
385 StorageClassFactory().registerStorageClass(componentStorageClass)
386 componentDatasetType = DatasetType(
387 DatasetType.nameWithComponent(datasetTypeName, "comp"),
388 dimensions,
389 componentStorageClass,
390 parentStorageClass=storageClass,
391 )
392 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
393 self.assertIsInstance(datasetTypeOut, DatasetType)
394 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
395 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
396 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
397 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
398 self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name)
399 self.assertEqual(datasetTypeOut, componentDatasetType)
401 # Now with a string and not a real storage class to test that
402 # pickling doesn't force the StorageClass to be resolved
403 componentDatasetType = DatasetType(
404 DatasetType.nameWithComponent(datasetTypeName, "comp"),
405 dimensions,
406 "StrangeComponent",
407 parentStorageClass="UnknownParent",
408 )
409 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
410 self.assertEqual(datasetTypeOut, componentDatasetType)
411 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
413 # Now with a storage class that is created by the factory
414 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
415 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
416 componentDatasetType = DatasetType(
417 DatasetType.nameWithComponent(datasetTypeName, "comp"),
418 dimensions,
419 factoryComponentStorageClassClass(),
420 parentStorageClass=factoryStorageClassClass(),
421 )
422 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
423 self.assertEqual(datasetTypeOut, componentDatasetType)
424 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
426 def test_composites(self):
427 """Test components within composite DatasetTypes."""
428 storageClassA = StorageClass("compA")
429 storageClassB = StorageClass("compB")
430 storageClass = StorageClass(
431 "test_composite", components={"compA": storageClassA, "compB": storageClassB}
432 )
433 self.assertTrue(storageClass.isComposite())
434 self.assertFalse(storageClassA.isComposite())
435 self.assertFalse(storageClassB.isComposite())
437 dimensions = self.universe.extract(("instrument", "visit"))
439 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
440 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
441 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
443 self.assertTrue(datasetTypeComposite.isComposite())
444 self.assertFalse(datasetTypeComponentA.isComposite())
445 self.assertTrue(datasetTypeComponentB.isComponent())
446 self.assertFalse(datasetTypeComposite.isComponent())
448 self.assertEqual(datasetTypeComposite.name, "composite")
449 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
450 self.assertEqual(datasetTypeComponentB.component(), "compB")
451 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
452 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
454 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
455 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
456 self.assertIsNone(datasetTypeComposite.parentStorageClass)
459class DatasetRefTestCase(unittest.TestCase):
460 """Test for DatasetRef."""
462 def setUp(self):
463 self.universe = DimensionUniverse()
464 datasetTypeName = "test"
465 self.componentStorageClass1 = StorageClass("Component1")
466 self.componentStorageClass2 = StorageClass("Component2")
467 self.parentStorageClass = StorageClass(
468 "Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2}
469 )
470 dimensions = self.universe.extract(("instrument", "visit"))
471 self.dataId = dict(instrument="DummyCam", visit=42)
472 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
474 def testConstructor(self):
475 """Test that construction preserves and validates values."""
476 # Construct an unresolved ref.
477 ref = DatasetRef(self.datasetType, self.dataId)
478 self.assertEqual(ref.datasetType, self.datasetType)
479 self.assertEqual(
480 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
481 )
482 self.assertIsInstance(ref.dataId, DataCoordinate)
483 # Constructing an unresolved ref with run and/or components should
484 # fail.
485 run = "somerun"
486 with self.assertRaises(ValueError):
487 DatasetRef(self.datasetType, self.dataId, run=run)
488 # Passing a data ID that is missing dimensions should fail.
489 with self.assertRaises(KeyError):
490 DatasetRef(self.datasetType, {"instrument": "DummyCam"})
491 # Constructing a resolved ref should preserve run as well as everything
492 # else.
493 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
494 self.assertEqual(ref.datasetType, self.datasetType)
495 self.assertEqual(
496 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
497 )
498 self.assertIsInstance(ref.dataId, DataCoordinate)
499 self.assertEqual(ref.id, 1)
500 self.assertEqual(ref.run, run)
502 def testSorting(self):
503 """Can we sort a DatasetRef"""
504 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1))
505 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10))
506 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22))
508 # Enable detailed diff report
509 self.maxDiff = None
511 # This will sort them on visit number
512 sort = sorted([ref3, ref1, ref2])
513 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
515 # Now include a run
516 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2)
517 self.assertEqual(ref1.run, "b")
518 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2)
519 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1)
520 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3)
522 # This will sort them on run before visit
523 sort = sorted([ref3, ref1, ref2, ref4])
524 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
526 # Now with strings
527 with self.assertRaises(TypeError):
528 sort = sorted(["z", ref1, "c"])
530 def testResolving(self):
531 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
532 unresolvedRef = ref.unresolved()
533 self.assertIsNotNone(ref.id)
534 self.assertIsNone(unresolvedRef.id)
535 self.assertIsNone(unresolvedRef.run)
536 self.assertNotEqual(ref, unresolvedRef)
537 self.assertEqual(ref.unresolved(), unresolvedRef)
538 self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
539 self.assertEqual(ref.dataId, unresolvedRef.dataId)
540 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
541 self.assertEqual(ref, reresolvedRef)
542 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
543 self.assertIsNotNone(reresolvedRef.run)
545 def testPickle(self):
546 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
547 s = pickle.dumps(ref)
548 self.assertEqual(pickle.loads(s), ref)
550 def testJson(self):
551 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
552 s = ref.to_json()
553 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)
556if __name__ == "__main__": 556 ↛ 557line 556 didn't jump to line 557, because the condition on line 556 was never true
557 unittest.main()