Coverage for tests/test_datasets.py: 10%
264 statements
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import copy
23import pickle
24import unittest
26from lsst.daf.butler import (
27 DataCoordinate,
28 DatasetRef,
29 DatasetType,
30 DimensionUniverse,
31 StorageClass,
32 StorageClassFactory,
33)
35"""Tests for datasets module.
36"""
39class DatasetTypeTestCase(unittest.TestCase):
40 """Test for DatasetType."""
42 def setUp(self):
43 self.universe = DimensionUniverse()
45 def testConstructor(self):
46 """Test construction preserves values.
48 Note that construction doesn't check for valid storageClass.
49 This can only be verified for a particular schema.
50 """
51 datasetTypeName = "test"
52 storageClass = StorageClass("test_StructuredData")
53 dimensions = self.universe.extract(("visit", "instrument"))
54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
55 self.assertEqual(datasetType.name, datasetTypeName)
56 self.assertEqual(datasetType.storageClass, storageClass)
57 self.assertEqual(datasetType.dimensions, dimensions)
59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass)
61 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
62 DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed")
64 def testConstructor2(self):
65 """Test construction from StorageClass name."""
66 datasetTypeName = "test"
67 storageClass = StorageClass("test_constructor2")
68 StorageClassFactory().registerStorageClass(storageClass)
69 dimensions = self.universe.extract(("instrument", "visit"))
70 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
71 self.assertEqual(datasetType.name, datasetTypeName)
72 self.assertEqual(datasetType.storageClass, storageClass)
73 self.assertEqual(datasetType.dimensions, dimensions)
75 def testNameValidation(self):
76 """Test that dataset type names only contain certain characters
77 in certain positions.
78 """
79 dimensions = self.universe.extract(("instrument", "visit"))
80 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b", "_a")
81 badNames = ("1", "a%b", "B+Z", "T[0]")
83 # Construct storage class with all the good names included as
84 # components so that we can test internal consistency
85 storageClass = StorageClass(
86 "test_StructuredData", components={n: StorageClass("component") for n in goodNames}
87 )
89 for name in goodNames:
90 composite = DatasetType(name, dimensions, storageClass)
91 self.assertEqual(composite.name, name)
92 for suffix in goodNames:
93 full = DatasetType.nameWithComponent(name, suffix)
94 component = composite.makeComponentDatasetType(suffix)
95 self.assertEqual(component.name, full)
96 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
97 for suffix in badNames:
98 full = DatasetType.nameWithComponent(name, suffix)
99 with self.subTest(full=full):
100 with self.assertRaises(ValueError):
101 DatasetType(full, dimensions, storageClass)
102 for name in badNames:
103 with self.subTest(name=name):
104 with self.assertRaises(ValueError):
105 DatasetType(name, dimensions, storageClass)
107 def testEquality(self):
108 storageA = StorageClass("test_a")
109 storageB = StorageClass("test_b")
110 parent = StorageClass("test")
111 dimensionsA = self.universe.extract(["instrument"])
112 dimensionsB = self.universe.extract(["skymap"])
113 self.assertEqual(
114 DatasetType(
115 "a",
116 dimensionsA,
117 storageA,
118 ),
119 DatasetType(
120 "a",
121 dimensionsA,
122 storageA,
123 ),
124 )
125 self.assertEqual(
126 DatasetType(
127 "a",
128 dimensionsA,
129 "test_a",
130 ),
131 DatasetType(
132 "a",
133 dimensionsA,
134 storageA,
135 ),
136 )
137 self.assertEqual(
138 DatasetType(
139 "a",
140 dimensionsA,
141 storageA,
142 ),
143 DatasetType(
144 "a",
145 dimensionsA,
146 "test_a",
147 ),
148 )
149 self.assertEqual(
150 DatasetType(
151 "a",
152 dimensionsA,
153 "test_a",
154 ),
155 DatasetType(
156 "a",
157 dimensionsA,
158 "test_a",
159 ),
160 )
161 self.assertEqual(
162 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
163 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
164 )
165 self.assertEqual(
166 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
167 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
168 )
169 self.assertNotEqual(
170 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=True),
171 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=False),
172 )
173 self.assertNotEqual(
174 DatasetType(
175 "a",
176 dimensionsA,
177 storageA,
178 ),
179 DatasetType(
180 "b",
181 dimensionsA,
182 storageA,
183 ),
184 )
185 self.assertNotEqual(
186 DatasetType(
187 "a",
188 dimensionsA,
189 storageA,
190 ),
191 DatasetType(
192 "b",
193 dimensionsA,
194 "test_a",
195 ),
196 )
197 self.assertNotEqual(
198 DatasetType(
199 "a",
200 dimensionsA,
201 storageA,
202 ),
203 DatasetType(
204 "a",
205 dimensionsA,
206 storageB,
207 ),
208 )
209 self.assertNotEqual(
210 DatasetType(
211 "a",
212 dimensionsA,
213 storageA,
214 ),
215 DatasetType(
216 "a",
217 dimensionsA,
218 "test_b",
219 ),
220 )
221 self.assertNotEqual(
222 DatasetType(
223 "a",
224 dimensionsA,
225 storageA,
226 ),
227 DatasetType(
228 "a",
229 dimensionsB,
230 storageA,
231 ),
232 )
233 self.assertNotEqual(
234 DatasetType(
235 "a",
236 dimensionsA,
237 storageA,
238 ),
239 DatasetType(
240 "a",
241 dimensionsB,
242 "test_a",
243 ),
244 )
245 self.assertNotEqual(
246 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
247 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB),
248 )
249 self.assertNotEqual(
250 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
251 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"),
252 )
254 def testCompatibility(self):
255 storageA = StorageClass("test_a", pytype=set, converters={"list": "builtins.set"})
256 storageB = StorageClass("test_b", pytype=list)
257 storageC = StorageClass("test_c", pytype=dict)
258 self.assertTrue(storageA.can_convert(storageB))
259 dimensionsA = self.universe.extract(["instrument"])
261 dA = DatasetType("a", dimensionsA, storageA)
262 dA2 = DatasetType("a", dimensionsA, storageB)
263 self.assertNotEqual(dA, dA2)
264 self.assertTrue(dA.is_compatible_with(dA))
265 self.assertTrue(dA.is_compatible_with(dA2))
266 self.assertFalse(dA2.is_compatible_with(dA))
268 dA3 = DatasetType("a", dimensionsA, storageC)
269 self.assertFalse(dA.is_compatible_with(dA3))
271 def testJson(self):
272 storageA = StorageClass("test_a")
273 dimensionsA = self.universe.extract(["instrument"])
274 self.assertEqual(
275 DatasetType(
276 "a",
277 dimensionsA,
278 storageA,
279 ),
280 DatasetType.from_json(
281 DatasetType(
282 "a",
283 dimensionsA,
284 storageA,
285 ).to_json(),
286 self.universe,
287 ),
288 )
289 self.assertEqual(
290 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
291 DatasetType.from_json(
292 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent").to_json(),
293 self.universe,
294 ),
295 )
297 def testSorting(self):
298 """Can we sort a DatasetType"""
299 storage = StorageClass("test_a")
300 dimensions = self.universe.extract(["instrument"])
302 d_a = DatasetType("a", dimensions, storage)
303 d_f = DatasetType("f", dimensions, storage)
304 d_p = DatasetType("p", dimensions, storage)
306 sort = sorted([d_p, d_f, d_a])
307 self.assertEqual(sort, [d_a, d_f, d_p])
309 # Now with strings
310 with self.assertRaises(TypeError):
311 sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
313 def testHashability(self):
314 """Test `DatasetType.__hash__`.
316 This test is performed by checking that `DatasetType` entries can
317 be inserted into a `set` and that unique values of its
318 (`name`, `storageClass`, `dimensions`) parameters result in separate
319 entries (and equal ones don't).
321 This does not check for uniformity of hashing or the actual values
322 of the hash function.
323 """
324 types = []
325 unique = 0
326 storageC = StorageClass("test_c")
327 storageD = StorageClass("test_d")
328 for name in ["a", "b"]:
329 for storageClass in [storageC, storageD]:
330 for dimensions in [("instrument",), ("skymap",)]:
331 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass)
332 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass)
333 types.extend((datasetType, datasetTypeCopy))
334 unique += 1 # datasetType should always equal its copy
335 self.assertEqual(len(set(types)), unique) # all other combinations are unique
337 # also check that hashes of instances constructed with StorageClass
338 # name matches hashes of instances constructed with instances
339 dimensions = self.universe.extract(["instrument"])
340 self.assertEqual(
341 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c"))
342 )
343 self.assertEqual(
344 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_c"))
345 )
346 self.assertNotEqual(
347 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_d"))
348 )
349 self.assertNotEqual(
350 hash(DatasetType("a", dimensions, storageD)), hash(DatasetType("a", dimensions, "test_c"))
351 )
352 self.assertNotEqual(
353 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_d"))
354 )
356 def testDeepCopy(self):
357 """Test that we can copy a dataset type."""
358 storageClass = StorageClass("test_copy")
359 datasetTypeName = "test"
360 dimensions = self.universe.extract(("instrument", "visit"))
361 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
362 dcopy = copy.deepcopy(datasetType)
363 self.assertEqual(dcopy, datasetType)
365 # Now with calibration flag set
366 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
367 dcopy = copy.deepcopy(datasetType)
368 self.assertEqual(dcopy, datasetType)
369 self.assertTrue(dcopy.isCalibration())
371 # And again with a composite
372 componentStorageClass = StorageClass("copy_component")
373 componentDatasetType = DatasetType(
374 DatasetType.nameWithComponent(datasetTypeName, "comp"),
375 dimensions,
376 componentStorageClass,
377 parentStorageClass=storageClass,
378 )
379 dcopy = copy.deepcopy(componentDatasetType)
380 self.assertEqual(dcopy, componentDatasetType)
382 def testPickle(self):
383 """Test pickle support."""
384 storageClass = StorageClass("test_pickle")
385 datasetTypeName = "test"
386 dimensions = self.universe.extract(("instrument", "visit"))
387 # Un-pickling requires that storage class is registered with factory.
388 StorageClassFactory().registerStorageClass(storageClass)
389 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
390 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
391 self.assertIsInstance(datasetTypeOut, DatasetType)
392 self.assertEqual(datasetType.name, datasetTypeOut.name)
393 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
394 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
395 self.assertIsNone(datasetTypeOut.parentStorageClass)
396 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
397 self.assertFalse(datasetTypeOut.isCalibration())
399 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
400 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
401 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
402 self.assertTrue(datasetTypeOut.isCalibration())
404 # And again with a composite
405 componentStorageClass = StorageClass("pickle_component")
406 StorageClassFactory().registerStorageClass(componentStorageClass)
407 componentDatasetType = DatasetType(
408 DatasetType.nameWithComponent(datasetTypeName, "comp"),
409 dimensions,
410 componentStorageClass,
411 parentStorageClass=storageClass,
412 )
413 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
414 self.assertIsInstance(datasetTypeOut, DatasetType)
415 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
416 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
417 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
418 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
419 self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name)
420 self.assertEqual(datasetTypeOut, componentDatasetType)
422 # Now with a string and not a real storage class to test that
423 # pickling doesn't force the StorageClass to be resolved
424 componentDatasetType = DatasetType(
425 DatasetType.nameWithComponent(datasetTypeName, "comp"),
426 dimensions,
427 "StrangeComponent",
428 parentStorageClass="UnknownParent",
429 )
430 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
431 self.assertEqual(datasetTypeOut, componentDatasetType)
432 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
434 # Now with a storage class that is created by the factory
435 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
436 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
437 componentDatasetType = DatasetType(
438 DatasetType.nameWithComponent(datasetTypeName, "comp"),
439 dimensions,
440 factoryComponentStorageClassClass(),
441 parentStorageClass=factoryStorageClassClass(),
442 )
443 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
444 self.assertEqual(datasetTypeOut, componentDatasetType)
445 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
447 def test_composites(self):
448 """Test components within composite DatasetTypes."""
449 storageClassA = StorageClass("compA")
450 storageClassB = StorageClass("compB")
451 storageClass = StorageClass(
452 "test_composite", components={"compA": storageClassA, "compB": storageClassB}
453 )
454 self.assertTrue(storageClass.isComposite())
455 self.assertFalse(storageClassA.isComposite())
456 self.assertFalse(storageClassB.isComposite())
458 dimensions = self.universe.extract(("instrument", "visit"))
460 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
461 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
462 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
464 self.assertTrue(datasetTypeComposite.isComposite())
465 self.assertFalse(datasetTypeComponentA.isComposite())
466 self.assertTrue(datasetTypeComponentB.isComponent())
467 self.assertFalse(datasetTypeComposite.isComponent())
469 self.assertEqual(datasetTypeComposite.name, "composite")
470 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
471 self.assertEqual(datasetTypeComponentB.component(), "compB")
472 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
473 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
475 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
476 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
477 self.assertIsNone(datasetTypeComposite.parentStorageClass)
479 with self.assertRaises(KeyError):
480 datasetTypeComposite.makeComponentDatasetType("compF")
483class DatasetRefTestCase(unittest.TestCase):
484 """Test for DatasetRef."""
486 def setUp(self):
487 self.universe = DimensionUniverse()
488 datasetTypeName = "test"
489 self.componentStorageClass1 = StorageClass("Component1")
490 self.componentStorageClass2 = StorageClass("Component2")
491 self.parentStorageClass = StorageClass(
492 "Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2}
493 )
494 dimensions = self.universe.extract(("instrument", "visit"))
495 self.dataId = dict(instrument="DummyCam", visit=42)
496 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
498 def testConstructor(self):
499 """Test that construction preserves and validates values."""
500 # Construct an unresolved ref.
501 ref = DatasetRef(self.datasetType, self.dataId)
502 self.assertEqual(ref.datasetType, self.datasetType)
503 self.assertEqual(
504 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
505 )
506 self.assertIsInstance(ref.dataId, DataCoordinate)
507 # Constructing an unresolved ref with run and/or components should
508 # fail.
509 run = "somerun"
510 with self.assertRaises(ValueError):
511 DatasetRef(self.datasetType, self.dataId, run=run)
512 # Passing a data ID that is missing dimensions should fail.
513 with self.assertRaises(KeyError):
514 DatasetRef(self.datasetType, {"instrument": "DummyCam"})
515 # Constructing a resolved ref should preserve run as well as everything
516 # else.
517 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
518 self.assertEqual(ref.datasetType, self.datasetType)
519 self.assertEqual(
520 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
521 )
522 self.assertIsInstance(ref.dataId, DataCoordinate)
523 self.assertEqual(ref.id, 1)
524 self.assertEqual(ref.run, run)
526 def testSorting(self):
527 """Can we sort a DatasetRef"""
528 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1))
529 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10))
530 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22))
532 # Enable detailed diff report
533 self.maxDiff = None
535 # This will sort them on visit number
536 sort = sorted([ref3, ref1, ref2])
537 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
539 # Now include a run
540 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2)
541 self.assertEqual(ref1.run, "b")
542 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2)
543 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1)
544 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3)
546 # This will sort them on run before visit
547 sort = sorted([ref3, ref1, ref2, ref4])
548 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
550 # Now with strings
551 with self.assertRaises(TypeError):
552 sort = sorted(["z", ref1, "c"])
554 def testResolving(self):
555 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
556 unresolvedRef = ref.unresolved()
557 self.assertIsNotNone(ref.id)
558 self.assertIsNone(unresolvedRef.id)
559 self.assertIsNone(unresolvedRef.run)
560 self.assertNotEqual(ref, unresolvedRef)
561 self.assertEqual(ref.unresolved(), unresolvedRef)
562 self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
563 self.assertEqual(ref.dataId, unresolvedRef.dataId)
564 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
565 self.assertEqual(ref, reresolvedRef)
566 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
567 self.assertIsNotNone(reresolvedRef.run)
569 def testPickle(self):
570 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
571 s = pickle.dumps(ref)
572 self.assertEqual(pickle.loads(s), ref)
574 def testJson(self):
575 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
576 s = ref.to_json()
577 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)
580if __name__ == "__main__": 580 ↛ 581line 580 didn't jump to line 581, because the condition on line 580 was never true
581 unittest.main()