Coverage for tests/test_datasets.py: 10%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import copy
23import pickle
24import unittest
26from lsst.daf.butler import (
27 DataCoordinate,
28 DatasetRef,
29 DatasetType,
30 DimensionUniverse,
31 StorageClass,
32 StorageClassFactory,
33)
35"""Tests for datasets module.
36"""
39class DatasetTypeTestCase(unittest.TestCase):
40 """Test for DatasetType."""
42 def setUp(self):
43 self.universe = DimensionUniverse()
45 def testConstructor(self):
46 """Test construction preserves values.
48 Note that construction doesn't check for valid storageClass.
49 This can only be verified for a particular schema.
50 """
51 datasetTypeName = "test"
52 storageClass = StorageClass("test_StructuredData")
53 dimensions = self.universe.extract(("visit", "instrument"))
54 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
55 self.assertEqual(datasetType.name, datasetTypeName)
56 self.assertEqual(datasetType.storageClass, storageClass)
57 self.assertEqual(datasetType.dimensions, dimensions)
59 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
60 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass)
61 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
62 DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed")
64 def testConstructor2(self):
65 """Test construction from StorageClass name."""
66 datasetTypeName = "test"
67 storageClass = StorageClass("test_constructor2")
68 StorageClassFactory().registerStorageClass(storageClass)
69 dimensions = self.universe.extract(("instrument", "visit"))
70 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
71 self.assertEqual(datasetType.name, datasetTypeName)
72 self.assertEqual(datasetType.storageClass, storageClass)
73 self.assertEqual(datasetType.dimensions, dimensions)
75 def testNameValidation(self):
76 """Test that dataset type names only contain certain characters
77 in certain positions.
78 """
79 dimensions = self.universe.extract(("instrument", "visit"))
80 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b", "_a")
81 badNames = ("1", "a%b", "B+Z", "T[0]")
83 # Construct storage class with all the good names included as
84 # components so that we can test internal consistency
85 storageClass = StorageClass(
86 "test_StructuredData", components={n: StorageClass("component") for n in goodNames}
87 )
89 for name in goodNames:
90 composite = DatasetType(name, dimensions, storageClass)
91 self.assertEqual(composite.name, name)
92 for suffix in goodNames:
93 full = DatasetType.nameWithComponent(name, suffix)
94 component = composite.makeComponentDatasetType(suffix)
95 self.assertEqual(component.name, full)
96 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
97 for suffix in badNames:
98 full = DatasetType.nameWithComponent(name, suffix)
99 with self.subTest(full=full):
100 with self.assertRaises(ValueError):
101 DatasetType(full, dimensions, storageClass)
102 for name in badNames:
103 with self.subTest(name=name):
104 with self.assertRaises(ValueError):
105 DatasetType(name, dimensions, storageClass)
107 def testEquality(self):
108 storageA = StorageClass("test_a")
109 storageB = StorageClass("test_b")
110 parent = StorageClass("test")
111 dimensionsA = self.universe.extract(["instrument"])
112 dimensionsB = self.universe.extract(["skymap"])
113 self.assertEqual(
114 DatasetType(
115 "a",
116 dimensionsA,
117 storageA,
118 ),
119 DatasetType(
120 "a",
121 dimensionsA,
122 storageA,
123 ),
124 )
125 self.assertEqual(
126 DatasetType(
127 "a",
128 dimensionsA,
129 "test_a",
130 ),
131 DatasetType(
132 "a",
133 dimensionsA,
134 storageA,
135 ),
136 )
137 self.assertEqual(
138 DatasetType(
139 "a",
140 dimensionsA,
141 storageA,
142 ),
143 DatasetType(
144 "a",
145 dimensionsA,
146 "test_a",
147 ),
148 )
149 self.assertEqual(
150 DatasetType(
151 "a",
152 dimensionsA,
153 "test_a",
154 ),
155 DatasetType(
156 "a",
157 dimensionsA,
158 "test_a",
159 ),
160 )
161 self.assertEqual(
162 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
163 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
164 )
165 self.assertEqual(
166 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
167 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
168 )
169 self.assertNotEqual(
170 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=True),
171 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=False),
172 )
173 self.assertNotEqual(
174 DatasetType(
175 "a",
176 dimensionsA,
177 storageA,
178 ),
179 DatasetType(
180 "b",
181 dimensionsA,
182 storageA,
183 ),
184 )
185 self.assertNotEqual(
186 DatasetType(
187 "a",
188 dimensionsA,
189 storageA,
190 ),
191 DatasetType(
192 "b",
193 dimensionsA,
194 "test_a",
195 ),
196 )
197 self.assertNotEqual(
198 DatasetType(
199 "a",
200 dimensionsA,
201 storageA,
202 ),
203 DatasetType(
204 "a",
205 dimensionsA,
206 storageB,
207 ),
208 )
209 self.assertNotEqual(
210 DatasetType(
211 "a",
212 dimensionsA,
213 storageA,
214 ),
215 DatasetType(
216 "a",
217 dimensionsA,
218 "test_b",
219 ),
220 )
221 self.assertNotEqual(
222 DatasetType(
223 "a",
224 dimensionsA,
225 storageA,
226 ),
227 DatasetType(
228 "a",
229 dimensionsB,
230 storageA,
231 ),
232 )
233 self.assertNotEqual(
234 DatasetType(
235 "a",
236 dimensionsA,
237 storageA,
238 ),
239 DatasetType(
240 "a",
241 dimensionsB,
242 "test_a",
243 ),
244 )
245 self.assertNotEqual(
246 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
247 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB),
248 )
249 self.assertNotEqual(
250 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
251 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"),
252 )
254 def testCompatibility(self):
255 storageA = StorageClass("test_a", pytype=set, converters={"list": "builtins.set"})
256 storageB = StorageClass("test_b", pytype=list)
257 storageC = StorageClass("test_c", pytype=dict)
258 self.assertTrue(storageA.can_convert(storageB))
259 dimensionsA = self.universe.extract(["instrument"])
261 dA = DatasetType("a", dimensionsA, storageA)
262 dA2 = DatasetType("a", dimensionsA, storageB)
263 self.assertNotEqual(dA, dA2)
264 self.assertTrue(dA.is_compatible_with(dA))
265 self.assertTrue(dA.is_compatible_with(dA2))
266 self.assertFalse(dA2.is_compatible_with(dA))
268 dA3 = DatasetType("a", dimensionsA, storageC)
269 self.assertFalse(dA.is_compatible_with(dA3))
271 def testJson(self):
272 storageA = StorageClass("test_a")
273 dimensionsA = self.universe.extract(["instrument"])
274 self.assertEqual(
275 DatasetType(
276 "a",
277 dimensionsA,
278 storageA,
279 ),
280 DatasetType.from_json(
281 DatasetType(
282 "a",
283 dimensionsA,
284 storageA,
285 ).to_json(),
286 self.universe,
287 ),
288 )
289 self.assertEqual(
290 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
291 DatasetType.from_json(
292 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent").to_json(),
293 self.universe,
294 ),
295 )
297 def testSorting(self):
298 """Can we sort a DatasetType"""
299 storage = StorageClass("test_a")
300 dimensions = self.universe.extract(["instrument"])
302 d_a = DatasetType("a", dimensions, storage)
303 d_f = DatasetType("f", dimensions, storage)
304 d_p = DatasetType("p", dimensions, storage)
306 sort = sorted([d_p, d_f, d_a])
307 self.assertEqual(sort, [d_a, d_f, d_p])
309 # Now with strings
310 with self.assertRaises(TypeError):
311 sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
313 def testHashability(self):
314 """Test `DatasetType.__hash__`.
316 This test is performed by checking that `DatasetType` entries can
317 be inserted into a `set` and that unique values of its
318 (`name`, `storageClass`, `dimensions`) parameters result in separate
319 entries (and equal ones don't).
321 This does not check for uniformity of hashing or the actual values
322 of the hash function.
323 """
324 types = []
325 unique = 0
326 storageC = StorageClass("test_c")
327 storageD = StorageClass("test_d")
328 for name in ["a", "b"]:
329 for storageClass in [storageC, storageD]:
330 for dimensions in [("instrument",), ("skymap",)]:
331 datasetType = DatasetType(name, self.universe.extract(dimensions), storageClass)
332 datasetTypeCopy = DatasetType(name, self.universe.extract(dimensions), storageClass)
333 types.extend((datasetType, datasetTypeCopy))
334 unique += 1 # datasetType should always equal its copy
335 self.assertEqual(len(set(types)), unique) # all other combinations are unique
337 # also check that hashes of instances constructed with StorageClass
338 # name matches hashes of instances constructed with instances
339 dimensions = self.universe.extract(["instrument"])
340 self.assertEqual(
341 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c"))
342 )
343 self.assertEqual(
344 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_c"))
345 )
346 self.assertNotEqual(
347 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_d"))
348 )
349 self.assertNotEqual(
350 hash(DatasetType("a", dimensions, storageD)), hash(DatasetType("a", dimensions, "test_c"))
351 )
352 self.assertNotEqual(
353 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_d"))
354 )
356 def testDeepCopy(self):
357 """Test that we can copy a dataset type."""
358 storageClass = StorageClass("test_copy")
359 datasetTypeName = "test"
360 dimensions = self.universe.extract(("instrument", "visit"))
361 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
362 dcopy = copy.deepcopy(datasetType)
363 self.assertEqual(dcopy, datasetType)
365 # Now with calibration flag set
366 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
367 dcopy = copy.deepcopy(datasetType)
368 self.assertEqual(dcopy, datasetType)
369 self.assertTrue(dcopy.isCalibration())
371 # And again with a composite
372 componentStorageClass = StorageClass("copy_component")
373 componentDatasetType = DatasetType(
374 DatasetType.nameWithComponent(datasetTypeName, "comp"),
375 dimensions,
376 componentStorageClass,
377 parentStorageClass=storageClass,
378 )
379 dcopy = copy.deepcopy(componentDatasetType)
380 self.assertEqual(dcopy, componentDatasetType)
382 def testPickle(self):
383 """Test pickle support."""
384 storageClass = StorageClass("test_pickle")
385 datasetTypeName = "test"
386 dimensions = self.universe.extract(("instrument", "visit"))
387 # Un-pickling requires that storage class is registered with factory.
388 StorageClassFactory().registerStorageClass(storageClass)
389 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
390 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
391 self.assertIsInstance(datasetTypeOut, DatasetType)
392 self.assertEqual(datasetType.name, datasetTypeOut.name)
393 self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
394 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
395 self.assertIsNone(datasetTypeOut.parentStorageClass)
396 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
397 self.assertFalse(datasetTypeOut.isCalibration())
399 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
400 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
401 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
402 self.assertTrue(datasetTypeOut.isCalibration())
404 # And again with a composite
405 componentStorageClass = StorageClass("pickle_component")
406 StorageClassFactory().registerStorageClass(componentStorageClass)
407 componentDatasetType = DatasetType(
408 DatasetType.nameWithComponent(datasetTypeName, "comp"),
409 dimensions,
410 componentStorageClass,
411 parentStorageClass=storageClass,
412 )
413 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
414 self.assertIsInstance(datasetTypeOut, DatasetType)
415 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
416 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
417 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
418 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
419 self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name)
420 self.assertEqual(datasetTypeOut, componentDatasetType)
422 # Now with a string and not a real storage class to test that
423 # pickling doesn't force the StorageClass to be resolved
424 componentDatasetType = DatasetType(
425 DatasetType.nameWithComponent(datasetTypeName, "comp"),
426 dimensions,
427 "StrangeComponent",
428 parentStorageClass="UnknownParent",
429 )
430 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
431 self.assertEqual(datasetTypeOut, componentDatasetType)
432 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
434 # Now with a storage class that is created by the factory
435 factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
436 factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
437 componentDatasetType = DatasetType(
438 DatasetType.nameWithComponent(datasetTypeName, "comp"),
439 dimensions,
440 factoryComponentStorageClassClass(),
441 parentStorageClass=factoryStorageClassClass(),
442 )
443 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
444 self.assertEqual(datasetTypeOut, componentDatasetType)
445 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
447 def test_composites(self):
448 """Test components within composite DatasetTypes."""
449 storageClassA = StorageClass("compA")
450 storageClassB = StorageClass("compB")
451 storageClass = StorageClass(
452 "test_composite", components={"compA": storageClassA, "compB": storageClassB}
453 )
454 self.assertTrue(storageClass.isComposite())
455 self.assertFalse(storageClassA.isComposite())
456 self.assertFalse(storageClassB.isComposite())
458 dimensions = self.universe.extract(("instrument", "visit"))
460 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
461 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
462 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
464 self.assertTrue(datasetTypeComposite.isComposite())
465 self.assertFalse(datasetTypeComponentA.isComposite())
466 self.assertTrue(datasetTypeComponentB.isComponent())
467 self.assertFalse(datasetTypeComposite.isComponent())
469 self.assertEqual(datasetTypeComposite.name, "composite")
470 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
471 self.assertEqual(datasetTypeComponentB.component(), "compB")
472 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
473 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
475 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
476 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
477 self.assertIsNone(datasetTypeComposite.parentStorageClass)
480class DatasetRefTestCase(unittest.TestCase):
481 """Test for DatasetRef."""
483 def setUp(self):
484 self.universe = DimensionUniverse()
485 datasetTypeName = "test"
486 self.componentStorageClass1 = StorageClass("Component1")
487 self.componentStorageClass2 = StorageClass("Component2")
488 self.parentStorageClass = StorageClass(
489 "Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2}
490 )
491 dimensions = self.universe.extract(("instrument", "visit"))
492 self.dataId = dict(instrument="DummyCam", visit=42)
493 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
495 def testConstructor(self):
496 """Test that construction preserves and validates values."""
497 # Construct an unresolved ref.
498 ref = DatasetRef(self.datasetType, self.dataId)
499 self.assertEqual(ref.datasetType, self.datasetType)
500 self.assertEqual(
501 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
502 )
503 self.assertIsInstance(ref.dataId, DataCoordinate)
504 # Constructing an unresolved ref with run and/or components should
505 # fail.
506 run = "somerun"
507 with self.assertRaises(ValueError):
508 DatasetRef(self.datasetType, self.dataId, run=run)
509 # Passing a data ID that is missing dimensions should fail.
510 with self.assertRaises(KeyError):
511 DatasetRef(self.datasetType, {"instrument": "DummyCam"})
512 # Constructing a resolved ref should preserve run as well as everything
513 # else.
514 ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
515 self.assertEqual(ref.datasetType, self.datasetType)
516 self.assertEqual(
517 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
518 )
519 self.assertIsInstance(ref.dataId, DataCoordinate)
520 self.assertEqual(ref.id, 1)
521 self.assertEqual(ref.run, run)
523 def testSorting(self):
524 """Can we sort a DatasetRef"""
525 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1))
526 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10))
527 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22))
529 # Enable detailed diff report
530 self.maxDiff = None
532 # This will sort them on visit number
533 sort = sorted([ref3, ref1, ref2])
534 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
536 # Now include a run
537 ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2)
538 self.assertEqual(ref1.run, "b")
539 ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2)
540 ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1)
541 ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3)
543 # This will sort them on run before visit
544 sort = sorted([ref3, ref1, ref2, ref4])
545 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
547 # Now with strings
548 with self.assertRaises(TypeError):
549 sort = sorted(["z", ref1, "c"])
551 def testResolving(self):
552 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
553 unresolvedRef = ref.unresolved()
554 self.assertIsNotNone(ref.id)
555 self.assertIsNone(unresolvedRef.id)
556 self.assertIsNone(unresolvedRef.run)
557 self.assertNotEqual(ref, unresolvedRef)
558 self.assertEqual(ref.unresolved(), unresolvedRef)
559 self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
560 self.assertEqual(ref.dataId, unresolvedRef.dataId)
561 reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
562 self.assertEqual(ref, reresolvedRef)
563 self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
564 self.assertIsNotNone(reresolvedRef.run)
566 def testPickle(self):
567 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
568 s = pickle.dumps(ref)
569 self.assertEqual(pickle.loads(s), ref)
571 def testJson(self):
572 ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
573 s = ref.to_json()
574 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)
577if __name__ == "__main__": 577 ↛ 578line 577 didn't jump to line 578, because the condition on line 577 was never true
578 unittest.main()