Coverage for tests / test_datasets.py: 8%
487 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:49 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import copy
29import os
30import pickle
31import unittest
32import uuid
34from lsst.daf.butler import (
35 DataCoordinate,
36 DatasetProvenance,
37 DatasetRef,
38 DatasetType,
39 DimensionConfig,
40 DimensionUniverse,
41 FileDataset,
42 SerializedDatasetRefContainerV1,
43 StorageClass,
44 StorageClassFactory,
45 UnknownComponentError,
46)
47from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo
48from lsst.daf.butler.datastores.file_datastore.retrieve_artifacts import ZipIndex
49from lsst.daf.butler.formatters.yaml import YamlFormatter
50from lsst.resources import ResourcePath
52TESTDIR = os.path.abspath(os.path.dirname(__file__))
54"""Tests for datasets module.
55"""
58class DatasetTypeTestCase(unittest.TestCase):
59 """Test for DatasetType."""
61 def setUp(self) -> None:
62 self.universe = DimensionUniverse()
64 def testConstructor(self) -> None:
65 """Test construction preserves values.
67 Note that construction doesn't check for valid storageClass.
68 This can only be verified for a particular schema.
69 """
70 datasetTypeName = "test"
71 storageClass = StorageClass("test_StructuredData")
72 dimensions = self.universe.conform(("visit", "instrument"))
73 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
74 self.assertEqual(datasetType.name, datasetTypeName)
75 self.assertEqual(datasetType.storageClass, storageClass)
76 self.assertEqual(datasetType.dimensions, dimensions)
78 with self.assertRaises(ValueError, msg="Construct component without parent storage class"):
79 DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass)
80 with self.assertRaises(ValueError, msg="Construct non-component with parent storage class"):
81 DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed")
83 def testConstructor2(self) -> None:
84 """Test construction from StorageClass name."""
85 datasetTypeName = "test"
86 storageClass = StorageClass("test_constructor2")
87 StorageClassFactory().registerStorageClass(storageClass)
88 dimensions = self.universe.conform(("instrument", "visit"))
89 datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
90 self.assertEqual(datasetType.name, datasetTypeName)
91 self.assertEqual(datasetType.storageClass, storageClass)
92 self.assertEqual(datasetType.dimensions, dimensions)
94 def testNameValidation(self) -> None:
95 """Test that dataset type names only contain certain characters
96 in certain positions.
97 """
98 dimensions = self.universe.conform(("instrument", "visit"))
99 goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b", "_a")
100 badNames = ("1", "a%b", "B+Z", "T[0]")
102 # Construct storage class with all the good names included as
103 # components so that we can test internal consistency
104 storageClass = StorageClass(
105 "test_StructuredData", components={n: StorageClass("component") for n in goodNames}
106 )
108 for name in goodNames:
109 composite = DatasetType(name, dimensions, storageClass)
110 self.assertEqual(composite.name, name)
111 for suffix in goodNames:
112 full = DatasetType.nameWithComponent(name, suffix)
113 component = composite.makeComponentDatasetType(suffix)
114 self.assertEqual(component.name, full)
115 assert component.parentStorageClass is not None
116 self.assertEqual(component.parentStorageClass.name, "test_StructuredData")
117 for suffix in badNames:
118 full = DatasetType.nameWithComponent(name, suffix)
119 with self.subTest(full=full):
120 with self.assertRaises(ValueError):
121 DatasetType(full, dimensions, storageClass)
122 for name in badNames:
123 with self.subTest(name=name):
124 with self.assertRaises(ValueError):
125 DatasetType(name, dimensions, storageClass)
127 def testEquality(self) -> None:
128 storageA = StorageClass("test_a")
129 storageB = StorageClass("test_b")
130 parent = StorageClass("test")
131 dimensionsA = self.universe.conform(["instrument"])
132 dimensionsB = self.universe.conform(["skymap"])
133 self.assertEqual(
134 DatasetType(
135 "a",
136 dimensionsA,
137 storageA,
138 ),
139 DatasetType(
140 "a",
141 dimensionsA,
142 storageA,
143 ),
144 )
145 self.assertEqual(
146 DatasetType(
147 "a",
148 dimensionsA,
149 "test_a",
150 ),
151 DatasetType(
152 "a",
153 dimensionsA,
154 storageA,
155 ),
156 )
157 self.assertEqual(
158 DatasetType(
159 "a",
160 dimensionsA,
161 storageA,
162 ),
163 DatasetType(
164 "a",
165 dimensionsA,
166 "test_a",
167 ),
168 )
169 self.assertEqual(
170 DatasetType(
171 "a",
172 dimensionsA,
173 "test_a",
174 ),
175 DatasetType(
176 "a",
177 dimensionsA,
178 "test_a",
179 ),
180 )
181 self.assertEqual(
182 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
183 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=parent),
184 )
185 self.assertEqual(
186 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
187 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
188 )
189 self.assertNotEqual(
190 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=True),
191 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=False),
192 )
193 self.assertNotEqual(
194 DatasetType(
195 "a",
196 dimensionsA,
197 storageA,
198 ),
199 DatasetType(
200 "b",
201 dimensionsA,
202 storageA,
203 ),
204 )
205 self.assertNotEqual(
206 DatasetType(
207 "a",
208 dimensionsA,
209 storageA,
210 ),
211 DatasetType(
212 "b",
213 dimensionsA,
214 "test_a",
215 ),
216 )
217 self.assertNotEqual(
218 DatasetType(
219 "a",
220 dimensionsA,
221 storageA,
222 ),
223 DatasetType(
224 "a",
225 dimensionsA,
226 storageB,
227 ),
228 )
229 self.assertNotEqual(
230 DatasetType(
231 "a",
232 dimensionsA,
233 storageA,
234 ),
235 DatasetType(
236 "a",
237 dimensionsA,
238 "test_b",
239 ),
240 )
241 self.assertNotEqual(
242 DatasetType(
243 "a",
244 dimensionsA,
245 storageA,
246 ),
247 DatasetType(
248 "a",
249 dimensionsB,
250 storageA,
251 ),
252 )
253 self.assertNotEqual(
254 DatasetType(
255 "a",
256 dimensionsA,
257 storageA,
258 ),
259 DatasetType(
260 "a",
261 dimensionsB,
262 "test_a",
263 ),
264 )
265 self.assertNotEqual(
266 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageA),
267 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass=storageB),
268 )
269 self.assertNotEqual(
270 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageA"),
271 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"),
272 )
274 def testCompatibility(self) -> None:
275 storageA = StorageClass("test_a", pytype=set, converters={"list": "builtins.set"})
276 storageB = StorageClass("test_b", pytype=list)
277 storageC = StorageClass("test_c", pytype=dict)
278 self.assertTrue(storageA.can_convert(storageB))
279 dimensionsA = self.universe.conform(["instrument"])
281 dA = DatasetType("a", dimensionsA, storageA)
282 dA2 = DatasetType("a", dimensionsA, storageB)
283 self.assertNotEqual(dA, dA2)
284 self.assertTrue(dA.is_compatible_with(dA))
285 self.assertTrue(dA.is_compatible_with(dA2))
286 self.assertFalse(dA2.is_compatible_with(dA))
288 dA3 = DatasetType("a", dimensionsA, storageC)
289 self.assertFalse(dA.is_compatible_with(dA3))
291 def testOverrideStorageClass(self) -> None:
292 storageA = StorageClass("test_a", pytype=list, converters={"dict": "builtins.list"})
293 storageB = StorageClass("test_b", pytype=dict, converters={"list": "dict"})
294 dimensions = self.universe.conform(["instrument"])
296 dA = DatasetType("a", dimensions, storageA)
297 dB = dA.overrideStorageClass(storageB)
298 self.assertNotEqual(dA, dB)
299 self.assertEqual(dB.storageClass, storageB)
301 round_trip = dB.overrideStorageClass(storageA)
302 self.assertEqual(round_trip, dA)
304 # Check that parents move over. Assign a pytype to avoid using
305 # object in later tests.
306 parent = StorageClass("composite", pytype=tuple, components={"a": storageA, "c": storageA})
307 dP = DatasetType("comp", dimensions, parent)
308 dP_A = dP.makeComponentDatasetType("a")
309 dp_B = dP_A.overrideStorageClass(storageB)
310 self.assertEqual(dp_B.storageClass, storageB)
311 self.assertEqual(dp_B.parentStorageClass, parent)
313 # Check that components are checked for compatibility but parents
314 # can be different.
315 parent2 = StorageClass(
316 "composite2",
317 pytype=frozenset,
318 components={"a": storageB, "c": storageB},
319 )
320 dP2 = DatasetType("comp", dimensions, parent2)
321 # Components are compatible even though parents aren't.
322 self.assertFalse(dP.is_compatible_with(dP2))
323 self.assertTrue(dP2.makeComponentDatasetType("a").is_compatible_with(dP_A))
325 def testJson(self) -> None:
326 storageA = StorageClass("test_a")
327 dimensionsA = self.universe.conform(["instrument"])
328 self.assertEqual(
329 DatasetType(
330 "a",
331 dimensionsA,
332 storageA,
333 ),
334 DatasetType.from_json(
335 DatasetType(
336 "a",
337 dimensionsA,
338 storageA,
339 ).to_json(),
340 self.universe,
341 ),
342 )
343 self.assertEqual(
344 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
345 DatasetType.from_json(
346 DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent").to_json(),
347 self.universe,
348 ),
349 )
351 def testSorting(self) -> None:
352 """Can we sort a DatasetType"""
353 storage = StorageClass("test_a")
354 dimensions = self.universe.conform(["instrument"])
356 d_a = DatasetType("a", dimensions, storage)
357 d_f = DatasetType("f", dimensions, storage)
358 d_p = DatasetType("p", dimensions, storage)
360 sort = sorted([d_p, d_f, d_a])
361 self.assertEqual(sort, [d_a, d_f, d_p])
363 # Now with strings
364 with self.assertRaises(TypeError):
365 sort = sorted(["z", d_p, "c", d_f, d_a, "d"]) # type: ignore [list-item]
367 def testHashability(self) -> None:
368 """Test `DatasetType.__hash__`.
370 This test is performed by checking that `DatasetType` entries can
371 be inserted into a `set` and that unique values of its
372 (`name`, `storageClass`, `dimensions`) parameters result in separate
373 entries (and equal ones don't).
375 This does not check for uniformity of hashing or the actual values
376 of the hash function.
377 """
378 types: list[DatasetType] = []
379 unique = 0
380 storageC = StorageClass("test_c")
381 storageD = StorageClass("test_d")
382 for name in ["a", "b"]:
383 for storageClass in [storageC, storageD]:
384 for dims in [("instrument",), ("skymap",)]:
385 datasetType = DatasetType(name, self.universe.conform(dims), storageClass)
386 datasetTypeCopy = DatasetType(name, self.universe.conform(dims), storageClass)
387 types.extend((datasetType, datasetTypeCopy))
388 unique += 1 # datasetType should always equal its copy
389 self.assertEqual(len(set(types)), unique) # all other combinations are unique
391 # also check that hashes of instances constructed with StorageClass
392 # name matches hashes of instances constructed with instances
393 dimensions = self.universe.conform(["instrument"])
394 self.assertEqual(
395 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c"))
396 )
397 self.assertEqual(
398 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_c"))
399 )
400 self.assertNotEqual(
401 hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_d"))
402 )
403 self.assertNotEqual(
404 hash(DatasetType("a", dimensions, storageD)), hash(DatasetType("a", dimensions, "test_c"))
405 )
406 self.assertNotEqual(
407 hash(DatasetType("a", dimensions, "test_c")), hash(DatasetType("a", dimensions, "test_d"))
408 )
410 def testDeepCopy(self) -> None:
411 """Test that we can copy a dataset type."""
412 storageClass = StorageClass("test_copy")
413 datasetTypeName = "test"
414 dimensions = self.universe.conform(("instrument", "visit"))
415 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
416 dcopy = copy.deepcopy(datasetType)
417 self.assertEqual(dcopy, datasetType)
419 # Now with calibration flag set
420 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
421 dcopy = copy.deepcopy(datasetType)
422 self.assertEqual(dcopy, datasetType)
423 self.assertTrue(dcopy.isCalibration())
425 # And again with a composite
426 componentStorageClass = StorageClass("copy_component")
427 componentDatasetType = DatasetType(
428 DatasetType.nameWithComponent(datasetTypeName, "comp"),
429 dimensions,
430 componentStorageClass,
431 parentStorageClass=storageClass,
432 )
433 dcopy = copy.deepcopy(componentDatasetType)
434 self.assertEqual(dcopy, componentDatasetType)
436 def testPickle(self) -> None:
437 """Test pickle support."""
438 storageClass = StorageClass("test_pickle")
439 datasetTypeName = "test"
440 dimensions = self.universe.conform(("instrument", "visit"))
441 # Un-pickling requires that storage class is registered with factory.
442 StorageClassFactory().registerStorageClass(storageClass)
443 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
444 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
445 self.assertIsInstance(datasetTypeOut, DatasetType)
446 self.assertEqual(datasetType.name, datasetTypeOut.name)
447 self.assertEqual(datasetType.dimensions, datasetTypeOut.dimensions)
448 self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
449 self.assertIsNone(datasetTypeOut.parentStorageClass)
450 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
451 self.assertFalse(datasetTypeOut.isCalibration())
453 datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
454 datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
455 self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
456 self.assertTrue(datasetTypeOut.isCalibration())
458 # And again with a composite
459 componentStorageClass = StorageClass("pickle_component")
460 StorageClassFactory().registerStorageClass(componentStorageClass)
461 componentDatasetType = DatasetType(
462 DatasetType.nameWithComponent(datasetTypeName, "comp"),
463 dimensions,
464 componentStorageClass,
465 parentStorageClass=storageClass,
466 )
467 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
468 self.assertIsInstance(datasetTypeOut, DatasetType)
469 self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
470 self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
471 self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
472 self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
473 self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name)
474 self.assertEqual(datasetTypeOut, componentDatasetType)
476 # Now with a string and not a real storage class to test that
477 # pickling doesn't force the StorageClass to be resolved
478 componentDatasetType = DatasetType(
479 DatasetType.nameWithComponent(datasetTypeName, "comp"),
480 dimensions,
481 "StrangeComponent",
482 parentStorageClass="UnknownParent",
483 )
484 datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
485 self.assertEqual(datasetTypeOut, componentDatasetType)
486 self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
488 def test_composites(self) -> None:
489 """Test components within composite DatasetTypes."""
490 storageClassA = StorageClass("compA")
491 storageClassB = StorageClass("compB")
492 storageClass = StorageClass(
493 "test_composite", components={"compA": storageClassA, "compB": storageClassB}
494 )
495 self.assertTrue(storageClass.isComposite())
496 self.assertFalse(storageClassA.isComposite())
497 self.assertFalse(storageClassB.isComposite())
499 dimensions = self.universe.conform(("instrument", "visit"))
501 datasetTypeComposite = DatasetType("composite", dimensions, storageClass)
502 datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA")
503 datasetTypeComponentB = datasetTypeComposite.makeComponentDatasetType("compB")
505 self.assertTrue(datasetTypeComposite.isComposite())
506 self.assertFalse(datasetTypeComponentA.isComposite())
507 self.assertTrue(datasetTypeComponentB.isComponent())
508 self.assertFalse(datasetTypeComposite.isComponent())
510 self.assertEqual(datasetTypeComposite.name, "composite")
511 self.assertEqual(datasetTypeComponentA.name, "composite.compA")
512 self.assertEqual(datasetTypeComponentB.component(), "compB")
513 self.assertEqual(datasetTypeComposite.nameAndComponent(), ("composite", None))
514 self.assertEqual(datasetTypeComponentA.nameAndComponent(), ("composite", "compA"))
516 self.assertEqual(datasetTypeComponentA.parentStorageClass, storageClass)
517 self.assertEqual(datasetTypeComponentB.parentStorageClass, storageClass)
518 self.assertIsNone(datasetTypeComposite.parentStorageClass)
520 with self.assertRaises(UnknownComponentError):
521 datasetTypeComposite.makeComponentDatasetType("compF")
523 with self.assertRaises(UnknownComponentError):
524 datasetTypeComposite.componentTypeName("unknown")
527class DatasetRefTestCase(unittest.TestCase):
528 """Test for DatasetRef."""
530 def setUp(self) -> None:
531 self.universe = DimensionUniverse()
532 datasetTypeName = "test"
533 self.componentStorageClass1 = StorageClass("Component1")
534 self.componentStorageClass2 = StorageClass("Component2")
535 self.parentStorageClass = StorageClass(
536 "Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2}
537 )
538 sc_factory = StorageClassFactory()
539 sc_factory.registerStorageClass(self.componentStorageClass1)
540 sc_factory.registerStorageClass(self.componentStorageClass2)
541 sc_factory.registerStorageClass(self.parentStorageClass)
542 dimensions = self.universe.conform(("instrument", "visit"))
543 self.dataId = DataCoordinate.standardize(
544 dict(instrument="DummyCam", visit=42), universe=self.universe
545 )
546 self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
548 def _make_datastore_records(self, ref: DatasetRef, *paths: str) -> DatasetRef:
549 """Return an updated dataset ref with datastore records."""
550 opaque_table_name = "datastore_records"
551 datastore_records = {
552 opaque_table_name: [
553 StoredFileInfo(
554 formatter="",
555 path=path,
556 storageClass=ref.datasetType.storageClass,
557 component=None,
558 checksum=None,
559 file_size=1,
560 )
561 for path in paths
562 ]
563 }
564 return ref.replace(datastore_records=datastore_records)
566 def testConstructor(self) -> None:
567 """Test that construction preserves and validates values."""
568 # Constructing a ref requires a run.
569 with self.assertRaises(TypeError):
570 DatasetRef(self.datasetType, self.dataId, id=uuid.uuid4()) # type: ignore [call-arg]
572 # Constructing an unresolved ref with run and/or components should
573 # issue a ref with an id.
574 run = "somerun"
575 ref = DatasetRef(self.datasetType, self.dataId, run=run)
576 self.assertEqual(ref.datasetType, self.datasetType)
577 self.assertEqual(
578 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
579 )
580 self.assertIsNotNone(ref.id)
582 # Passing a data ID that is missing dimensions should fail.
583 # Create a full DataCoordinate to ensure that we are testing the
584 # right thing.
585 dimensions = self.universe.conform(("instrument",))
586 dataId = DataCoordinate.standardize(instrument="DummyCam", dimensions=dimensions)
587 with self.assertRaises(KeyError):
588 DatasetRef(self.datasetType, dataId, run="run")
589 # Constructing a resolved ref should preserve run as well as everything
590 # else.
591 id_ = uuid.uuid4()
592 ref = DatasetRef(self.datasetType, self.dataId, id=id_, run=run)
593 self.assertEqual(ref.datasetType, self.datasetType)
594 self.assertEqual(
595 ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId
596 )
597 self.assertIsInstance(ref.dataId, DataCoordinate)
598 self.assertEqual(ref.id, id_)
599 self.assertEqual(ref.run, run)
601 with self.assertRaises(ValueError):
602 DatasetRef(self.datasetType, self.dataId, run=run, id_generation_mode=42) # type: ignore
604 def testSorting(self) -> None:
605 """Can we sort a DatasetRef"""
606 # All refs have the same run.
607 dimensions = self.universe.conform(("instrument", "visit"))
608 ref1 = DatasetRef(
609 self.datasetType,
610 DataCoordinate.standardize(instrument="DummyCam", visit=1, dimensions=dimensions),
611 run="run",
612 )
613 ref2 = DatasetRef(
614 self.datasetType,
615 DataCoordinate.standardize(instrument="DummyCam", visit=10, dimensions=dimensions),
616 run="run",
617 )
618 ref3 = DatasetRef(
619 self.datasetType,
620 DataCoordinate.standardize(instrument="DummyCam", visit=22, dimensions=dimensions),
621 run="run",
622 )
624 # Enable detailed diff report
625 self.maxDiff = None
627 # This will sort them on visit number
628 sort = sorted([ref3, ref1, ref2])
629 self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
631 # Now include different runs.
632 ref1 = DatasetRef(
633 self.datasetType,
634 DataCoordinate.standardize(instrument="DummyCam", visit=43, dimensions=dimensions),
635 run="b",
636 )
637 self.assertEqual(ref1.run, "b")
638 ref4 = DatasetRef(
639 self.datasetType,
640 DataCoordinate.standardize(instrument="DummyCam", visit=10, dimensions=dimensions),
641 run="b",
642 )
643 ref2 = DatasetRef(
644 self.datasetType,
645 DataCoordinate.standardize(instrument="DummyCam", visit=4, dimensions=dimensions),
646 run="a",
647 )
648 ref3 = DatasetRef(
649 self.datasetType,
650 DataCoordinate.standardize(instrument="DummyCam", visit=104, dimensions=dimensions),
651 run="c",
652 )
654 # This will sort them on run before visit
655 sort = sorted([ref3, ref1, ref2, ref4])
656 self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")
658 # Now with strings
659 with self.assertRaises(TypeError):
660 sort = sorted(["z", ref1, "c"]) # type: ignore [list-item]
662 def testOverrideStorageClass(self) -> None:
663 storageA = StorageClass("test_a", pytype=list)
665 ref = DatasetRef(self.datasetType, self.dataId, run="somerun")
667 ref_new = ref.overrideStorageClass(storageA)
668 self.assertNotEqual(ref, ref_new)
669 self.assertEqual(ref_new.datasetType.storageClass, storageA)
670 self.assertEqual(ref_new.overrideStorageClass(ref.datasetType.storageClass), ref)
671 self.assertTrue(ref.is_compatible_with(ref_new))
672 with self.assertRaises(AttributeError):
673 ref_new.is_compatible_with(None) # type: ignore
675 # Check different code paths of incompatibility.
676 ref_incompat = DatasetRef(ref.datasetType, ref.dataId, run="somerun2", id=ref.id)
677 self.assertFalse(ref.is_compatible_with(ref_incompat)) # bad run
678 ref_incompat = DatasetRef(ref.datasetType, ref.dataId, run="somerun")
679 self.assertFalse(ref.is_compatible_with(ref_incompat)) # bad ID
681 incompatible_sc = StorageClass("my_int", pytype=int)
682 with self.assertRaises(ValueError):
683 # Do not test against "ref" because it has a default storage class
684 # of "object" which is compatible with everything.
685 ref_new.overrideStorageClass(incompatible_sc)
687 def testReplace(self) -> None:
688 """Test for `DatasetRef.replace` method."""
689 ref = DatasetRef(self.datasetType, self.dataId, run="somerun")
691 ref2 = ref.replace(run="somerun2")
692 self.assertEqual(ref2.run, "somerun2")
693 self.assertIsNotNone(ref2.id)
694 self.assertNotEqual(ref2.id, ref.id)
696 ref3 = ref.replace(run="somerun3", id=ref2.id)
697 self.assertEqual(ref3.run, "somerun3")
698 self.assertEqual(ref3.id, ref2.id)
700 ref4 = ref.replace(id=ref2.id)
701 self.assertEqual(ref4.run, "somerun")
702 self.assertEqual(ref4.id, ref2.id)
704 ref5 = ref.replace()
705 self.assertEqual(ref5.run, "somerun")
706 self.assertEqual(ref5, ref)
708 self.assertIsNone(ref5._datastore_records)
709 ref5 = ref5.replace(datastore_records={})
710 self.assertEqual(ref5._datastore_records, {})
711 ref5 = ref5.replace(datastore_records=None)
712 self.assertIsNone(ref5._datastore_records)
714 def testPickle(self) -> None:
715 ref = DatasetRef(self.datasetType, self.dataId, run="somerun")
716 s = pickle.dumps(ref)
717 self.assertEqual(pickle.loads(s), ref)
719 def testJson(self) -> None:
720 ref = DatasetRef(self.datasetType, self.dataId, run="somerun")
721 s = ref.to_json()
722 self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)
724 # Also test ref with datastore records, serialization does not
725 # preserve those.
726 ref = self._make_datastore_records(ref, "/path1", "/path2")
727 s = ref.to_json()
728 ref2 = DatasetRef.from_json(s, universe=self.universe)
729 self.assertEqual(ref2, ref)
730 self.assertIsNone(ref2._datastore_records)
732 def testFileDataset(self) -> None:
733 ref = DatasetRef(self.datasetType, self.dataId, run="somerun")
734 file_dataset = FileDataset(path="something.yaml", refs=ref)
735 self.assertEqual(file_dataset.refs, [ref])
737 ref2 = DatasetRef(self.datasetType, self.dataId, run="somerun2")
738 with self.assertRaises(ValueError):
739 FileDataset(path="other.yaml", refs=[ref, ref2])
741 serialized = file_dataset.to_simple()
742 self.assertEqual(serialized.path, "something.yaml")
743 self.assertEqual(len(serialized.refs), 1)
744 serialized_ref = serialized.refs[ref.id]
745 self.assertEqual(serialized_ref.run, ref.run)
746 self.assertEqual(serialized_ref.dataset_type_name, self.datasetType.name)
747 self.assertEqual(serialized_ref.data_id, dict(ref.dataId.mapping))
749 def load_dataset_type(name: str) -> DatasetType:
750 self.assertEqual(name, ref.datasetType.name)
751 return self.datasetType
753 deserialized = file_dataset.from_simple(
754 serialized, universe=ref.dimensions.universe, dataset_type_loader=load_dataset_type
755 )
756 self.assertEqual(deserialized.formatter, file_dataset.formatter)
757 self.assertEqual(deserialized.refs, file_dataset.refs)
758 self.assertEqual(deserialized.path, file_dataset.path)
760 file_dataset.formatter = "lsst.daf.butler.formatters.json.JsonFormatter"
761 file_dataset_from_string_formatter = FileDataset.from_simple(
762 file_dataset.to_simple(), dataset_type_loader=load_dataset_type, universe=ref.dimensions.universe
763 )
764 self.assertEqual(
765 file_dataset_from_string_formatter.formatter, "lsst.daf.butler.formatters.json.JsonFormatter"
766 )
768 file_dataset.formatter = YamlFormatter
769 file_dataset_from_class_formatter = FileDataset.from_simple(
770 file_dataset.to_simple(), dataset_type_loader=load_dataset_type, universe=ref.dimensions.universe
771 )
772 self.assertEqual(
773 file_dataset_from_class_formatter.formatter, "lsst.daf.butler.formatters.yaml.YamlFormatter"
774 )
776 def test_container(self) -> None:
777 ref1 = DatasetRef(self.datasetType, self.dataId, run="somerun")
778 ref2 = ref1.replace(run="somerun2")
780 container = SerializedDatasetRefContainerV1.from_refs([ref1, ref2])
781 self.assertEqual(len(container), 2)
783 new_refs = container.to_refs(universe=self.universe)
784 self.assertEqual(new_refs, [ref1, ref2])
786 def test_dataset_provenance(self) -> None:
787 """Test that dataset provenance can be stored."""
788 dimensions = self.universe.conform(("instrument", "visit"))
789 ref1 = DatasetRef(self.datasetType, self.dataId, run="somerun")
790 ref2 = DatasetRef(
791 self.datasetType,
792 DataCoordinate.standardize(instrument="DummyCam", visit=10, dimensions=dimensions),
793 run="run",
794 )
795 ref3 = DatasetRef(
796 self.datasetType,
797 DataCoordinate.standardize(instrument="DummyCam", visit=22, dimensions=dimensions),
798 run="run",
799 )
801 quantum_id = uuid.uuid4()
802 prov = DatasetProvenance(quantum_id=quantum_id)
803 prov.add_input(ref2)
804 prov.add_input(ref3)
805 prov.add_input(ref2) # no-op which should leave ref2 still ahead of ref3 in output.
806 extra_id = uuid.uuid4()
807 prov.add_extra_provenance(
808 ref2.id, {"extra_string": "value", "extra_number": 42, "extra_id": extra_id}
809 )
811 with self.assertRaises(ValueError):
812 prov.add_extra_provenance(ref2.id, {"extra_string": "value", "extra_number": 42, "id": extra_id})
814 with self.assertRaises(ValueError):
815 # Unknown dataset.
816 prov.add_extra_provenance(ref1.id, {"extra": 42})
818 expected = {
819 "id": ref1.id,
820 "datasettype": "test",
821 "dataid.instrument": "DummyCam",
822 "dataid.visit": 42,
823 "run": "somerun",
824 "quantum": quantum_id,
825 "n_inputs": 2,
826 "input.0.datasettype": "test",
827 "input.0.run": "run",
828 "input.0.id": ref2.id,
829 "input.0.extra_number": 42,
830 "input.0.extra_string": "value",
831 "input.0.extra_id": extra_id,
832 "input.1.datasettype": "test",
833 "input.1.run": "run",
834 "input.1.id": ref3.id,
835 }
837 prov_dict = prov.to_flat_dict(ref1, sep=".")
838 self.assertEqual(prov_dict, expected)
839 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
840 self.assertEqual(prov_dict, {})
842 expected = {
843 "id": ref1.id,
844 "datasettype": "test",
845 "dataid.instrument": "DummyCam",
846 "dataid.visit": 42,
847 "run": "somerun",
848 "quantum": quantum_id,
849 "n_inputs": 2,
850 }
852 prov_dict = prov.to_flat_dict(ref1, sep=".", max_inputs=1)
853 self.assertEqual(prov_dict, expected)
854 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
855 self.assertEqual(prov_dict, {})
857 expected = {
858 "id": ref1.id,
859 "datasettype": "test",
860 "dataid.instrument": "DummyCam",
861 "dataid.visit": 42,
862 "run": "somerun",
863 "quantum": quantum_id,
864 "n_inputs": 2,
865 "input.0.id": ref2.id,
866 "input.0.extra_number": 42,
867 "input.0.extra_string": "value",
868 "input.0.extra_id": extra_id,
869 "input.1.id": ref3.id,
870 }
872 prov_dict = prov.to_flat_dict(ref1, sep=".", store_minimalist_inputs=True)
873 self.assertEqual(prov_dict, expected)
874 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
875 self.assertEqual(prov_dict, {})
877 prov_dict = prov.to_flat_dict(ref1, prefix="", sep=".", simple_types=True)
878 self.assertEqual(prov_dict["id"], str(ref1.id))
879 self.assertEqual(prov_dict["quantum"], str(quantum_id))
880 self.assertEqual(prov_dict["input.0.id"], str(ref2.id))
881 self.assertEqual(prov_dict["input.0.extra_id"], str(extra_id))
882 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
883 self.assertEqual(prov_dict, {})
885 for prefix, sep in (
886 ("LSST BUTLER 🔭", " "), # Unicode in prefix.
887 ("LSST*BUTLER 🔭", " "), # regex character.
888 ("LSST*BUTLER", "+"), # two regex characters.
889 ("LSST_BUTLER", "\\"), # backslash for extra difficulty.
890 ("LSST BUTLER 🔭", "→"), # Unicode separator.
891 ):
892 prov_dict = prov.to_flat_dict(ref1, prefix=prefix, sep=sep)
893 self.assertIn(f"{prefix}{sep}RUN", prov_dict)
894 self.assertIn(f"{prefix}{sep}INPUT{sep}0{sep}EXTRA_NUMBER", prov_dict)
895 self.assertEqual(prov_dict[f"{prefix}{sep}RUN"], "somerun")
896 self.assertEqual(prov_dict[f"{prefix}{sep}INPUT{sep}0{sep}EXTRA_NUMBER"], 42)
897 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
898 self.assertEqual(prov_dict, {})
900 # Prefix has no case so lower case assumed.
901 prov_dict = prov.to_flat_dict(ref1, prefix="🔭 LSST BUTLER", sep="→")
902 self.assertIn("🔭 LSST BUTLER→run", prov_dict)
903 self.assertIn("🔭 LSST BUTLER→input→0→extra_number", prov_dict)
904 self.assertEqual(prov_dict["🔭 LSST BUTLER→run"], "somerun")
905 self.assertEqual(prov_dict["🔭 LSST BUTLER→input→0→extra_number"], 42)
906 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
907 self.assertEqual(prov_dict, {})
909 # Prefix has no case but force upper.
910 prov_dict = prov.to_flat_dict(ref1, prefix="🔭 LSST BUTLER", sep="→", use_upper=True)
911 self.assertIn("🔭 LSST BUTLER→RUN", prov_dict)
912 self.assertIn("🔭 LSST BUTLER→INPUT→0→EXTRA_NUMBER", prov_dict)
913 self.assertEqual(prov_dict["🔭 LSST BUTLER→RUN"], "somerun")
914 self.assertEqual(prov_dict["🔭 LSST BUTLER→INPUT→0→EXTRA_NUMBER"], 42)
915 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
916 self.assertEqual(prov_dict, {})
918 prov_dict = prov.to_flat_dict(None, prefix="butler", sep=" ")
919 self.assertNotIn("butler run", prov_dict)
920 self.assertIn("butler quantum", prov_dict)
921 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
922 self.assertEqual(prov_dict, {})
924 # Check that an empty provenance returns empty dict with no ref.
925 prov2 = DatasetProvenance()
926 prov_dict = prov2.to_flat_dict(None)
927 self.assertEqual(prov_dict, {})
928 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
929 self.assertEqual(prov_dict, {})
931 # Check that an empty provenance with a ref returns info just for
932 # that ref. Use separator that needs escaping in a regex.
933 prov_dict = prov2.to_flat_dict(ref1, prefix="", sep="*")
934 expected = {
935 "id": ref1.id,
936 "datasettype": "test",
937 "dataid*instrument": "DummyCam",
938 "dataid*visit": 42,
939 "run": "somerun",
940 "n_inputs": 0,
941 }
942 self.assertEqual(prov_dict, expected)
943 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
944 self.assertEqual(prov_dict, {})
946 # Test with empty provenance with ref that has no dataId.
947 datasetType = DatasetType("empty", self.universe.empty, self.parentStorageClass)
948 empty_ref = DatasetRef(datasetType, {}, "empty_run")
949 prov3 = DatasetProvenance()
950 prov_dict = prov3.to_flat_dict(empty_ref)
951 expected = {
952 "id": empty_ref.id,
953 "datasettype": "empty",
954 "run": "empty_run",
955 "n_inputs": 0,
956 }
957 self.assertEqual(prov_dict, expected)
958 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
959 self.assertEqual(prov_dict, {})
961 prov_dict = prov3.to_flat_dict(empty_ref, prefix="x-yz", sep="-")
962 expected = {
963 "x-yz-id": empty_ref.id,
964 "x-yz-datasettype": "empty",
965 "x-yz-run": "empty_run",
966 "x-yz-n_inputs": 0,
967 }
968 self.assertEqual(prov_dict, expected)
969 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
970 self.assertEqual(prov_dict, {})
972 with self.assertRaises(ValueError):
973 prov3.to_flat_dict(empty_ref, sep="##")
974 with self.assertRaises(ValueError):
975 prov3.to_flat_dict(empty_ref, sep="a")
976 with self.assertRaises(ValueError):
977 prov3.to_flat_dict(empty_ref, sep="1")
978 with self.assertRaises(ValueError):
979 prov3.to_flat_dict(empty_ref, sep="_")
980 with self.assertRaises(ValueError):
981 prov3.to_flat_dict(empty_ref, sep="Σ")
983 # Dictionary with inconsistent prefixes and separators.
984 test_dicts = (
985 {
986 "xyz-dataid.instrument": "LATISS",
987 },
988 {
989 "xyz-dataid-detector": 10,
990 "abc-dataid-instrument": "LATISS",
991 },
992 {
993 "abc.input.0.id": "id",
994 "xyz.input.0.run": "run",
995 "abc.dataid.instrument": "latiss",
996 },
997 {
998 "abc.input.0.id": "id0",
999 "abc input 0 id": "id1",
1000 },
1001 )
1002 for prov_dict in test_dicts:
1003 with self.assertRaises(ValueError):
1004 DatasetProvenance.strip_provenance_from_flat_dict(prov_dict)
1007class ZipIndexTestCase(unittest.TestCase):
1008 """Test that a ZipIndex can be read."""
1010 def test_v1(self):
1011 """Read a v1 serialization."""
1012 path = os.path.join(TESTDIR, "data", "zip_index.json")
1013 with open(path) as fd:
1014 index = ZipIndex.model_validate_json(fd.read())
1016 self.assertEqual(index.index_version, "V1")
1017 self.assertEqual(len(index), 17)
1018 self.assertEqual(len(index.refs), 4)
1020 # Reconstruct the refs using the required universe.
1021 universe_version = index.refs.universe_version
1022 namespace = index.refs.universe_namespace
1023 universe_path = ResourcePath(
1024 f"resource://lsst.daf.butler/configs/old_dimensions/{namespace}_universe{universe_version}.yaml"
1025 )
1026 dimension_config = DimensionConfig(universe_path)
1027 universe = DimensionUniverse(dimension_config)
1028 refs = index.refs.to_refs(universe=universe)
1029 self.assertEqual(len(refs), 4)
1030 self.assertFalse(refs[0].dataId.hasRecords())
1032 # Read an index with records attached.
1033 path = os.path.join(TESTDIR, "data", "zip_index_with_records.json")
1034 with open(path) as fd:
1035 index = ZipIndex.model_validate_json(fd.read())
1036 refs = index.refs.to_refs(universe=universe)
1037 self.assertEqual(len(refs), 2)
1038 self.assertTrue(refs[0].dataId.hasRecords())
1041if __name__ == "__main__":
1042 unittest.main()