Coverage for tests/test_datastore.py: 12%
1098 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:34 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import sys
28import tempfile
29import time
30import unittest
31import unittest.mock
32import uuid
33from collections import UserDict
34from dataclasses import dataclass
36import lsst.utils.tests
37import yaml
38from lsst.daf.butler import (
39 Config,
40 DataCoordinate,
41 DatasetRef,
42 DatasetRefURIs,
43 DatasetType,
44 DatasetTypeNotSupportedError,
45 Datastore,
46 DatastoreCacheManager,
47 DatastoreCacheManagerConfig,
48 DatastoreConfig,
49 DatastoreDisabledCacheManager,
50 DatastoreValidationError,
51 DimensionUniverse,
52 FileDataset,
53 NamedKeyDict,
54 StorageClass,
55 StorageClassFactory,
56 StoredFileInfo,
57)
58from lsst.daf.butler.formatters.yaml import YamlFormatter
59from lsst.daf.butler.tests import (
60 BadNoWriteFormatter,
61 BadWriteFormatter,
62 DatasetTestHelper,
63 DatastoreTestHelper,
64 DummyRegistry,
65 MetricsExample,
66 MetricsExampleDataclass,
67 MetricsExampleModel,
68)
69from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
70from lsst.resources import ResourcePath
71from lsst.utils import doImport
73TESTDIR = os.path.dirname(__file__)
76class DataIdForTest(UserDict):
78 """A dict-like class that can be used for a DataId dict that is hashable.
80 By default the class is immutable ("frozen"). The `frozen`
81 attribute can be set to `False` to change values (but note that
82 the hash values before and after mutation will be different!).
83 """
85 def __init__(self, *args, **kwargs):
86 self.frozen = False
87 super().__init__(*args, **kwargs)
88 self.frozen = True
90 def __hash__(self):
91 return hash(str(self.data))
93 def __setitem__(self, k, v):
94 if self.frozen:
95 raise RuntimeError("DataIdForTest is frozen.")
96 return super().__setitem__(k, v)
98 def __delitem__(self, k):
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().__delitem__(k)
103 def __ior__(self, other):
104 assert sys.version_info[0] == 3
105 if sys.version_info[1] < 9:
106 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
107 if self.frozen:
108 raise RuntimeError("DataIdForTest is frozen.")
109 return super().__ior__(other)
111 def pop(self, k):
112 if self.frozen:
113 raise RuntimeError("DataIdForTest is frozen.")
114 return super().pop(k)
116 def popitem(self):
117 if self.frozen:
118 raise RuntimeError("DataIdForTest is frozen.")
119 return super().popitem()
121 def update(self, *args, **kwargs):
122 if self.frozen:
123 raise RuntimeError("DataIdForTest is frozen.")
124 super().update(*args, **kwargs)
127def makeExampleMetrics(use_none=False):
128 if use_none:
129 array = None
130 else:
131 array = [563, 234, 456.7, 105, 2054, -1045]
132 return MetricsExample(
133 {"AM1": 5.2, "AM2": 30.6},
134 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
135 array,
136 )
139@dataclass(frozen=True)
140class Named:
141 name: str
144class FakeDataCoordinate(NamedKeyDict):
145 """A fake hashable frozen DataCoordinate built from a simple dict."""
147 @classmethod
148 def from_dict(cls, dataId):
149 new = cls()
150 for k, v in dataId.items():
151 new[Named(k)] = v
152 return new.freeze()
154 def __hash__(self) -> int:
155 return hash(frozenset(self.items()))
158class TransactionTestError(Exception):
159 """Specific error for transactions, to prevent misdiagnosing
160 that might otherwise occur when a standard exception is used.
161 """
163 pass
166class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
167 """Support routines for datastore testing"""
169 root = None
171 @classmethod
172 def setUpClass(cls):
173 # Storage Classes are fixed for all datastores in these tests
174 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
175 cls.storageClassFactory = StorageClassFactory()
176 cls.storageClassFactory.addFromConfig(scConfigFile)
178 # Read the Datastore config so we can get the class
179 # information (since we should not assume the constructor
180 # name here, but rely on the configuration file itself)
181 datastoreConfig = DatastoreConfig(cls.configFile)
182 cls.datastoreType = doImport(datastoreConfig["cls"])
183 cls.universe = DimensionUniverse()
185 def setUp(self):
186 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
188 def tearDown(self):
189 if self.root is not None and os.path.exists(self.root):
190 shutil.rmtree(self.root, ignore_errors=True)
193class DatastoreTests(DatastoreTestsBase):
194 """Some basic tests of a simple datastore."""
196 hasUnsupportedPut = True
198 def testConfigRoot(self):
199 full = DatastoreConfig(self.configFile)
200 config = DatastoreConfig(self.configFile, mergeDefaults=False)
201 newroot = "/random/location"
202 self.datastoreType.setConfigRoot(newroot, config, full)
203 if self.rootKeys:
204 for k in self.rootKeys:
205 self.assertIn(newroot, config[k])
207 def testConstructor(self):
208 datastore = self.makeDatastore()
209 self.assertIsNotNone(datastore)
210 self.assertIs(datastore.isEphemeral, self.isEphemeral)
212 def testConfigurationValidation(self):
213 datastore = self.makeDatastore()
214 sc = self.storageClassFactory.getStorageClass("ThingOne")
215 datastore.validateConfiguration([sc])
217 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
218 if self.validationCanFail:
219 with self.assertRaises(DatastoreValidationError):
220 datastore.validateConfiguration([sc2], logFailures=True)
222 dimensions = self.universe.extract(("visit", "physical_filter"))
223 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
225 datastore.validateConfiguration([ref])
227 def testParameterValidation(self):
228 """Check that parameters are validated"""
229 sc = self.storageClassFactory.getStorageClass("ThingOne")
230 dimensions = self.universe.extract(("visit", "physical_filter"))
231 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
232 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
233 datastore = self.makeDatastore()
234 data = {1: 2, 3: 4}
235 datastore.put(data, ref)
236 newdata = datastore.get(ref)
237 self.assertEqual(data, newdata)
238 with self.assertRaises(KeyError):
239 newdata = datastore.get(ref, parameters={"missing": 5})
241 def testBasicPutGet(self):
242 metrics = makeExampleMetrics()
243 datastore = self.makeDatastore()
245 # Create multiple storage classes for testing different formulations
246 storageClasses = [
247 self.storageClassFactory.getStorageClass(sc)
248 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
249 ]
251 dimensions = self.universe.extract(("visit", "physical_filter"))
252 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
253 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
255 for sc in storageClasses:
256 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
257 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
259 # Make sure that using getManyURIs without predicting before the
260 # dataset has been put raises.
261 with self.assertRaises(FileNotFoundError):
262 datastore.getManyURIs([ref], predict=False)
264 # Make sure that using getManyURIs with predicting before the
265 # dataset has been put predicts the URI.
266 uris = datastore.getManyURIs([ref, ref2], predict=True)
267 self.assertIn("52", uris[ref].primaryURI.geturl())
268 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
269 self.assertIn("53", uris[ref2].primaryURI.geturl())
270 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
272 datastore.put(metrics, ref)
274 # Does it exist?
275 self.assertTrue(datastore.exists(ref))
276 self.assertTrue(datastore.knows(ref))
277 multi = datastore.knows_these([ref])
278 self.assertTrue(multi[ref])
279 multi = datastore.mexists([ref])
280 self.assertTrue(multi[ref])
282 # Get
283 metricsOut = datastore.get(ref, parameters=None)
284 self.assertEqual(metrics, metricsOut)
286 uri = datastore.getURI(ref)
287 self.assertEqual(uri.scheme, self.uriScheme)
289 uris = datastore.getManyURIs([ref])
290 self.assertEqual(len(uris), 1)
291 ref, uri = uris.popitem()
292 self.assertTrue(uri.primaryURI.exists())
293 self.assertFalse(uri.componentURIs)
295 # Get a component -- we need to construct new refs for them
296 # with derived storage classes but with parent ID
297 for comp in ("data", "output"):
298 compRef = ref.makeComponentRef(comp)
299 output = datastore.get(compRef)
300 self.assertEqual(output, getattr(metricsOut, comp))
302 uri = datastore.getURI(compRef)
303 self.assertEqual(uri.scheme, self.uriScheme)
305 uris = datastore.getManyURIs([compRef])
306 self.assertEqual(len(uris), 1)
308 storageClass = sc
310 # Check that we can put a metric with None in a component and
311 # get it back as None
312 metricsNone = makeExampleMetrics(use_none=True)
313 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
314 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
315 datastore.put(metricsNone, refNone)
317 comp = "data"
318 for comp in ("data", "output"):
319 compRef = refNone.makeComponentRef(comp)
320 output = datastore.get(compRef)
321 self.assertEqual(output, getattr(metricsNone, comp))
323 # Check that a put fails if the dataset type is not supported
324 if self.hasUnsupportedPut:
325 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
326 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
327 with self.assertRaises(DatasetTypeNotSupportedError):
328 datastore.put(metrics, ref)
330 # These should raise
331 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
332 with self.assertRaises(FileNotFoundError):
333 # non-existing file
334 datastore.get(ref)
336 # Get a URI from it
337 uri = datastore.getURI(ref, predict=True)
338 self.assertEqual(uri.scheme, self.uriScheme)
340 with self.assertRaises(FileNotFoundError):
341 datastore.getURI(ref)
343 def testTrustGetRequest(self):
344 """Check that we can get datasets that registry knows nothing about."""
346 datastore = self.makeDatastore()
348 # Skip test if the attribute is not defined
349 if not hasattr(datastore, "trustGetRequest"):
350 return
352 metrics = makeExampleMetrics()
354 i = 0
355 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
356 i += 1
357 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
359 if sc_name == "StructuredComposite":
360 disassembled = True
361 else:
362 disassembled = False
364 # Start datastore in default configuration of using registry
365 datastore.trustGetRequest = False
367 # Create multiple storage classes for testing with or without
368 # disassembly
369 sc = self.storageClassFactory.getStorageClass(sc_name)
370 dimensions = self.universe.extract(("visit", "physical_filter"))
372 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
374 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
375 datastore.put(metrics, ref)
377 # Does it exist?
378 self.assertTrue(datastore.exists(ref))
379 self.assertTrue(datastore.knows(ref))
380 multi = datastore.knows_these([ref])
381 self.assertTrue(multi[ref])
382 multi = datastore.mexists([ref])
383 self.assertTrue(multi[ref])
385 # Get
386 metricsOut = datastore.get(ref)
387 self.assertEqual(metrics, metricsOut)
389 # Get the URI(s)
390 primaryURI, componentURIs = datastore.getURIs(ref)
391 if disassembled:
392 self.assertIsNone(primaryURI)
393 self.assertEqual(len(componentURIs), 3)
394 else:
395 self.assertIn(datasetTypeName, primaryURI.path)
396 self.assertFalse(componentURIs)
398 # Delete registry entry so now we are trusting
399 datastore.removeStoredItemInfo(ref)
401 # Now stop trusting and check that things break
402 datastore.trustGetRequest = False
404 # Does it exist?
405 self.assertFalse(datastore.exists(ref))
406 self.assertFalse(datastore.knows(ref))
407 multi = datastore.knows_these([ref])
408 self.assertFalse(multi[ref])
409 multi = datastore.mexists([ref])
410 self.assertFalse(multi[ref])
412 with self.assertRaises(FileNotFoundError):
413 datastore.get(ref)
415 if sc_name != "StructuredDataNoComponents":
416 with self.assertRaises(FileNotFoundError):
417 datastore.get(ref.makeComponentRef("data"))
419 # URI should fail unless we ask for prediction
420 with self.assertRaises(FileNotFoundError):
421 datastore.getURIs(ref)
423 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
424 if disassembled:
425 self.assertIsNone(predicted_primary)
426 self.assertEqual(len(predicted_disassembled), 3)
427 for uri in predicted_disassembled.values():
428 self.assertEqual(uri.fragment, "predicted")
429 self.assertIn(datasetTypeName, uri.path)
430 else:
431 self.assertIn(datasetTypeName, predicted_primary.path)
432 self.assertFalse(predicted_disassembled)
433 self.assertEqual(predicted_primary.fragment, "predicted")
435 # Now enable registry-free trusting mode
436 datastore.trustGetRequest = True
438 # Try again to get it
439 metricsOut = datastore.get(ref)
440 self.assertEqual(metricsOut, metrics)
442 # Does it exist?
443 self.assertTrue(datastore.exists(ref))
445 # Get a component
446 if sc_name != "StructuredDataNoComponents":
447 comp = "data"
448 compRef = ref.makeComponentRef(comp)
449 output = datastore.get(compRef)
450 self.assertEqual(output, getattr(metrics, comp))
452 # Get the URI -- if we trust this should work even without
453 # enabling prediction.
454 primaryURI2, componentURIs2 = datastore.getURIs(ref)
455 self.assertEqual(primaryURI2, primaryURI)
456 self.assertEqual(componentURIs2, componentURIs)
458 # Check for compatible storage class.
459 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
460 # Make new dataset ref with compatible storage class.
461 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
463 # Without `set_retrieve_dataset_type_method` it will fail to
464 # find correct file.
465 self.assertFalse(datastore.exists(ref_comp))
466 with self.assertRaises(FileNotFoundError):
467 datastore.get(ref_comp)
468 with self.assertRaises(FileNotFoundError):
469 datastore.get(ref, storageClass="StructuredDataDictJson")
471 # Need a special method to generate stored dataset type.
472 def _stored_dataset_type(name: str) -> DatasetType:
473 if name == ref.datasetType.name:
474 return ref.datasetType
475 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
477 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
479 # Storage class override with original dataset ref.
480 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
481 self.assertIsInstance(metrics_as_dict, dict)
483 # get() should return a dict now.
484 metrics_as_dict = datastore.get(ref_comp)
485 self.assertIsInstance(metrics_as_dict, dict)
487 # exists() should work as well.
488 self.assertTrue(datastore.exists(ref_comp))
490 datastore.set_retrieve_dataset_type_method(None)
492 def testDisassembly(self):
493 """Test disassembly within datastore."""
494 metrics = makeExampleMetrics()
495 if self.isEphemeral:
496 # in-memory datastore does not disassemble
497 return
499 # Create multiple storage classes for testing different formulations
500 # of composites. One of these will not disassemble to provide
501 # a reference.
502 storageClasses = [
503 self.storageClassFactory.getStorageClass(sc)
504 for sc in (
505 "StructuredComposite",
506 "StructuredCompositeTestA",
507 "StructuredCompositeTestB",
508 "StructuredCompositeReadComp",
509 "StructuredData", # No disassembly
510 "StructuredCompositeReadCompNoDisassembly",
511 )
512 ]
514 # Create the test datastore
515 datastore = self.makeDatastore()
517 # Dummy dataId
518 dimensions = self.universe.extract(("visit", "physical_filter"))
519 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
521 for i, sc in enumerate(storageClasses):
522 with self.subTest(storageClass=sc.name):
523 # Create a different dataset type each time round
524 # so that a test failure in this subtest does not trigger
525 # a cascade of tests because of file clashes
526 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
528 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
530 datastore.put(metrics, ref)
532 baseURI, compURIs = datastore.getURIs(ref)
533 if disassembled:
534 self.assertIsNone(baseURI)
535 self.assertEqual(set(compURIs), {"data", "output", "summary"})
536 else:
537 self.assertIsNotNone(baseURI)
538 self.assertEqual(compURIs, {})
540 metrics_get = datastore.get(ref)
541 self.assertEqual(metrics_get, metrics)
543 # Retrieve the composite with read parameter
544 stop = 4
545 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
546 self.assertEqual(metrics_get.summary, metrics.summary)
547 self.assertEqual(metrics_get.output, metrics.output)
548 self.assertEqual(metrics_get.data, metrics.data[:stop])
550 # Retrieve a component
551 data = datastore.get(ref.makeComponentRef("data"))
552 self.assertEqual(data, metrics.data)
554 # On supported storage classes attempt to access a read
555 # only component
556 if "ReadComp" in sc.name:
557 cRef = ref.makeComponentRef("counter")
558 counter = datastore.get(cRef)
559 self.assertEqual(counter, len(metrics.data))
561 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
562 self.assertEqual(counter, stop)
564 datastore.remove(ref)
566 def prepDeleteTest(self, n_refs=1):
567 metrics = makeExampleMetrics()
568 datastore = self.makeDatastore()
569 # Put
570 dimensions = self.universe.extract(("visit", "physical_filter"))
571 sc = self.storageClassFactory.getStorageClass("StructuredData")
572 refs = []
573 for i in range(n_refs):
574 dataId = FakeDataCoordinate.from_dict(
575 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
576 )
577 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
578 datastore.put(metrics, ref)
580 # Does it exist?
581 self.assertTrue(datastore.exists(ref))
583 # Get
584 metricsOut = datastore.get(ref)
585 self.assertEqual(metrics, metricsOut)
586 refs.append(ref)
588 return datastore, *refs
590 def testRemove(self):
591 datastore, ref = self.prepDeleteTest()
593 # Remove
594 datastore.remove(ref)
596 # Does it exist?
597 self.assertFalse(datastore.exists(ref))
599 # Do we now get a predicted URI?
600 uri = datastore.getURI(ref, predict=True)
601 self.assertEqual(uri.fragment, "predicted")
603 # Get should now fail
604 with self.assertRaises(FileNotFoundError):
605 datastore.get(ref)
606 # Can only delete once
607 with self.assertRaises(FileNotFoundError):
608 datastore.remove(ref)
610 def testForget(self):
611 datastore, ref = self.prepDeleteTest()
613 # Remove
614 datastore.forget([ref])
616 # Does it exist (as far as we know)?
617 self.assertFalse(datastore.exists(ref))
619 # Do we now get a predicted URI?
620 uri = datastore.getURI(ref, predict=True)
621 self.assertEqual(uri.fragment, "predicted")
623 # Get should now fail
624 with self.assertRaises(FileNotFoundError):
625 datastore.get(ref)
627 # Forgetting again is a silent no-op
628 datastore.forget([ref])
630 # Predicted URI should still point to the file.
631 self.assertTrue(uri.exists())
633 def testTransfer(self):
634 metrics = makeExampleMetrics()
636 dimensions = self.universe.extract(("visit", "physical_filter"))
637 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"})
639 sc = self.storageClassFactory.getStorageClass("StructuredData")
640 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
642 inputDatastore = self.makeDatastore("test_input_datastore")
643 outputDatastore = self.makeDatastore("test_output_datastore")
645 inputDatastore.put(metrics, ref)
646 outputDatastore.transfer(inputDatastore, ref)
648 metricsOut = outputDatastore.get(ref)
649 self.assertEqual(metrics, metricsOut)
651 def testBasicTransaction(self):
652 datastore = self.makeDatastore()
653 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
654 dimensions = self.universe.extract(("visit", "physical_filter"))
655 nDatasets = 6
656 dataIds = [
657 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"})
658 for i in range(nDatasets)
659 ]
660 data = [
661 (
662 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
663 makeExampleMetrics(),
664 )
665 for dataId in dataIds
666 ]
667 succeed = data[: nDatasets // 2]
668 fail = data[nDatasets // 2 :]
669 # All datasets added in this transaction should continue to exist
670 with datastore.transaction():
671 for ref, metrics in succeed:
672 datastore.put(metrics, ref)
673 # Whereas datasets added in this transaction should not
674 with self.assertRaises(TransactionTestError):
675 with datastore.transaction():
676 for ref, metrics in fail:
677 datastore.put(metrics, ref)
678 raise TransactionTestError("This should propagate out of the context manager")
679 # Check for datasets that should exist
680 for ref, metrics in succeed:
681 # Does it exist?
682 self.assertTrue(datastore.exists(ref))
683 # Get
684 metricsOut = datastore.get(ref, parameters=None)
685 self.assertEqual(metrics, metricsOut)
686 # URI
687 uri = datastore.getURI(ref)
688 self.assertEqual(uri.scheme, self.uriScheme)
689 # Check for datasets that should not exist
690 for ref, _ in fail:
691 # These should raise
692 with self.assertRaises(FileNotFoundError):
693 # non-existing file
694 datastore.get(ref)
695 with self.assertRaises(FileNotFoundError):
696 datastore.getURI(ref)
698 def testNestedTransaction(self):
699 datastore = self.makeDatastore()
700 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
701 dimensions = self.universe.extract(("visit", "physical_filter"))
702 metrics = makeExampleMetrics()
704 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
705 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
706 datastore.put(metrics, refBefore)
707 with self.assertRaises(TransactionTestError):
708 with datastore.transaction():
709 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"})
710 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
711 datastore.put(metrics, refOuter)
712 with datastore.transaction():
713 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"})
714 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
715 datastore.put(metrics, refInner)
716 # All datasets should exist
717 for ref in (refBefore, refOuter, refInner):
718 metricsOut = datastore.get(ref, parameters=None)
719 self.assertEqual(metrics, metricsOut)
720 raise TransactionTestError("This should roll back the transaction")
721 # Dataset(s) inserted before the transaction should still exist
722 metricsOut = datastore.get(refBefore, parameters=None)
723 self.assertEqual(metrics, metricsOut)
724 # But all datasets inserted during the (rolled back) transaction
725 # should be gone
726 with self.assertRaises(FileNotFoundError):
727 datastore.get(refOuter)
728 with self.assertRaises(FileNotFoundError):
729 datastore.get(refInner)
731 def _prepareIngestTest(self):
732 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
733 dimensions = self.universe.extract(("visit", "physical_filter"))
734 metrics = makeExampleMetrics()
735 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
736 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
737 return metrics, ref
739 def runIngestTest(self, func, expectOutput=True):
740 metrics, ref = self._prepareIngestTest()
741 # The file will be deleted after the test.
742 # For symlink tests this leads to a situation where the datastore
743 # points to a file that does not exist. This will make os.path.exist
744 # return False but then the new symlink will fail with
745 # FileExistsError later in the code so the test still passes.
746 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
747 with open(path, "w") as fd:
748 yaml.dump(metrics._asdict(), stream=fd)
749 func(metrics, path, ref)
751 def testIngestNoTransfer(self):
752 """Test ingesting existing files with no transfer."""
753 for mode in (None, "auto"):
754 # Some datastores have auto but can't do in place transfer
755 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
756 continue
758 with self.subTest(mode=mode):
759 datastore = self.makeDatastore()
761 def succeed(obj, path, ref):
762 """Ingest a file already in the datastore root."""
763 # first move it into the root, and adjust the path
764 # accordingly
765 path = shutil.copy(path, datastore.root.ospath)
766 path = os.path.relpath(path, start=datastore.root.ospath)
767 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
768 self.assertEqual(obj, datastore.get(ref))
770 def failInputDoesNotExist(obj, path, ref):
771 """Can't ingest files if we're given a bad path."""
772 with self.assertRaises(FileNotFoundError):
773 datastore.ingest(
774 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
775 )
776 self.assertFalse(datastore.exists(ref))
778 def failOutsideRoot(obj, path, ref):
779 """Can't ingest files outside of datastore root unless
780 auto."""
781 if mode == "auto":
782 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
783 self.assertTrue(datastore.exists(ref))
784 else:
785 with self.assertRaises(RuntimeError):
786 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
787 self.assertFalse(datastore.exists(ref))
789 def failNotImplemented(obj, path, ref):
790 with self.assertRaises(NotImplementedError):
791 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
793 if mode in self.ingestTransferModes:
794 self.runIngestTest(failOutsideRoot)
795 self.runIngestTest(failInputDoesNotExist)
796 self.runIngestTest(succeed)
797 else:
798 self.runIngestTest(failNotImplemented)
800 def testIngestTransfer(self):
801 """Test ingesting existing files after transferring them."""
802 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
803 with self.subTest(mode=mode):
804 datastore = self.makeDatastore(mode)
806 def succeed(obj, path, ref):
807 """Ingest a file by transferring it to the template
808 location."""
809 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
810 self.assertEqual(obj, datastore.get(ref))
812 def failInputDoesNotExist(obj, path, ref):
813 """Can't ingest files if we're given a bad path."""
814 with self.assertRaises(FileNotFoundError):
815 # Ensure the file does not look like it is in
816 # datastore for auto mode
817 datastore.ingest(
818 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
819 )
820 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
822 def failNotImplemented(obj, path, ref):
823 with self.assertRaises(NotImplementedError):
824 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
826 if mode in self.ingestTransferModes:
827 self.runIngestTest(failInputDoesNotExist)
828 self.runIngestTest(succeed, expectOutput=(mode != "move"))
829 else:
830 self.runIngestTest(failNotImplemented)
832 def testIngestSymlinkOfSymlink(self):
833 """Special test for symlink to a symlink ingest"""
834 metrics, ref = self._prepareIngestTest()
835 # The aim of this test is to create a dataset on disk, then
836 # create a symlink to it and finally ingest the symlink such that
837 # the symlink in the datastore points to the original dataset.
838 for mode in ("symlink", "relsymlink"):
839 if mode not in self.ingestTransferModes:
840 continue
842 print(f"Trying mode {mode}")
843 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
844 with open(realpath, "w") as fd:
845 yaml.dump(metrics._asdict(), stream=fd)
846 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
847 os.symlink(os.path.abspath(realpath), sympath)
849 datastore = self.makeDatastore()
850 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
852 uri = datastore.getURI(ref)
853 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
854 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
856 linkTarget = os.readlink(uri.ospath)
857 if mode == "relsymlink":
858 self.assertFalse(os.path.isabs(linkTarget))
859 else:
860 self.assertEqual(linkTarget, os.path.abspath(realpath))
862 # Check that we can get the dataset back regardless of mode
863 metric2 = datastore.get(ref)
864 self.assertEqual(metric2, metrics)
866 # Cleanup the file for next time round loop
867 # since it will get the same file name in store
868 datastore.remove(ref)
870 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
871 datastore = self.makeDatastore(name)
873 # For now only the FileDatastore can be used for this test.
874 # ChainedDatastore that only includes InMemoryDatastores have to be
875 # skipped as well.
876 for name in datastore.names:
877 if not name.startswith("InMemoryDatastore"):
878 break
879 else:
880 raise unittest.SkipTest("in-memory datastore does not support record export/import")
882 metrics = makeExampleMetrics()
883 dimensions = self.universe.extract(("visit", "physical_filter"))
884 sc = self.storageClassFactory.getStorageClass("StructuredData")
886 refs = []
887 for visit in (2048, 2049, 2050):
888 dataId = FakeDataCoordinate.from_dict(
889 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
890 )
891 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
892 datastore.put(metrics, ref)
893 refs.append(ref)
894 return datastore, refs
896 def testExportImportRecords(self):
897 """Test for export_records and import_records methods."""
898 datastore, refs = self._populate_export_datastore("test_datastore")
899 for exported_refs in (refs, refs[1:]):
900 n_refs = len(exported_refs)
901 records = datastore.export_records(exported_refs)
902 self.assertGreater(len(records), 0)
903 self.assertTrue(set(records.keys()) <= set(datastore.names))
904 # In a ChainedDatastore each FileDatastore will have a complete set
905 for datastore_name in records:
906 record_data = records[datastore_name]
907 self.assertEqual(len(record_data.records), n_refs)
909 # Check that subsetting works, include non-existing dataset ID.
910 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
911 subset = record_data.subset(dataset_ids)
912 assert subset is not None
913 self.assertEqual(len(subset.records), 1)
914 subset = record_data.subset({uuid.uuid4()})
915 self.assertIsNone(subset)
917 # Use the same datastore name to import relative path.
918 datastore2 = self.makeDatastore("test_datastore")
920 records = datastore.export_records(refs[1:])
921 datastore2.import_records(records)
923 with self.assertRaises(FileNotFoundError):
924 data = datastore2.get(refs[0])
925 data = datastore2.get(refs[1])
926 self.assertIsNotNone(data)
927 data = datastore2.get(refs[2])
928 self.assertIsNotNone(data)
930 def testExport(self):
931 datastore, refs = self._populate_export_datastore("test_datastore")
933 datasets = list(datastore.export(refs))
934 self.assertEqual(len(datasets), 3)
936 for transfer in (None, "auto"):
937 # Both will default to None
938 datasets = list(datastore.export(refs, transfer=transfer))
939 self.assertEqual(len(datasets), 3)
941 with self.assertRaises(TypeError):
942 list(datastore.export(refs, transfer="copy"))
944 with self.assertRaises(TypeError):
945 list(datastore.export(refs, directory="exportDir", transfer="move"))
947 # Create a new ref that is not known to the datastore and try to
948 # export it.
949 sc = self.storageClassFactory.getStorageClass("ThingOne")
950 dimensions = self.universe.extract(("visit", "physical_filter"))
951 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
952 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
953 with self.assertRaises(FileNotFoundError):
954 list(datastore.export(refs + [ref], transfer=None))
956 def test_pydantic_dict_storage_class_conversions(self):
957 """Test converting a dataset stored as a pydantic model into a dict on
958 read.
959 """
960 datastore = self.makeDatastore()
961 store_as_model = self.makeDatasetRef(
962 "store_as_model",
963 dimensions=self.universe.empty,
964 storageClass="DictConvertibleModel",
965 dataId=DataCoordinate.makeEmpty(self.universe),
966 )
967 content = {"a": "one", "b": "two"}
968 model = DictConvertibleModel.from_dict(content, extra="original content")
969 datastore.put(model, store_as_model)
970 retrieved_model = datastore.get(store_as_model)
971 self.assertEqual(retrieved_model, model)
972 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
973 self.assertEqual(type(loaded), dict)
974 self.assertEqual(loaded, content)
976 def test_simple_class_put_get(self):
977 """Test that we can put and get a simple class with dict()
978 constructor."""
979 datastore = self.makeDatastore()
980 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
981 self._assert_different_puts(datastore, "MetricsExample", data)
983 def test_dataclass_put_get(self):
984 """Test that we can put and get a simple dataclass."""
985 datastore = self.makeDatastore()
986 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
987 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
989 def test_pydantic_put_get(self):
990 """Test that we can put and get a simple Pydantic model."""
991 datastore = self.makeDatastore()
992 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
993 self._assert_different_puts(datastore, "MetricsExampleModel", data)
995 def test_tuple_put_get(self):
996 """Test that we can put and get a tuple."""
997 datastore = self.makeDatastore()
998 data = tuple(["a", "b", 1])
999 self._assert_different_puts(datastore, "TupleExample", data)
1001 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None:
1002 refs = {
1003 x: self.makeDatasetRef(
1004 f"stora_as_{x}",
1005 dimensions=self.universe.empty,
1006 storageClass=f"{storageClass_root}{x}",
1007 dataId=DataCoordinate.makeEmpty(self.universe),
1008 )
1009 for x in ["A", "B"]
1010 }
1012 for ref in refs.values():
1013 datastore.put(data, ref)
1015 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
1018class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
1019 """PosixDatastore specialization"""
1021 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1022 uriScheme = "file"
1023 canIngestNoTransferAuto = True
1024 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1025 isEphemeral = False
1026 rootKeys = ("root",)
1027 validationCanFail = True
1029 def setUp(self):
1030 # Override the working directory before calling the base class
1031 self.root = tempfile.mkdtemp(dir=TESTDIR)
1032 super().setUp()
1034 def testAtomicWrite(self):
1035 """Test that we write to a temporary and then rename"""
1036 datastore = self.makeDatastore()
1037 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1038 dimensions = self.universe.extract(("visit", "physical_filter"))
1039 metrics = makeExampleMetrics()
1041 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
1042 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1044 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1045 datastore.put(metrics, ref)
1046 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1047 self.assertIn("transfer=move", move_logs[0])
1049 # And the transfer should be file to file.
1050 self.assertEqual(move_logs[0].count("file://"), 2)
1052 def testCanNotDeterminePutFormatterLocation(self):
1053 """Verify that the expected exception is raised if the FileDatastore
1054 can not determine the put formatter location."""
1056 _ = makeExampleMetrics()
1057 datastore = self.makeDatastore()
1059 # Create multiple storage classes for testing different formulations
1060 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1062 sccomp = StorageClass("Dummy")
1063 compositeStorageClass = StorageClass(
1064 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1065 )
1067 dimensions = self.universe.extract(("visit", "physical_filter"))
1068 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1070 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1071 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
1073 def raiser(ref):
1074 raise DatasetTypeNotSupportedError()
1076 with unittest.mock.patch.object(
1077 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1078 "_determine_put_formatter_location",
1079 side_effect=raiser,
1080 ):
1081 # verify the non-composite ref execution path:
1082 with self.assertRaises(DatasetTypeNotSupportedError):
1083 datastore.getURIs(ref, predict=True)
1085 # verify the composite-ref execution path:
1086 with self.assertRaises(DatasetTypeNotSupportedError):
1087 datastore.getURIs(compRef, predict=True)
1090class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1091 """Posix datastore tests but with checksums disabled."""
1093 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1095 def testChecksum(self):
1096 """Ensure that checksums have not been calculated."""
1098 datastore = self.makeDatastore()
1099 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1100 dimensions = self.universe.extract(("visit", "physical_filter"))
1101 metrics = makeExampleMetrics()
1103 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
1104 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1106 # Configuration should have disabled checksum calculation
1107 datastore.put(metrics, ref)
1108 infos = datastore.getStoredItemsInfo(ref)
1109 self.assertIsNone(infos[0].checksum)
1111 # Remove put back but with checksums enabled explicitly
1112 datastore.remove(ref)
1113 datastore.useChecksum = True
1114 datastore.put(metrics, ref)
1116 infos = datastore.getStoredItemsInfo(ref)
1117 self.assertIsNotNone(infos[0].checksum)
1120class TrashDatastoreTestCase(PosixDatastoreTestCase):
1121 """Restrict trash test to FileDatastore."""
1123 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1125 def testTrash(self):
1126 datastore, *refs = self.prepDeleteTest(n_refs=10)
1128 # Trash one of them.
1129 ref = refs.pop()
1130 uri = datastore.getURI(ref)
1131 datastore.trash(ref)
1132 self.assertTrue(uri.exists(), uri) # Not deleted yet
1133 datastore.emptyTrash()
1134 self.assertFalse(uri.exists(), uri)
1136 # Trash it again should be fine.
1137 datastore.trash(ref)
1139 # Trash multiple items at once.
1140 subset = [refs.pop(), refs.pop()]
1141 datastore.trash(subset)
1142 datastore.emptyTrash()
1144 # Remove a record and trash should do nothing.
1145 # This is execution butler scenario.
1146 ref = refs.pop()
1147 uri = datastore.getURI(ref)
1148 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1149 self.assertTrue(uri.exists())
1150 datastore.trash(ref)
1151 datastore.emptyTrash()
1152 self.assertTrue(uri.exists())
1154 # Switch on trust and it should delete the file.
1155 datastore.trustGetRequest = True
1156 datastore.trash([ref])
1157 self.assertFalse(uri.exists())
1159 # Remove multiples at once in trust mode.
1160 subset = [refs.pop() for i in range(3)]
1161 datastore.trash(subset)
1162 datastore.trash(refs.pop()) # Check that a single ref can trash
1165class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1166 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1168 def setUp(self):
1169 # Override the working directory before calling the base class
1170 self.root = tempfile.mkdtemp(dir=TESTDIR)
1171 super().setUp()
1173 def testCleanup(self):
1174 """Test that a failed formatter write does cleanup a partial file."""
1175 metrics = makeExampleMetrics()
1176 datastore = self.makeDatastore()
1178 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1180 dimensions = self.universe.extract(("visit", "physical_filter"))
1181 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1183 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1185 # Determine where the file will end up (we assume Formatters use
1186 # the same file extension)
1187 expectedUri = datastore.getURI(ref, predict=True)
1188 self.assertEqual(expectedUri.fragment, "predicted")
1190 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1192 # Try formatter that fails and formatter that fails and leaves
1193 # a file behind
1194 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1195 with self.subTest(formatter=formatter):
1196 # Monkey patch the formatter
1197 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1199 # Try to put the dataset, it should fail
1200 with self.assertRaises(Exception):
1201 datastore.put(metrics, ref)
1203 # Check that there is no file on disk
1204 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1206 # Check that there is a directory
1207 dir = expectedUri.dirname()
1208 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1210 # Force YamlFormatter and check that this time a file is written
1211 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1212 datastore.put(metrics, ref)
1213 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1214 datastore.remove(ref)
1215 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1218class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1219 """PosixDatastore specialization"""
1221 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1222 uriScheme = "mem"
1223 hasUnsupportedPut = False
1224 ingestTransferModes = ()
1225 isEphemeral = True
1226 rootKeys = None
1227 validationCanFail = False
1230class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1231 """ChainedDatastore specialization using a POSIXDatastore"""
1233 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1234 hasUnsupportedPut = False
1235 canIngestNoTransferAuto = False
1236 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1237 isEphemeral = False
1238 rootKeys = (".datastores.1.root", ".datastores.2.root")
1239 validationCanFail = True
1242class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1243 """ChainedDatastore specialization using all InMemoryDatastore"""
1245 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1246 validationCanFail = False
1249class DatastoreConstraintsTests(DatastoreTestsBase):
1250 """Basic tests of constraints model of Datastores."""
1252 def testConstraints(self):
1253 """Test constraints model. Assumes that each test class has the
1254 same constraints."""
1255 metrics = makeExampleMetrics()
1256 datastore = self.makeDatastore()
1258 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1259 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1260 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1261 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"})
1263 # Write empty file suitable for ingest check (JSON and YAML variants)
1264 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1265 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1266 for datasetTypeName, sc, accepted in (
1267 ("metric", sc1, True),
1268 ("metric5", sc1, False),
1269 ("metric33", sc1, True),
1270 ("metric5", sc2, True),
1271 ):
1272 # Choose different temp file depending on StorageClass
1273 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1275 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1276 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1277 if accepted:
1278 datastore.put(metrics, ref)
1279 self.assertTrue(datastore.exists(ref))
1280 datastore.remove(ref)
1282 # Try ingest
1283 if self.canIngest:
1284 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1285 self.assertTrue(datastore.exists(ref))
1286 datastore.remove(ref)
1287 else:
1288 with self.assertRaises(DatasetTypeNotSupportedError):
1289 datastore.put(metrics, ref)
1290 self.assertFalse(datastore.exists(ref))
1292 # Again with ingest
1293 if self.canIngest:
1294 with self.assertRaises(DatasetTypeNotSupportedError):
1295 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1296 self.assertFalse(datastore.exists(ref))
1299class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1300 """PosixDatastore specialization"""
1302 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1303 canIngest = True
1305 def setUp(self):
1306 # Override the working directory before calling the base class
1307 self.root = tempfile.mkdtemp(dir=TESTDIR)
1308 super().setUp()
1311class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1312 """InMemoryDatastore specialization"""
1314 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1315 canIngest = False
1318class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1319 """ChainedDatastore specialization using a POSIXDatastore and constraints
1320 at the ChainedDatstore"""
1322 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1325class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1326 """ChainedDatastore specialization using a POSIXDatastore"""
1328 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1331class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1332 """ChainedDatastore specialization using all InMemoryDatastore"""
1334 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1335 canIngest = False
1338class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1339 """Test that a chained datastore can control constraints per-datastore
1340 even if child datastore would accept."""
1342 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1344 def setUp(self):
1345 # Override the working directory before calling the base class
1346 self.root = tempfile.mkdtemp(dir=TESTDIR)
1347 super().setUp()
1349 def testConstraints(self):
1350 """Test chained datastore constraints model."""
1351 metrics = makeExampleMetrics()
1352 datastore = self.makeDatastore()
1354 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1355 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1356 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1357 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1358 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1360 # Write empty file suitable for ingest check (JSON and YAML variants)
1361 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1362 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1364 for typeName, dataId, sc, accept, ingest in (
1365 ("metric", dataId1, sc1, (False, True, False), True),
1366 ("metric5", dataId1, sc1, (False, False, False), False),
1367 ("metric5", dataId2, sc1, (True, False, False), False),
1368 ("metric33", dataId2, sc2, (True, True, False), True),
1369 ("metric5", dataId1, sc2, (False, True, False), True),
1370 ):
1371 # Choose different temp file depending on StorageClass
1372 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1374 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1375 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1376 if any(accept):
1377 datastore.put(metrics, ref)
1378 self.assertTrue(datastore.exists(ref))
1380 # Check each datastore inside the chained datastore
1381 for childDatastore, expected in zip(datastore.datastores, accept):
1382 self.assertEqual(
1383 childDatastore.exists(ref),
1384 expected,
1385 f"Testing presence of {ref} in datastore {childDatastore.name}",
1386 )
1388 datastore.remove(ref)
1390 # Check that ingest works
1391 if ingest:
1392 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1393 self.assertTrue(datastore.exists(ref))
1395 # Check each datastore inside the chained datastore
1396 for childDatastore, expected in zip(datastore.datastores, accept):
1397 # Ephemeral datastores means InMemory at the moment
1398 # and that does not accept ingest of files.
1399 if childDatastore.isEphemeral:
1400 expected = False
1401 self.assertEqual(
1402 childDatastore.exists(ref),
1403 expected,
1404 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1405 )
1407 datastore.remove(ref)
1408 else:
1409 with self.assertRaises(DatasetTypeNotSupportedError):
1410 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1412 else:
1413 with self.assertRaises(DatasetTypeNotSupportedError):
1414 datastore.put(metrics, ref)
1415 self.assertFalse(datastore.exists(ref))
1417 # Again with ingest
1418 with self.assertRaises(DatasetTypeNotSupportedError):
1419 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1420 self.assertFalse(datastore.exists(ref))
1423class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1424 """Tests for datastore caching infrastructure."""
1426 @classmethod
1427 def setUpClass(cls):
1428 cls.storageClassFactory = StorageClassFactory()
1429 cls.universe = DimensionUniverse()
1431 # Ensure that we load the test storage class definitions.
1432 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1433 cls.storageClassFactory.addFromConfig(scConfigFile)
1435 def setUp(self):
1436 self.id = 0
1438 # Create a root that we can use for caching tests.
1439 self.root = tempfile.mkdtemp(dir=TESTDIR)
1441 # Create some test dataset refs and associated test files
1442 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1443 dimensions = self.universe.extract(("visit", "physical_filter"))
1444 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1446 # Create list of refs and list of temporary files
1447 n_datasets = 10
1448 self.refs = [
1449 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1450 for n in range(n_datasets)
1451 ]
1453 root_uri = ResourcePath(self.root, forceDirectory=True)
1454 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1456 # Create test files.
1457 for uri in self.files:
1458 uri.write(b"0123456789")
1460 # Create some composite refs with component files.
1461 sc = self.storageClassFactory.getStorageClass("StructuredData")
1462 self.composite_refs = [
1463 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1464 ]
1465 self.comp_files = []
1466 self.comp_refs = []
1467 for n, ref in enumerate(self.composite_refs):
1468 component_refs = []
1469 component_files = []
1470 for component in sc.components:
1471 component_ref = ref.makeComponentRef(component)
1472 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1473 component_refs.append(component_ref)
1474 component_files.append(file)
1475 file.write(b"9876543210")
1477 self.comp_files.append(component_files)
1478 self.comp_refs.append(component_refs)
1480 def tearDown(self):
1481 if self.root is not None and os.path.exists(self.root):
1482 shutil.rmtree(self.root, ignore_errors=True)
1484 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1485 config = Config.fromYaml(config_str)
1486 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1488 def testNoCacheDir(self):
1489 config_str = """
1490cached:
1491 root: null
1492 cacheable:
1493 metric0: true
1494 """
1495 cache_manager = self._make_cache_manager(config_str)
1497 # Look inside to check we don't have a cache directory
1498 self.assertIsNone(cache_manager._cache_directory)
1500 self.assertCache(cache_manager)
1502 # Test that the cache directory is marked temporary
1503 self.assertTrue(cache_manager.cache_directory.isTemporary)
1505 def testNoCacheDirReversed(self):
1506 """Use default caching status and metric1 to false"""
1507 config_str = """
1508cached:
1509 root: null
1510 default: true
1511 cacheable:
1512 metric1: false
1513 """
1514 cache_manager = self._make_cache_manager(config_str)
1516 self.assertCache(cache_manager)
1518 def testEnvvarCacheDir(self):
1519 config_str = f"""
1520cached:
1521 root: '{self.root}'
1522 cacheable:
1523 metric0: true
1524 """
1526 root = ResourcePath(self.root, forceDirectory=True)
1527 env_dir = root.join("somewhere", forceDirectory=True)
1528 elsewhere = root.join("elsewhere", forceDirectory=True)
1530 # Environment variable should override the config value.
1531 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1532 cache_manager = self._make_cache_manager(config_str)
1533 self.assertEqual(cache_manager.cache_directory, env_dir)
1535 # This environment variable should not override the config value.
1536 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1537 cache_manager = self._make_cache_manager(config_str)
1538 self.assertEqual(cache_manager.cache_directory, root)
1540 # No default setting.
1541 config_str = """
1542cached:
1543 root: null
1544 default: true
1545 cacheable:
1546 metric1: false
1547 """
1548 cache_manager = self._make_cache_manager(config_str)
1550 # This environment variable should override the config value.
1551 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1552 cache_manager = self._make_cache_manager(config_str)
1553 self.assertEqual(cache_manager.cache_directory, env_dir)
1555 # If both environment variables are set the main (not IF_UNSET)
1556 # variable should win.
1557 with unittest.mock.patch.dict(
1558 os.environ,
1559 {
1560 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1561 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1562 },
1563 ):
1564 cache_manager = self._make_cache_manager(config_str)
1565 self.assertEqual(cache_manager.cache_directory, env_dir)
1567 # Use the API to set the environment variable, making sure that the
1568 # variable is reset on exit.
1569 with unittest.mock.patch.dict(
1570 os.environ,
1571 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1572 ):
1573 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1574 self.assertTrue(defined)
1575 cache_manager = self._make_cache_manager(config_str)
1576 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1578 # Now create the cache manager ahead of time and set the fallback
1579 # later.
1580 cache_manager = self._make_cache_manager(config_str)
1581 self.assertIsNone(cache_manager._cache_directory)
1582 with unittest.mock.patch.dict(
1583 os.environ,
1584 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1585 ):
1586 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1587 self.assertTrue(defined)
1588 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1590 def testExplicitCacheDir(self):
1591 config_str = f"""
1592cached:
1593 root: '{self.root}'
1594 cacheable:
1595 metric0: true
1596 """
1597 cache_manager = self._make_cache_manager(config_str)
1599 # Look inside to check we do have a cache directory.
1600 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1602 self.assertCache(cache_manager)
1604 # Test that the cache directory is not marked temporary
1605 self.assertFalse(cache_manager.cache_directory.isTemporary)
1607 def assertCache(self, cache_manager):
1608 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1609 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1611 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1612 self.assertIsInstance(uri, ResourcePath)
1613 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1615 # Check presence in cache using ref and then using file extension.
1616 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1617 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1618 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1619 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1621 # Cached file should no longer exist but uncached file should be
1622 # unaffected.
1623 self.assertFalse(self.files[0].exists())
1624 self.assertTrue(self.files[1].exists())
1626 # Should find this file and it should be within the cache directory.
1627 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1628 self.assertTrue(found.exists())
1629 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1631 # Should not be able to find these in cache
1632 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1633 self.assertIsNone(found)
1634 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1635 self.assertIsNone(found)
1637 def testNoCache(self):
1638 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1639 for uri, ref in zip(self.files, self.refs):
1640 self.assertFalse(cache_manager.should_be_cached(ref))
1641 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1642 self.assertFalse(cache_manager.known_to_cache(ref))
1643 with cache_manager.find_in_cache(ref, ".txt") as found:
1644 self.assertIsNone(found, msg=f"{cache_manager}")
1646 def _expiration_config(self, mode: str, threshold: int) -> str:
1647 return f"""
1648cached:
1649 default: true
1650 expiry:
1651 mode: {mode}
1652 threshold: {threshold}
1653 cacheable:
1654 unused: true
1655 """
1657 def testCacheExpiryFiles(self):
1658 threshold = 2 # Keep at least 2 files.
1659 mode = "files"
1660 config_str = self._expiration_config(mode, threshold)
1662 cache_manager = self._make_cache_manager(config_str)
1664 # Check that an empty cache returns unknown for arbitrary ref
1665 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1667 # Should end with datasets: 2, 3, 4
1668 self.assertExpiration(cache_manager, 5, threshold + 1)
1669 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1671 # Check that we will not expire a file that is actively in use.
1672 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1673 self.assertIsNotNone(found)
1675 # Trigger cache expiration that should remove the file
1676 # we just retrieved. Should now have: 3, 4, 5
1677 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1678 self.assertIsNotNone(cached)
1680 # Cache should still report the standard file count.
1681 self.assertEqual(cache_manager.file_count, threshold + 1)
1683 # Add additional entry to cache.
1684 # Should now have 4, 5, 6
1685 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1686 self.assertIsNotNone(cached)
1688 # Is the file still there?
1689 self.assertTrue(found.exists())
1691 # Can we read it?
1692 data = found.read()
1693 self.assertGreater(len(data), 0)
1695 # Outside context the file should no longer exist.
1696 self.assertFalse(found.exists())
1698 # File count should not have changed.
1699 self.assertEqual(cache_manager.file_count, threshold + 1)
1701 # Dataset 2 was in the exempt directory but because hardlinks
1702 # are used it was deleted from the main cache during cache expiry
1703 # above and so should no longer be found.
1704 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1705 self.assertIsNone(found)
1707 # And the one stored after it is also gone.
1708 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1709 self.assertIsNone(found)
1711 # But dataset 4 is present.
1712 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1713 self.assertIsNotNone(found)
1715 # Adding a new dataset to the cache should now delete it.
1716 cache_manager.move_to_cache(self.files[7], self.refs[7])
1718 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1719 self.assertIsNone(found)
1721 def testCacheExpiryDatasets(self):
1722 threshold = 2 # Keep 2 datasets.
1723 mode = "datasets"
1724 config_str = self._expiration_config(mode, threshold)
1726 cache_manager = self._make_cache_manager(config_str)
1727 self.assertExpiration(cache_manager, 5, threshold + 1)
1728 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1730 def testCacheExpiryDatasetsComposite(self):
1731 threshold = 2 # Keep 2 datasets.
1732 mode = "datasets"
1733 config_str = self._expiration_config(mode, threshold)
1735 cache_manager = self._make_cache_manager(config_str)
1737 n_datasets = 3
1738 for i in range(n_datasets):
1739 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1740 cached = cache_manager.move_to_cache(component_file, component_ref)
1741 self.assertIsNotNone(cached)
1742 self.assertTrue(cache_manager.known_to_cache(component_ref))
1743 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1744 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1746 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1748 # Write two new non-composite and the number of files should drop.
1749 self.assertExpiration(cache_manager, 2, 5)
1751 def testCacheExpirySize(self):
1752 threshold = 55 # Each file is 10 bytes
1753 mode = "size"
1754 config_str = self._expiration_config(mode, threshold)
1756 cache_manager = self._make_cache_manager(config_str)
1757 self.assertExpiration(cache_manager, 10, 6)
1758 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1760 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1761 """Insert the datasets and then check the number retained."""
1762 for i in range(n_datasets):
1763 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1764 self.assertIsNotNone(cached)
1766 self.assertEqual(cache_manager.file_count, n_retained)
1768 # The oldest file should not be in the cache any more.
1769 for i in range(n_datasets):
1770 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1771 if i >= n_datasets - n_retained:
1772 self.assertIsInstance(found, ResourcePath)
1773 else:
1774 self.assertIsNone(found)
1776 def testCacheExpiryAge(self):
1777 threshold = 1 # Expire older than 2 seconds
1778 mode = "age"
1779 config_str = self._expiration_config(mode, threshold)
1781 cache_manager = self._make_cache_manager(config_str)
1782 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1784 # Insert 3 files, then sleep, then insert more.
1785 for i in range(2):
1786 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1787 self.assertIsNotNone(cached)
1788 time.sleep(2.0)
1789 for j in range(4):
1790 i = 2 + j # Continue the counting
1791 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1792 self.assertIsNotNone(cached)
1794 # Only the files written after the sleep should exist.
1795 self.assertEqual(cache_manager.file_count, 4)
1796 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1797 self.assertIsNone(found)
1798 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1799 self.assertIsInstance(found, ResourcePath)
1802class DatasetRefURIsTestCase(unittest.TestCase):
1803 """Tests for DatasetRefURIs."""
1805 def testSequenceAccess(self):
1806 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1807 uris = DatasetRefURIs()
1809 self.assertEqual(len(uris), 2)
1810 self.assertEqual(uris[0], None)
1811 self.assertEqual(uris[1], {})
1813 primaryURI = ResourcePath("1/2/3")
1814 componentURI = ResourcePath("a/b/c")
1816 # affirm that DatasetRefURIs does not support MutableSequence functions
1817 with self.assertRaises(TypeError):
1818 uris[0] = primaryURI
1819 with self.assertRaises(TypeError):
1820 uris[1] = {"foo": componentURI}
1822 # but DatasetRefURIs can be set by property name:
1823 uris.primaryURI = primaryURI
1824 uris.componentURIs = {"foo": componentURI}
1825 self.assertEqual(uris.primaryURI, primaryURI)
1826 self.assertEqual(uris[0], primaryURI)
1828 primary, components = uris
1829 self.assertEqual(primary, primaryURI)
1830 self.assertEqual(components, {"foo": componentURI})
1832 def testRepr(self):
1833 """Verify __repr__ output."""
1834 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1835 self.assertEqual(
1836 repr(uris),
1837 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1838 )
1841class DataIdForTestTestCase(unittest.TestCase):
1842 """Tests for the DataIdForTest class."""
1844 def testImmutable(self):
1845 """Verify that an instance is immutable by default."""
1846 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1847 initial_hash = hash(dataId)
1849 with self.assertRaises(RuntimeError):
1850 dataId["instrument"] = "foo"
1852 with self.assertRaises(RuntimeError):
1853 del dataId["instrument"]
1855 assert sys.version_info[0] == 3
1856 if sys.version_info[1] >= 9:
1857 with self.assertRaises(RuntimeError):
1858 dataId |= dict(foo="bar")
1860 with self.assertRaises(RuntimeError):
1861 dataId.pop("instrument")
1863 with self.assertRaises(RuntimeError):
1864 dataId.popitem()
1866 with self.assertRaises(RuntimeError):
1867 dataId.update(dict(instrument="foo"))
1869 # verify that the hash value has not changed.
1870 self.assertEqual(initial_hash, hash(dataId))
1872 def testMutable(self):
1873 """Verify that an instance can be made mutable (unfrozen)."""
1874 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1875 initial_hash = hash(dataId)
1876 dataId.frozen = False
1877 self.assertEqual(initial_hash, hash(dataId))
1879 dataId["instrument"] = "foo"
1880 self.assertEqual(dataId["instrument"], "foo")
1881 self.assertNotEqual(initial_hash, hash(dataId))
1882 initial_hash = hash(dataId)
1884 del dataId["instrument"]
1885 self.assertTrue("instrument" not in dataId)
1886 self.assertNotEqual(initial_hash, hash(dataId))
1887 initial_hash = hash(dataId)
1889 assert sys.version_info[0] == 3
1890 if sys.version_info[1] >= 9:
1891 dataId |= dict(foo="bar")
1892 self.assertEqual(dataId["foo"], "bar")
1893 self.assertNotEqual(initial_hash, hash(dataId))
1894 initial_hash = hash(dataId)
1896 dataId.pop("visit")
1897 self.assertTrue("visit" not in dataId)
1898 self.assertNotEqual(initial_hash, hash(dataId))
1899 initial_hash = hash(dataId)
1901 dataId.popitem()
1902 self.assertTrue("physical_filter" not in dataId)
1903 self.assertNotEqual(initial_hash, hash(dataId))
1904 initial_hash = hash(dataId)
1906 dataId.update(dict(instrument="foo"))
1907 self.assertEqual(dataId["instrument"], "foo")
1908 self.assertNotEqual(initial_hash, hash(dataId))
1909 initial_hash = hash(dataId)
1912class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1913 storageClassFactory = StorageClassFactory()
1915 def test_StoredFileInfo(self):
1916 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1917 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False)
1919 record = dict(
1920 storage_class="StructuredDataDict",
1921 formatter="lsst.daf.butler.Formatter",
1922 path="a/b/c.txt",
1923 component="component",
1924 dataset_id=ref.id,
1925 checksum=None,
1926 file_size=5,
1927 )
1928 info = StoredFileInfo.from_record(record)
1930 self.assertEqual(info.dataset_id, ref.id)
1931 self.assertEqual(info.to_record(), record)
1933 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False)
1934 rebased = info.rebase(ref2)
1935 self.assertEqual(rebased.dataset_id, ref2.id)
1936 self.assertEqual(rebased.rebase(ref), info)
1938 with self.assertRaises(TypeError):
1939 rebased.update(formatter=42)
1941 with self.assertRaises(ValueError):
1942 rebased.update(something=42, new="42")
1944 # Check that pickle works on StoredFileInfo.
1945 pickled_info = pickle.dumps(info)
1946 unpickled_info = pickle.loads(pickled_info)
1947 self.assertEqual(unpickled_info, info)
1950if __name__ == "__main__":
1951 unittest.main()