Coverage for tests/test_datastore.py: 12%
1099 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 02:10 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 02:10 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import sys
28import tempfile
29import time
30import unittest
31import unittest.mock
32import uuid
33from collections import UserDict
34from dataclasses import dataclass
36import lsst.utils.tests
37import yaml
38from lsst.daf.butler import (
39 Config,
40 DataCoordinate,
41 DatasetRef,
42 DatasetRefURIs,
43 DatasetType,
44 DatasetTypeNotSupportedError,
45 Datastore,
46 DatastoreCacheManager,
47 DatastoreCacheManagerConfig,
48 DatastoreConfig,
49 DatastoreDisabledCacheManager,
50 DatastoreValidationError,
51 DimensionUniverse,
52 FileDataset,
53 NamedKeyDict,
54 StorageClass,
55 StorageClassFactory,
56 StoredFileInfo,
57)
58from lsst.daf.butler.formatters.yaml import YamlFormatter
59from lsst.daf.butler.tests import (
60 BadNoWriteFormatter,
61 BadWriteFormatter,
62 DatasetTestHelper,
63 DatastoreTestHelper,
64 DummyRegistry,
65 MetricsExample,
66 MetricsExampleDataclass,
67 MetricsExampleModel,
68)
69from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
70from lsst.resources import ResourcePath
71from lsst.utils import doImport
73TESTDIR = os.path.dirname(__file__)
76class DataIdForTest(UserDict):
78 """A dict-like class that can be used for a DataId dict that is hashable.
80 By default the class is immutable ("frozen"). The `frozen`
81 attribute can be set to `False` to change values (but note that
82 the hash values before and after mutation will be different!).
83 """
85 def __init__(self, *args, **kwargs):
86 self.frozen = False
87 super().__init__(*args, **kwargs)
88 self.frozen = True
90 def __hash__(self):
91 return hash(str(self.data))
93 def __setitem__(self, k, v):
94 if self.frozen:
95 raise RuntimeError("DataIdForTest is frozen.")
96 return super().__setitem__(k, v)
98 def __delitem__(self, k):
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().__delitem__(k)
103 def __ior__(self, other):
104 assert sys.version_info[0] == 3
105 if sys.version_info[1] < 9:
106 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
107 if self.frozen:
108 raise RuntimeError("DataIdForTest is frozen.")
109 return super().__ior__(other)
111 def pop(self, k):
112 if self.frozen:
113 raise RuntimeError("DataIdForTest is frozen.")
114 return super().pop(k)
116 def popitem(self):
117 if self.frozen:
118 raise RuntimeError("DataIdForTest is frozen.")
119 return super().popitem()
121 def update(self, *args, **kwargs):
122 if self.frozen:
123 raise RuntimeError("DataIdForTest is frozen.")
124 super().update(*args, **kwargs)
127def makeExampleMetrics(use_none=False):
128 if use_none:
129 array = None
130 else:
131 array = [563, 234, 456.7, 105, 2054, -1045]
132 return MetricsExample(
133 {"AM1": 5.2, "AM2": 30.6},
134 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
135 array,
136 )
139@dataclass(frozen=True)
140class Named:
141 name: str
144class FakeDataCoordinate(NamedKeyDict):
145 """A fake hashable frozen DataCoordinate built from a simple dict."""
147 @classmethod
148 def from_dict(cls, dataId):
149 new = cls()
150 for k, v in dataId.items():
151 new[Named(k)] = v
152 return new.freeze()
154 def __hash__(self) -> int:
155 return hash(frozenset(self.items()))
158class TransactionTestError(Exception):
159 """Specific error for transactions, to prevent misdiagnosing
160 that might otherwise occur when a standard exception is used.
161 """
163 pass
166class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
167 """Support routines for datastore testing"""
169 root = None
171 @classmethod
172 def setUpClass(cls):
173 # Storage Classes are fixed for all datastores in these tests
174 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
175 cls.storageClassFactory = StorageClassFactory()
176 cls.storageClassFactory.addFromConfig(scConfigFile)
178 # Read the Datastore config so we can get the class
179 # information (since we should not assume the constructor
180 # name here, but rely on the configuration file itself)
181 datastoreConfig = DatastoreConfig(cls.configFile)
182 cls.datastoreType = doImport(datastoreConfig["cls"])
183 cls.universe = DimensionUniverse()
185 def setUp(self):
186 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
188 def tearDown(self):
189 if self.root is not None and os.path.exists(self.root):
190 shutil.rmtree(self.root, ignore_errors=True)
193class DatastoreTests(DatastoreTestsBase):
194 """Some basic tests of a simple datastore."""
196 hasUnsupportedPut = True
198 def testConfigRoot(self):
199 full = DatastoreConfig(self.configFile)
200 config = DatastoreConfig(self.configFile, mergeDefaults=False)
201 newroot = "/random/location"
202 self.datastoreType.setConfigRoot(newroot, config, full)
203 if self.rootKeys:
204 for k in self.rootKeys:
205 self.assertIn(newroot, config[k])
207 def testConstructor(self):
208 datastore = self.makeDatastore()
209 self.assertIsNotNone(datastore)
210 self.assertIs(datastore.isEphemeral, self.isEphemeral)
212 def testConfigurationValidation(self):
213 datastore = self.makeDatastore()
214 sc = self.storageClassFactory.getStorageClass("ThingOne")
215 datastore.validateConfiguration([sc])
217 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
218 if self.validationCanFail:
219 with self.assertRaises(DatastoreValidationError):
220 datastore.validateConfiguration([sc2], logFailures=True)
222 dimensions = self.universe.extract(("visit", "physical_filter"))
223 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
225 datastore.validateConfiguration([ref])
227 def testParameterValidation(self):
228 """Check that parameters are validated"""
229 sc = self.storageClassFactory.getStorageClass("ThingOne")
230 dimensions = self.universe.extract(("visit", "physical_filter"))
231 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
232 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
233 datastore = self.makeDatastore()
234 data = {1: 2, 3: 4}
235 datastore.put(data, ref)
236 newdata = datastore.get(ref)
237 self.assertEqual(data, newdata)
238 with self.assertRaises(KeyError):
239 newdata = datastore.get(ref, parameters={"missing": 5})
241 def testBasicPutGet(self):
242 metrics = makeExampleMetrics()
243 datastore = self.makeDatastore()
245 # Create multiple storage classes for testing different formulations
246 storageClasses = [
247 self.storageClassFactory.getStorageClass(sc)
248 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
249 ]
251 dimensions = self.universe.extract(("visit", "physical_filter"))
252 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
253 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
255 for sc in storageClasses:
256 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
257 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
259 # Make sure that using getManyURIs without predicting before the
260 # dataset has been put raises.
261 with self.assertRaises(FileNotFoundError):
262 datastore.getManyURIs([ref], predict=False)
264 # Make sure that using getManyURIs with predicting before the
265 # dataset has been put predicts the URI.
266 uris = datastore.getManyURIs([ref, ref2], predict=True)
267 self.assertIn("52", uris[ref].primaryURI.geturl())
268 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
269 self.assertIn("53", uris[ref2].primaryURI.geturl())
270 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
272 datastore.put(metrics, ref)
274 # Does it exist?
275 self.assertTrue(datastore.exists(ref))
276 self.assertTrue(datastore.knows(ref))
277 multi = datastore.knows_these([ref])
278 self.assertTrue(multi[ref])
279 multi = datastore.mexists([ref, ref2])
280 self.assertTrue(multi[ref])
281 self.assertFalse(multi[ref2])
283 # Get
284 metricsOut = datastore.get(ref, parameters=None)
285 self.assertEqual(metrics, metricsOut)
287 uri = datastore.getURI(ref)
288 self.assertEqual(uri.scheme, self.uriScheme)
290 uris = datastore.getManyURIs([ref])
291 self.assertEqual(len(uris), 1)
292 ref, uri = uris.popitem()
293 self.assertTrue(uri.primaryURI.exists())
294 self.assertFalse(uri.componentURIs)
296 # Get a component -- we need to construct new refs for them
297 # with derived storage classes but with parent ID
298 for comp in ("data", "output"):
299 compRef = ref.makeComponentRef(comp)
300 output = datastore.get(compRef)
301 self.assertEqual(output, getattr(metricsOut, comp))
303 uri = datastore.getURI(compRef)
304 self.assertEqual(uri.scheme, self.uriScheme)
306 uris = datastore.getManyURIs([compRef])
307 self.assertEqual(len(uris), 1)
309 storageClass = sc
311 # Check that we can put a metric with None in a component and
312 # get it back as None
313 metricsNone = makeExampleMetrics(use_none=True)
314 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
315 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
316 datastore.put(metricsNone, refNone)
318 comp = "data"
319 for comp in ("data", "output"):
320 compRef = refNone.makeComponentRef(comp)
321 output = datastore.get(compRef)
322 self.assertEqual(output, getattr(metricsNone, comp))
324 # Check that a put fails if the dataset type is not supported
325 if self.hasUnsupportedPut:
326 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
327 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
328 with self.assertRaises(DatasetTypeNotSupportedError):
329 datastore.put(metrics, ref)
331 # These should raise
332 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
333 with self.assertRaises(FileNotFoundError):
334 # non-existing file
335 datastore.get(ref)
337 # Get a URI from it
338 uri = datastore.getURI(ref, predict=True)
339 self.assertEqual(uri.scheme, self.uriScheme)
341 with self.assertRaises(FileNotFoundError):
342 datastore.getURI(ref)
344 def testTrustGetRequest(self):
345 """Check that we can get datasets that registry knows nothing about."""
347 datastore = self.makeDatastore()
349 # Skip test if the attribute is not defined
350 if not hasattr(datastore, "trustGetRequest"):
351 return
353 metrics = makeExampleMetrics()
355 i = 0
356 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
357 i += 1
358 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
360 if sc_name == "StructuredComposite":
361 disassembled = True
362 else:
363 disassembled = False
365 # Start datastore in default configuration of using registry
366 datastore.trustGetRequest = False
368 # Create multiple storage classes for testing with or without
369 # disassembly
370 sc = self.storageClassFactory.getStorageClass(sc_name)
371 dimensions = self.universe.extract(("visit", "physical_filter"))
373 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
375 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
376 datastore.put(metrics, ref)
378 # Does it exist?
379 self.assertTrue(datastore.exists(ref))
380 self.assertTrue(datastore.knows(ref))
381 multi = datastore.knows_these([ref])
382 self.assertTrue(multi[ref])
383 multi = datastore.mexists([ref])
384 self.assertTrue(multi[ref])
386 # Get
387 metricsOut = datastore.get(ref)
388 self.assertEqual(metrics, metricsOut)
390 # Get the URI(s)
391 primaryURI, componentURIs = datastore.getURIs(ref)
392 if disassembled:
393 self.assertIsNone(primaryURI)
394 self.assertEqual(len(componentURIs), 3)
395 else:
396 self.assertIn(datasetTypeName, primaryURI.path)
397 self.assertFalse(componentURIs)
399 # Delete registry entry so now we are trusting
400 datastore.removeStoredItemInfo(ref)
402 # Now stop trusting and check that things break
403 datastore.trustGetRequest = False
405 # Does it exist?
406 self.assertFalse(datastore.exists(ref))
407 self.assertFalse(datastore.knows(ref))
408 multi = datastore.knows_these([ref])
409 self.assertFalse(multi[ref])
410 multi = datastore.mexists([ref])
411 self.assertFalse(multi[ref])
413 with self.assertRaises(FileNotFoundError):
414 datastore.get(ref)
416 if sc_name != "StructuredDataNoComponents":
417 with self.assertRaises(FileNotFoundError):
418 datastore.get(ref.makeComponentRef("data"))
420 # URI should fail unless we ask for prediction
421 with self.assertRaises(FileNotFoundError):
422 datastore.getURIs(ref)
424 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
425 if disassembled:
426 self.assertIsNone(predicted_primary)
427 self.assertEqual(len(predicted_disassembled), 3)
428 for uri in predicted_disassembled.values():
429 self.assertEqual(uri.fragment, "predicted")
430 self.assertIn(datasetTypeName, uri.path)
431 else:
432 self.assertIn(datasetTypeName, predicted_primary.path)
433 self.assertFalse(predicted_disassembled)
434 self.assertEqual(predicted_primary.fragment, "predicted")
436 # Now enable registry-free trusting mode
437 datastore.trustGetRequest = True
439 # Try again to get it
440 metricsOut = datastore.get(ref)
441 self.assertEqual(metricsOut, metrics)
443 # Does it exist?
444 self.assertTrue(datastore.exists(ref))
446 # Get a component
447 if sc_name != "StructuredDataNoComponents":
448 comp = "data"
449 compRef = ref.makeComponentRef(comp)
450 output = datastore.get(compRef)
451 self.assertEqual(output, getattr(metrics, comp))
453 # Get the URI -- if we trust this should work even without
454 # enabling prediction.
455 primaryURI2, componentURIs2 = datastore.getURIs(ref)
456 self.assertEqual(primaryURI2, primaryURI)
457 self.assertEqual(componentURIs2, componentURIs)
459 # Check for compatible storage class.
460 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
461 # Make new dataset ref with compatible storage class.
462 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
464 # Without `set_retrieve_dataset_type_method` it will fail to
465 # find correct file.
466 self.assertFalse(datastore.exists(ref_comp))
467 with self.assertRaises(FileNotFoundError):
468 datastore.get(ref_comp)
469 with self.assertRaises(FileNotFoundError):
470 datastore.get(ref, storageClass="StructuredDataDictJson")
472 # Need a special method to generate stored dataset type.
473 def _stored_dataset_type(name: str) -> DatasetType:
474 if name == ref.datasetType.name:
475 return ref.datasetType
476 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
478 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
480 # Storage class override with original dataset ref.
481 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
482 self.assertIsInstance(metrics_as_dict, dict)
484 # get() should return a dict now.
485 metrics_as_dict = datastore.get(ref_comp)
486 self.assertIsInstance(metrics_as_dict, dict)
488 # exists() should work as well.
489 self.assertTrue(datastore.exists(ref_comp))
491 datastore.set_retrieve_dataset_type_method(None)
493 def testDisassembly(self):
494 """Test disassembly within datastore."""
495 metrics = makeExampleMetrics()
496 if self.isEphemeral:
497 # in-memory datastore does not disassemble
498 return
500 # Create multiple storage classes for testing different formulations
501 # of composites. One of these will not disassemble to provide
502 # a reference.
503 storageClasses = [
504 self.storageClassFactory.getStorageClass(sc)
505 for sc in (
506 "StructuredComposite",
507 "StructuredCompositeTestA",
508 "StructuredCompositeTestB",
509 "StructuredCompositeReadComp",
510 "StructuredData", # No disassembly
511 "StructuredCompositeReadCompNoDisassembly",
512 )
513 ]
515 # Create the test datastore
516 datastore = self.makeDatastore()
518 # Dummy dataId
519 dimensions = self.universe.extract(("visit", "physical_filter"))
520 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
522 for i, sc in enumerate(storageClasses):
523 with self.subTest(storageClass=sc.name):
524 # Create a different dataset type each time round
525 # so that a test failure in this subtest does not trigger
526 # a cascade of tests because of file clashes
527 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
529 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
531 datastore.put(metrics, ref)
533 baseURI, compURIs = datastore.getURIs(ref)
534 if disassembled:
535 self.assertIsNone(baseURI)
536 self.assertEqual(set(compURIs), {"data", "output", "summary"})
537 else:
538 self.assertIsNotNone(baseURI)
539 self.assertEqual(compURIs, {})
541 metrics_get = datastore.get(ref)
542 self.assertEqual(metrics_get, metrics)
544 # Retrieve the composite with read parameter
545 stop = 4
546 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
547 self.assertEqual(metrics_get.summary, metrics.summary)
548 self.assertEqual(metrics_get.output, metrics.output)
549 self.assertEqual(metrics_get.data, metrics.data[:stop])
551 # Retrieve a component
552 data = datastore.get(ref.makeComponentRef("data"))
553 self.assertEqual(data, metrics.data)
555 # On supported storage classes attempt to access a read
556 # only component
557 if "ReadComp" in sc.name:
558 cRef = ref.makeComponentRef("counter")
559 counter = datastore.get(cRef)
560 self.assertEqual(counter, len(metrics.data))
562 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
563 self.assertEqual(counter, stop)
565 datastore.remove(ref)
567 def prepDeleteTest(self, n_refs=1):
568 metrics = makeExampleMetrics()
569 datastore = self.makeDatastore()
570 # Put
571 dimensions = self.universe.extract(("visit", "physical_filter"))
572 sc = self.storageClassFactory.getStorageClass("StructuredData")
573 refs = []
574 for i in range(n_refs):
575 dataId = FakeDataCoordinate.from_dict(
576 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
577 )
578 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
579 datastore.put(metrics, ref)
581 # Does it exist?
582 self.assertTrue(datastore.exists(ref))
584 # Get
585 metricsOut = datastore.get(ref)
586 self.assertEqual(metrics, metricsOut)
587 refs.append(ref)
589 return datastore, *refs
591 def testRemove(self):
592 datastore, ref = self.prepDeleteTest()
594 # Remove
595 datastore.remove(ref)
597 # Does it exist?
598 self.assertFalse(datastore.exists(ref))
600 # Do we now get a predicted URI?
601 uri = datastore.getURI(ref, predict=True)
602 self.assertEqual(uri.fragment, "predicted")
604 # Get should now fail
605 with self.assertRaises(FileNotFoundError):
606 datastore.get(ref)
607 # Can only delete once
608 with self.assertRaises(FileNotFoundError):
609 datastore.remove(ref)
611 def testForget(self):
612 datastore, ref = self.prepDeleteTest()
614 # Remove
615 datastore.forget([ref])
617 # Does it exist (as far as we know)?
618 self.assertFalse(datastore.exists(ref))
620 # Do we now get a predicted URI?
621 uri = datastore.getURI(ref, predict=True)
622 self.assertEqual(uri.fragment, "predicted")
624 # Get should now fail
625 with self.assertRaises(FileNotFoundError):
626 datastore.get(ref)
628 # Forgetting again is a silent no-op
629 datastore.forget([ref])
631 # Predicted URI should still point to the file.
632 self.assertTrue(uri.exists())
634 def testTransfer(self):
635 metrics = makeExampleMetrics()
637 dimensions = self.universe.extract(("visit", "physical_filter"))
638 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"})
640 sc = self.storageClassFactory.getStorageClass("StructuredData")
641 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
643 inputDatastore = self.makeDatastore("test_input_datastore")
644 outputDatastore = self.makeDatastore("test_output_datastore")
646 inputDatastore.put(metrics, ref)
647 outputDatastore.transfer(inputDatastore, ref)
649 metricsOut = outputDatastore.get(ref)
650 self.assertEqual(metrics, metricsOut)
652 def testBasicTransaction(self):
653 datastore = self.makeDatastore()
654 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
655 dimensions = self.universe.extract(("visit", "physical_filter"))
656 nDatasets = 6
657 dataIds = [
658 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"})
659 for i in range(nDatasets)
660 ]
661 data = [
662 (
663 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
664 makeExampleMetrics(),
665 )
666 for dataId in dataIds
667 ]
668 succeed = data[: nDatasets // 2]
669 fail = data[nDatasets // 2 :]
670 # All datasets added in this transaction should continue to exist
671 with datastore.transaction():
672 for ref, metrics in succeed:
673 datastore.put(metrics, ref)
674 # Whereas datasets added in this transaction should not
675 with self.assertRaises(TransactionTestError):
676 with datastore.transaction():
677 for ref, metrics in fail:
678 datastore.put(metrics, ref)
679 raise TransactionTestError("This should propagate out of the context manager")
680 # Check for datasets that should exist
681 for ref, metrics in succeed:
682 # Does it exist?
683 self.assertTrue(datastore.exists(ref))
684 # Get
685 metricsOut = datastore.get(ref, parameters=None)
686 self.assertEqual(metrics, metricsOut)
687 # URI
688 uri = datastore.getURI(ref)
689 self.assertEqual(uri.scheme, self.uriScheme)
690 # Check for datasets that should not exist
691 for ref, _ in fail:
692 # These should raise
693 with self.assertRaises(FileNotFoundError):
694 # non-existing file
695 datastore.get(ref)
696 with self.assertRaises(FileNotFoundError):
697 datastore.getURI(ref)
699 def testNestedTransaction(self):
700 datastore = self.makeDatastore()
701 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
702 dimensions = self.universe.extract(("visit", "physical_filter"))
703 metrics = makeExampleMetrics()
705 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
706 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
707 datastore.put(metrics, refBefore)
708 with self.assertRaises(TransactionTestError):
709 with datastore.transaction():
710 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"})
711 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
712 datastore.put(metrics, refOuter)
713 with datastore.transaction():
714 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"})
715 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
716 datastore.put(metrics, refInner)
717 # All datasets should exist
718 for ref in (refBefore, refOuter, refInner):
719 metricsOut = datastore.get(ref, parameters=None)
720 self.assertEqual(metrics, metricsOut)
721 raise TransactionTestError("This should roll back the transaction")
722 # Dataset(s) inserted before the transaction should still exist
723 metricsOut = datastore.get(refBefore, parameters=None)
724 self.assertEqual(metrics, metricsOut)
725 # But all datasets inserted during the (rolled back) transaction
726 # should be gone
727 with self.assertRaises(FileNotFoundError):
728 datastore.get(refOuter)
729 with self.assertRaises(FileNotFoundError):
730 datastore.get(refInner)
732 def _prepareIngestTest(self):
733 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
734 dimensions = self.universe.extract(("visit", "physical_filter"))
735 metrics = makeExampleMetrics()
736 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
737 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
738 return metrics, ref
740 def runIngestTest(self, func, expectOutput=True):
741 metrics, ref = self._prepareIngestTest()
742 # The file will be deleted after the test.
743 # For symlink tests this leads to a situation where the datastore
744 # points to a file that does not exist. This will make os.path.exist
745 # return False but then the new symlink will fail with
746 # FileExistsError later in the code so the test still passes.
747 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
748 with open(path, "w") as fd:
749 yaml.dump(metrics._asdict(), stream=fd)
750 func(metrics, path, ref)
752 def testIngestNoTransfer(self):
753 """Test ingesting existing files with no transfer."""
754 for mode in (None, "auto"):
755 # Some datastores have auto but can't do in place transfer
756 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
757 continue
759 with self.subTest(mode=mode):
760 datastore = self.makeDatastore()
762 def succeed(obj, path, ref):
763 """Ingest a file already in the datastore root."""
764 # first move it into the root, and adjust the path
765 # accordingly
766 path = shutil.copy(path, datastore.root.ospath)
767 path = os.path.relpath(path, start=datastore.root.ospath)
768 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
769 self.assertEqual(obj, datastore.get(ref))
771 def failInputDoesNotExist(obj, path, ref):
772 """Can't ingest files if we're given a bad path."""
773 with self.assertRaises(FileNotFoundError):
774 datastore.ingest(
775 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
776 )
777 self.assertFalse(datastore.exists(ref))
779 def failOutsideRoot(obj, path, ref):
780 """Can't ingest files outside of datastore root unless
781 auto."""
782 if mode == "auto":
783 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
784 self.assertTrue(datastore.exists(ref))
785 else:
786 with self.assertRaises(RuntimeError):
787 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
788 self.assertFalse(datastore.exists(ref))
790 def failNotImplemented(obj, path, ref):
791 with self.assertRaises(NotImplementedError):
792 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
794 if mode in self.ingestTransferModes:
795 self.runIngestTest(failOutsideRoot)
796 self.runIngestTest(failInputDoesNotExist)
797 self.runIngestTest(succeed)
798 else:
799 self.runIngestTest(failNotImplemented)
801 def testIngestTransfer(self):
802 """Test ingesting existing files after transferring them."""
803 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
804 with self.subTest(mode=mode):
805 datastore = self.makeDatastore(mode)
807 def succeed(obj, path, ref):
808 """Ingest a file by transferring it to the template
809 location."""
810 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
811 self.assertEqual(obj, datastore.get(ref))
813 def failInputDoesNotExist(obj, path, ref):
814 """Can't ingest files if we're given a bad path."""
815 with self.assertRaises(FileNotFoundError):
816 # Ensure the file does not look like it is in
817 # datastore for auto mode
818 datastore.ingest(
819 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
820 )
821 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
823 def failNotImplemented(obj, path, ref):
824 with self.assertRaises(NotImplementedError):
825 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
827 if mode in self.ingestTransferModes:
828 self.runIngestTest(failInputDoesNotExist)
829 self.runIngestTest(succeed, expectOutput=(mode != "move"))
830 else:
831 self.runIngestTest(failNotImplemented)
833 def testIngestSymlinkOfSymlink(self):
834 """Special test for symlink to a symlink ingest"""
835 metrics, ref = self._prepareIngestTest()
836 # The aim of this test is to create a dataset on disk, then
837 # create a symlink to it and finally ingest the symlink such that
838 # the symlink in the datastore points to the original dataset.
839 for mode in ("symlink", "relsymlink"):
840 if mode not in self.ingestTransferModes:
841 continue
843 print(f"Trying mode {mode}")
844 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
845 with open(realpath, "w") as fd:
846 yaml.dump(metrics._asdict(), stream=fd)
847 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
848 os.symlink(os.path.abspath(realpath), sympath)
850 datastore = self.makeDatastore()
851 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
853 uri = datastore.getURI(ref)
854 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
855 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
857 linkTarget = os.readlink(uri.ospath)
858 if mode == "relsymlink":
859 self.assertFalse(os.path.isabs(linkTarget))
860 else:
861 self.assertEqual(linkTarget, os.path.abspath(realpath))
863 # Check that we can get the dataset back regardless of mode
864 metric2 = datastore.get(ref)
865 self.assertEqual(metric2, metrics)
867 # Cleanup the file for next time round loop
868 # since it will get the same file name in store
869 datastore.remove(ref)
871 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
872 datastore = self.makeDatastore(name)
874 # For now only the FileDatastore can be used for this test.
875 # ChainedDatastore that only includes InMemoryDatastores have to be
876 # skipped as well.
877 for name in datastore.names:
878 if not name.startswith("InMemoryDatastore"):
879 break
880 else:
881 raise unittest.SkipTest("in-memory datastore does not support record export/import")
883 metrics = makeExampleMetrics()
884 dimensions = self.universe.extract(("visit", "physical_filter"))
885 sc = self.storageClassFactory.getStorageClass("StructuredData")
887 refs = []
888 for visit in (2048, 2049, 2050):
889 dataId = FakeDataCoordinate.from_dict(
890 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
891 )
892 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
893 datastore.put(metrics, ref)
894 refs.append(ref)
895 return datastore, refs
897 def testExportImportRecords(self):
898 """Test for export_records and import_records methods."""
899 datastore, refs = self._populate_export_datastore("test_datastore")
900 for exported_refs in (refs, refs[1:]):
901 n_refs = len(exported_refs)
902 records = datastore.export_records(exported_refs)
903 self.assertGreater(len(records), 0)
904 self.assertTrue(set(records.keys()) <= set(datastore.names))
905 # In a ChainedDatastore each FileDatastore will have a complete set
906 for datastore_name in records:
907 record_data = records[datastore_name]
908 self.assertEqual(len(record_data.records), n_refs)
910 # Check that subsetting works, include non-existing dataset ID.
911 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
912 subset = record_data.subset(dataset_ids)
913 assert subset is not None
914 self.assertEqual(len(subset.records), 1)
915 subset = record_data.subset({uuid.uuid4()})
916 self.assertIsNone(subset)
918 # Use the same datastore name to import relative path.
919 datastore2 = self.makeDatastore("test_datastore")
921 records = datastore.export_records(refs[1:])
922 datastore2.import_records(records)
924 with self.assertRaises(FileNotFoundError):
925 data = datastore2.get(refs[0])
926 data = datastore2.get(refs[1])
927 self.assertIsNotNone(data)
928 data = datastore2.get(refs[2])
929 self.assertIsNotNone(data)
931 def testExport(self):
932 datastore, refs = self._populate_export_datastore("test_datastore")
934 datasets = list(datastore.export(refs))
935 self.assertEqual(len(datasets), 3)
937 for transfer in (None, "auto"):
938 # Both will default to None
939 datasets = list(datastore.export(refs, transfer=transfer))
940 self.assertEqual(len(datasets), 3)
942 with self.assertRaises(TypeError):
943 list(datastore.export(refs, transfer="copy"))
945 with self.assertRaises(TypeError):
946 list(datastore.export(refs, directory="exportDir", transfer="move"))
948 # Create a new ref that is not known to the datastore and try to
949 # export it.
950 sc = self.storageClassFactory.getStorageClass("ThingOne")
951 dimensions = self.universe.extract(("visit", "physical_filter"))
952 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
953 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
954 with self.assertRaises(FileNotFoundError):
955 list(datastore.export(refs + [ref], transfer=None))
957 def test_pydantic_dict_storage_class_conversions(self):
958 """Test converting a dataset stored as a pydantic model into a dict on
959 read.
960 """
961 datastore = self.makeDatastore()
962 store_as_model = self.makeDatasetRef(
963 "store_as_model",
964 dimensions=self.universe.empty,
965 storageClass="DictConvertibleModel",
966 dataId=DataCoordinate.makeEmpty(self.universe),
967 )
968 content = {"a": "one", "b": "two"}
969 model = DictConvertibleModel.from_dict(content, extra="original content")
970 datastore.put(model, store_as_model)
971 retrieved_model = datastore.get(store_as_model)
972 self.assertEqual(retrieved_model, model)
973 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
974 self.assertEqual(type(loaded), dict)
975 self.assertEqual(loaded, content)
977 def test_simple_class_put_get(self):
978 """Test that we can put and get a simple class with dict()
979 constructor."""
980 datastore = self.makeDatastore()
981 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
982 self._assert_different_puts(datastore, "MetricsExample", data)
984 def test_dataclass_put_get(self):
985 """Test that we can put and get a simple dataclass."""
986 datastore = self.makeDatastore()
987 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
988 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
990 def test_pydantic_put_get(self):
991 """Test that we can put and get a simple Pydantic model."""
992 datastore = self.makeDatastore()
993 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
994 self._assert_different_puts(datastore, "MetricsExampleModel", data)
996 def test_tuple_put_get(self):
997 """Test that we can put and get a tuple."""
998 datastore = self.makeDatastore()
999 data = tuple(["a", "b", 1])
1000 self._assert_different_puts(datastore, "TupleExample", data)
1002 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None:
1003 refs = {
1004 x: self.makeDatasetRef(
1005 f"stora_as_{x}",
1006 dimensions=self.universe.empty,
1007 storageClass=f"{storageClass_root}{x}",
1008 dataId=DataCoordinate.makeEmpty(self.universe),
1009 )
1010 for x in ["A", "B"]
1011 }
1013 for ref in refs.values():
1014 datastore.put(data, ref)
1016 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
1019class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
1020 """PosixDatastore specialization"""
1022 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1023 uriScheme = "file"
1024 canIngestNoTransferAuto = True
1025 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1026 isEphemeral = False
1027 rootKeys = ("root",)
1028 validationCanFail = True
1030 def setUp(self):
1031 # Override the working directory before calling the base class
1032 self.root = tempfile.mkdtemp(dir=TESTDIR)
1033 super().setUp()
1035 def testAtomicWrite(self):
1036 """Test that we write to a temporary and then rename"""
1037 datastore = self.makeDatastore()
1038 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1039 dimensions = self.universe.extract(("visit", "physical_filter"))
1040 metrics = makeExampleMetrics()
1042 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
1043 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1045 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1046 datastore.put(metrics, ref)
1047 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1048 self.assertIn("transfer=move", move_logs[0])
1050 # And the transfer should be file to file.
1051 self.assertEqual(move_logs[0].count("file://"), 2)
1053 def testCanNotDeterminePutFormatterLocation(self):
1054 """Verify that the expected exception is raised if the FileDatastore
1055 can not determine the put formatter location."""
1057 _ = makeExampleMetrics()
1058 datastore = self.makeDatastore()
1060 # Create multiple storage classes for testing different formulations
1061 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1063 sccomp = StorageClass("Dummy")
1064 compositeStorageClass = StorageClass(
1065 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1066 )
1068 dimensions = self.universe.extract(("visit", "physical_filter"))
1069 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1071 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1072 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
1074 def raiser(ref):
1075 raise DatasetTypeNotSupportedError()
1077 with unittest.mock.patch.object(
1078 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1079 "_determine_put_formatter_location",
1080 side_effect=raiser,
1081 ):
1082 # verify the non-composite ref execution path:
1083 with self.assertRaises(DatasetTypeNotSupportedError):
1084 datastore.getURIs(ref, predict=True)
1086 # verify the composite-ref execution path:
1087 with self.assertRaises(DatasetTypeNotSupportedError):
1088 datastore.getURIs(compRef, predict=True)
1091class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1092 """Posix datastore tests but with checksums disabled."""
1094 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1096 def testChecksum(self):
1097 """Ensure that checksums have not been calculated."""
1099 datastore = self.makeDatastore()
1100 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1101 dimensions = self.universe.extract(("visit", "physical_filter"))
1102 metrics = makeExampleMetrics()
1104 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
1105 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1107 # Configuration should have disabled checksum calculation
1108 datastore.put(metrics, ref)
1109 infos = datastore.getStoredItemsInfo(ref)
1110 self.assertIsNone(infos[0].checksum)
1112 # Remove put back but with checksums enabled explicitly
1113 datastore.remove(ref)
1114 datastore.useChecksum = True
1115 datastore.put(metrics, ref)
1117 infos = datastore.getStoredItemsInfo(ref)
1118 self.assertIsNotNone(infos[0].checksum)
1121class TrashDatastoreTestCase(PosixDatastoreTestCase):
1122 """Restrict trash test to FileDatastore."""
1124 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1126 def testTrash(self):
1127 datastore, *refs = self.prepDeleteTest(n_refs=10)
1129 # Trash one of them.
1130 ref = refs.pop()
1131 uri = datastore.getURI(ref)
1132 datastore.trash(ref)
1133 self.assertTrue(uri.exists(), uri) # Not deleted yet
1134 datastore.emptyTrash()
1135 self.assertFalse(uri.exists(), uri)
1137 # Trash it again should be fine.
1138 datastore.trash(ref)
1140 # Trash multiple items at once.
1141 subset = [refs.pop(), refs.pop()]
1142 datastore.trash(subset)
1143 datastore.emptyTrash()
1145 # Remove a record and trash should do nothing.
1146 # This is execution butler scenario.
1147 ref = refs.pop()
1148 uri = datastore.getURI(ref)
1149 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1150 self.assertTrue(uri.exists())
1151 datastore.trash(ref)
1152 datastore.emptyTrash()
1153 self.assertTrue(uri.exists())
1155 # Switch on trust and it should delete the file.
1156 datastore.trustGetRequest = True
1157 datastore.trash([ref])
1158 self.assertFalse(uri.exists())
1160 # Remove multiples at once in trust mode.
1161 subset = [refs.pop() for i in range(3)]
1162 datastore.trash(subset)
1163 datastore.trash(refs.pop()) # Check that a single ref can trash
1166class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1167 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1169 def setUp(self):
1170 # Override the working directory before calling the base class
1171 self.root = tempfile.mkdtemp(dir=TESTDIR)
1172 super().setUp()
1174 def testCleanup(self):
1175 """Test that a failed formatter write does cleanup a partial file."""
1176 metrics = makeExampleMetrics()
1177 datastore = self.makeDatastore()
1179 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1181 dimensions = self.universe.extract(("visit", "physical_filter"))
1182 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1184 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1186 # Determine where the file will end up (we assume Formatters use
1187 # the same file extension)
1188 expectedUri = datastore.getURI(ref, predict=True)
1189 self.assertEqual(expectedUri.fragment, "predicted")
1191 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1193 # Try formatter that fails and formatter that fails and leaves
1194 # a file behind
1195 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1196 with self.subTest(formatter=formatter):
1197 # Monkey patch the formatter
1198 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1200 # Try to put the dataset, it should fail
1201 with self.assertRaises(Exception):
1202 datastore.put(metrics, ref)
1204 # Check that there is no file on disk
1205 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1207 # Check that there is a directory
1208 dir = expectedUri.dirname()
1209 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1211 # Force YamlFormatter and check that this time a file is written
1212 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1213 datastore.put(metrics, ref)
1214 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1215 datastore.remove(ref)
1216 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1219class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1220 """PosixDatastore specialization"""
1222 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1223 uriScheme = "mem"
1224 hasUnsupportedPut = False
1225 ingestTransferModes = ()
1226 isEphemeral = True
1227 rootKeys = None
1228 validationCanFail = False
1231class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1232 """ChainedDatastore specialization using a POSIXDatastore"""
1234 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1235 hasUnsupportedPut = False
1236 canIngestNoTransferAuto = False
1237 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1238 isEphemeral = False
1239 rootKeys = (".datastores.1.root", ".datastores.2.root")
1240 validationCanFail = True
1243class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1244 """ChainedDatastore specialization using all InMemoryDatastore"""
1246 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1247 validationCanFail = False
1250class DatastoreConstraintsTests(DatastoreTestsBase):
1251 """Basic tests of constraints model of Datastores."""
1253 def testConstraints(self):
1254 """Test constraints model. Assumes that each test class has the
1255 same constraints."""
1256 metrics = makeExampleMetrics()
1257 datastore = self.makeDatastore()
1259 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1260 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1261 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1262 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"})
1264 # Write empty file suitable for ingest check (JSON and YAML variants)
1265 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1266 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1267 for datasetTypeName, sc, accepted in (
1268 ("metric", sc1, True),
1269 ("metric5", sc1, False),
1270 ("metric33", sc1, True),
1271 ("metric5", sc2, True),
1272 ):
1273 # Choose different temp file depending on StorageClass
1274 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1276 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1277 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1278 if accepted:
1279 datastore.put(metrics, ref)
1280 self.assertTrue(datastore.exists(ref))
1281 datastore.remove(ref)
1283 # Try ingest
1284 if self.canIngest:
1285 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1286 self.assertTrue(datastore.exists(ref))
1287 datastore.remove(ref)
1288 else:
1289 with self.assertRaises(DatasetTypeNotSupportedError):
1290 datastore.put(metrics, ref)
1291 self.assertFalse(datastore.exists(ref))
1293 # Again with ingest
1294 if self.canIngest:
1295 with self.assertRaises(DatasetTypeNotSupportedError):
1296 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1297 self.assertFalse(datastore.exists(ref))
1300class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1301 """PosixDatastore specialization"""
1303 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1304 canIngest = True
1306 def setUp(self):
1307 # Override the working directory before calling the base class
1308 self.root = tempfile.mkdtemp(dir=TESTDIR)
1309 super().setUp()
1312class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1313 """InMemoryDatastore specialization"""
1315 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1316 canIngest = False
1319class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1320 """ChainedDatastore specialization using a POSIXDatastore and constraints
1321 at the ChainedDatstore"""
1323 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1326class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1327 """ChainedDatastore specialization using a POSIXDatastore"""
1329 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1332class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1333 """ChainedDatastore specialization using all InMemoryDatastore"""
1335 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1336 canIngest = False
1339class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1340 """Test that a chained datastore can control constraints per-datastore
1341 even if child datastore would accept."""
1343 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1345 def setUp(self):
1346 # Override the working directory before calling the base class
1347 self.root = tempfile.mkdtemp(dir=TESTDIR)
1348 super().setUp()
1350 def testConstraints(self):
1351 """Test chained datastore constraints model."""
1352 metrics = makeExampleMetrics()
1353 datastore = self.makeDatastore()
1355 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1356 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1357 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1358 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1359 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1361 # Write empty file suitable for ingest check (JSON and YAML variants)
1362 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1363 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1365 for typeName, dataId, sc, accept, ingest in (
1366 ("metric", dataId1, sc1, (False, True, False), True),
1367 ("metric5", dataId1, sc1, (False, False, False), False),
1368 ("metric5", dataId2, sc1, (True, False, False), False),
1369 ("metric33", dataId2, sc2, (True, True, False), True),
1370 ("metric5", dataId1, sc2, (False, True, False), True),
1371 ):
1372 # Choose different temp file depending on StorageClass
1373 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1375 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1376 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1377 if any(accept):
1378 datastore.put(metrics, ref)
1379 self.assertTrue(datastore.exists(ref))
1381 # Check each datastore inside the chained datastore
1382 for childDatastore, expected in zip(datastore.datastores, accept):
1383 self.assertEqual(
1384 childDatastore.exists(ref),
1385 expected,
1386 f"Testing presence of {ref} in datastore {childDatastore.name}",
1387 )
1389 datastore.remove(ref)
1391 # Check that ingest works
1392 if ingest:
1393 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1394 self.assertTrue(datastore.exists(ref))
1396 # Check each datastore inside the chained datastore
1397 for childDatastore, expected in zip(datastore.datastores, accept):
1398 # Ephemeral datastores means InMemory at the moment
1399 # and that does not accept ingest of files.
1400 if childDatastore.isEphemeral:
1401 expected = False
1402 self.assertEqual(
1403 childDatastore.exists(ref),
1404 expected,
1405 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1406 )
1408 datastore.remove(ref)
1409 else:
1410 with self.assertRaises(DatasetTypeNotSupportedError):
1411 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1413 else:
1414 with self.assertRaises(DatasetTypeNotSupportedError):
1415 datastore.put(metrics, ref)
1416 self.assertFalse(datastore.exists(ref))
1418 # Again with ingest
1419 with self.assertRaises(DatasetTypeNotSupportedError):
1420 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1421 self.assertFalse(datastore.exists(ref))
1424class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1425 """Tests for datastore caching infrastructure."""
1427 @classmethod
1428 def setUpClass(cls):
1429 cls.storageClassFactory = StorageClassFactory()
1430 cls.universe = DimensionUniverse()
1432 # Ensure that we load the test storage class definitions.
1433 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1434 cls.storageClassFactory.addFromConfig(scConfigFile)
1436 def setUp(self):
1437 self.id = 0
1439 # Create a root that we can use for caching tests.
1440 self.root = tempfile.mkdtemp(dir=TESTDIR)
1442 # Create some test dataset refs and associated test files
1443 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1444 dimensions = self.universe.extract(("visit", "physical_filter"))
1445 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1447 # Create list of refs and list of temporary files
1448 n_datasets = 10
1449 self.refs = [
1450 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1451 for n in range(n_datasets)
1452 ]
1454 root_uri = ResourcePath(self.root, forceDirectory=True)
1455 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1457 # Create test files.
1458 for uri in self.files:
1459 uri.write(b"0123456789")
1461 # Create some composite refs with component files.
1462 sc = self.storageClassFactory.getStorageClass("StructuredData")
1463 self.composite_refs = [
1464 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1465 ]
1466 self.comp_files = []
1467 self.comp_refs = []
1468 for n, ref in enumerate(self.composite_refs):
1469 component_refs = []
1470 component_files = []
1471 for component in sc.components:
1472 component_ref = ref.makeComponentRef(component)
1473 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1474 component_refs.append(component_ref)
1475 component_files.append(file)
1476 file.write(b"9876543210")
1478 self.comp_files.append(component_files)
1479 self.comp_refs.append(component_refs)
1481 def tearDown(self):
1482 if self.root is not None and os.path.exists(self.root):
1483 shutil.rmtree(self.root, ignore_errors=True)
1485 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1486 config = Config.fromYaml(config_str)
1487 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1489 def testNoCacheDir(self):
1490 config_str = """
1491cached:
1492 root: null
1493 cacheable:
1494 metric0: true
1495 """
1496 cache_manager = self._make_cache_manager(config_str)
1498 # Look inside to check we don't have a cache directory
1499 self.assertIsNone(cache_manager._cache_directory)
1501 self.assertCache(cache_manager)
1503 # Test that the cache directory is marked temporary
1504 self.assertTrue(cache_manager.cache_directory.isTemporary)
1506 def testNoCacheDirReversed(self):
1507 """Use default caching status and metric1 to false"""
1508 config_str = """
1509cached:
1510 root: null
1511 default: true
1512 cacheable:
1513 metric1: false
1514 """
1515 cache_manager = self._make_cache_manager(config_str)
1517 self.assertCache(cache_manager)
1519 def testEnvvarCacheDir(self):
1520 config_str = f"""
1521cached:
1522 root: '{self.root}'
1523 cacheable:
1524 metric0: true
1525 """
1527 root = ResourcePath(self.root, forceDirectory=True)
1528 env_dir = root.join("somewhere", forceDirectory=True)
1529 elsewhere = root.join("elsewhere", forceDirectory=True)
1531 # Environment variable should override the config value.
1532 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1533 cache_manager = self._make_cache_manager(config_str)
1534 self.assertEqual(cache_manager.cache_directory, env_dir)
1536 # This environment variable should not override the config value.
1537 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1538 cache_manager = self._make_cache_manager(config_str)
1539 self.assertEqual(cache_manager.cache_directory, root)
1541 # No default setting.
1542 config_str = """
1543cached:
1544 root: null
1545 default: true
1546 cacheable:
1547 metric1: false
1548 """
1549 cache_manager = self._make_cache_manager(config_str)
1551 # This environment variable should override the config value.
1552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1553 cache_manager = self._make_cache_manager(config_str)
1554 self.assertEqual(cache_manager.cache_directory, env_dir)
1556 # If both environment variables are set the main (not IF_UNSET)
1557 # variable should win.
1558 with unittest.mock.patch.dict(
1559 os.environ,
1560 {
1561 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1562 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1563 },
1564 ):
1565 cache_manager = self._make_cache_manager(config_str)
1566 self.assertEqual(cache_manager.cache_directory, env_dir)
1568 # Use the API to set the environment variable, making sure that the
1569 # variable is reset on exit.
1570 with unittest.mock.patch.dict(
1571 os.environ,
1572 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1573 ):
1574 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1575 self.assertTrue(defined)
1576 cache_manager = self._make_cache_manager(config_str)
1577 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1579 # Now create the cache manager ahead of time and set the fallback
1580 # later.
1581 cache_manager = self._make_cache_manager(config_str)
1582 self.assertIsNone(cache_manager._cache_directory)
1583 with unittest.mock.patch.dict(
1584 os.environ,
1585 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1586 ):
1587 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1588 self.assertTrue(defined)
1589 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1591 def testExplicitCacheDir(self):
1592 config_str = f"""
1593cached:
1594 root: '{self.root}'
1595 cacheable:
1596 metric0: true
1597 """
1598 cache_manager = self._make_cache_manager(config_str)
1600 # Look inside to check we do have a cache directory.
1601 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1603 self.assertCache(cache_manager)
1605 # Test that the cache directory is not marked temporary
1606 self.assertFalse(cache_manager.cache_directory.isTemporary)
1608 def assertCache(self, cache_manager):
1609 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1610 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1612 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1613 self.assertIsInstance(uri, ResourcePath)
1614 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1616 # Check presence in cache using ref and then using file extension.
1617 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1618 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1619 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1620 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1622 # Cached file should no longer exist but uncached file should be
1623 # unaffected.
1624 self.assertFalse(self.files[0].exists())
1625 self.assertTrue(self.files[1].exists())
1627 # Should find this file and it should be within the cache directory.
1628 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1629 self.assertTrue(found.exists())
1630 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1632 # Should not be able to find these in cache
1633 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1634 self.assertIsNone(found)
1635 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1636 self.assertIsNone(found)
1638 def testNoCache(self):
1639 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1640 for uri, ref in zip(self.files, self.refs):
1641 self.assertFalse(cache_manager.should_be_cached(ref))
1642 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1643 self.assertFalse(cache_manager.known_to_cache(ref))
1644 with cache_manager.find_in_cache(ref, ".txt") as found:
1645 self.assertIsNone(found, msg=f"{cache_manager}")
1647 def _expiration_config(self, mode: str, threshold: int) -> str:
1648 return f"""
1649cached:
1650 default: true
1651 expiry:
1652 mode: {mode}
1653 threshold: {threshold}
1654 cacheable:
1655 unused: true
1656 """
1658 def testCacheExpiryFiles(self):
1659 threshold = 2 # Keep at least 2 files.
1660 mode = "files"
1661 config_str = self._expiration_config(mode, threshold)
1663 cache_manager = self._make_cache_manager(config_str)
1665 # Check that an empty cache returns unknown for arbitrary ref
1666 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1668 # Should end with datasets: 2, 3, 4
1669 self.assertExpiration(cache_manager, 5, threshold + 1)
1670 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1672 # Check that we will not expire a file that is actively in use.
1673 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1674 self.assertIsNotNone(found)
1676 # Trigger cache expiration that should remove the file
1677 # we just retrieved. Should now have: 3, 4, 5
1678 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1679 self.assertIsNotNone(cached)
1681 # Cache should still report the standard file count.
1682 self.assertEqual(cache_manager.file_count, threshold + 1)
1684 # Add additional entry to cache.
1685 # Should now have 4, 5, 6
1686 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1687 self.assertIsNotNone(cached)
1689 # Is the file still there?
1690 self.assertTrue(found.exists())
1692 # Can we read it?
1693 data = found.read()
1694 self.assertGreater(len(data), 0)
1696 # Outside context the file should no longer exist.
1697 self.assertFalse(found.exists())
1699 # File count should not have changed.
1700 self.assertEqual(cache_manager.file_count, threshold + 1)
1702 # Dataset 2 was in the exempt directory but because hardlinks
1703 # are used it was deleted from the main cache during cache expiry
1704 # above and so should no longer be found.
1705 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1706 self.assertIsNone(found)
1708 # And the one stored after it is also gone.
1709 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1710 self.assertIsNone(found)
1712 # But dataset 4 is present.
1713 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1714 self.assertIsNotNone(found)
1716 # Adding a new dataset to the cache should now delete it.
1717 cache_manager.move_to_cache(self.files[7], self.refs[7])
1719 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1720 self.assertIsNone(found)
1722 def testCacheExpiryDatasets(self):
1723 threshold = 2 # Keep 2 datasets.
1724 mode = "datasets"
1725 config_str = self._expiration_config(mode, threshold)
1727 cache_manager = self._make_cache_manager(config_str)
1728 self.assertExpiration(cache_manager, 5, threshold + 1)
1729 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1731 def testCacheExpiryDatasetsComposite(self):
1732 threshold = 2 # Keep 2 datasets.
1733 mode = "datasets"
1734 config_str = self._expiration_config(mode, threshold)
1736 cache_manager = self._make_cache_manager(config_str)
1738 n_datasets = 3
1739 for i in range(n_datasets):
1740 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1741 cached = cache_manager.move_to_cache(component_file, component_ref)
1742 self.assertIsNotNone(cached)
1743 self.assertTrue(cache_manager.known_to_cache(component_ref))
1744 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1745 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1747 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1749 # Write two new non-composite and the number of files should drop.
1750 self.assertExpiration(cache_manager, 2, 5)
1752 def testCacheExpirySize(self):
1753 threshold = 55 # Each file is 10 bytes
1754 mode = "size"
1755 config_str = self._expiration_config(mode, threshold)
1757 cache_manager = self._make_cache_manager(config_str)
1758 self.assertExpiration(cache_manager, 10, 6)
1759 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1761 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1762 """Insert the datasets and then check the number retained."""
1763 for i in range(n_datasets):
1764 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1765 self.assertIsNotNone(cached)
1767 self.assertEqual(cache_manager.file_count, n_retained)
1769 # The oldest file should not be in the cache any more.
1770 for i in range(n_datasets):
1771 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1772 if i >= n_datasets - n_retained:
1773 self.assertIsInstance(found, ResourcePath)
1774 else:
1775 self.assertIsNone(found)
1777 def testCacheExpiryAge(self):
1778 threshold = 1 # Expire older than 2 seconds
1779 mode = "age"
1780 config_str = self._expiration_config(mode, threshold)
1782 cache_manager = self._make_cache_manager(config_str)
1783 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1785 # Insert 3 files, then sleep, then insert more.
1786 for i in range(2):
1787 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1788 self.assertIsNotNone(cached)
1789 time.sleep(2.0)
1790 for j in range(4):
1791 i = 2 + j # Continue the counting
1792 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1793 self.assertIsNotNone(cached)
1795 # Only the files written after the sleep should exist.
1796 self.assertEqual(cache_manager.file_count, 4)
1797 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1798 self.assertIsNone(found)
1799 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1800 self.assertIsInstance(found, ResourcePath)
1803class DatasetRefURIsTestCase(unittest.TestCase):
1804 """Tests for DatasetRefURIs."""
1806 def testSequenceAccess(self):
1807 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1808 uris = DatasetRefURIs()
1810 self.assertEqual(len(uris), 2)
1811 self.assertEqual(uris[0], None)
1812 self.assertEqual(uris[1], {})
1814 primaryURI = ResourcePath("1/2/3")
1815 componentURI = ResourcePath("a/b/c")
1817 # affirm that DatasetRefURIs does not support MutableSequence functions
1818 with self.assertRaises(TypeError):
1819 uris[0] = primaryURI
1820 with self.assertRaises(TypeError):
1821 uris[1] = {"foo": componentURI}
1823 # but DatasetRefURIs can be set by property name:
1824 uris.primaryURI = primaryURI
1825 uris.componentURIs = {"foo": componentURI}
1826 self.assertEqual(uris.primaryURI, primaryURI)
1827 self.assertEqual(uris[0], primaryURI)
1829 primary, components = uris
1830 self.assertEqual(primary, primaryURI)
1831 self.assertEqual(components, {"foo": componentURI})
1833 def testRepr(self):
1834 """Verify __repr__ output."""
1835 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1836 self.assertEqual(
1837 repr(uris),
1838 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1839 )
1842class DataIdForTestTestCase(unittest.TestCase):
1843 """Tests for the DataIdForTest class."""
1845 def testImmutable(self):
1846 """Verify that an instance is immutable by default."""
1847 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1848 initial_hash = hash(dataId)
1850 with self.assertRaises(RuntimeError):
1851 dataId["instrument"] = "foo"
1853 with self.assertRaises(RuntimeError):
1854 del dataId["instrument"]
1856 assert sys.version_info[0] == 3
1857 if sys.version_info[1] >= 9:
1858 with self.assertRaises(RuntimeError):
1859 dataId |= dict(foo="bar")
1861 with self.assertRaises(RuntimeError):
1862 dataId.pop("instrument")
1864 with self.assertRaises(RuntimeError):
1865 dataId.popitem()
1867 with self.assertRaises(RuntimeError):
1868 dataId.update(dict(instrument="foo"))
1870 # verify that the hash value has not changed.
1871 self.assertEqual(initial_hash, hash(dataId))
1873 def testMutable(self):
1874 """Verify that an instance can be made mutable (unfrozen)."""
1875 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1876 initial_hash = hash(dataId)
1877 dataId.frozen = False
1878 self.assertEqual(initial_hash, hash(dataId))
1880 dataId["instrument"] = "foo"
1881 self.assertEqual(dataId["instrument"], "foo")
1882 self.assertNotEqual(initial_hash, hash(dataId))
1883 initial_hash = hash(dataId)
1885 del dataId["instrument"]
1886 self.assertTrue("instrument" not in dataId)
1887 self.assertNotEqual(initial_hash, hash(dataId))
1888 initial_hash = hash(dataId)
1890 assert sys.version_info[0] == 3
1891 if sys.version_info[1] >= 9:
1892 dataId |= dict(foo="bar")
1893 self.assertEqual(dataId["foo"], "bar")
1894 self.assertNotEqual(initial_hash, hash(dataId))
1895 initial_hash = hash(dataId)
1897 dataId.pop("visit")
1898 self.assertTrue("visit" not in dataId)
1899 self.assertNotEqual(initial_hash, hash(dataId))
1900 initial_hash = hash(dataId)
1902 dataId.popitem()
1903 self.assertTrue("physical_filter" not in dataId)
1904 self.assertNotEqual(initial_hash, hash(dataId))
1905 initial_hash = hash(dataId)
1907 dataId.update(dict(instrument="foo"))
1908 self.assertEqual(dataId["instrument"], "foo")
1909 self.assertNotEqual(initial_hash, hash(dataId))
1910 initial_hash = hash(dataId)
1913class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1914 storageClassFactory = StorageClassFactory()
1916 def test_StoredFileInfo(self):
1917 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1918 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False)
1920 record = dict(
1921 storage_class="StructuredDataDict",
1922 formatter="lsst.daf.butler.Formatter",
1923 path="a/b/c.txt",
1924 component="component",
1925 dataset_id=ref.id,
1926 checksum=None,
1927 file_size=5,
1928 )
1929 info = StoredFileInfo.from_record(record)
1931 self.assertEqual(info.dataset_id, ref.id)
1932 self.assertEqual(info.to_record(), record)
1934 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False)
1935 rebased = info.rebase(ref2)
1936 self.assertEqual(rebased.dataset_id, ref2.id)
1937 self.assertEqual(rebased.rebase(ref), info)
1939 with self.assertRaises(TypeError):
1940 rebased.update(formatter=42)
1942 with self.assertRaises(ValueError):
1943 rebased.update(something=42, new="42")
1945 # Check that pickle works on StoredFileInfo.
1946 pickled_info = pickle.dumps(info)
1947 unpickled_info = pickle.loads(pickled_info)
1948 self.assertEqual(unpickled_info, info)
1951if __name__ == "__main__":
1952 unittest.main()