Coverage for tests/test_datastore.py: 15%
963 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 09:44 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import shutil
24import sys
25import tempfile
26import time
27import unittest
28from collections import UserDict
29from dataclasses import dataclass
31import lsst.utils.tests
32import yaml
33from lsst.daf.butler import (
34 Config,
35 DatasetRefURIs,
36 DatasetTypeNotSupportedError,
37 DatastoreCacheManager,
38 DatastoreCacheManagerConfig,
39 DatastoreConfig,
40 DatastoreDisabledCacheManager,
41 DatastoreValidationError,
42 DimensionUniverse,
43 FileDataset,
44 NamedKeyDict,
45 StorageClass,
46 StorageClassFactory,
47)
48from lsst.daf.butler.formatters.yaml import YamlFormatter
49from lsst.daf.butler.tests import (
50 BadNoWriteFormatter,
51 BadWriteFormatter,
52 DatasetTestHelper,
53 DatastoreTestHelper,
54 DummyRegistry,
55 MetricsExample,
56)
57from lsst.resources import ResourcePath
58from lsst.utils import doImport
60TESTDIR = os.path.dirname(__file__)
63class DataIdForTest(UserDict):
65 """A dict-like class that can be used for a DataId dict that is hashable.
67 By default the class is immutable ("frozen"). The `frozen`
68 attribute can be set to `False` to change values (but note that
69 the hash values before and after mutation will be different!).
70 """
72 def __init__(self, *args, **kwargs):
73 self.frozen = False
74 super().__init__(*args, **kwargs)
75 self.frozen = True
77 def __hash__(self):
78 return hash(str(self.data))
80 def __setitem__(self, k, v):
81 if self.frozen:
82 raise RuntimeError("DataIdForTest is frozen.")
83 return super().__setitem__(k, v)
85 def __delitem__(self, k):
86 if self.frozen:
87 raise RuntimeError("DataIdForTest is frozen.")
88 return super().__delitem__(k)
90 def __ior__(self, other):
91 assert sys.version_info[0] == 3
92 if sys.version_info[1] < 9:
93 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
94 if self.frozen:
95 raise RuntimeError("DataIdForTest is frozen.")
96 return super().__ior__(other)
98 def pop(self, k):
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().pop(k)
103 def popitem(self):
104 if self.frozen:
105 raise RuntimeError("DataIdForTest is frozen.")
106 return super().popitem()
108 def update(self, *args, **kwargs):
109 if self.frozen:
110 raise RuntimeError("DataIdForTest is frozen.")
111 super().update(*args, **kwargs)
114def makeExampleMetrics(use_none=False):
115 if use_none:
116 array = None
117 else:
118 array = [563, 234, 456.7, 105, 2054, -1045]
119 return MetricsExample(
120 {"AM1": 5.2, "AM2": 30.6},
121 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
122 array,
123 )
126@dataclass(frozen=True)
127class Named:
128 name: str
131class FakeDataCoordinate(NamedKeyDict):
132 """A fake hashable frozen DataCoordinate built from a simple dict."""
134 @classmethod
135 def from_dict(cls, dataId):
136 new = cls()
137 for k, v in dataId.items():
138 new[Named(k)] = v
139 return new.freeze()
141 def __hash__(self) -> int:
142 return hash(frozenset(self.items()))
145class TransactionTestError(Exception):
146 """Specific error for transactions, to prevent misdiagnosing
147 that might otherwise occur when a standard exception is used.
148 """
150 pass
153class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
154 """Support routines for datastore testing"""
156 root = None
158 @classmethod
159 def setUpClass(cls):
160 # Storage Classes are fixed for all datastores in these tests
161 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
162 cls.storageClassFactory = StorageClassFactory()
163 cls.storageClassFactory.addFromConfig(scConfigFile)
165 # Read the Datastore config so we can get the class
166 # information (since we should not assume the constructor
167 # name here, but rely on the configuration file itself)
168 datastoreConfig = DatastoreConfig(cls.configFile)
169 cls.datastoreType = doImport(datastoreConfig["cls"])
170 cls.universe = DimensionUniverse()
172 def setUp(self):
173 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
175 def tearDown(self):
176 if self.root is not None and os.path.exists(self.root):
177 shutil.rmtree(self.root, ignore_errors=True)
180class DatastoreTests(DatastoreTestsBase):
181 """Some basic tests of a simple datastore."""
183 hasUnsupportedPut = True
185 def testConfigRoot(self):
186 full = DatastoreConfig(self.configFile)
187 config = DatastoreConfig(self.configFile, mergeDefaults=False)
188 newroot = "/random/location"
189 self.datastoreType.setConfigRoot(newroot, config, full)
190 if self.rootKeys:
191 for k in self.rootKeys:
192 self.assertIn(newroot, config[k])
194 def testConstructor(self):
195 datastore = self.makeDatastore()
196 self.assertIsNotNone(datastore)
197 self.assertIs(datastore.isEphemeral, self.isEphemeral)
199 def testConfigurationValidation(self):
200 datastore = self.makeDatastore()
201 sc = self.storageClassFactory.getStorageClass("ThingOne")
202 datastore.validateConfiguration([sc])
204 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
205 if self.validationCanFail:
206 with self.assertRaises(DatastoreValidationError):
207 datastore.validateConfiguration([sc2], logFailures=True)
209 dimensions = self.universe.extract(("visit", "physical_filter"))
210 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
211 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
212 datastore.validateConfiguration([ref])
214 def testParameterValidation(self):
215 """Check that parameters are validated"""
216 sc = self.storageClassFactory.getStorageClass("ThingOne")
217 dimensions = self.universe.extract(("visit", "physical_filter"))
218 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
219 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
220 datastore = self.makeDatastore()
221 data = {1: 2, 3: 4}
222 datastore.put(data, ref)
223 newdata = datastore.get(ref)
224 self.assertEqual(data, newdata)
225 with self.assertRaises(KeyError):
226 newdata = datastore.get(ref, parameters={"missing": 5})
228 def testBasicPutGet(self):
229 metrics = makeExampleMetrics()
230 datastore = self.makeDatastore()
232 # Create multiple storage classes for testing different formulations
233 storageClasses = [
234 self.storageClassFactory.getStorageClass(sc)
235 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
236 ]
238 dimensions = self.universe.extract(("visit", "physical_filter"))
239 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
240 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
242 for sc in storageClasses:
243 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
244 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
246 # Make sure that using getManyURIs without predicting before the
247 # dataset has been put raises.
248 with self.assertRaises(FileNotFoundError):
249 datastore.getManyURIs([ref], predict=False)
251 # Make sure that using getManyURIs with predicting before the
252 # dataset has been put predicts the URI.
253 uris = datastore.getManyURIs([ref, ref2], predict=True)
254 self.assertIn("52", uris[ref].primaryURI.geturl())
255 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
256 self.assertIn("53", uris[ref2].primaryURI.geturl())
257 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
259 datastore.put(metrics, ref)
261 # Does it exist?
262 self.assertTrue(datastore.exists(ref))
264 # Get
265 metricsOut = datastore.get(ref, parameters=None)
266 self.assertEqual(metrics, metricsOut)
268 uri = datastore.getURI(ref)
269 self.assertEqual(uri.scheme, self.uriScheme)
271 uris = datastore.getManyURIs([ref])
272 self.assertEqual(len(uris), 1)
273 ref, uri = uris.popitem()
274 self.assertTrue(uri.primaryURI.exists())
275 self.assertFalse(uri.componentURIs)
277 # Get a component -- we need to construct new refs for them
278 # with derived storage classes but with parent ID
279 for comp in ("data", "output"):
280 compRef = ref.makeComponentRef(comp)
281 output = datastore.get(compRef)
282 self.assertEqual(output, getattr(metricsOut, comp))
284 uri = datastore.getURI(compRef)
285 self.assertEqual(uri.scheme, self.uriScheme)
287 uris = datastore.getManyURIs([compRef])
288 self.assertEqual(len(uris), 1)
290 storageClass = sc
292 # Check that we can put a metric with None in a component and
293 # get it back as None
294 metricsNone = makeExampleMetrics(use_none=True)
295 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
296 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
297 datastore.put(metricsNone, refNone)
299 comp = "data"
300 for comp in ("data", "output"):
301 compRef = refNone.makeComponentRef(comp)
302 output = datastore.get(compRef)
303 self.assertEqual(output, getattr(metricsNone, comp))
305 # Check that a put fails if the dataset type is not supported
306 if self.hasUnsupportedPut:
307 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
308 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
309 with self.assertRaises(DatasetTypeNotSupportedError):
310 datastore.put(metrics, ref)
312 # These should raise
313 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
314 with self.assertRaises(FileNotFoundError):
315 # non-existing file
316 datastore.get(ref)
318 # Get a URI from it
319 uri = datastore.getURI(ref, predict=True)
320 self.assertEqual(uri.scheme, self.uriScheme)
322 with self.assertRaises(FileNotFoundError):
323 datastore.getURI(ref)
325 def testTrustGetRequest(self):
326 """Check that we can get datasets that registry knows nothing about."""
328 datastore = self.makeDatastore()
330 # Skip test if the attribute is not defined
331 if not hasattr(datastore, "trustGetRequest"):
332 return
334 metrics = makeExampleMetrics()
336 i = 0
337 for sc_name in ("StructuredData", "StructuredComposite"):
338 i += 1
339 datasetTypeName = f"metric{i}"
341 if sc_name == "StructuredComposite":
342 disassembled = True
343 else:
344 disassembled = False
346 # Start datastore in default configuration of using registry
347 datastore.trustGetRequest = False
349 # Create multiple storage classes for testing with or without
350 # disassembly
351 sc = self.storageClassFactory.getStorageClass(sc_name)
352 dimensions = self.universe.extract(("visit", "physical_filter"))
354 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
356 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
357 datastore.put(metrics, ref)
359 # Does it exist?
360 self.assertTrue(datastore.exists(ref))
362 # Get
363 metricsOut = datastore.get(ref)
364 self.assertEqual(metrics, metricsOut)
366 # Get the URI(s)
367 primaryURI, componentURIs = datastore.getURIs(ref)
368 if disassembled:
369 self.assertIsNone(primaryURI)
370 self.assertEqual(len(componentURIs), 3)
371 else:
372 self.assertIn(datasetTypeName, primaryURI.path)
373 self.assertFalse(componentURIs)
375 # Delete registry entry so now we are trusting
376 datastore.removeStoredItemInfo(ref)
378 # Now stop trusting and check that things break
379 datastore.trustGetRequest = False
381 # Does it exist?
382 self.assertFalse(datastore.exists(ref))
384 with self.assertRaises(FileNotFoundError):
385 datastore.get(ref)
387 with self.assertRaises(FileNotFoundError):
388 datastore.get(ref.makeComponentRef("data"))
390 # URI should fail unless we ask for prediction
391 with self.assertRaises(FileNotFoundError):
392 datastore.getURIs(ref)
394 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
395 if disassembled:
396 self.assertIsNone(predicted_primary)
397 self.assertEqual(len(predicted_disassembled), 3)
398 for uri in predicted_disassembled.values():
399 self.assertEqual(uri.fragment, "predicted")
400 self.assertIn(datasetTypeName, uri.path)
401 else:
402 self.assertIn(datasetTypeName, predicted_primary.path)
403 self.assertFalse(predicted_disassembled)
404 self.assertEqual(predicted_primary.fragment, "predicted")
406 # Now enable registry-free trusting mode
407 datastore.trustGetRequest = True
409 # Try again to get it
410 metricsOut = datastore.get(ref)
411 self.assertEqual(metricsOut, metrics)
413 # Does it exist?
414 self.assertTrue(datastore.exists(ref))
416 # Get a component
417 comp = "data"
418 compRef = ref.makeComponentRef(comp)
419 output = datastore.get(compRef)
420 self.assertEqual(output, getattr(metrics, comp))
422 # Get the URI -- if we trust this should work even without
423 # enabling prediction.
424 primaryURI2, componentURIs2 = datastore.getURIs(ref)
425 self.assertEqual(primaryURI2, primaryURI)
426 self.assertEqual(componentURIs2, componentURIs)
428 def testDisassembly(self):
429 """Test disassembly within datastore."""
430 metrics = makeExampleMetrics()
431 if self.isEphemeral:
432 # in-memory datastore does not disassemble
433 return
435 # Create multiple storage classes for testing different formulations
436 # of composites. One of these will not disassemble to provide
437 # a reference.
438 storageClasses = [
439 self.storageClassFactory.getStorageClass(sc)
440 for sc in (
441 "StructuredComposite",
442 "StructuredCompositeTestA",
443 "StructuredCompositeTestB",
444 "StructuredCompositeReadComp",
445 "StructuredData", # No disassembly
446 "StructuredCompositeReadCompNoDisassembly",
447 )
448 ]
450 # Create the test datastore
451 datastore = self.makeDatastore()
453 # Dummy dataId
454 dimensions = self.universe.extract(("visit", "physical_filter"))
455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
457 for i, sc in enumerate(storageClasses):
458 with self.subTest(storageClass=sc.name):
459 # Create a different dataset type each time round
460 # so that a test failure in this subtest does not trigger
461 # a cascade of tests because of file clashes
462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
466 datastore.put(metrics, ref)
468 baseURI, compURIs = datastore.getURIs(ref)
469 if disassembled:
470 self.assertIsNone(baseURI)
471 self.assertEqual(set(compURIs), {"data", "output", "summary"})
472 else:
473 self.assertIsNotNone(baseURI)
474 self.assertEqual(compURIs, {})
476 metrics_get = datastore.get(ref)
477 self.assertEqual(metrics_get, metrics)
479 # Retrieve the composite with read parameter
480 stop = 4
481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
482 self.assertEqual(metrics_get.summary, metrics.summary)
483 self.assertEqual(metrics_get.output, metrics.output)
484 self.assertEqual(metrics_get.data, metrics.data[:stop])
486 # Retrieve a component
487 data = datastore.get(ref.makeComponentRef("data"))
488 self.assertEqual(data, metrics.data)
490 # On supported storage classes attempt to access a read
491 # only component
492 if "ReadComp" in sc.name:
493 cRef = ref.makeComponentRef("counter")
494 counter = datastore.get(cRef)
495 self.assertEqual(counter, len(metrics.data))
497 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
498 self.assertEqual(counter, stop)
500 datastore.remove(ref)
502 def testRegistryCompositePutGet(self):
503 """Tests the case where registry disassembles and puts to datastore."""
504 metrics = makeExampleMetrics()
505 datastore = self.makeDatastore()
507 # Create multiple storage classes for testing different formulations
508 # of composites
509 storageClasses = [
510 self.storageClassFactory.getStorageClass(sc)
511 for sc in (
512 "StructuredComposite",
513 "StructuredCompositeTestA",
514 "StructuredCompositeTestB",
515 )
516 ]
518 dimensions = self.universe.extract(("visit", "physical_filter"))
519 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
521 for sc in storageClasses:
522 print("Using storageClass: {}".format(sc.name))
523 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
525 components = sc.delegate().disassemble(metrics)
526 self.assertTrue(components)
528 compsRead = {}
529 for compName, compInfo in components.items():
530 compRef = self.makeDatasetRef(
531 ref.datasetType.componentTypeName(compName),
532 dimensions,
533 components[compName].storageClass,
534 dataId,
535 conform=False,
536 )
538 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
539 datastore.put(compInfo.component, compRef)
541 uri = datastore.getURI(compRef)
542 self.assertEqual(uri.scheme, self.uriScheme)
544 compsRead[compName] = datastore.get(compRef)
546 # We can generate identical files for each storage class
547 # so remove the component here
548 datastore.remove(compRef)
550 # combine all the components we read back into a new composite
551 metricsOut = sc.delegate().assemble(compsRead)
552 self.assertEqual(metrics, metricsOut)
554 def prepDeleteTest(self, n_refs=1):
555 metrics = makeExampleMetrics()
556 datastore = self.makeDatastore()
557 # Put
558 dimensions = self.universe.extract(("visit", "physical_filter"))
559 sc = self.storageClassFactory.getStorageClass("StructuredData")
560 refs = []
561 for i in range(n_refs):
562 dataId = FakeDataCoordinate.from_dict(
563 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
564 )
565 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
566 datastore.put(metrics, ref)
568 # Does it exist?
569 self.assertTrue(datastore.exists(ref))
571 # Get
572 metricsOut = datastore.get(ref)
573 self.assertEqual(metrics, metricsOut)
574 refs.append(ref)
576 return datastore, *refs
578 def testRemove(self):
579 datastore, ref = self.prepDeleteTest()
581 # Remove
582 datastore.remove(ref)
584 # Does it exist?
585 self.assertFalse(datastore.exists(ref))
587 # Do we now get a predicted URI?
588 uri = datastore.getURI(ref, predict=True)
589 self.assertEqual(uri.fragment, "predicted")
591 # Get should now fail
592 with self.assertRaises(FileNotFoundError):
593 datastore.get(ref)
594 # Can only delete once
595 with self.assertRaises(FileNotFoundError):
596 datastore.remove(ref)
598 def testForget(self):
599 datastore, ref = self.prepDeleteTest()
601 # Remove
602 datastore.forget([ref])
604 # Does it exist (as far as we know)?
605 self.assertFalse(datastore.exists(ref))
607 # Do we now get a predicted URI?
608 uri = datastore.getURI(ref, predict=True)
609 self.assertEqual(uri.fragment, "predicted")
611 # Get should now fail
612 with self.assertRaises(FileNotFoundError):
613 datastore.get(ref)
615 # Forgetting again is a silent no-op
616 datastore.forget([ref])
618 # Predicted URI should still point to the file.
619 self.assertTrue(uri.exists())
621 def testTransfer(self):
622 metrics = makeExampleMetrics()
624 dimensions = self.universe.extract(("visit", "physical_filter"))
625 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
627 sc = self.storageClassFactory.getStorageClass("StructuredData")
628 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
630 inputDatastore = self.makeDatastore("test_input_datastore")
631 outputDatastore = self.makeDatastore("test_output_datastore")
633 inputDatastore.put(metrics, ref)
634 outputDatastore.transfer(inputDatastore, ref)
636 metricsOut = outputDatastore.get(ref)
637 self.assertEqual(metrics, metricsOut)
639 def testBasicTransaction(self):
640 datastore = self.makeDatastore()
641 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
642 dimensions = self.universe.extract(("visit", "physical_filter"))
643 nDatasets = 6
644 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
645 data = [
646 (
647 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
648 makeExampleMetrics(),
649 )
650 for dataId in dataIds
651 ]
652 succeed = data[: nDatasets // 2]
653 fail = data[nDatasets // 2 :]
654 # All datasets added in this transaction should continue to exist
655 with datastore.transaction():
656 for ref, metrics in succeed:
657 datastore.put(metrics, ref)
658 # Whereas datasets added in this transaction should not
659 with self.assertRaises(TransactionTestError):
660 with datastore.transaction():
661 for ref, metrics in fail:
662 datastore.put(metrics, ref)
663 raise TransactionTestError("This should propagate out of the context manager")
664 # Check for datasets that should exist
665 for ref, metrics in succeed:
666 # Does it exist?
667 self.assertTrue(datastore.exists(ref))
668 # Get
669 metricsOut = datastore.get(ref, parameters=None)
670 self.assertEqual(metrics, metricsOut)
671 # URI
672 uri = datastore.getURI(ref)
673 self.assertEqual(uri.scheme, self.uriScheme)
674 # Check for datasets that should not exist
675 for ref, _ in fail:
676 # These should raise
677 with self.assertRaises(FileNotFoundError):
678 # non-existing file
679 datastore.get(ref)
680 with self.assertRaises(FileNotFoundError):
681 datastore.getURI(ref)
683 def testNestedTransaction(self):
684 datastore = self.makeDatastore()
685 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
686 dimensions = self.universe.extract(("visit", "physical_filter"))
687 metrics = makeExampleMetrics()
689 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
690 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
691 datastore.put(metrics, refBefore)
692 with self.assertRaises(TransactionTestError):
693 with datastore.transaction():
694 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
695 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
696 datastore.put(metrics, refOuter)
697 with datastore.transaction():
698 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
699 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
700 datastore.put(metrics, refInner)
701 # All datasets should exist
702 for ref in (refBefore, refOuter, refInner):
703 metricsOut = datastore.get(ref, parameters=None)
704 self.assertEqual(metrics, metricsOut)
705 raise TransactionTestError("This should roll back the transaction")
706 # Dataset(s) inserted before the transaction should still exist
707 metricsOut = datastore.get(refBefore, parameters=None)
708 self.assertEqual(metrics, metricsOut)
709 # But all datasets inserted during the (rolled back) transaction
710 # should be gone
711 with self.assertRaises(FileNotFoundError):
712 datastore.get(refOuter)
713 with self.assertRaises(FileNotFoundError):
714 datastore.get(refInner)
716 def _prepareIngestTest(self):
717 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
718 dimensions = self.universe.extract(("visit", "physical_filter"))
719 metrics = makeExampleMetrics()
720 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
721 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
722 return metrics, ref
724 def runIngestTest(self, func, expectOutput=True):
725 metrics, ref = self._prepareIngestTest()
726 # The file will be deleted after the test.
727 # For symlink tests this leads to a situation where the datastore
728 # points to a file that does not exist. This will make os.path.exist
729 # return False but then the new symlink will fail with
730 # FileExistsError later in the code so the test still passes.
731 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
732 with open(path, "w") as fd:
733 yaml.dump(metrics._asdict(), stream=fd)
734 func(metrics, path, ref)
736 def testIngestNoTransfer(self):
737 """Test ingesting existing files with no transfer."""
738 for mode in (None, "auto"):
740 # Some datastores have auto but can't do in place transfer
741 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
742 continue
744 with self.subTest(mode=mode):
745 datastore = self.makeDatastore()
747 def succeed(obj, path, ref):
748 """Ingest a file already in the datastore root."""
749 # first move it into the root, and adjust the path
750 # accordingly
751 path = shutil.copy(path, datastore.root.ospath)
752 path = os.path.relpath(path, start=datastore.root.ospath)
753 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
754 self.assertEqual(obj, datastore.get(ref))
756 def failInputDoesNotExist(obj, path, ref):
757 """Can't ingest files if we're given a bad path."""
758 with self.assertRaises(FileNotFoundError):
759 datastore.ingest(
760 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
761 )
762 self.assertFalse(datastore.exists(ref))
764 def failOutsideRoot(obj, path, ref):
765 """Can't ingest files outside of datastore root unless
766 auto."""
767 if mode == "auto":
768 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
769 self.assertTrue(datastore.exists(ref))
770 else:
771 with self.assertRaises(RuntimeError):
772 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
773 self.assertFalse(datastore.exists(ref))
775 def failNotImplemented(obj, path, ref):
776 with self.assertRaises(NotImplementedError):
777 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
779 if mode in self.ingestTransferModes:
780 self.runIngestTest(failOutsideRoot)
781 self.runIngestTest(failInputDoesNotExist)
782 self.runIngestTest(succeed)
783 else:
784 self.runIngestTest(failNotImplemented)
786 def testIngestTransfer(self):
787 """Test ingesting existing files after transferring them."""
788 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
789 with self.subTest(mode=mode):
790 datastore = self.makeDatastore(mode)
792 def succeed(obj, path, ref):
793 """Ingest a file by transferring it to the template
794 location."""
795 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
796 self.assertEqual(obj, datastore.get(ref))
798 def failInputDoesNotExist(obj, path, ref):
799 """Can't ingest files if we're given a bad path."""
800 with self.assertRaises(FileNotFoundError):
801 # Ensure the file does not look like it is in
802 # datastore for auto mode
803 datastore.ingest(
804 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
805 )
806 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
808 def failNotImplemented(obj, path, ref):
809 with self.assertRaises(NotImplementedError):
810 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
812 if mode in self.ingestTransferModes:
813 self.runIngestTest(failInputDoesNotExist)
814 self.runIngestTest(succeed, expectOutput=(mode != "move"))
815 else:
816 self.runIngestTest(failNotImplemented)
818 def testIngestSymlinkOfSymlink(self):
819 """Special test for symlink to a symlink ingest"""
820 metrics, ref = self._prepareIngestTest()
821 # The aim of this test is to create a dataset on disk, then
822 # create a symlink to it and finally ingest the symlink such that
823 # the symlink in the datastore points to the original dataset.
824 for mode in ("symlink", "relsymlink"):
825 if mode not in self.ingestTransferModes:
826 continue
828 print(f"Trying mode {mode}")
829 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
830 with open(realpath, "w") as fd:
831 yaml.dump(metrics._asdict(), stream=fd)
832 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
833 os.symlink(os.path.abspath(realpath), sympath)
835 datastore = self.makeDatastore()
836 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
838 uri = datastore.getURI(ref)
839 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
840 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
842 linkTarget = os.readlink(uri.ospath)
843 if mode == "relsymlink":
844 self.assertFalse(os.path.isabs(linkTarget))
845 else:
846 self.assertEqual(linkTarget, os.path.abspath(realpath))
848 # Check that we can get the dataset back regardless of mode
849 metric2 = datastore.get(ref)
850 self.assertEqual(metric2, metrics)
852 # Cleanup the file for next time round loop
853 # since it will get the same file name in store
854 datastore.remove(ref)
856 def testExportImportRecords(self):
857 """Test for export_records and import_records methods."""
859 datastore = self.makeDatastore("test_datastore")
861 # For now only the FileDatastore can be used for this test.
862 # ChainedDatastore that only includes InMemoryDatastores have to be
863 # skipped as well.
864 for name in datastore.names:
865 if not name.startswith("InMemoryDatastore"):
866 break
867 else:
868 raise unittest.SkipTest("in-memory datastore does not support record export/import")
870 metrics = makeExampleMetrics()
871 dimensions = self.universe.extract(("visit", "physical_filter"))
872 sc = self.storageClassFactory.getStorageClass("StructuredData")
874 refs = []
875 for visit in (2048, 2049, 2050):
876 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
877 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
878 datastore.put(metrics, ref)
879 refs.append(ref)
881 for exported_refs in (refs, refs[1:]):
882 n_refs = len(exported_refs)
883 records = datastore.export_records(exported_refs)
884 self.assertGreater(len(records), 0)
885 self.assertTrue(set(records.keys()) <= set(datastore.names))
886 # In a ChainedDatastore each FileDatastore will have a complete set
887 for datastore_name in records:
888 record_data = records[datastore_name]
889 self.assertEqual(len(record_data.records), n_refs)
891 # Use the same datastore name to import relative path.
892 datastore2 = self.makeDatastore("test_datastore")
894 records = datastore.export_records(refs[1:])
895 datastore2.import_records(records)
897 with self.assertRaises(FileNotFoundError):
898 data = datastore2.get(refs[0])
899 data = datastore2.get(refs[1])
900 self.assertIsNotNone(data)
901 data = datastore2.get(refs[2])
902 self.assertIsNotNone(data)
905class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
906 """PosixDatastore specialization"""
908 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
909 uriScheme = "file"
910 canIngestNoTransferAuto = True
911 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
912 isEphemeral = False
913 rootKeys = ("root",)
914 validationCanFail = True
916 def setUp(self):
917 # Override the working directory before calling the base class
918 self.root = tempfile.mkdtemp(dir=TESTDIR)
919 super().setUp()
921 def testCanNotDeterminePutFormatterLocation(self):
922 """Verify that the expected exception is raised if the FileDatastore
923 can not determine the put formatter location."""
925 _ = makeExampleMetrics()
926 datastore = self.makeDatastore()
928 # Create multiple storage classes for testing different formulations
929 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
931 sccomp = StorageClass("Dummy")
932 compositeStorageClass = StorageClass(
933 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
934 )
936 dimensions = self.universe.extract(("visit", "physical_filter"))
937 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
939 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
940 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
942 def raiser(ref):
943 raise DatasetTypeNotSupportedError()
945 with unittest.mock.patch.object(
946 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
947 "_determine_put_formatter_location",
948 side_effect=raiser,
949 ):
950 # verify the non-composite ref execution path:
951 with self.assertRaises(DatasetTypeNotSupportedError):
952 datastore.getURIs(ref, predict=True)
954 # verify the composite-ref execution path:
955 with self.assertRaises(DatasetTypeNotSupportedError):
956 datastore.getURIs(compRef, predict=True)
959class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
960 """Posix datastore tests but with checksums disabled."""
962 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
964 def testChecksum(self):
965 """Ensure that checksums have not been calculated."""
967 datastore = self.makeDatastore()
968 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
969 dimensions = self.universe.extract(("visit", "physical_filter"))
970 metrics = makeExampleMetrics()
972 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
973 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
975 # Configuration should have disabled checksum calculation
976 datastore.put(metrics, ref)
977 infos = datastore.getStoredItemsInfo(ref)
978 self.assertIsNone(infos[0].checksum)
980 # Remove put back but with checksums enabled explicitly
981 datastore.remove(ref)
982 datastore.useChecksum = True
983 datastore.put(metrics, ref)
985 infos = datastore.getStoredItemsInfo(ref)
986 self.assertIsNotNone(infos[0].checksum)
989class TrashDatastoreTestCase(PosixDatastoreTestCase):
990 """Restrict trash test to FileDatastore."""
992 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
994 def testTrash(self):
995 datastore, *refs = self.prepDeleteTest(n_refs=10)
997 # Trash one of them.
998 ref = refs.pop()
999 uri = datastore.getURI(ref)
1000 datastore.trash(ref)
1001 self.assertTrue(uri.exists(), uri) # Not deleted yet
1002 datastore.emptyTrash()
1003 self.assertFalse(uri.exists(), uri)
1005 # Trash it again should be fine.
1006 datastore.trash(ref)
1008 # Trash multiple items at once.
1009 subset = [refs.pop(), refs.pop()]
1010 datastore.trash(subset)
1011 datastore.emptyTrash()
1013 # Remove a record and trash should do nothing.
1014 # This is execution butler scenario.
1015 ref = refs.pop()
1016 uri = datastore.getURI(ref)
1017 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1018 self.assertTrue(uri.exists())
1019 datastore.trash(ref)
1020 datastore.emptyTrash()
1021 self.assertTrue(uri.exists())
1023 # Switch on trust and it should delete the file.
1024 datastore.trustGetRequest = True
1025 datastore.trash([ref])
1026 self.assertFalse(uri.exists())
1028 # Remove multiples at once in trust mode.
1029 subset = [refs.pop() for i in range(3)]
1030 datastore.trash(subset)
1031 datastore.trash(refs.pop()) # Check that a single ref can trash
1034class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1035 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1037 def setUp(self):
1038 # Override the working directory before calling the base class
1039 self.root = tempfile.mkdtemp(dir=TESTDIR)
1040 super().setUp()
1042 def testCleanup(self):
1043 """Test that a failed formatter write does cleanup a partial file."""
1044 metrics = makeExampleMetrics()
1045 datastore = self.makeDatastore()
1047 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1049 dimensions = self.universe.extract(("visit", "physical_filter"))
1050 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1052 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1054 # Determine where the file will end up (we assume Formatters use
1055 # the same file extension)
1056 expectedUri = datastore.getURI(ref, predict=True)
1057 self.assertEqual(expectedUri.fragment, "predicted")
1059 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1061 # Try formatter that fails and formatter that fails and leaves
1062 # a file behind
1063 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1064 with self.subTest(formatter=formatter):
1066 # Monkey patch the formatter
1067 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1069 # Try to put the dataset, it should fail
1070 with self.assertRaises(Exception):
1071 datastore.put(metrics, ref)
1073 # Check that there is no file on disk
1074 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1076 # Check that there is a directory
1077 dir = expectedUri.dirname()
1078 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1080 # Force YamlFormatter and check that this time a file is written
1081 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1082 datastore.put(metrics, ref)
1083 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1084 datastore.remove(ref)
1085 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1088class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1089 """PosixDatastore specialization"""
1091 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1092 uriScheme = "mem"
1093 hasUnsupportedPut = False
1094 ingestTransferModes = ()
1095 isEphemeral = True
1096 rootKeys = None
1097 validationCanFail = False
1100class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1101 """ChainedDatastore specialization using a POSIXDatastore"""
1103 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1104 hasUnsupportedPut = False
1105 canIngestNoTransferAuto = False
1106 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
1107 isEphemeral = False
1108 rootKeys = (".datastores.1.root", ".datastores.2.root")
1109 validationCanFail = True
1112class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1113 """ChainedDatastore specialization using all InMemoryDatastore"""
1115 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1116 validationCanFail = False
1119class DatastoreConstraintsTests(DatastoreTestsBase):
1120 """Basic tests of constraints model of Datastores."""
1122 def testConstraints(self):
1123 """Test constraints model. Assumes that each test class has the
1124 same constraints."""
1125 metrics = makeExampleMetrics()
1126 datastore = self.makeDatastore()
1128 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1129 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1130 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1131 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1133 # Write empty file suitable for ingest check (JSON and YAML variants)
1134 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1135 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1136 for datasetTypeName, sc, accepted in (
1137 ("metric", sc1, True),
1138 ("metric2", sc1, False),
1139 ("metric33", sc1, True),
1140 ("metric2", sc2, True),
1141 ):
1142 # Choose different temp file depending on StorageClass
1143 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1145 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1146 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1147 if accepted:
1148 datastore.put(metrics, ref)
1149 self.assertTrue(datastore.exists(ref))
1150 datastore.remove(ref)
1152 # Try ingest
1153 if self.canIngest:
1154 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1155 self.assertTrue(datastore.exists(ref))
1156 datastore.remove(ref)
1157 else:
1158 with self.assertRaises(DatasetTypeNotSupportedError):
1159 datastore.put(metrics, ref)
1160 self.assertFalse(datastore.exists(ref))
1162 # Again with ingest
1163 if self.canIngest:
1164 with self.assertRaises(DatasetTypeNotSupportedError):
1165 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1166 self.assertFalse(datastore.exists(ref))
1169class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1170 """PosixDatastore specialization"""
1172 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1173 canIngest = True
1175 def setUp(self):
1176 # Override the working directory before calling the base class
1177 self.root = tempfile.mkdtemp(dir=TESTDIR)
1178 super().setUp()
1181class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1182 """InMemoryDatastore specialization"""
1184 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1185 canIngest = False
1188class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1189 """ChainedDatastore specialization using a POSIXDatastore and constraints
1190 at the ChainedDatstore"""
1192 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1195class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1196 """ChainedDatastore specialization using a POSIXDatastore"""
1198 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1201class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1202 """ChainedDatastore specialization using all InMemoryDatastore"""
1204 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1205 canIngest = False
1208class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1209 """Test that a chained datastore can control constraints per-datastore
1210 even if child datastore would accept."""
1212 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1214 def setUp(self):
1215 # Override the working directory before calling the base class
1216 self.root = tempfile.mkdtemp(dir=TESTDIR)
1217 super().setUp()
1219 def testConstraints(self):
1220 """Test chained datastore constraints model."""
1221 metrics = makeExampleMetrics()
1222 datastore = self.makeDatastore()
1224 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1225 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1226 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1227 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1228 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1230 # Write empty file suitable for ingest check (JSON and YAML variants)
1231 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1232 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1234 for typeName, dataId, sc, accept, ingest in (
1235 ("metric", dataId1, sc1, (False, True, False), True),
1236 ("metric2", dataId1, sc1, (False, False, False), False),
1237 ("metric2", dataId2, sc1, (True, False, False), False),
1238 ("metric33", dataId2, sc2, (True, True, False), True),
1239 ("metric2", dataId1, sc2, (False, True, False), True),
1240 ):
1242 # Choose different temp file depending on StorageClass
1243 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1245 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1246 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1247 if any(accept):
1248 datastore.put(metrics, ref)
1249 self.assertTrue(datastore.exists(ref))
1251 # Check each datastore inside the chained datastore
1252 for childDatastore, expected in zip(datastore.datastores, accept):
1253 self.assertEqual(
1254 childDatastore.exists(ref),
1255 expected,
1256 f"Testing presence of {ref} in datastore {childDatastore.name}",
1257 )
1259 datastore.remove(ref)
1261 # Check that ingest works
1262 if ingest:
1263 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1264 self.assertTrue(datastore.exists(ref))
1266 # Check each datastore inside the chained datastore
1267 for childDatastore, expected in zip(datastore.datastores, accept):
1268 # Ephemeral datastores means InMemory at the moment
1269 # and that does not accept ingest of files.
1270 if childDatastore.isEphemeral:
1271 expected = False
1272 self.assertEqual(
1273 childDatastore.exists(ref),
1274 expected,
1275 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1276 )
1278 datastore.remove(ref)
1279 else:
1280 with self.assertRaises(DatasetTypeNotSupportedError):
1281 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1283 else:
1284 with self.assertRaises(DatasetTypeNotSupportedError):
1285 datastore.put(metrics, ref)
1286 self.assertFalse(datastore.exists(ref))
1288 # Again with ingest
1289 with self.assertRaises(DatasetTypeNotSupportedError):
1290 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1291 self.assertFalse(datastore.exists(ref))
1294class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1295 """Tests for datastore caching infrastructure."""
1297 @classmethod
1298 def setUpClass(cls):
1299 cls.storageClassFactory = StorageClassFactory()
1300 cls.universe = DimensionUniverse()
1302 # Ensure that we load the test storage class definitions.
1303 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1304 cls.storageClassFactory.addFromConfig(scConfigFile)
1306 def setUp(self):
1307 self.id = 0
1309 # Create a root that we can use for caching tests.
1310 self.root = tempfile.mkdtemp(dir=TESTDIR)
1312 # Create some test dataset refs and associated test files
1313 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1314 dimensions = self.universe.extract(("visit", "physical_filter"))
1315 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1317 # Create list of refs and list of temporary files
1318 n_datasets = 10
1319 self.refs = [
1320 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1321 for n in range(n_datasets)
1322 ]
1324 root_uri = ResourcePath(self.root, forceDirectory=True)
1325 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1327 # Create test files.
1328 for uri in self.files:
1329 uri.write(b"0123456789")
1331 # Create some composite refs with component files.
1332 sc = self.storageClassFactory.getStorageClass("StructuredData")
1333 self.composite_refs = [
1334 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1335 ]
1336 self.comp_files = []
1337 self.comp_refs = []
1338 for n, ref in enumerate(self.composite_refs):
1339 component_refs = []
1340 component_files = []
1341 for component in sc.components:
1342 component_ref = ref.makeComponentRef(component)
1343 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1344 component_refs.append(component_ref)
1345 component_files.append(file)
1346 file.write(b"9876543210")
1348 self.comp_files.append(component_files)
1349 self.comp_refs.append(component_refs)
1351 def tearDown(self):
1352 if self.root is not None and os.path.exists(self.root):
1353 shutil.rmtree(self.root, ignore_errors=True)
1355 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1356 config = Config.fromYaml(config_str)
1357 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1359 def testNoCacheDir(self):
1360 config_str = """
1361cached:
1362 root: null
1363 cacheable:
1364 metric0: true
1365 """
1366 cache_manager = self._make_cache_manager(config_str)
1368 # Look inside to check we don't have a cache directory
1369 self.assertIsNone(cache_manager._cache_directory)
1371 self.assertCache(cache_manager)
1373 # Test that the cache directory is marked temporary
1374 self.assertTrue(cache_manager.cache_directory.isTemporary)
1376 def testNoCacheDirReversed(self):
1377 """Use default caching status and metric1 to false"""
1378 config_str = """
1379cached:
1380 root: null
1381 default: true
1382 cacheable:
1383 metric1: false
1384 """
1385 cache_manager = self._make_cache_manager(config_str)
1387 self.assertCache(cache_manager)
1389 def testExplicitCacheDir(self):
1390 config_str = f"""
1391cached:
1392 root: '{self.root}'
1393 cacheable:
1394 metric0: true
1395 """
1396 cache_manager = self._make_cache_manager(config_str)
1398 # Look inside to check we do have a cache directory.
1399 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1401 self.assertCache(cache_manager)
1403 # Test that the cache directory is not marked temporary
1404 self.assertFalse(cache_manager.cache_directory.isTemporary)
1406 def assertCache(self, cache_manager):
1407 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1408 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1410 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1411 self.assertIsInstance(uri, ResourcePath)
1412 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1414 # Check presence in cache using ref and then using file extension.
1415 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1416 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1417 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1418 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1420 # Cached file should no longer exist but uncached file should be
1421 # unaffected.
1422 self.assertFalse(self.files[0].exists())
1423 self.assertTrue(self.files[1].exists())
1425 # Should find this file and it should be within the cache directory.
1426 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1427 self.assertTrue(found.exists())
1428 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1430 # Should not be able to find these in cache
1431 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1432 self.assertIsNone(found)
1433 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1434 self.assertIsNone(found)
1436 def testNoCache(self):
1437 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1438 for uri, ref in zip(self.files, self.refs):
1439 self.assertFalse(cache_manager.should_be_cached(ref))
1440 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1441 self.assertFalse(cache_manager.known_to_cache(ref))
1442 with cache_manager.find_in_cache(ref, ".txt") as found:
1443 self.assertIsNone(found, msg=f"{cache_manager}")
1445 def _expiration_config(self, mode: str, threshold: int) -> str:
1446 return f"""
1447cached:
1448 default: true
1449 expiry:
1450 mode: {mode}
1451 threshold: {threshold}
1452 cacheable:
1453 unused: true
1454 """
1456 def testCacheExpiryFiles(self):
1457 threshold = 2 # Keep at least 2 files.
1458 mode = "files"
1459 config_str = self._expiration_config(mode, threshold)
1461 cache_manager = self._make_cache_manager(config_str)
1463 # Check that an empty cache returns unknown for arbitrary ref
1464 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1466 # Should end with datasets: 2, 3, 4
1467 self.assertExpiration(cache_manager, 5, threshold + 1)
1468 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1470 # Check that we will not expire a file that is actively in use.
1471 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1472 self.assertIsNotNone(found)
1474 # Trigger cache expiration that should remove the file
1475 # we just retrieved. Should now have: 3, 4, 5
1476 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1477 self.assertIsNotNone(cached)
1479 # Cache should still report the standard file count.
1480 self.assertEqual(cache_manager.file_count, threshold + 1)
1482 # Add additional entry to cache.
1483 # Should now have 4, 5, 6
1484 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1485 self.assertIsNotNone(cached)
1487 # Is the file still there?
1488 self.assertTrue(found.exists())
1490 # Can we read it?
1491 data = found.read()
1492 self.assertGreater(len(data), 0)
1494 # Outside context the file should no longer exist.
1495 self.assertFalse(found.exists())
1497 # File count should not have changed.
1498 self.assertEqual(cache_manager.file_count, threshold + 1)
1500 # Dataset 2 was in the exempt directory but because hardlinks
1501 # are used it was deleted from the main cache during cache expiry
1502 # above and so should no longer be found.
1503 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1504 self.assertIsNone(found)
1506 # And the one stored after it is also gone.
1507 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1508 self.assertIsNone(found)
1510 # But dataset 4 is present.
1511 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1512 self.assertIsNotNone(found)
1514 # Adding a new dataset to the cache should now delete it.
1515 cache_manager.move_to_cache(self.files[7], self.refs[7])
1517 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1518 self.assertIsNone(found)
1520 def testCacheExpiryDatasets(self):
1521 threshold = 2 # Keep 2 datasets.
1522 mode = "datasets"
1523 config_str = self._expiration_config(mode, threshold)
1525 cache_manager = self._make_cache_manager(config_str)
1526 self.assertExpiration(cache_manager, 5, threshold + 1)
1527 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1529 def testCacheExpiryDatasetsComposite(self):
1530 threshold = 2 # Keep 2 datasets.
1531 mode = "datasets"
1532 config_str = self._expiration_config(mode, threshold)
1534 cache_manager = self._make_cache_manager(config_str)
1536 n_datasets = 3
1537 for i in range(n_datasets):
1538 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1539 cached = cache_manager.move_to_cache(component_file, component_ref)
1540 self.assertIsNotNone(cached)
1541 self.assertTrue(cache_manager.known_to_cache(component_ref))
1542 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1543 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1545 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1547 # Write two new non-composite and the number of files should drop.
1548 self.assertExpiration(cache_manager, 2, 5)
1550 def testCacheExpirySize(self):
1551 threshold = 55 # Each file is 10 bytes
1552 mode = "size"
1553 config_str = self._expiration_config(mode, threshold)
1555 cache_manager = self._make_cache_manager(config_str)
1556 self.assertExpiration(cache_manager, 10, 6)
1557 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1559 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1560 """Insert the datasets and then check the number retained."""
1561 for i in range(n_datasets):
1562 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1563 self.assertIsNotNone(cached)
1565 self.assertEqual(cache_manager.file_count, n_retained)
1567 # The oldest file should not be in the cache any more.
1568 for i in range(n_datasets):
1569 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1570 if i >= n_datasets - n_retained:
1571 self.assertIsInstance(found, ResourcePath)
1572 else:
1573 self.assertIsNone(found)
1575 def testCacheExpiryAge(self):
1576 threshold = 1 # Expire older than 2 seconds
1577 mode = "age"
1578 config_str = self._expiration_config(mode, threshold)
1580 cache_manager = self._make_cache_manager(config_str)
1581 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1583 # Insert 3 files, then sleep, then insert more.
1584 for i in range(2):
1585 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1586 self.assertIsNotNone(cached)
1587 time.sleep(2.0)
1588 for j in range(4):
1589 i = 2 + j # Continue the counting
1590 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1591 self.assertIsNotNone(cached)
1593 # Only the files written after the sleep should exist.
1594 self.assertEqual(cache_manager.file_count, 4)
1595 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1596 self.assertIsNone(found)
1597 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1598 self.assertIsInstance(found, ResourcePath)
1601class DatasetRefURIsTestCase(unittest.TestCase):
1602 """Tests for DatasetRefURIs."""
1604 def testSequenceAccess(self):
1605 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1606 uris = DatasetRefURIs()
1608 self.assertEqual(len(uris), 2)
1609 self.assertEqual(uris[0], None)
1610 self.assertEqual(uris[1], {})
1612 primaryURI = ResourcePath("1/2/3")
1613 componentURI = ResourcePath("a/b/c")
1615 # affirm that DatasetRefURIs does not support MutableSequence functions
1616 with self.assertRaises(TypeError):
1617 uris[0] = primaryURI
1618 with self.assertRaises(TypeError):
1619 uris[1] = {"foo": componentURI}
1621 # but DatasetRefURIs can be set by property name:
1622 uris.primaryURI = primaryURI
1623 uris.componentURIs = {"foo": componentURI}
1624 self.assertEqual(uris.primaryURI, primaryURI)
1625 self.assertEqual(uris[0], primaryURI)
1627 primary, components = uris
1628 self.assertEqual(primary, primaryURI)
1629 self.assertEqual(components, {"foo": componentURI})
1631 def testRepr(self):
1632 """Verify __repr__ output."""
1633 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1634 self.assertEqual(
1635 repr(uris),
1636 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1637 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1638 )
1641class DataIdForTestTestCase(unittest.TestCase):
1642 """Tests for the DataIdForTest class."""
1644 def testImmutable(self):
1645 """Verify that an instance is immutable by default."""
1646 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1647 initial_hash = hash(dataId)
1649 with self.assertRaises(RuntimeError):
1650 dataId["instrument"] = "foo"
1652 with self.assertRaises(RuntimeError):
1653 del dataId["instrument"]
1655 assert sys.version_info[0] == 3
1656 if sys.version_info[1] >= 9:
1657 with self.assertRaises(RuntimeError):
1658 dataId |= dict(foo="bar")
1660 with self.assertRaises(RuntimeError):
1661 dataId.pop("instrument")
1663 with self.assertRaises(RuntimeError):
1664 dataId.popitem()
1666 with self.assertRaises(RuntimeError):
1667 dataId.update(dict(instrument="foo"))
1669 # verify that the hash value has not changed.
1670 self.assertEqual(initial_hash, hash(dataId))
1672 def testMutable(self):
1673 """Verify that an instance can be made mutable (unfrozen)."""
1674 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1675 initial_hash = hash(dataId)
1676 dataId.frozen = False
1677 self.assertEqual(initial_hash, hash(dataId))
1679 dataId["instrument"] = "foo"
1680 self.assertEqual(dataId["instrument"], "foo")
1681 self.assertNotEqual(initial_hash, hash(dataId))
1682 initial_hash = hash(dataId)
1684 del dataId["instrument"]
1685 self.assertTrue("instrument" not in dataId)
1686 self.assertNotEqual(initial_hash, hash(dataId))
1687 initial_hash = hash(dataId)
1689 assert sys.version_info[0] == 3
1690 if sys.version_info[1] >= 9:
1691 dataId |= dict(foo="bar")
1692 self.assertEqual(dataId["foo"], "bar")
1693 self.assertNotEqual(initial_hash, hash(dataId))
1694 initial_hash = hash(dataId)
1696 dataId.pop("visit")
1697 self.assertTrue("visit" not in dataId)
1698 self.assertNotEqual(initial_hash, hash(dataId))
1699 initial_hash = hash(dataId)
1701 dataId.popitem()
1702 self.assertTrue("physical_filter" not in dataId)
1703 self.assertNotEqual(initial_hash, hash(dataId))
1704 initial_hash = hash(dataId)
1706 dataId.update(dict(instrument="foo"))
1707 self.assertEqual(dataId["instrument"], "foo")
1708 self.assertNotEqual(initial_hash, hash(dataId))
1709 initial_hash = hash(dataId)
1712if __name__ == "__main__": 1712 ↛ 1713line 1712 didn't jump to line 1713, because the condition on line 1712 was never true
1713 unittest.main()