Coverage for tests/test_datastore.py: 18%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import shutil
24import tempfile
25import time
26import unittest
27from dataclasses import dataclass
29import lsst.utils.tests
30import yaml
31from lsst.daf.butler import (
32 Config,
33 DatasetTypeNotSupportedError,
34 DatastoreCacheManager,
35 DatastoreCacheManagerConfig,
36 DatastoreConfig,
37 DatastoreDisabledCacheManager,
38 DatastoreValidationError,
39 DimensionUniverse,
40 FileDataset,
41 NamedKeyDict,
42 StorageClass,
43 StorageClassFactory,
44)
45from lsst.daf.butler.formatters.yaml import YamlFormatter
46from lsst.daf.butler.tests import (
47 BadNoWriteFormatter,
48 BadWriteFormatter,
49 DatasetTestHelper,
50 DatastoreTestHelper,
51 DummyRegistry,
52 MetricsExample,
53)
54from lsst.resources import ResourcePath
55from lsst.utils import doImport
57TESTDIR = os.path.dirname(__file__)
60def makeExampleMetrics(use_none=False):
61 if use_none:
62 array = None
63 else:
64 array = [563, 234, 456.7, 105, 2054, -1045]
65 return MetricsExample(
66 {"AM1": 5.2, "AM2": 30.6},
67 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
68 array,
69 )
72@dataclass(frozen=True)
73class Named:
74 name: str
77class FakeDataCoordinate(NamedKeyDict):
78 """A fake hashable frozen DataCoordinate built from a simple dict."""
80 @classmethod
81 def from_dict(cls, dataId):
82 new = cls()
83 for k, v in dataId.items():
84 new[Named(k)] = v
85 return new.freeze()
87 def __hash__(self) -> int:
88 return hash(frozenset(self.items()))
91class TransactionTestError(Exception):
92 """Specific error for transactions, to prevent misdiagnosing
93 that might otherwise occur when a standard exception is used.
94 """
96 pass
99class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
100 """Support routines for datastore testing"""
102 root = None
104 @classmethod
105 def setUpClass(cls):
106 # Storage Classes are fixed for all datastores in these tests
107 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
108 cls.storageClassFactory = StorageClassFactory()
109 cls.storageClassFactory.addFromConfig(scConfigFile)
111 # Read the Datastore config so we can get the class
112 # information (since we should not assume the constructor
113 # name here, but rely on the configuration file itself)
114 datastoreConfig = DatastoreConfig(cls.configFile)
115 cls.datastoreType = doImport(datastoreConfig["cls"])
116 cls.universe = DimensionUniverse()
118 def setUp(self):
119 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
121 def tearDown(self):
122 if self.root is not None and os.path.exists(self.root):
123 shutil.rmtree(self.root, ignore_errors=True)
126class DatastoreTests(DatastoreTestsBase):
127 """Some basic tests of a simple datastore."""
129 hasUnsupportedPut = True
131 def testConfigRoot(self):
132 full = DatastoreConfig(self.configFile)
133 config = DatastoreConfig(self.configFile, mergeDefaults=False)
134 newroot = "/random/location"
135 self.datastoreType.setConfigRoot(newroot, config, full)
136 if self.rootKeys:
137 for k in self.rootKeys:
138 self.assertIn(newroot, config[k])
140 def testConstructor(self):
141 datastore = self.makeDatastore()
142 self.assertIsNotNone(datastore)
143 self.assertIs(datastore.isEphemeral, self.isEphemeral)
145 def testConfigurationValidation(self):
146 datastore = self.makeDatastore()
147 sc = self.storageClassFactory.getStorageClass("ThingOne")
148 datastore.validateConfiguration([sc])
150 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
151 if self.validationCanFail:
152 with self.assertRaises(DatastoreValidationError):
153 datastore.validateConfiguration([sc2], logFailures=True)
155 dimensions = self.universe.extract(("visit", "physical_filter"))
156 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
157 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
158 datastore.validateConfiguration([ref])
160 def testParameterValidation(self):
161 """Check that parameters are validated"""
162 sc = self.storageClassFactory.getStorageClass("ThingOne")
163 dimensions = self.universe.extract(("visit", "physical_filter"))
164 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
165 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
166 datastore = self.makeDatastore()
167 data = {1: 2, 3: 4}
168 datastore.put(data, ref)
169 newdata = datastore.get(ref)
170 self.assertEqual(data, newdata)
171 with self.assertRaises(KeyError):
172 newdata = datastore.get(ref, parameters={"missing": 5})
174 def testBasicPutGet(self):
175 metrics = makeExampleMetrics()
176 datastore = self.makeDatastore()
178 # Create multiple storage classes for testing different formulations
179 storageClasses = [
180 self.storageClassFactory.getStorageClass(sc)
181 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
182 ]
184 dimensions = self.universe.extract(("visit", "physical_filter"))
185 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
187 for sc in storageClasses:
188 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
189 print("Using storageClass: {}".format(sc.name))
190 datastore.put(metrics, ref)
192 # Does it exist?
193 self.assertTrue(datastore.exists(ref))
195 # Get
196 metricsOut = datastore.get(ref, parameters=None)
197 self.assertEqual(metrics, metricsOut)
199 uri = datastore.getURI(ref)
200 self.assertEqual(uri.scheme, self.uriScheme)
202 # Get a component -- we need to construct new refs for them
203 # with derived storage classes but with parent ID
204 for comp in ("data", "output"):
205 compRef = ref.makeComponentRef(comp)
206 output = datastore.get(compRef)
207 self.assertEqual(output, getattr(metricsOut, comp))
209 uri = datastore.getURI(compRef)
210 self.assertEqual(uri.scheme, self.uriScheme)
212 storageClass = sc
214 # Check that we can put a metric with None in a component and
215 # get it back as None
216 metricsNone = makeExampleMetrics(use_none=True)
217 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
218 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
219 datastore.put(metricsNone, refNone)
221 comp = "data"
222 for comp in ("data", "output"):
223 compRef = refNone.makeComponentRef(comp)
224 output = datastore.get(compRef)
225 self.assertEqual(output, getattr(metricsNone, comp))
227 # Check that a put fails if the dataset type is not supported
228 if self.hasUnsupportedPut:
229 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
230 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
231 with self.assertRaises(DatasetTypeNotSupportedError):
232 datastore.put(metrics, ref)
234 # These should raise
235 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
236 with self.assertRaises(FileNotFoundError):
237 # non-existing file
238 datastore.get(ref)
240 # Get a URI from it
241 uri = datastore.getURI(ref, predict=True)
242 self.assertEqual(uri.scheme, self.uriScheme)
244 with self.assertRaises(FileNotFoundError):
245 datastore.getURI(ref)
247 def testTrustGetRequest(self):
248 """Check that we can get datasets that registry knows nothing about."""
250 datastore = self.makeDatastore()
252 # Skip test if the attribute is not defined
253 if not hasattr(datastore, "trustGetRequest"):
254 return
256 metrics = makeExampleMetrics()
258 i = 0
259 for sc_name in ("StructuredData", "StructuredComposite"):
260 i += 1
261 datasetTypeName = f"metric{i}"
263 if sc_name == "StructuredComposite":
264 disassembled = True
265 else:
266 disassembled = False
268 # Start datastore in default configuration of using registry
269 datastore.trustGetRequest = False
271 # Create multiple storage classes for testing with or without
272 # disassembly
273 sc = self.storageClassFactory.getStorageClass(sc_name)
274 dimensions = self.universe.extract(("visit", "physical_filter"))
275 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}
277 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
278 datastore.put(metrics, ref)
280 # Does it exist?
281 self.assertTrue(datastore.exists(ref))
283 # Get
284 metricsOut = datastore.get(ref)
285 self.assertEqual(metrics, metricsOut)
287 # Get the URI(s)
288 primaryURI, componentURIs = datastore.getURIs(ref)
289 if disassembled:
290 self.assertIsNone(primaryURI)
291 self.assertEqual(len(componentURIs), 3)
292 else:
293 self.assertIn(datasetTypeName, primaryURI.path)
294 self.assertFalse(componentURIs)
296 # Delete registry entry so now we are trusting
297 datastore.removeStoredItemInfo(ref)
299 # Now stop trusting and check that things break
300 datastore.trustGetRequest = False
302 # Does it exist?
303 self.assertFalse(datastore.exists(ref))
305 with self.assertRaises(FileNotFoundError):
306 datastore.get(ref)
308 with self.assertRaises(FileNotFoundError):
309 datastore.get(ref.makeComponentRef("data"))
311 # URI should fail unless we ask for prediction
312 with self.assertRaises(FileNotFoundError):
313 datastore.getURIs(ref)
315 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
316 if disassembled:
317 self.assertIsNone(predicted_primary)
318 self.assertEqual(len(predicted_disassembled), 3)
319 for uri in predicted_disassembled.values():
320 self.assertEqual(uri.fragment, "predicted")
321 self.assertIn(datasetTypeName, uri.path)
322 else:
323 self.assertIn(datasetTypeName, predicted_primary.path)
324 self.assertFalse(predicted_disassembled)
325 self.assertEqual(predicted_primary.fragment, "predicted")
327 # Now enable registry-free trusting mode
328 datastore.trustGetRequest = True
330 # Try again to get it
331 metricsOut = datastore.get(ref)
332 self.assertEqual(metricsOut, metrics)
334 # Does it exist?
335 self.assertTrue(datastore.exists(ref))
337 # Get a component
338 comp = "data"
339 compRef = ref.makeComponentRef(comp)
340 output = datastore.get(compRef)
341 self.assertEqual(output, getattr(metrics, comp))
343 # Get the URI -- if we trust this should work even without
344 # enabling prediction.
345 primaryURI2, componentURIs2 = datastore.getURIs(ref)
346 self.assertEqual(primaryURI2, primaryURI)
347 self.assertEqual(componentURIs2, componentURIs)
349 def testDisassembly(self):
350 """Test disassembly within datastore."""
351 metrics = makeExampleMetrics()
352 if self.isEphemeral:
353 # in-memory datastore does not disassemble
354 return
356 # Create multiple storage classes for testing different formulations
357 # of composites. One of these will not disassemble to provide
358 # a reference.
359 storageClasses = [
360 self.storageClassFactory.getStorageClass(sc)
361 for sc in (
362 "StructuredComposite",
363 "StructuredCompositeTestA",
364 "StructuredCompositeTestB",
365 "StructuredCompositeReadComp",
366 "StructuredData", # No disassembly
367 "StructuredCompositeReadCompNoDisassembly",
368 )
369 ]
371 # Create the test datastore
372 datastore = self.makeDatastore()
374 # Dummy dataId
375 dimensions = self.universe.extract(("visit", "physical_filter"))
376 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
378 for i, sc in enumerate(storageClasses):
379 with self.subTest(storageClass=sc.name):
380 # Create a different dataset type each time round
381 # so that a test failure in this subtest does not trigger
382 # a cascade of tests because of file clashes
383 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
385 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
387 datastore.put(metrics, ref)
389 baseURI, compURIs = datastore.getURIs(ref)
390 if disassembled:
391 self.assertIsNone(baseURI)
392 self.assertEqual(set(compURIs), {"data", "output", "summary"})
393 else:
394 self.assertIsNotNone(baseURI)
395 self.assertEqual(compURIs, {})
397 metrics_get = datastore.get(ref)
398 self.assertEqual(metrics_get, metrics)
400 # Retrieve the composite with read parameter
401 stop = 4
402 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
403 self.assertEqual(metrics_get.summary, metrics.summary)
404 self.assertEqual(metrics_get.output, metrics.output)
405 self.assertEqual(metrics_get.data, metrics.data[:stop])
407 # Retrieve a component
408 data = datastore.get(ref.makeComponentRef("data"))
409 self.assertEqual(data, metrics.data)
411 # On supported storage classes attempt to access a read
412 # only component
413 if "ReadComp" in sc.name:
414 cRef = ref.makeComponentRef("counter")
415 counter = datastore.get(cRef)
416 self.assertEqual(counter, len(metrics.data))
418 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
419 self.assertEqual(counter, stop)
421 datastore.remove(ref)
423 def testRegistryCompositePutGet(self):
424 """Tests the case where registry disassembles and puts to datastore."""
425 metrics = makeExampleMetrics()
426 datastore = self.makeDatastore()
428 # Create multiple storage classes for testing different formulations
429 # of composites
430 storageClasses = [
431 self.storageClassFactory.getStorageClass(sc)
432 for sc in (
433 "StructuredComposite",
434 "StructuredCompositeTestA",
435 "StructuredCompositeTestB",
436 )
437 ]
439 dimensions = self.universe.extract(("visit", "physical_filter"))
440 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
442 for sc in storageClasses:
443 print("Using storageClass: {}".format(sc.name))
444 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
446 components = sc.delegate().disassemble(metrics)
447 self.assertTrue(components)
449 compsRead = {}
450 for compName, compInfo in components.items():
451 compRef = self.makeDatasetRef(
452 ref.datasetType.componentTypeName(compName),
453 dimensions,
454 components[compName].storageClass,
455 dataId,
456 conform=False,
457 )
459 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
460 datastore.put(compInfo.component, compRef)
462 uri = datastore.getURI(compRef)
463 self.assertEqual(uri.scheme, self.uriScheme)
465 compsRead[compName] = datastore.get(compRef)
467 # We can generate identical files for each storage class
468 # so remove the component here
469 datastore.remove(compRef)
471 # combine all the components we read back into a new composite
472 metricsOut = sc.delegate().assemble(compsRead)
473 self.assertEqual(metrics, metricsOut)
475 def prepDeleteTest(self, n_refs=1):
476 metrics = makeExampleMetrics()
477 datastore = self.makeDatastore()
478 # Put
479 dimensions = self.universe.extract(("visit", "physical_filter"))
480 sc = self.storageClassFactory.getStorageClass("StructuredData")
481 refs = []
482 for i in range(n_refs):
483 dataId = FakeDataCoordinate.from_dict(
484 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
485 )
486 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
487 datastore.put(metrics, ref)
489 # Does it exist?
490 self.assertTrue(datastore.exists(ref))
492 # Get
493 metricsOut = datastore.get(ref)
494 self.assertEqual(metrics, metricsOut)
495 refs.append(ref)
497 return datastore, *refs
499 def testRemove(self):
500 datastore, ref = self.prepDeleteTest()
502 # Remove
503 datastore.remove(ref)
505 # Does it exist?
506 self.assertFalse(datastore.exists(ref))
508 # Do we now get a predicted URI?
509 uri = datastore.getURI(ref, predict=True)
510 self.assertEqual(uri.fragment, "predicted")
512 # Get should now fail
513 with self.assertRaises(FileNotFoundError):
514 datastore.get(ref)
515 # Can only delete once
516 with self.assertRaises(FileNotFoundError):
517 datastore.remove(ref)
519 def testForget(self):
520 datastore, ref = self.prepDeleteTest()
522 # Remove
523 datastore.forget([ref])
525 # Does it exist (as far as we know)?
526 self.assertFalse(datastore.exists(ref))
528 # Do we now get a predicted URI?
529 uri = datastore.getURI(ref, predict=True)
530 self.assertEqual(uri.fragment, "predicted")
532 # Get should now fail
533 with self.assertRaises(FileNotFoundError):
534 datastore.get(ref)
536 # Forgetting again is a silent no-op
537 datastore.forget([ref])
539 # Predicted URI should still point to the file.
540 self.assertTrue(uri.exists())
542 def testTransfer(self):
543 metrics = makeExampleMetrics()
545 dimensions = self.universe.extract(("visit", "physical_filter"))
546 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
548 sc = self.storageClassFactory.getStorageClass("StructuredData")
549 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
551 inputDatastore = self.makeDatastore("test_input_datastore")
552 outputDatastore = self.makeDatastore("test_output_datastore")
554 inputDatastore.put(metrics, ref)
555 outputDatastore.transfer(inputDatastore, ref)
557 metricsOut = outputDatastore.get(ref)
558 self.assertEqual(metrics, metricsOut)
560 def testBasicTransaction(self):
561 datastore = self.makeDatastore()
562 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
563 dimensions = self.universe.extract(("visit", "physical_filter"))
564 nDatasets = 6
565 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
566 data = [
567 (
568 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
569 makeExampleMetrics(),
570 )
571 for dataId in dataIds
572 ]
573 succeed = data[: nDatasets // 2]
574 fail = data[nDatasets // 2 :]
575 # All datasets added in this transaction should continue to exist
576 with datastore.transaction():
577 for ref, metrics in succeed:
578 datastore.put(metrics, ref)
579 # Whereas datasets added in this transaction should not
580 with self.assertRaises(TransactionTestError):
581 with datastore.transaction():
582 for ref, metrics in fail:
583 datastore.put(metrics, ref)
584 raise TransactionTestError("This should propagate out of the context manager")
585 # Check for datasets that should exist
586 for ref, metrics in succeed:
587 # Does it exist?
588 self.assertTrue(datastore.exists(ref))
589 # Get
590 metricsOut = datastore.get(ref, parameters=None)
591 self.assertEqual(metrics, metricsOut)
592 # URI
593 uri = datastore.getURI(ref)
594 self.assertEqual(uri.scheme, self.uriScheme)
595 # Check for datasets that should not exist
596 for ref, _ in fail:
597 # These should raise
598 with self.assertRaises(FileNotFoundError):
599 # non-existing file
600 datastore.get(ref)
601 with self.assertRaises(FileNotFoundError):
602 datastore.getURI(ref)
604 def testNestedTransaction(self):
605 datastore = self.makeDatastore()
606 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
607 dimensions = self.universe.extract(("visit", "physical_filter"))
608 metrics = makeExampleMetrics()
610 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
611 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
612 datastore.put(metrics, refBefore)
613 with self.assertRaises(TransactionTestError):
614 with datastore.transaction():
615 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
616 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
617 datastore.put(metrics, refOuter)
618 with datastore.transaction():
619 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
620 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
621 datastore.put(metrics, refInner)
622 # All datasets should exist
623 for ref in (refBefore, refOuter, refInner):
624 metricsOut = datastore.get(ref, parameters=None)
625 self.assertEqual(metrics, metricsOut)
626 raise TransactionTestError("This should roll back the transaction")
627 # Dataset(s) inserted before the transaction should still exist
628 metricsOut = datastore.get(refBefore, parameters=None)
629 self.assertEqual(metrics, metricsOut)
630 # But all datasets inserted during the (rolled back) transaction
631 # should be gone
632 with self.assertRaises(FileNotFoundError):
633 datastore.get(refOuter)
634 with self.assertRaises(FileNotFoundError):
635 datastore.get(refInner)
637 def _prepareIngestTest(self):
638 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
639 dimensions = self.universe.extract(("visit", "physical_filter"))
640 metrics = makeExampleMetrics()
641 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
642 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
643 return metrics, ref
645 def runIngestTest(self, func, expectOutput=True):
646 metrics, ref = self._prepareIngestTest()
647 # The file will be deleted after the test.
648 # For symlink tests this leads to a situation where the datastore
649 # points to a file that does not exist. This will make os.path.exist
650 # return False but then the new symlink will fail with
651 # FileExistsError later in the code so the test still passes.
652 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
653 with open(path, "w") as fd:
654 yaml.dump(metrics._asdict(), stream=fd)
655 func(metrics, path, ref)
657 def testIngestNoTransfer(self):
658 """Test ingesting existing files with no transfer."""
659 for mode in (None, "auto"):
661 # Some datastores have auto but can't do in place transfer
662 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
663 continue
665 with self.subTest(mode=mode):
666 datastore = self.makeDatastore()
668 def succeed(obj, path, ref):
669 """Ingest a file already in the datastore root."""
670 # first move it into the root, and adjust the path
671 # accordingly
672 path = shutil.copy(path, datastore.root.ospath)
673 path = os.path.relpath(path, start=datastore.root.ospath)
674 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
675 self.assertEqual(obj, datastore.get(ref))
677 def failInputDoesNotExist(obj, path, ref):
678 """Can't ingest files if we're given a bad path."""
679 with self.assertRaises(FileNotFoundError):
680 datastore.ingest(
681 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
682 )
683 self.assertFalse(datastore.exists(ref))
685 def failOutsideRoot(obj, path, ref):
686 """Can't ingest files outside of datastore root unless
687 auto."""
688 if mode == "auto":
689 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
690 self.assertTrue(datastore.exists(ref))
691 else:
692 with self.assertRaises(RuntimeError):
693 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
694 self.assertFalse(datastore.exists(ref))
696 def failNotImplemented(obj, path, ref):
697 with self.assertRaises(NotImplementedError):
698 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
700 if mode in self.ingestTransferModes:
701 self.runIngestTest(failOutsideRoot)
702 self.runIngestTest(failInputDoesNotExist)
703 self.runIngestTest(succeed)
704 else:
705 self.runIngestTest(failNotImplemented)
707 def testIngestTransfer(self):
708 """Test ingesting existing files after transferring them."""
709 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
710 with self.subTest(mode=mode):
711 datastore = self.makeDatastore(mode)
713 def succeed(obj, path, ref):
714 """Ingest a file by transferring it to the template
715 location."""
716 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
717 self.assertEqual(obj, datastore.get(ref))
719 def failInputDoesNotExist(obj, path, ref):
720 """Can't ingest files if we're given a bad path."""
721 with self.assertRaises(FileNotFoundError):
722 # Ensure the file does not look like it is in
723 # datastore for auto mode
724 datastore.ingest(
725 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
726 )
727 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
729 def failNotImplemented(obj, path, ref):
730 with self.assertRaises(NotImplementedError):
731 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
733 if mode in self.ingestTransferModes:
734 self.runIngestTest(failInputDoesNotExist)
735 self.runIngestTest(succeed, expectOutput=(mode != "move"))
736 else:
737 self.runIngestTest(failNotImplemented)
739 def testIngestSymlinkOfSymlink(self):
740 """Special test for symlink to a symlink ingest"""
741 metrics, ref = self._prepareIngestTest()
742 # The aim of this test is to create a dataset on disk, then
743 # create a symlink to it and finally ingest the symlink such that
744 # the symlink in the datastore points to the original dataset.
745 for mode in ("symlink", "relsymlink"):
746 if mode not in self.ingestTransferModes:
747 continue
749 print(f"Trying mode {mode}")
750 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
751 with open(realpath, "w") as fd:
752 yaml.dump(metrics._asdict(), stream=fd)
753 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
754 os.symlink(os.path.abspath(realpath), sympath)
756 datastore = self.makeDatastore()
757 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
759 uri = datastore.getURI(ref)
760 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
761 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
763 linkTarget = os.readlink(uri.ospath)
764 if mode == "relsymlink":
765 self.assertFalse(os.path.isabs(linkTarget))
766 else:
767 self.assertEqual(linkTarget, os.path.abspath(realpath))
769 # Check that we can get the dataset back regardless of mode
770 metric2 = datastore.get(ref)
771 self.assertEqual(metric2, metrics)
773 # Cleanup the file for next time round loop
774 # since it will get the same file name in store
775 datastore.remove(ref)
778class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
779 """PosixDatastore specialization"""
781 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
782 uriScheme = "file"
783 canIngestNoTransferAuto = True
784 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
785 isEphemeral = False
786 rootKeys = ("root",)
787 validationCanFail = True
789 def setUp(self):
790 # Override the working directory before calling the base class
791 self.root = tempfile.mkdtemp(dir=TESTDIR)
792 super().setUp()
795class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
796 """Posix datastore tests but with checksums disabled."""
798 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
800 def testChecksum(self):
801 """Ensure that checksums have not been calculated."""
803 datastore = self.makeDatastore()
804 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
805 dimensions = self.universe.extract(("visit", "physical_filter"))
806 metrics = makeExampleMetrics()
808 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
809 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
811 # Configuration should have disabled checksum calculation
812 datastore.put(metrics, ref)
813 infos = datastore.getStoredItemsInfo(ref)
814 self.assertIsNone(infos[0].checksum)
816 # Remove put back but with checksums enabled explicitly
817 datastore.remove(ref)
818 datastore.useChecksum = True
819 datastore.put(metrics, ref)
821 infos = datastore.getStoredItemsInfo(ref)
822 self.assertIsNotNone(infos[0].checksum)
825class TrashDatastoreTestCase(PosixDatastoreTestCase):
826 """Restrict trash test to FileDatastore."""
828 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
830 def testTrash(self):
831 datastore, *refs = self.prepDeleteTest(n_refs=10)
833 # Trash one of them.
834 ref = refs.pop()
835 uri = datastore.getURI(ref)
836 datastore.trash(ref)
837 self.assertTrue(uri.exists(), uri) # Not deleted yet
838 datastore.emptyTrash()
839 self.assertFalse(uri.exists(), uri)
841 # Trash it again should be fine.
842 datastore.trash(ref)
844 # Trash multiple items at once.
845 subset = [refs.pop(), refs.pop()]
846 datastore.trash(subset)
847 datastore.emptyTrash()
849 # Remove a record and trash should do nothing.
850 # This is execution butler scenario.
851 ref = refs.pop()
852 uri = datastore.getURI(ref)
853 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
854 self.assertTrue(uri.exists())
855 datastore.trash(ref)
856 datastore.emptyTrash()
857 self.assertTrue(uri.exists())
859 # Switch on trust and it should delete the file.
860 datastore.trustGetRequest = True
861 datastore.trash([ref])
862 self.assertFalse(uri.exists())
864 # Remove multiples at once in trust mode.
865 subset = [refs.pop() for i in range(3)]
866 datastore.trash(subset)
867 datastore.trash(refs.pop()) # Check that a single ref can trash
870class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
871 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
873 def setUp(self):
874 # Override the working directory before calling the base class
875 self.root = tempfile.mkdtemp(dir=TESTDIR)
876 super().setUp()
878 def testCleanup(self):
879 """Test that a failed formatter write does cleanup a partial file."""
880 metrics = makeExampleMetrics()
881 datastore = self.makeDatastore()
883 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
885 dimensions = self.universe.extract(("visit", "physical_filter"))
886 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
888 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
890 # Determine where the file will end up (we assume Formatters use
891 # the same file extension)
892 expectedUri = datastore.getURI(ref, predict=True)
893 self.assertEqual(expectedUri.fragment, "predicted")
895 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
897 # Try formatter that fails and formatter that fails and leaves
898 # a file behind
899 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
900 with self.subTest(formatter=formatter):
902 # Monkey patch the formatter
903 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
905 # Try to put the dataset, it should fail
906 with self.assertRaises(Exception):
907 datastore.put(metrics, ref)
909 # Check that there is no file on disk
910 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
912 # Check that there is a directory
913 dir = expectedUri.dirname()
914 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
916 # Force YamlFormatter and check that this time a file is written
917 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
918 datastore.put(metrics, ref)
919 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
920 datastore.remove(ref)
921 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
924class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
925 """PosixDatastore specialization"""
927 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
928 uriScheme = "mem"
929 hasUnsupportedPut = False
930 ingestTransferModes = ()
931 isEphemeral = True
932 rootKeys = None
933 validationCanFail = False
936class ChainedDatastoreTestCase(PosixDatastoreTestCase):
937 """ChainedDatastore specialization using a POSIXDatastore"""
939 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
940 hasUnsupportedPut = False
941 canIngestNoTransferAuto = False
942 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
943 isEphemeral = False
944 rootKeys = (".datastores.1.root", ".datastores.2.root")
945 validationCanFail = True
948class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
949 """ChainedDatastore specialization using all InMemoryDatastore"""
951 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
952 validationCanFail = False
955class DatastoreConstraintsTests(DatastoreTestsBase):
956 """Basic tests of constraints model of Datastores."""
958 def testConstraints(self):
959 """Test constraints model. Assumes that each test class has the
960 same constraints."""
961 metrics = makeExampleMetrics()
962 datastore = self.makeDatastore()
964 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
965 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
966 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
967 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
969 # Write empty file suitable for ingest check (JSON and YAML variants)
970 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
971 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
972 for datasetTypeName, sc, accepted in (
973 ("metric", sc1, True),
974 ("metric2", sc1, False),
975 ("metric33", sc1, True),
976 ("metric2", sc2, True),
977 ):
978 # Choose different temp file depending on StorageClass
979 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
981 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
982 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
983 if accepted:
984 datastore.put(metrics, ref)
985 self.assertTrue(datastore.exists(ref))
986 datastore.remove(ref)
988 # Try ingest
989 if self.canIngest:
990 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
991 self.assertTrue(datastore.exists(ref))
992 datastore.remove(ref)
993 else:
994 with self.assertRaises(DatasetTypeNotSupportedError):
995 datastore.put(metrics, ref)
996 self.assertFalse(datastore.exists(ref))
998 # Again with ingest
999 if self.canIngest:
1000 with self.assertRaises(DatasetTypeNotSupportedError):
1001 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1002 self.assertFalse(datastore.exists(ref))
1005class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1006 """PosixDatastore specialization"""
1008 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1009 canIngest = True
1011 def setUp(self):
1012 # Override the working directory before calling the base class
1013 self.root = tempfile.mkdtemp(dir=TESTDIR)
1014 super().setUp()
1017class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1018 """InMemoryDatastore specialization"""
1020 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1021 canIngest = False
1024class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1025 """ChainedDatastore specialization using a POSIXDatastore and constraints
1026 at the ChainedDatstore"""
1028 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1031class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1032 """ChainedDatastore specialization using a POSIXDatastore"""
1034 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1037class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1038 """ChainedDatastore specialization using all InMemoryDatastore"""
1040 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1041 canIngest = False
1044class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1045 """Test that a chained datastore can control constraints per-datastore
1046 even if child datastore would accept."""
1048 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1050 def setUp(self):
1051 # Override the working directory before calling the base class
1052 self.root = tempfile.mkdtemp(dir=TESTDIR)
1053 super().setUp()
1055 def testConstraints(self):
1056 """Test chained datastore constraints model."""
1057 metrics = makeExampleMetrics()
1058 datastore = self.makeDatastore()
1060 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1061 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1062 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1063 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1064 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1066 # Write empty file suitable for ingest check (JSON and YAML variants)
1067 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1068 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1070 for typeName, dataId, sc, accept, ingest in (
1071 ("metric", dataId1, sc1, (False, True, False), True),
1072 ("metric2", dataId1, sc1, (False, False, False), False),
1073 ("metric2", dataId2, sc1, (True, False, False), False),
1074 ("metric33", dataId2, sc2, (True, True, False), True),
1075 ("metric2", dataId1, sc2, (False, True, False), True),
1076 ):
1078 # Choose different temp file depending on StorageClass
1079 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1081 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1082 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1083 if any(accept):
1084 datastore.put(metrics, ref)
1085 self.assertTrue(datastore.exists(ref))
1087 # Check each datastore inside the chained datastore
1088 for childDatastore, expected in zip(datastore.datastores, accept):
1089 self.assertEqual(
1090 childDatastore.exists(ref),
1091 expected,
1092 f"Testing presence of {ref} in datastore {childDatastore.name}",
1093 )
1095 datastore.remove(ref)
1097 # Check that ingest works
1098 if ingest:
1099 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1100 self.assertTrue(datastore.exists(ref))
1102 # Check each datastore inside the chained datastore
1103 for childDatastore, expected in zip(datastore.datastores, accept):
1104 # Ephemeral datastores means InMemory at the moment
1105 # and that does not accept ingest of files.
1106 if childDatastore.isEphemeral:
1107 expected = False
1108 self.assertEqual(
1109 childDatastore.exists(ref),
1110 expected,
1111 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1112 )
1114 datastore.remove(ref)
1115 else:
1116 with self.assertRaises(DatasetTypeNotSupportedError):
1117 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1119 else:
1120 with self.assertRaises(DatasetTypeNotSupportedError):
1121 datastore.put(metrics, ref)
1122 self.assertFalse(datastore.exists(ref))
1124 # Again with ingest
1125 with self.assertRaises(DatasetTypeNotSupportedError):
1126 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1127 self.assertFalse(datastore.exists(ref))
1130class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1131 """Tests for datastore caching infrastructure."""
1133 @classmethod
1134 def setUpClass(cls):
1135 cls.storageClassFactory = StorageClassFactory()
1136 cls.universe = DimensionUniverse()
1138 # Ensure that we load the test storage class definitions.
1139 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1140 cls.storageClassFactory.addFromConfig(scConfigFile)
1142 def setUp(self):
1143 self.id = 0
1145 # Create a root that we can use for caching tests.
1146 self.root = tempfile.mkdtemp(dir=TESTDIR)
1148 # Create some test dataset refs and associated test files
1149 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1150 dimensions = self.universe.extract(("visit", "physical_filter"))
1151 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1153 # Create list of refs and list of temporary files
1154 n_datasets = 10
1155 self.refs = [
1156 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1157 for n in range(n_datasets)
1158 ]
1160 root_uri = ResourcePath(self.root, forceDirectory=True)
1161 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1163 # Create test files.
1164 for uri in self.files:
1165 uri.write(b"0123456789")
1167 # Create some composite refs with component files.
1168 sc = self.storageClassFactory.getStorageClass("StructuredData")
1169 self.composite_refs = [
1170 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1171 ]
1172 self.comp_files = []
1173 self.comp_refs = []
1174 for n, ref in enumerate(self.composite_refs):
1175 component_refs = []
1176 component_files = []
1177 for component in sc.components:
1178 component_ref = ref.makeComponentRef(component)
1179 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1180 component_refs.append(component_ref)
1181 component_files.append(file)
1182 file.write(b"9876543210")
1184 self.comp_files.append(component_files)
1185 self.comp_refs.append(component_refs)
1187 def tearDown(self):
1188 if self.root is not None and os.path.exists(self.root):
1189 shutil.rmtree(self.root, ignore_errors=True)
1191 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1192 config = Config.fromYaml(config_str)
1193 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1195 def testNoCacheDir(self):
1196 config_str = """
1197cached:
1198 root: null
1199 cacheable:
1200 metric0: true
1201 """
1202 cache_manager = self._make_cache_manager(config_str)
1204 # Look inside to check we don't have a cache directory
1205 self.assertIsNone(cache_manager._cache_directory)
1207 self.assertCache(cache_manager)
1209 # Test that the cache directory is marked temporary
1210 self.assertTrue(cache_manager.cache_directory.isTemporary)
1212 def testNoCacheDirReversed(self):
1213 """Use default caching status and metric1 to false"""
1214 config_str = """
1215cached:
1216 root: null
1217 default: true
1218 cacheable:
1219 metric1: false
1220 """
1221 cache_manager = self._make_cache_manager(config_str)
1223 self.assertCache(cache_manager)
1225 def testExplicitCacheDir(self):
1226 config_str = f"""
1227cached:
1228 root: '{self.root}'
1229 cacheable:
1230 metric0: true
1231 """
1232 cache_manager = self._make_cache_manager(config_str)
1234 # Look inside to check we do have a cache directory.
1235 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1237 self.assertCache(cache_manager)
1239 # Test that the cache directory is not marked temporary
1240 self.assertFalse(cache_manager.cache_directory.isTemporary)
1242 def assertCache(self, cache_manager):
1243 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1244 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1246 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1247 self.assertIsInstance(uri, ResourcePath)
1248 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1250 # Cached file should no longer exist but uncached file should be
1251 # unaffected.
1252 self.assertFalse(self.files[0].exists())
1253 self.assertTrue(self.files[1].exists())
1255 # Should find this file and it should be within the cache directory.
1256 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1257 self.assertTrue(found.exists())
1258 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1260 # Should not be able to find these in cache
1261 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1262 self.assertIsNone(found)
1263 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1264 self.assertIsNone(found)
1266 def testNoCache(self):
1267 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1268 for uri, ref in zip(self.files, self.refs):
1269 self.assertFalse(cache_manager.should_be_cached(ref))
1270 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1271 with cache_manager.find_in_cache(ref, ".txt") as found:
1272 self.assertIsNone(found, msg=f"{cache_manager}")
1274 def _expiration_config(self, mode: str, threshold: int) -> str:
1275 return f"""
1276cached:
1277 default: true
1278 expiry:
1279 mode: {mode}
1280 threshold: {threshold}
1281 cacheable:
1282 unused: true
1283 """
1285 def testCacheExpiryFiles(self):
1286 threshold = 2 # Keep at least 2 files.
1287 mode = "files"
1288 config_str = self._expiration_config(mode, threshold)
1290 cache_manager = self._make_cache_manager(config_str)
1291 # Should end with datasets: 2, 3, 4
1292 self.assertExpiration(cache_manager, 5, threshold + 1)
1293 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1295 # Check that we will not expire a file that is actively in use.
1296 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1297 self.assertIsNotNone(found)
1299 # Trigger cache expiration that should remove the file
1300 # we just retrieved. Should now have: 3, 4, 5
1301 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1302 self.assertIsNotNone(cached)
1304 # Cache should still report the standard file count.
1305 self.assertEqual(cache_manager.file_count, threshold + 1)
1307 # Add additional entry to cache.
1308 # Should now have 4, 5, 6
1309 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1310 self.assertIsNotNone(cached)
1312 # Is the file still there?
1313 self.assertTrue(found.exists())
1315 # Can we read it?
1316 data = found.read()
1317 self.assertGreater(len(data), 0)
1319 # Outside context the file should no longer exist.
1320 self.assertFalse(found.exists())
1322 # File count should not have changed.
1323 self.assertEqual(cache_manager.file_count, threshold + 1)
1325 # Dataset 2 was in the exempt directory but because hardlinks
1326 # are used it was deleted from the main cache during cache expiry
1327 # above and so should no longer be found.
1328 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1329 self.assertIsNone(found)
1331 # And the one stored after it is also gone.
1332 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1333 self.assertIsNone(found)
1335 # But dataset 4 is present.
1336 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1337 self.assertIsNotNone(found)
1339 # Adding a new dataset to the cache should now delete it.
1340 cache_manager.move_to_cache(self.files[7], self.refs[7])
1342 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1343 self.assertIsNone(found)
1345 def testCacheExpiryDatasets(self):
1346 threshold = 2 # Keep 2 datasets.
1347 mode = "datasets"
1348 config_str = self._expiration_config(mode, threshold)
1350 cache_manager = self._make_cache_manager(config_str)
1351 self.assertExpiration(cache_manager, 5, threshold + 1)
1352 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1354 def testCacheExpiryDatasetsComposite(self):
1355 threshold = 2 # Keep 2 datasets.
1356 mode = "datasets"
1357 config_str = self._expiration_config(mode, threshold)
1359 cache_manager = self._make_cache_manager(config_str)
1361 n_datasets = 3
1362 for i in range(n_datasets):
1363 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1364 cached = cache_manager.move_to_cache(component_file, component_ref)
1365 self.assertIsNotNone(cached)
1366 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1368 # Write two new non-composite and the number of files should drop.
1369 self.assertExpiration(cache_manager, 2, 5)
1371 def testCacheExpirySize(self):
1372 threshold = 55 # Each file is 10 bytes
1373 mode = "size"
1374 config_str = self._expiration_config(mode, threshold)
1376 cache_manager = self._make_cache_manager(config_str)
1377 self.assertExpiration(cache_manager, 10, 6)
1378 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1380 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1381 """Insert the datasets and then check the number retained."""
1382 for i in range(n_datasets):
1383 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1384 self.assertIsNotNone(cached)
1386 self.assertEqual(cache_manager.file_count, n_retained)
1388 # The oldest file should not be in the cache any more.
1389 for i in range(n_datasets):
1390 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1391 if i >= n_datasets - n_retained:
1392 self.assertIsInstance(found, ResourcePath)
1393 else:
1394 self.assertIsNone(found)
1396 def testCacheExpiryAge(self):
1397 threshold = 1 # Expire older than 2 seconds
1398 mode = "age"
1399 config_str = self._expiration_config(mode, threshold)
1401 cache_manager = self._make_cache_manager(config_str)
1402 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1404 # Insert 3 files, then sleep, then insert more.
1405 for i in range(2):
1406 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1407 self.assertIsNotNone(cached)
1408 time.sleep(2.0)
1409 for j in range(4):
1410 i = 2 + j # Continue the counting
1411 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1412 self.assertIsNotNone(cached)
1414 # Only the files written after the sleep should exist.
1415 self.assertEqual(cache_manager.file_count, 4)
1416 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1417 self.assertIsNone(found)
1418 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1419 self.assertIsInstance(found, ResourcePath)
1422if __name__ == "__main__": 1422 ↛ 1423line 1422 didn't jump to line 1423, because the condition on line 1422 was never true
1423 unittest.main()