Coverage for tests/test_datastore.py: 14%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import unittest
24import shutil
25import yaml
26import tempfile
27import time
28from dataclasses import dataclass
29import lsst.utils.tests
31from lsst.utils import doImport
33from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset
34from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError
35from lsst.daf.butler.formatters.yaml import YamlFormatter
36from lsst.daf.butler import (DatastoreCacheManager, DatastoreDisabledCacheManager,
37 DatastoreCacheManagerConfig, Config, ButlerURI, NamedKeyDict)
39from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter,
40 BadNoWriteFormatter, MetricsExample, DummyRegistry)
43TESTDIR = os.path.dirname(__file__)
46def makeExampleMetrics(use_none=False):
47 if use_none:
48 array = None
49 else:
50 array = [563, 234, 456.7, 105, 2054, -1045]
51 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
52 {"a": [1, 2, 3],
53 "b": {"blue": 5, "red": "green"}},
54 array,
55 )
58@dataclass(frozen=True)
59class Named:
60 name: str
63class FakeDataCoordinate(NamedKeyDict):
64 """A fake hashable frozen DataCoordinate built from a simple dict."""
66 @classmethod
67 def from_dict(cls, dataId):
68 new = cls()
69 for k, v in dataId.items():
70 new[Named(k)] = v
71 return new.freeze()
73 def __hash__(self) -> int:
74 return hash(frozenset(self.items()))
77class TransactionTestError(Exception):
78 """Specific error for transactions, to prevent misdiagnosing
79 that might otherwise occur when a standard exception is used.
80 """
81 pass
84class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
85 """Support routines for datastore testing"""
86 root = None
88 @classmethod
89 def setUpClass(cls):
90 # Storage Classes are fixed for all datastores in these tests
91 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
92 cls.storageClassFactory = StorageClassFactory()
93 cls.storageClassFactory.addFromConfig(scConfigFile)
95 # Read the Datastore config so we can get the class
96 # information (since we should not assume the constructor
97 # name here, but rely on the configuration file itself)
98 datastoreConfig = DatastoreConfig(cls.configFile)
99 cls.datastoreType = doImport(datastoreConfig["cls"])
100 cls.universe = DimensionUniverse()
102 def setUp(self):
103 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
105 def tearDown(self):
106 if self.root is not None and os.path.exists(self.root):
107 shutil.rmtree(self.root, ignore_errors=True)
110class DatastoreTests(DatastoreTestsBase):
111 """Some basic tests of a simple datastore."""
113 hasUnsupportedPut = True
115 def testConfigRoot(self):
116 full = DatastoreConfig(self.configFile)
117 config = DatastoreConfig(self.configFile, mergeDefaults=False)
118 newroot = "/random/location"
119 self.datastoreType.setConfigRoot(newroot, config, full)
120 if self.rootKeys:
121 for k in self.rootKeys:
122 self.assertIn(newroot, config[k])
124 def testConstructor(self):
125 datastore = self.makeDatastore()
126 self.assertIsNotNone(datastore)
127 self.assertIs(datastore.isEphemeral, self.isEphemeral)
129 def testConfigurationValidation(self):
130 datastore = self.makeDatastore()
131 sc = self.storageClassFactory.getStorageClass("ThingOne")
132 datastore.validateConfiguration([sc])
134 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
135 if self.validationCanFail:
136 with self.assertRaises(DatastoreValidationError):
137 datastore.validateConfiguration([sc2], logFailures=True)
139 dimensions = self.universe.extract(("visit", "physical_filter"))
140 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
141 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
142 datastore.validateConfiguration([ref])
144 def testParameterValidation(self):
145 """Check that parameters are validated"""
146 sc = self.storageClassFactory.getStorageClass("ThingOne")
147 dimensions = self.universe.extract(("visit", "physical_filter"))
148 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
149 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
150 datastore = self.makeDatastore()
151 data = {1: 2, 3: 4}
152 datastore.put(data, ref)
153 newdata = datastore.get(ref)
154 self.assertEqual(data, newdata)
155 with self.assertRaises(KeyError):
156 newdata = datastore.get(ref, parameters={"missing": 5})
158 def testBasicPutGet(self):
159 metrics = makeExampleMetrics()
160 datastore = self.makeDatastore()
162 # Create multiple storage classes for testing different formulations
163 storageClasses = [self.storageClassFactory.getStorageClass(sc)
164 for sc in ("StructuredData",
165 "StructuredDataJson",
166 "StructuredDataPickle")]
168 dimensions = self.universe.extract(("visit", "physical_filter"))
169 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
171 for sc in storageClasses:
172 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
173 print("Using storageClass: {}".format(sc.name))
174 datastore.put(metrics, ref)
176 # Does it exist?
177 self.assertTrue(datastore.exists(ref))
179 # Get
180 metricsOut = datastore.get(ref, parameters=None)
181 self.assertEqual(metrics, metricsOut)
183 uri = datastore.getURI(ref)
184 self.assertEqual(uri.scheme, self.uriScheme)
186 # Get a component -- we need to construct new refs for them
187 # with derived storage classes but with parent ID
188 for comp in ("data", "output"):
189 compRef = ref.makeComponentRef(comp)
190 output = datastore.get(compRef)
191 self.assertEqual(output, getattr(metricsOut, comp))
193 uri = datastore.getURI(compRef)
194 self.assertEqual(uri.scheme, self.uriScheme)
196 storageClass = sc
198 # Check that we can put a metric with None in a component and
199 # get it back as None
200 metricsNone = makeExampleMetrics(use_none=True)
201 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
202 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
203 datastore.put(metricsNone, refNone)
205 comp = "data"
206 for comp in ("data", "output"):
207 compRef = refNone.makeComponentRef(comp)
208 output = datastore.get(compRef)
209 self.assertEqual(output, getattr(metricsNone, comp))
211 # Check that a put fails if the dataset type is not supported
212 if self.hasUnsupportedPut:
213 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
214 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
215 with self.assertRaises(DatasetTypeNotSupportedError):
216 datastore.put(metrics, ref)
218 # These should raise
219 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
220 with self.assertRaises(FileNotFoundError):
221 # non-existing file
222 datastore.get(ref)
224 # Get a URI from it
225 uri = datastore.getURI(ref, predict=True)
226 self.assertEqual(uri.scheme, self.uriScheme)
228 with self.assertRaises(FileNotFoundError):
229 datastore.getURI(ref)
231 def testTrustGetRequest(self):
232 """Check that we can get datasets that registry knows nothing about.
233 """
235 datastore = self.makeDatastore()
237 # Skip test if the attribute is not defined
238 if not hasattr(datastore, "trustGetRequest"):
239 return
241 metrics = makeExampleMetrics()
243 i = 0
244 for sc_name in ("StructuredData", "StructuredComposite"):
245 i += 1
246 datasetTypeName = f"metric{i}"
248 if sc_name == "StructuredComposite":
249 disassembled = True
250 else:
251 disassembled = False
253 # Start datastore in default configuration of using registry
254 datastore.trustGetRequest = False
256 # Create multiple storage classes for testing with or without
257 # disassembly
258 sc = self.storageClassFactory.getStorageClass(sc_name)
259 dimensions = self.universe.extract(("visit", "physical_filter"))
260 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}
262 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
263 datastore.put(metrics, ref)
265 # Does it exist?
266 self.assertTrue(datastore.exists(ref))
268 # Get
269 metricsOut = datastore.get(ref)
270 self.assertEqual(metrics, metricsOut)
272 # Get the URI(s)
273 primaryURI, componentURIs = datastore.getURIs(ref)
274 if disassembled:
275 self.assertIsNone(primaryURI)
276 self.assertEqual(len(componentURIs), 3)
277 else:
278 self.assertIn(datasetTypeName, primaryURI.path)
279 self.assertFalse(componentURIs)
281 # Delete registry entry so now we are trusting
282 datastore.removeStoredItemInfo(ref)
284 # Now stop trusting and check that things break
285 datastore.trustGetRequest = False
287 # Does it exist?
288 self.assertFalse(datastore.exists(ref))
290 with self.assertRaises(FileNotFoundError):
291 datastore.get(ref)
293 with self.assertRaises(FileNotFoundError):
294 datastore.get(ref.makeComponentRef("data"))
296 # URI should fail unless we ask for prediction
297 with self.assertRaises(FileNotFoundError):
298 datastore.getURIs(ref)
300 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
301 if disassembled:
302 self.assertIsNone(predicted_primary)
303 self.assertEqual(len(predicted_disassembled), 3)
304 for uri in predicted_disassembled.values():
305 self.assertEqual(uri.fragment, "predicted")
306 self.assertIn(datasetTypeName, uri.path)
307 else:
308 self.assertIn(datasetTypeName, predicted_primary.path)
309 self.assertFalse(predicted_disassembled)
310 self.assertEqual(predicted_primary.fragment, "predicted")
312 # Now enable registry-free trusting mode
313 datastore.trustGetRequest = True
315 # Try again to get it
316 metricsOut = datastore.get(ref)
317 self.assertEqual(metricsOut, metrics)
319 # Does it exist?
320 self.assertTrue(datastore.exists(ref))
322 # Get a component
323 comp = "data"
324 compRef = ref.makeComponentRef(comp)
325 output = datastore.get(compRef)
326 self.assertEqual(output, getattr(metrics, comp))
328 # Get the URI -- if we trust this should work even without
329 # enabling prediction.
330 primaryURI2, componentURIs2 = datastore.getURIs(ref)
331 self.assertEqual(primaryURI2, primaryURI)
332 self.assertEqual(componentURIs2, componentURIs)
334 def testDisassembly(self):
335 """Test disassembly within datastore."""
336 metrics = makeExampleMetrics()
337 if self.isEphemeral:
338 # in-memory datastore does not disassemble
339 return
341 # Create multiple storage classes for testing different formulations
342 # of composites. One of these will not disassemble to provide
343 # a reference.
344 storageClasses = [self.storageClassFactory.getStorageClass(sc)
345 for sc in ("StructuredComposite",
346 "StructuredCompositeTestA",
347 "StructuredCompositeTestB",
348 "StructuredCompositeReadComp",
349 "StructuredData", # No disassembly
350 "StructuredCompositeReadCompNoDisassembly",
351 )]
353 # Create the test datastore
354 datastore = self.makeDatastore()
356 # Dummy dataId
357 dimensions = self.universe.extract(("visit", "physical_filter"))
358 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
360 for i, sc in enumerate(storageClasses):
361 with self.subTest(storageClass=sc.name):
362 # Create a different dataset type each time round
363 # so that a test failure in this subtest does not trigger
364 # a cascade of tests because of file clashes
365 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId,
366 conform=False)
368 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
370 datastore.put(metrics, ref)
372 baseURI, compURIs = datastore.getURIs(ref)
373 if disassembled:
374 self.assertIsNone(baseURI)
375 self.assertEqual(set(compURIs), {"data", "output", "summary"})
376 else:
377 self.assertIsNotNone(baseURI)
378 self.assertEqual(compURIs, {})
380 metrics_get = datastore.get(ref)
381 self.assertEqual(metrics_get, metrics)
383 # Retrieve the composite with read parameter
384 stop = 4
385 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
386 self.assertEqual(metrics_get.summary, metrics.summary)
387 self.assertEqual(metrics_get.output, metrics.output)
388 self.assertEqual(metrics_get.data, metrics.data[:stop])
390 # Retrieve a component
391 data = datastore.get(ref.makeComponentRef("data"))
392 self.assertEqual(data, metrics.data)
394 # On supported storage classes attempt to access a read
395 # only component
396 if "ReadComp" in sc.name:
397 cRef = ref.makeComponentRef("counter")
398 counter = datastore.get(cRef)
399 self.assertEqual(counter, len(metrics.data))
401 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
402 self.assertEqual(counter, stop)
404 datastore.remove(ref)
406 def testRegistryCompositePutGet(self):
407 """Tests the case where registry disassembles and puts to datastore.
408 """
409 metrics = makeExampleMetrics()
410 datastore = self.makeDatastore()
412 # Create multiple storage classes for testing different formulations
413 # of composites
414 storageClasses = [self.storageClassFactory.getStorageClass(sc)
415 for sc in ("StructuredComposite",
416 "StructuredCompositeTestA",
417 "StructuredCompositeTestB",
418 )]
420 dimensions = self.universe.extract(("visit", "physical_filter"))
421 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
423 for sc in storageClasses:
424 print("Using storageClass: {}".format(sc.name))
425 ref = self.makeDatasetRef("metric", dimensions, sc, dataId,
426 conform=False)
428 components = sc.delegate().disassemble(metrics)
429 self.assertTrue(components)
431 compsRead = {}
432 for compName, compInfo in components.items():
433 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions,
434 components[compName].storageClass, dataId,
435 conform=False)
437 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
438 datastore.put(compInfo.component, compRef)
440 uri = datastore.getURI(compRef)
441 self.assertEqual(uri.scheme, self.uriScheme)
443 compsRead[compName] = datastore.get(compRef)
445 # We can generate identical files for each storage class
446 # so remove the component here
447 datastore.remove(compRef)
449 # combine all the components we read back into a new composite
450 metricsOut = sc.delegate().assemble(compsRead)
451 self.assertEqual(metrics, metricsOut)
453 def prepDeleteTest(self, n_refs=1):
454 metrics = makeExampleMetrics()
455 datastore = self.makeDatastore()
456 # Put
457 dimensions = self.universe.extract(("visit", "physical_filter"))
458 sc = self.storageClassFactory.getStorageClass("StructuredData")
459 refs = []
460 for i in range(n_refs):
461 dataId = FakeDataCoordinate.from_dict({"instrument": "dummy", "visit": 638 + i,
462 "physical_filter": "U"})
463 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
464 datastore.put(metrics, ref)
466 # Does it exist?
467 self.assertTrue(datastore.exists(ref))
469 # Get
470 metricsOut = datastore.get(ref)
471 self.assertEqual(metrics, metricsOut)
472 refs.append(ref)
474 return datastore, *refs
476 def testRemove(self):
477 datastore, ref = self.prepDeleteTest()
479 # Remove
480 datastore.remove(ref)
482 # Does it exist?
483 self.assertFalse(datastore.exists(ref))
485 # Do we now get a predicted URI?
486 uri = datastore.getURI(ref, predict=True)
487 self.assertEqual(uri.fragment, "predicted")
489 # Get should now fail
490 with self.assertRaises(FileNotFoundError):
491 datastore.get(ref)
492 # Can only delete once
493 with self.assertRaises(FileNotFoundError):
494 datastore.remove(ref)
496 def testForget(self):
497 datastore, ref = self.prepDeleteTest()
499 # Remove
500 datastore.forget([ref])
502 # Does it exist (as far as we know)?
503 self.assertFalse(datastore.exists(ref))
505 # Do we now get a predicted URI?
506 uri = datastore.getURI(ref, predict=True)
507 self.assertEqual(uri.fragment, "predicted")
509 # Get should now fail
510 with self.assertRaises(FileNotFoundError):
511 datastore.get(ref)
513 # Forgetting again is a silent no-op
514 datastore.forget([ref])
516 # Predicted URI should still point to the file.
517 self.assertTrue(uri.exists())
519 def testTransfer(self):
520 metrics = makeExampleMetrics()
522 dimensions = self.universe.extract(("visit", "physical_filter"))
523 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
525 sc = self.storageClassFactory.getStorageClass("StructuredData")
526 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
528 inputDatastore = self.makeDatastore("test_input_datastore")
529 outputDatastore = self.makeDatastore("test_output_datastore")
531 inputDatastore.put(metrics, ref)
532 outputDatastore.transfer(inputDatastore, ref)
534 metricsOut = outputDatastore.get(ref)
535 self.assertEqual(metrics, metricsOut)
537 def testBasicTransaction(self):
538 datastore = self.makeDatastore()
539 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
540 dimensions = self.universe.extract(("visit", "physical_filter"))
541 nDatasets = 6
542 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
543 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
544 makeExampleMetrics(),)
545 for dataId in dataIds]
546 succeed = data[:nDatasets//2]
547 fail = data[nDatasets//2:]
548 # All datasets added in this transaction should continue to exist
549 with datastore.transaction():
550 for ref, metrics in succeed:
551 datastore.put(metrics, ref)
552 # Whereas datasets added in this transaction should not
553 with self.assertRaises(TransactionTestError):
554 with datastore.transaction():
555 for ref, metrics in fail:
556 datastore.put(metrics, ref)
557 raise TransactionTestError("This should propagate out of the context manager")
558 # Check for datasets that should exist
559 for ref, metrics in succeed:
560 # Does it exist?
561 self.assertTrue(datastore.exists(ref))
562 # Get
563 metricsOut = datastore.get(ref, parameters=None)
564 self.assertEqual(metrics, metricsOut)
565 # URI
566 uri = datastore.getURI(ref)
567 self.assertEqual(uri.scheme, self.uriScheme)
568 # Check for datasets that should not exist
569 for ref, _ in fail:
570 # These should raise
571 with self.assertRaises(FileNotFoundError):
572 # non-existing file
573 datastore.get(ref)
574 with self.assertRaises(FileNotFoundError):
575 datastore.getURI(ref)
577 def testNestedTransaction(self):
578 datastore = self.makeDatastore()
579 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
580 dimensions = self.universe.extract(("visit", "physical_filter"))
581 metrics = makeExampleMetrics()
583 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
584 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
585 conform=False)
586 datastore.put(metrics, refBefore)
587 with self.assertRaises(TransactionTestError):
588 with datastore.transaction():
589 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
590 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
591 conform=False)
592 datastore.put(metrics, refOuter)
593 with datastore.transaction():
594 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
595 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
596 conform=False)
597 datastore.put(metrics, refInner)
598 # All datasets should exist
599 for ref in (refBefore, refOuter, refInner):
600 metricsOut = datastore.get(ref, parameters=None)
601 self.assertEqual(metrics, metricsOut)
602 raise TransactionTestError("This should roll back the transaction")
603 # Dataset(s) inserted before the transaction should still exist
604 metricsOut = datastore.get(refBefore, parameters=None)
605 self.assertEqual(metrics, metricsOut)
606 # But all datasets inserted during the (rolled back) transaction
607 # should be gone
608 with self.assertRaises(FileNotFoundError):
609 datastore.get(refOuter)
610 with self.assertRaises(FileNotFoundError):
611 datastore.get(refInner)
613 def _prepareIngestTest(self):
614 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
615 dimensions = self.universe.extract(("visit", "physical_filter"))
616 metrics = makeExampleMetrics()
617 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
618 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
619 return metrics, ref
621 def runIngestTest(self, func, expectOutput=True):
622 metrics, ref = self._prepareIngestTest()
623 # The file will be deleted after the test.
624 # For symlink tests this leads to a situation where the datastore
625 # points to a file that does not exist. This will make os.path.exist
626 # return False but then the new symlink will fail with
627 # FileExistsError later in the code so the test still passes.
628 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
629 with open(path, 'w') as fd:
630 yaml.dump(metrics._asdict(), stream=fd)
631 func(metrics, path, ref)
633 def testIngestNoTransfer(self):
634 """Test ingesting existing files with no transfer.
635 """
636 for mode in (None, "auto"):
638 # Some datastores have auto but can't do in place transfer
639 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
640 continue
642 with self.subTest(mode=mode):
643 datastore = self.makeDatastore()
645 def succeed(obj, path, ref):
646 """Ingest a file already in the datastore root."""
647 # first move it into the root, and adjust the path
648 # accordingly
649 path = shutil.copy(path, datastore.root.ospath)
650 path = os.path.relpath(path, start=datastore.root.ospath)
651 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
652 self.assertEqual(obj, datastore.get(ref))
654 def failInputDoesNotExist(obj, path, ref):
655 """Can't ingest files if we're given a bad path."""
656 with self.assertRaises(FileNotFoundError):
657 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref),
658 transfer=mode)
659 self.assertFalse(datastore.exists(ref))
661 def failOutsideRoot(obj, path, ref):
662 """Can't ingest files outside of datastore root unless
663 auto."""
664 if mode == "auto":
665 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
666 self.assertTrue(datastore.exists(ref))
667 else:
668 with self.assertRaises(RuntimeError):
669 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
670 self.assertFalse(datastore.exists(ref))
672 def failNotImplemented(obj, path, ref):
673 with self.assertRaises(NotImplementedError):
674 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
676 if mode in self.ingestTransferModes:
677 self.runIngestTest(failOutsideRoot)
678 self.runIngestTest(failInputDoesNotExist)
679 self.runIngestTest(succeed)
680 else:
681 self.runIngestTest(failNotImplemented)
683 def testIngestTransfer(self):
684 """Test ingesting existing files after transferring them.
685 """
686 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
687 with self.subTest(mode=mode):
688 datastore = self.makeDatastore(mode)
690 def succeed(obj, path, ref):
691 """Ingest a file by transferring it to the template
692 location."""
693 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
694 self.assertEqual(obj, datastore.get(ref))
696 def failInputDoesNotExist(obj, path, ref):
697 """Can't ingest files if we're given a bad path."""
698 with self.assertRaises(FileNotFoundError):
699 # Ensure the file does not look like it is in
700 # datastore for auto mode
701 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref),
702 transfer=mode)
703 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
705 def failNotImplemented(obj, path, ref):
706 with self.assertRaises(NotImplementedError):
707 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
709 if mode in self.ingestTransferModes:
710 self.runIngestTest(failInputDoesNotExist)
711 self.runIngestTest(succeed, expectOutput=(mode != "move"))
712 else:
713 self.runIngestTest(failNotImplemented)
715 def testIngestSymlinkOfSymlink(self):
716 """Special test for symlink to a symlink ingest"""
717 metrics, ref = self._prepareIngestTest()
718 # The aim of this test is to create a dataset on disk, then
719 # create a symlink to it and finally ingest the symlink such that
720 # the symlink in the datastore points to the original dataset.
721 for mode in ("symlink", "relsymlink"):
722 if mode not in self.ingestTransferModes:
723 continue
725 print(f"Trying mode {mode}")
726 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
727 with open(realpath, 'w') as fd:
728 yaml.dump(metrics._asdict(), stream=fd)
729 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
730 os.symlink(os.path.abspath(realpath), sympath)
732 datastore = self.makeDatastore()
733 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
735 uri = datastore.getURI(ref)
736 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
737 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
739 linkTarget = os.readlink(uri.ospath)
740 if mode == "relsymlink":
741 self.assertFalse(os.path.isabs(linkTarget))
742 else:
743 self.assertEqual(linkTarget, os.path.abspath(realpath))
745 # Check that we can get the dataset back regardless of mode
746 metric2 = datastore.get(ref)
747 self.assertEqual(metric2, metrics)
749 # Cleanup the file for next time round loop
750 # since it will get the same file name in store
751 datastore.remove(ref)
754class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
755 """PosixDatastore specialization"""
756 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
757 uriScheme = "file"
758 canIngestNoTransferAuto = True
759 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
760 isEphemeral = False
761 rootKeys = ("root",)
762 validationCanFail = True
764 def setUp(self):
765 # Override the working directory before calling the base class
766 self.root = tempfile.mkdtemp(dir=TESTDIR)
767 super().setUp()
770class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
771 """Posix datastore tests but with checksums disabled."""
772 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
774 def testChecksum(self):
775 """Ensure that checksums have not been calculated."""
777 datastore = self.makeDatastore()
778 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
779 dimensions = self.universe.extract(("visit", "physical_filter"))
780 metrics = makeExampleMetrics()
782 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
783 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
784 conform=False)
786 # Configuration should have disabled checksum calculation
787 datastore.put(metrics, ref)
788 infos = datastore.getStoredItemsInfo(ref)
789 self.assertIsNone(infos[0].checksum)
791 # Remove put back but with checksums enabled explicitly
792 datastore.remove(ref)
793 datastore.useChecksum = True
794 datastore.put(metrics, ref)
796 infos = datastore.getStoredItemsInfo(ref)
797 self.assertIsNotNone(infos[0].checksum)
800class TrashDatastoreTestCase(PosixDatastoreTestCase):
801 """Restrict trash test to FileDatastore."""
802 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
804 def testTrash(self):
805 datastore, *refs = self.prepDeleteTest(n_refs=10)
807 # Trash one of them.
808 ref = refs.pop()
809 uri = datastore.getURI(ref)
810 datastore.trash(ref)
811 self.assertTrue(uri.exists(), uri) # Not deleted yet
812 datastore.emptyTrash()
813 self.assertFalse(uri.exists(), uri)
815 # Trash it again should be fine.
816 datastore.trash(ref)
818 # Trash multiple items at once.
819 subset = [refs.pop(), refs.pop()]
820 datastore.trash(subset)
821 datastore.emptyTrash()
823 # Remove a record and trash should do nothing.
824 # This is execution butler scenario.
825 ref = refs.pop()
826 uri = datastore.getURI(ref)
827 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
828 self.assertTrue(uri.exists())
829 datastore.trash(ref)
830 datastore.emptyTrash()
831 self.assertTrue(uri.exists())
833 # Switch on trust and it should delete the file.
834 datastore.trustGetRequest = True
835 datastore.trash([ref])
836 self.assertFalse(uri.exists())
838 # Remove multiples at once in trust mode.
839 subset = [refs.pop() for i in range(3)]
840 datastore.trash(subset)
841 datastore.trash(refs.pop()) # Check that a single ref can trash
844class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
845 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
847 def setUp(self):
848 # Override the working directory before calling the base class
849 self.root = tempfile.mkdtemp(dir=TESTDIR)
850 super().setUp()
852 def testCleanup(self):
853 """Test that a failed formatter write does cleanup a partial file."""
854 metrics = makeExampleMetrics()
855 datastore = self.makeDatastore()
857 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
859 dimensions = self.universe.extract(("visit", "physical_filter"))
860 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
862 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
864 # Determine where the file will end up (we assume Formatters use
865 # the same file extension)
866 expectedUri = datastore.getURI(ref, predict=True)
867 self.assertEqual(expectedUri.fragment, "predicted")
869 self.assertEqual(expectedUri.getExtension(), ".yaml",
870 f"Is there a file extension in {expectedUri}")
872 # Try formatter that fails and formatter that fails and leaves
873 # a file behind
874 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
875 with self.subTest(formatter=formatter):
877 # Monkey patch the formatter
878 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter,
879 overwrite=True)
881 # Try to put the dataset, it should fail
882 with self.assertRaises(Exception):
883 datastore.put(metrics, ref)
885 # Check that there is no file on disk
886 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
888 # Check that there is a directory
889 dir = expectedUri.dirname()
890 self.assertTrue(dir.exists(),
891 f"Check for existence of directory {dir}")
893 # Force YamlFormatter and check that this time a file is written
894 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter,
895 overwrite=True)
896 datastore.put(metrics, ref)
897 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
898 datastore.remove(ref)
899 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
902class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
903 """PosixDatastore specialization"""
904 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
905 uriScheme = "mem"
906 hasUnsupportedPut = False
907 ingestTransferModes = ()
908 isEphemeral = True
909 rootKeys = None
910 validationCanFail = False
913class ChainedDatastoreTestCase(PosixDatastoreTestCase):
914 """ChainedDatastore specialization using a POSIXDatastore"""
915 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
916 hasUnsupportedPut = False
917 canIngestNoTransferAuto = False
918 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
919 isEphemeral = False
920 rootKeys = (".datastores.1.root", ".datastores.2.root")
921 validationCanFail = True
924class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
925 """ChainedDatastore specialization using all InMemoryDatastore"""
926 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
927 validationCanFail = False
930class DatastoreConstraintsTests(DatastoreTestsBase):
931 """Basic tests of constraints model of Datastores."""
933 def testConstraints(self):
934 """Test constraints model. Assumes that each test class has the
935 same constraints."""
936 metrics = makeExampleMetrics()
937 datastore = self.makeDatastore()
939 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
940 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
941 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
942 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
944 # Write empty file suitable for ingest check (JSON and YAML variants)
945 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
946 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
947 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False),
948 ("metric33", sc1, True), ("metric2", sc2, True)):
949 # Choose different temp file depending on StorageClass
950 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
952 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
953 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
954 if accepted:
955 datastore.put(metrics, ref)
956 self.assertTrue(datastore.exists(ref))
957 datastore.remove(ref)
959 # Try ingest
960 if self.canIngest:
961 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
962 self.assertTrue(datastore.exists(ref))
963 datastore.remove(ref)
964 else:
965 with self.assertRaises(DatasetTypeNotSupportedError):
966 datastore.put(metrics, ref)
967 self.assertFalse(datastore.exists(ref))
969 # Again with ingest
970 if self.canIngest:
971 with self.assertRaises(DatasetTypeNotSupportedError):
972 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
973 self.assertFalse(datastore.exists(ref))
976class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
977 """PosixDatastore specialization"""
978 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
979 canIngest = True
981 def setUp(self):
982 # Override the working directory before calling the base class
983 self.root = tempfile.mkdtemp(dir=TESTDIR)
984 super().setUp()
987class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
988 """InMemoryDatastore specialization"""
989 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
990 canIngest = False
993class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
994 """ChainedDatastore specialization using a POSIXDatastore and constraints
995 at the ChainedDatstore """
996 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
999class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1000 """ChainedDatastore specialization using a POSIXDatastore"""
1001 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1004class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1005 """ChainedDatastore specialization using all InMemoryDatastore"""
1006 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1007 canIngest = False
1010class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1011 """Test that a chained datastore can control constraints per-datastore
1012 even if child datastore would accept."""
1014 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1016 def setUp(self):
1017 # Override the working directory before calling the base class
1018 self.root = tempfile.mkdtemp(dir=TESTDIR)
1019 super().setUp()
1021 def testConstraints(self):
1022 """Test chained datastore constraints model."""
1023 metrics = makeExampleMetrics()
1024 datastore = self.makeDatastore()
1026 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1027 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1028 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1029 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1030 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1032 # Write empty file suitable for ingest check (JSON and YAML variants)
1033 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1034 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1036 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True),
1037 ("metric2", dataId1, sc1, (False, False, False), False),
1038 ("metric2", dataId2, sc1, (True, False, False), False),
1039 ("metric33", dataId2, sc2, (True, True, False), True),
1040 ("metric2", dataId1, sc2, (False, True, False), True)):
1042 # Choose different temp file depending on StorageClass
1043 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1045 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1046 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId,
1047 conform=False)
1048 if any(accept):
1049 datastore.put(metrics, ref)
1050 self.assertTrue(datastore.exists(ref))
1052 # Check each datastore inside the chained datastore
1053 for childDatastore, expected in zip(datastore.datastores, accept):
1054 self.assertEqual(childDatastore.exists(ref), expected,
1055 f"Testing presence of {ref} in datastore {childDatastore.name}")
1057 datastore.remove(ref)
1059 # Check that ingest works
1060 if ingest:
1061 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1062 self.assertTrue(datastore.exists(ref))
1064 # Check each datastore inside the chained datastore
1065 for childDatastore, expected in zip(datastore.datastores, accept):
1066 # Ephemeral datastores means InMemory at the moment
1067 # and that does not accept ingest of files.
1068 if childDatastore.isEphemeral:
1069 expected = False
1070 self.assertEqual(childDatastore.exists(ref), expected,
1071 f"Testing presence of ingested {ref} in datastore"
1072 f" {childDatastore.name}")
1074 datastore.remove(ref)
1075 else:
1076 with self.assertRaises(DatasetTypeNotSupportedError):
1077 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1079 else:
1080 with self.assertRaises(DatasetTypeNotSupportedError):
1081 datastore.put(metrics, ref)
1082 self.assertFalse(datastore.exists(ref))
1084 # Again with ingest
1085 with self.assertRaises(DatasetTypeNotSupportedError):
1086 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1087 self.assertFalse(datastore.exists(ref))
1090class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1091 """Tests for datastore caching infrastructure."""
1093 @classmethod
1094 def setUpClass(cls):
1095 cls.storageClassFactory = StorageClassFactory()
1096 cls.universe = DimensionUniverse()
1098 # Ensure that we load the test storage class definitions.
1099 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1100 cls.storageClassFactory.addFromConfig(scConfigFile)
1102 def setUp(self):
1103 self.id = 0
1105 # Create a root that we can use for caching tests.
1106 self.root = tempfile.mkdtemp(dir=TESTDIR)
1108 # Create some test dataset refs and associated test files
1109 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1110 dimensions = self.universe.extract(("visit", "physical_filter"))
1111 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1113 # Create list of refs and list of temporary files
1114 n_datasets = 10
1115 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId,
1116 conform=False) for n in range(n_datasets)]
1118 root_uri = ButlerURI(self.root, forceDirectory=True)
1119 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1121 # Create test files.
1122 for uri in self.files:
1123 uri.write(b"0123456789")
1125 # Create some composite refs with component files.
1126 sc = self.storageClassFactory.getStorageClass("StructuredData")
1127 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId,
1128 conform=False) for n in range(3)]
1129 self.comp_files = []
1130 self.comp_refs = []
1131 for n, ref in enumerate(self.composite_refs):
1132 component_refs = []
1133 component_files = []
1134 for component in sc.components:
1135 component_ref = ref.makeComponentRef(component)
1136 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1137 component_refs.append(component_ref)
1138 component_files.append(file)
1139 file.write(b"9876543210")
1141 self.comp_files.append(component_files)
1142 self.comp_refs.append(component_refs)
1144 def tearDown(self):
1145 if self.root is not None and os.path.exists(self.root):
1146 shutil.rmtree(self.root, ignore_errors=True)
1148 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1149 config = Config.fromYaml(config_str)
1150 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1152 def testNoCacheDir(self):
1153 config_str = """
1154cached:
1155 root: null
1156 cacheable:
1157 metric0: true
1158 """
1159 cache_manager = self._make_cache_manager(config_str)
1161 # Look inside to check we don't have a cache directory
1162 self.assertIsNone(cache_manager._cache_directory)
1164 self.assertCache(cache_manager)
1166 # Test that the cache directory is marked temporary
1167 self.assertTrue(cache_manager.cache_directory.isTemporary)
1169 def testNoCacheDirReversed(self):
1170 """Use default caching status and metric1 to false"""
1171 config_str = """
1172cached:
1173 root: null
1174 default: true
1175 cacheable:
1176 metric1: false
1177 """
1178 cache_manager = self._make_cache_manager(config_str)
1180 self.assertCache(cache_manager)
1182 def testExplicitCacheDir(self):
1183 config_str = f"""
1184cached:
1185 root: '{self.root}'
1186 cacheable:
1187 metric0: true
1188 """
1189 cache_manager = self._make_cache_manager(config_str)
1191 # Look inside to check we do have a cache directory.
1192 self.assertEqual(cache_manager.cache_directory,
1193 ButlerURI(self.root, forceDirectory=True))
1195 self.assertCache(cache_manager)
1197 # Test that the cache directory is not marked temporary
1198 self.assertFalse(cache_manager.cache_directory.isTemporary)
1200 def assertCache(self, cache_manager):
1201 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1202 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1204 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1205 self.assertIsInstance(uri, ButlerURI)
1206 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1208 # Cached file should no longer exist but uncached file should be
1209 # unaffectted.
1210 self.assertFalse(self.files[0].exists())
1211 self.assertTrue(self.files[1].exists())
1213 # Should find this file and it should be within the cache directory.
1214 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1215 self.assertTrue(found.exists())
1216 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1218 # Should not be able to find these in cache
1219 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1220 self.assertIsNone(found)
1221 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1222 self.assertIsNone(found)
1224 def testNoCache(self):
1225 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1226 for uri, ref in zip(self.files, self.refs):
1227 self.assertFalse(cache_manager.should_be_cached(ref))
1228 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1229 with cache_manager.find_in_cache(ref, ".txt") as found:
1230 self.assertIsNone(found, msg=f"{cache_manager}")
1232 def _expiration_config(self, mode: str, threshold: int) -> str:
1233 return f"""
1234cached:
1235 default: true
1236 expiry:
1237 mode: {mode}
1238 threshold: {threshold}
1239 cacheable:
1240 unused: true
1241 """
1243 def testCacheExpiryFiles(self):
1244 threshold = 2 # Keep at least 2 files.
1245 mode = "files"
1246 config_str = self._expiration_config(mode, threshold)
1248 cache_manager = self._make_cache_manager(config_str)
1249 # Should end with datasets: 2, 3, 4
1250 self.assertExpiration(cache_manager, 5, threshold + 1)
1251 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1253 # Check that we will not expire a file that is actively in use.
1254 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1255 self.assertIsNotNone(found)
1257 # Trigger cache expiration that should remove the file
1258 # we just retrieved. Should now have: 3, 4, 5
1259 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1260 self.assertIsNotNone(cached)
1262 # Cache should still report the standard file count.
1263 self.assertEqual(cache_manager.file_count, threshold + 1)
1265 # Add additional entry to cache.
1266 # Should now have 4, 5, 6
1267 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1268 self.assertIsNotNone(cached)
1270 # Is the file still there?
1271 self.assertTrue(found.exists())
1273 # Can we read it?
1274 data = found.read()
1275 self.assertGreater(len(data), 0)
1277 # Outside context the file should no longer exist.
1278 self.assertFalse(found.exists())
1280 # File count should not have changed.
1281 self.assertEqual(cache_manager.file_count, threshold + 1)
1283 # Dataset 2 was in the exempt directory but because hardlinks
1284 # are used it was deleted from the main cache during cache expiry
1285 # above and so should no longer be found.
1286 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1287 self.assertIsNone(found)
1289 # And the one stored after it is also gone.
1290 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1291 self.assertIsNone(found)
1293 # But dataset 4 is present.
1294 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1295 self.assertIsNotNone(found)
1297 # Adding a new dataset to the cache should now delete it.
1298 cache_manager.move_to_cache(self.files[7], self.refs[7])
1300 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1301 self.assertIsNone(found)
1303 def testCacheExpiryDatasets(self):
1304 threshold = 2 # Keep 2 datasets.
1305 mode = "datasets"
1306 config_str = self._expiration_config(mode, threshold)
1308 cache_manager = self._make_cache_manager(config_str)
1309 self.assertExpiration(cache_manager, 5, threshold + 1)
1310 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1312 def testCacheExpiryDatasetsComposite(self):
1313 threshold = 2 # Keep 2 datasets.
1314 mode = "datasets"
1315 config_str = self._expiration_config(mode, threshold)
1317 cache_manager = self._make_cache_manager(config_str)
1319 n_datasets = 3
1320 for i in range(n_datasets):
1321 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1322 cached = cache_manager.move_to_cache(component_file, component_ref)
1323 self.assertIsNotNone(cached)
1324 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1326 # Write two new non-composite and the number of files should drop.
1327 self.assertExpiration(cache_manager, 2, 5)
1329 def testCacheExpirySize(self):
1330 threshold = 55 # Each file is 10 bytes
1331 mode = "size"
1332 config_str = self._expiration_config(mode, threshold)
1334 cache_manager = self._make_cache_manager(config_str)
1335 self.assertExpiration(cache_manager, 10, 6)
1336 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1338 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1339 """Insert the datasets and then check the number retained."""
1340 for i in range(n_datasets):
1341 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1342 self.assertIsNotNone(cached)
1344 self.assertEqual(cache_manager.file_count, n_retained)
1346 # The oldest file should not be in the cache any more.
1347 for i in range(n_datasets):
1348 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1349 if i >= n_datasets - n_retained:
1350 self.assertIsInstance(found, ButlerURI)
1351 else:
1352 self.assertIsNone(found)
1354 def testCacheExpiryAge(self):
1355 threshold = 1 # Expire older than 2 seconds
1356 mode = "age"
1357 config_str = self._expiration_config(mode, threshold)
1359 cache_manager = self._make_cache_manager(config_str)
1360 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1362 # Insert 3 files, then sleep, then insert more.
1363 for i in range(2):
1364 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1365 self.assertIsNotNone(cached)
1366 time.sleep(2.0)
1367 for j in range(4):
1368 i = 2 + j # Continue the counting
1369 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1370 self.assertIsNotNone(cached)
1372 # Only the files written after the sleep should exist.
1373 self.assertEqual(cache_manager.file_count, 4)
1374 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1375 self.assertIsNone(found)
1376 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1377 self.assertIsInstance(found, ButlerURI)
1380if __name__ == "__main__": 1380 ↛ 1381line 1380 didn't jump to line 1381, because the condition on line 1380 was never true
1381 unittest.main()