Coverage for tests/test_datastore.py: 17%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import shutil
24import tempfile
25import time
26import unittest
27from dataclasses import dataclass
28from itertools import chain
30import lsst.utils.tests
31import yaml
32from lsst.daf.butler import (
33 Config,
34 DatasetTypeNotSupportedError,
35 DatastoreCacheManager,
36 DatastoreCacheManagerConfig,
37 DatastoreConfig,
38 DatastoreDisabledCacheManager,
39 DatastoreValidationError,
40 DimensionUniverse,
41 FileDataset,
42 NamedKeyDict,
43 StorageClass,
44 StorageClassFactory,
45)
46from lsst.daf.butler.formatters.yaml import YamlFormatter
47from lsst.daf.butler.tests import (
48 BadNoWriteFormatter,
49 BadWriteFormatter,
50 DatasetTestHelper,
51 DatastoreTestHelper,
52 DummyRegistry,
53 MetricsExample,
54)
55from lsst.resources import ResourcePath
56from lsst.utils import doImport
58TESTDIR = os.path.dirname(__file__)
61def makeExampleMetrics(use_none=False):
62 if use_none:
63 array = None
64 else:
65 array = [563, 234, 456.7, 105, 2054, -1045]
66 return MetricsExample(
67 {"AM1": 5.2, "AM2": 30.6},
68 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
69 array,
70 )
73@dataclass(frozen=True)
74class Named:
75 name: str
78class FakeDataCoordinate(NamedKeyDict):
79 """A fake hashable frozen DataCoordinate built from a simple dict."""
81 @classmethod
82 def from_dict(cls, dataId):
83 new = cls()
84 for k, v in dataId.items():
85 new[Named(k)] = v
86 return new.freeze()
88 def __hash__(self) -> int:
89 return hash(frozenset(self.items()))
92class TransactionTestError(Exception):
93 """Specific error for transactions, to prevent misdiagnosing
94 that might otherwise occur when a standard exception is used.
95 """
97 pass
100class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
101 """Support routines for datastore testing"""
103 root = None
105 @classmethod
106 def setUpClass(cls):
107 # Storage Classes are fixed for all datastores in these tests
108 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
109 cls.storageClassFactory = StorageClassFactory()
110 cls.storageClassFactory.addFromConfig(scConfigFile)
112 # Read the Datastore config so we can get the class
113 # information (since we should not assume the constructor
114 # name here, but rely on the configuration file itself)
115 datastoreConfig = DatastoreConfig(cls.configFile)
116 cls.datastoreType = doImport(datastoreConfig["cls"])
117 cls.universe = DimensionUniverse()
119 def setUp(self):
120 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
122 def tearDown(self):
123 if self.root is not None and os.path.exists(self.root):
124 shutil.rmtree(self.root, ignore_errors=True)
127class DatastoreTests(DatastoreTestsBase):
128 """Some basic tests of a simple datastore."""
130 hasUnsupportedPut = True
132 def testConfigRoot(self):
133 full = DatastoreConfig(self.configFile)
134 config = DatastoreConfig(self.configFile, mergeDefaults=False)
135 newroot = "/random/location"
136 self.datastoreType.setConfigRoot(newroot, config, full)
137 if self.rootKeys:
138 for k in self.rootKeys:
139 self.assertIn(newroot, config[k])
141 def testConstructor(self):
142 datastore = self.makeDatastore()
143 self.assertIsNotNone(datastore)
144 self.assertIs(datastore.isEphemeral, self.isEphemeral)
146 def testConfigurationValidation(self):
147 datastore = self.makeDatastore()
148 sc = self.storageClassFactory.getStorageClass("ThingOne")
149 datastore.validateConfiguration([sc])
151 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
152 if self.validationCanFail:
153 with self.assertRaises(DatastoreValidationError):
154 datastore.validateConfiguration([sc2], logFailures=True)
156 dimensions = self.universe.extract(("visit", "physical_filter"))
157 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
158 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
159 datastore.validateConfiguration([ref])
161 def testParameterValidation(self):
162 """Check that parameters are validated"""
163 sc = self.storageClassFactory.getStorageClass("ThingOne")
164 dimensions = self.universe.extract(("visit", "physical_filter"))
165 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
166 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
167 datastore = self.makeDatastore()
168 data = {1: 2, 3: 4}
169 datastore.put(data, ref)
170 newdata = datastore.get(ref)
171 self.assertEqual(data, newdata)
172 with self.assertRaises(KeyError):
173 newdata = datastore.get(ref, parameters={"missing": 5})
175 def testBasicPutGet(self):
176 metrics = makeExampleMetrics()
177 datastore = self.makeDatastore()
179 # Create multiple storage classes for testing different formulations
180 storageClasses = [
181 self.storageClassFactory.getStorageClass(sc)
182 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
183 ]
185 dimensions = self.universe.extract(("visit", "physical_filter"))
186 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
188 for sc in storageClasses:
189 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
190 print("Using storageClass: {}".format(sc.name))
191 datastore.put(metrics, ref)
193 # Does it exist?
194 self.assertTrue(datastore.exists(ref))
196 # Get
197 metricsOut = datastore.get(ref, parameters=None)
198 self.assertEqual(metrics, metricsOut)
200 uri = datastore.getURI(ref)
201 self.assertEqual(uri.scheme, self.uriScheme)
203 # Get a component -- we need to construct new refs for them
204 # with derived storage classes but with parent ID
205 for comp in ("data", "output"):
206 compRef = ref.makeComponentRef(comp)
207 output = datastore.get(compRef)
208 self.assertEqual(output, getattr(metricsOut, comp))
210 uri = datastore.getURI(compRef)
211 self.assertEqual(uri.scheme, self.uriScheme)
213 storageClass = sc
215 # Check that we can put a metric with None in a component and
216 # get it back as None
217 metricsNone = makeExampleMetrics(use_none=True)
218 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
219 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
220 datastore.put(metricsNone, refNone)
222 comp = "data"
223 for comp in ("data", "output"):
224 compRef = refNone.makeComponentRef(comp)
225 output = datastore.get(compRef)
226 self.assertEqual(output, getattr(metricsNone, comp))
228 # Check that a put fails if the dataset type is not supported
229 if self.hasUnsupportedPut:
230 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
231 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
232 with self.assertRaises(DatasetTypeNotSupportedError):
233 datastore.put(metrics, ref)
235 # These should raise
236 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
237 with self.assertRaises(FileNotFoundError):
238 # non-existing file
239 datastore.get(ref)
241 # Get a URI from it
242 uri = datastore.getURI(ref, predict=True)
243 self.assertEqual(uri.scheme, self.uriScheme)
245 with self.assertRaises(FileNotFoundError):
246 datastore.getURI(ref)
248 def testTrustGetRequest(self):
249 """Check that we can get datasets that registry knows nothing about."""
251 datastore = self.makeDatastore()
253 # Skip test if the attribute is not defined
254 if not hasattr(datastore, "trustGetRequest"):
255 return
257 metrics = makeExampleMetrics()
259 i = 0
260 for sc_name in ("StructuredData", "StructuredComposite"):
261 i += 1
262 datasetTypeName = f"metric{i}"
264 if sc_name == "StructuredComposite":
265 disassembled = True
266 else:
267 disassembled = False
269 # Start datastore in default configuration of using registry
270 datastore.trustGetRequest = False
272 # Create multiple storage classes for testing with or without
273 # disassembly
274 sc = self.storageClassFactory.getStorageClass(sc_name)
275 dimensions = self.universe.extract(("visit", "physical_filter"))
276 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}
278 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
279 datastore.put(metrics, ref)
281 # Does it exist?
282 self.assertTrue(datastore.exists(ref))
284 # Get
285 metricsOut = datastore.get(ref)
286 self.assertEqual(metrics, metricsOut)
288 # Get the URI(s)
289 primaryURI, componentURIs = datastore.getURIs(ref)
290 if disassembled:
291 self.assertIsNone(primaryURI)
292 self.assertEqual(len(componentURIs), 3)
293 else:
294 self.assertIn(datasetTypeName, primaryURI.path)
295 self.assertFalse(componentURIs)
297 # Delete registry entry so now we are trusting
298 datastore.removeStoredItemInfo(ref)
300 # Now stop trusting and check that things break
301 datastore.trustGetRequest = False
303 # Does it exist?
304 self.assertFalse(datastore.exists(ref))
306 with self.assertRaises(FileNotFoundError):
307 datastore.get(ref)
309 with self.assertRaises(FileNotFoundError):
310 datastore.get(ref.makeComponentRef("data"))
312 # URI should fail unless we ask for prediction
313 with self.assertRaises(FileNotFoundError):
314 datastore.getURIs(ref)
316 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
317 if disassembled:
318 self.assertIsNone(predicted_primary)
319 self.assertEqual(len(predicted_disassembled), 3)
320 for uri in predicted_disassembled.values():
321 self.assertEqual(uri.fragment, "predicted")
322 self.assertIn(datasetTypeName, uri.path)
323 else:
324 self.assertIn(datasetTypeName, predicted_primary.path)
325 self.assertFalse(predicted_disassembled)
326 self.assertEqual(predicted_primary.fragment, "predicted")
328 # Now enable registry-free trusting mode
329 datastore.trustGetRequest = True
331 # Try again to get it
332 metricsOut = datastore.get(ref)
333 self.assertEqual(metricsOut, metrics)
335 # Does it exist?
336 self.assertTrue(datastore.exists(ref))
338 # Get a component
339 comp = "data"
340 compRef = ref.makeComponentRef(comp)
341 output = datastore.get(compRef)
342 self.assertEqual(output, getattr(metrics, comp))
344 # Get the URI -- if we trust this should work even without
345 # enabling prediction.
346 primaryURI2, componentURIs2 = datastore.getURIs(ref)
347 self.assertEqual(primaryURI2, primaryURI)
348 self.assertEqual(componentURIs2, componentURIs)
350 def testDisassembly(self):
351 """Test disassembly within datastore."""
352 metrics = makeExampleMetrics()
353 if self.isEphemeral:
354 # in-memory datastore does not disassemble
355 return
357 # Create multiple storage classes for testing different formulations
358 # of composites. One of these will not disassemble to provide
359 # a reference.
360 storageClasses = [
361 self.storageClassFactory.getStorageClass(sc)
362 for sc in (
363 "StructuredComposite",
364 "StructuredCompositeTestA",
365 "StructuredCompositeTestB",
366 "StructuredCompositeReadComp",
367 "StructuredData", # No disassembly
368 "StructuredCompositeReadCompNoDisassembly",
369 )
370 ]
372 # Create the test datastore
373 datastore = self.makeDatastore()
375 # Dummy dataId
376 dimensions = self.universe.extract(("visit", "physical_filter"))
377 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
379 for i, sc in enumerate(storageClasses):
380 with self.subTest(storageClass=sc.name):
381 # Create a different dataset type each time round
382 # so that a test failure in this subtest does not trigger
383 # a cascade of tests because of file clashes
384 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
386 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
388 datastore.put(metrics, ref)
390 baseURI, compURIs = datastore.getURIs(ref)
391 if disassembled:
392 self.assertIsNone(baseURI)
393 self.assertEqual(set(compURIs), {"data", "output", "summary"})
394 else:
395 self.assertIsNotNone(baseURI)
396 self.assertEqual(compURIs, {})
398 metrics_get = datastore.get(ref)
399 self.assertEqual(metrics_get, metrics)
401 # Retrieve the composite with read parameter
402 stop = 4
403 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
404 self.assertEqual(metrics_get.summary, metrics.summary)
405 self.assertEqual(metrics_get.output, metrics.output)
406 self.assertEqual(metrics_get.data, metrics.data[:stop])
408 # Retrieve a component
409 data = datastore.get(ref.makeComponentRef("data"))
410 self.assertEqual(data, metrics.data)
412 # On supported storage classes attempt to access a read
413 # only component
414 if "ReadComp" in sc.name:
415 cRef = ref.makeComponentRef("counter")
416 counter = datastore.get(cRef)
417 self.assertEqual(counter, len(metrics.data))
419 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
420 self.assertEqual(counter, stop)
422 datastore.remove(ref)
424 def testRegistryCompositePutGet(self):
425 """Tests the case where registry disassembles and puts to datastore."""
426 metrics = makeExampleMetrics()
427 datastore = self.makeDatastore()
429 # Create multiple storage classes for testing different formulations
430 # of composites
431 storageClasses = [
432 self.storageClassFactory.getStorageClass(sc)
433 for sc in (
434 "StructuredComposite",
435 "StructuredCompositeTestA",
436 "StructuredCompositeTestB",
437 )
438 ]
440 dimensions = self.universe.extract(("visit", "physical_filter"))
441 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
443 for sc in storageClasses:
444 print("Using storageClass: {}".format(sc.name))
445 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
447 components = sc.delegate().disassemble(metrics)
448 self.assertTrue(components)
450 compsRead = {}
451 for compName, compInfo in components.items():
452 compRef = self.makeDatasetRef(
453 ref.datasetType.componentTypeName(compName),
454 dimensions,
455 components[compName].storageClass,
456 dataId,
457 conform=False,
458 )
460 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
461 datastore.put(compInfo.component, compRef)
463 uri = datastore.getURI(compRef)
464 self.assertEqual(uri.scheme, self.uriScheme)
466 compsRead[compName] = datastore.get(compRef)
468 # We can generate identical files for each storage class
469 # so remove the component here
470 datastore.remove(compRef)
472 # combine all the components we read back into a new composite
473 metricsOut = sc.delegate().assemble(compsRead)
474 self.assertEqual(metrics, metricsOut)
476 def prepDeleteTest(self, n_refs=1):
477 metrics = makeExampleMetrics()
478 datastore = self.makeDatastore()
479 # Put
480 dimensions = self.universe.extract(("visit", "physical_filter"))
481 sc = self.storageClassFactory.getStorageClass("StructuredData")
482 refs = []
483 for i in range(n_refs):
484 dataId = FakeDataCoordinate.from_dict(
485 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
486 )
487 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
488 datastore.put(metrics, ref)
490 # Does it exist?
491 self.assertTrue(datastore.exists(ref))
493 # Get
494 metricsOut = datastore.get(ref)
495 self.assertEqual(metrics, metricsOut)
496 refs.append(ref)
498 return datastore, *refs
500 def testRemove(self):
501 datastore, ref = self.prepDeleteTest()
503 # Remove
504 datastore.remove(ref)
506 # Does it exist?
507 self.assertFalse(datastore.exists(ref))
509 # Do we now get a predicted URI?
510 uri = datastore.getURI(ref, predict=True)
511 self.assertEqual(uri.fragment, "predicted")
513 # Get should now fail
514 with self.assertRaises(FileNotFoundError):
515 datastore.get(ref)
516 # Can only delete once
517 with self.assertRaises(FileNotFoundError):
518 datastore.remove(ref)
520 def testForget(self):
521 datastore, ref = self.prepDeleteTest()
523 # Remove
524 datastore.forget([ref])
526 # Does it exist (as far as we know)?
527 self.assertFalse(datastore.exists(ref))
529 # Do we now get a predicted URI?
530 uri = datastore.getURI(ref, predict=True)
531 self.assertEqual(uri.fragment, "predicted")
533 # Get should now fail
534 with self.assertRaises(FileNotFoundError):
535 datastore.get(ref)
537 # Forgetting again is a silent no-op
538 datastore.forget([ref])
540 # Predicted URI should still point to the file.
541 self.assertTrue(uri.exists())
543 def testTransfer(self):
544 metrics = makeExampleMetrics()
546 dimensions = self.universe.extract(("visit", "physical_filter"))
547 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
549 sc = self.storageClassFactory.getStorageClass("StructuredData")
550 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
552 inputDatastore = self.makeDatastore("test_input_datastore")
553 outputDatastore = self.makeDatastore("test_output_datastore")
555 inputDatastore.put(metrics, ref)
556 outputDatastore.transfer(inputDatastore, ref)
558 metricsOut = outputDatastore.get(ref)
559 self.assertEqual(metrics, metricsOut)
561 def testBasicTransaction(self):
562 datastore = self.makeDatastore()
563 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
564 dimensions = self.universe.extract(("visit", "physical_filter"))
565 nDatasets = 6
566 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
567 data = [
568 (
569 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
570 makeExampleMetrics(),
571 )
572 for dataId in dataIds
573 ]
574 succeed = data[: nDatasets // 2]
575 fail = data[nDatasets // 2 :]
576 # All datasets added in this transaction should continue to exist
577 with datastore.transaction():
578 for ref, metrics in succeed:
579 datastore.put(metrics, ref)
580 # Whereas datasets added in this transaction should not
581 with self.assertRaises(TransactionTestError):
582 with datastore.transaction():
583 for ref, metrics in fail:
584 datastore.put(metrics, ref)
585 raise TransactionTestError("This should propagate out of the context manager")
586 # Check for datasets that should exist
587 for ref, metrics in succeed:
588 # Does it exist?
589 self.assertTrue(datastore.exists(ref))
590 # Get
591 metricsOut = datastore.get(ref, parameters=None)
592 self.assertEqual(metrics, metricsOut)
593 # URI
594 uri = datastore.getURI(ref)
595 self.assertEqual(uri.scheme, self.uriScheme)
596 # Check for datasets that should not exist
597 for ref, _ in fail:
598 # These should raise
599 with self.assertRaises(FileNotFoundError):
600 # non-existing file
601 datastore.get(ref)
602 with self.assertRaises(FileNotFoundError):
603 datastore.getURI(ref)
605 def testNestedTransaction(self):
606 datastore = self.makeDatastore()
607 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
608 dimensions = self.universe.extract(("visit", "physical_filter"))
609 metrics = makeExampleMetrics()
611 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
612 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
613 datastore.put(metrics, refBefore)
614 with self.assertRaises(TransactionTestError):
615 with datastore.transaction():
616 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
617 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
618 datastore.put(metrics, refOuter)
619 with datastore.transaction():
620 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
621 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
622 datastore.put(metrics, refInner)
623 # All datasets should exist
624 for ref in (refBefore, refOuter, refInner):
625 metricsOut = datastore.get(ref, parameters=None)
626 self.assertEqual(metrics, metricsOut)
627 raise TransactionTestError("This should roll back the transaction")
628 # Dataset(s) inserted before the transaction should still exist
629 metricsOut = datastore.get(refBefore, parameters=None)
630 self.assertEqual(metrics, metricsOut)
631 # But all datasets inserted during the (rolled back) transaction
632 # should be gone
633 with self.assertRaises(FileNotFoundError):
634 datastore.get(refOuter)
635 with self.assertRaises(FileNotFoundError):
636 datastore.get(refInner)
638 def _prepareIngestTest(self):
639 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
640 dimensions = self.universe.extract(("visit", "physical_filter"))
641 metrics = makeExampleMetrics()
642 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
643 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
644 return metrics, ref
646 def runIngestTest(self, func, expectOutput=True):
647 metrics, ref = self._prepareIngestTest()
648 # The file will be deleted after the test.
649 # For symlink tests this leads to a situation where the datastore
650 # points to a file that does not exist. This will make os.path.exist
651 # return False but then the new symlink will fail with
652 # FileExistsError later in the code so the test still passes.
653 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
654 with open(path, "w") as fd:
655 yaml.dump(metrics._asdict(), stream=fd)
656 func(metrics, path, ref)
658 def testIngestNoTransfer(self):
659 """Test ingesting existing files with no transfer."""
660 for mode in (None, "auto"):
662 # Some datastores have auto but can't do in place transfer
663 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
664 continue
666 with self.subTest(mode=mode):
667 datastore = self.makeDatastore()
669 def succeed(obj, path, ref):
670 """Ingest a file already in the datastore root."""
671 # first move it into the root, and adjust the path
672 # accordingly
673 path = shutil.copy(path, datastore.root.ospath)
674 path = os.path.relpath(path, start=datastore.root.ospath)
675 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
676 self.assertEqual(obj, datastore.get(ref))
678 def failInputDoesNotExist(obj, path, ref):
679 """Can't ingest files if we're given a bad path."""
680 with self.assertRaises(FileNotFoundError):
681 datastore.ingest(
682 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
683 )
684 self.assertFalse(datastore.exists(ref))
686 def failOutsideRoot(obj, path, ref):
687 """Can't ingest files outside of datastore root unless
688 auto."""
689 if mode == "auto":
690 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
691 self.assertTrue(datastore.exists(ref))
692 else:
693 with self.assertRaises(RuntimeError):
694 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
695 self.assertFalse(datastore.exists(ref))
697 def failNotImplemented(obj, path, ref):
698 with self.assertRaises(NotImplementedError):
699 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
701 if mode in self.ingestTransferModes:
702 self.runIngestTest(failOutsideRoot)
703 self.runIngestTest(failInputDoesNotExist)
704 self.runIngestTest(succeed)
705 else:
706 self.runIngestTest(failNotImplemented)
708 def testIngestTransfer(self):
709 """Test ingesting existing files after transferring them."""
710 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
711 with self.subTest(mode=mode):
712 datastore = self.makeDatastore(mode)
714 def succeed(obj, path, ref):
715 """Ingest a file by transferring it to the template
716 location."""
717 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
718 self.assertEqual(obj, datastore.get(ref))
720 def failInputDoesNotExist(obj, path, ref):
721 """Can't ingest files if we're given a bad path."""
722 with self.assertRaises(FileNotFoundError):
723 # Ensure the file does not look like it is in
724 # datastore for auto mode
725 datastore.ingest(
726 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
727 )
728 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
730 def failNotImplemented(obj, path, ref):
731 with self.assertRaises(NotImplementedError):
732 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
734 if mode in self.ingestTransferModes:
735 self.runIngestTest(failInputDoesNotExist)
736 self.runIngestTest(succeed, expectOutput=(mode != "move"))
737 else:
738 self.runIngestTest(failNotImplemented)
740 def testIngestSymlinkOfSymlink(self):
741 """Special test for symlink to a symlink ingest"""
742 metrics, ref = self._prepareIngestTest()
743 # The aim of this test is to create a dataset on disk, then
744 # create a symlink to it and finally ingest the symlink such that
745 # the symlink in the datastore points to the original dataset.
746 for mode in ("symlink", "relsymlink"):
747 if mode not in self.ingestTransferModes:
748 continue
750 print(f"Trying mode {mode}")
751 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
752 with open(realpath, "w") as fd:
753 yaml.dump(metrics._asdict(), stream=fd)
754 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
755 os.symlink(os.path.abspath(realpath), sympath)
757 datastore = self.makeDatastore()
758 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
760 uri = datastore.getURI(ref)
761 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
762 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
764 linkTarget = os.readlink(uri.ospath)
765 if mode == "relsymlink":
766 self.assertFalse(os.path.isabs(linkTarget))
767 else:
768 self.assertEqual(linkTarget, os.path.abspath(realpath))
770 # Check that we can get the dataset back regardless of mode
771 metric2 = datastore.get(ref)
772 self.assertEqual(metric2, metrics)
774 # Cleanup the file for next time round loop
775 # since it will get the same file name in store
776 datastore.remove(ref)
778 def testExportImportRecords(self):
779 """Test for export_records and import_records methods."""
781 datastore = self.makeDatastore("test_datastore")
783 # For now only the FileDatastore can be used for this test.
784 # ChainedDatastore that only includes InMemoryDatastores have to be
785 # skipped as well.
786 for name in datastore.names:
787 if not name.startswith("InMemoryDatastore"):
788 break
789 else:
790 raise unittest.SkipTest("in-memory datastore does not support record export/import")
792 metrics = makeExampleMetrics()
793 dimensions = self.universe.extract(("visit", "physical_filter"))
794 sc = self.storageClassFactory.getStorageClass("StructuredData")
796 refs = []
797 for visit in (2048, 2049, 2050):
798 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
799 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
800 datastore.put(metrics, ref)
801 refs.append(ref)
803 for exported_refs in (refs, refs[1:]):
804 n_refs = len(exported_refs)
805 records = datastore.export_records(exported_refs)
806 self.assertGreater(len(records), 0)
807 self.assertTrue(set(records.keys()) <= set(datastore.names))
808 # In a ChainedDatastore each FileDatastore will have a complete set
809 for datastore_name in records:
810 record_data = records[datastore_name]
811 self.assertEqual(len(record_data.refs), n_refs)
812 self.assertEqual(len(list(chain(*record_data.records.values()))), n_refs)
814 # Use the same datastore name to import relative path.
815 datastore2 = self.makeDatastore("test_datastore")
817 records = datastore.export_records(refs[1:])
818 datastore2.import_records(records)
820 with self.assertRaises(FileNotFoundError):
821 data = datastore2.get(refs[0])
822 data = datastore2.get(refs[1])
823 self.assertIsNotNone(data)
824 data = datastore2.get(refs[2])
825 self.assertIsNotNone(data)
828class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
829 """PosixDatastore specialization"""
831 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
832 uriScheme = "file"
833 canIngestNoTransferAuto = True
834 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
835 isEphemeral = False
836 rootKeys = ("root",)
837 validationCanFail = True
839 def setUp(self):
840 # Override the working directory before calling the base class
841 self.root = tempfile.mkdtemp(dir=TESTDIR)
842 super().setUp()
845class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
846 """Posix datastore tests but with checksums disabled."""
848 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
850 def testChecksum(self):
851 """Ensure that checksums have not been calculated."""
853 datastore = self.makeDatastore()
854 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
855 dimensions = self.universe.extract(("visit", "physical_filter"))
856 metrics = makeExampleMetrics()
858 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
859 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
861 # Configuration should have disabled checksum calculation
862 datastore.put(metrics, ref)
863 infos = datastore.getStoredItemsInfo(ref)
864 self.assertIsNone(infos[0].checksum)
866 # Remove put back but with checksums enabled explicitly
867 datastore.remove(ref)
868 datastore.useChecksum = True
869 datastore.put(metrics, ref)
871 infos = datastore.getStoredItemsInfo(ref)
872 self.assertIsNotNone(infos[0].checksum)
875class TrashDatastoreTestCase(PosixDatastoreTestCase):
876 """Restrict trash test to FileDatastore."""
878 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
880 def testTrash(self):
881 datastore, *refs = self.prepDeleteTest(n_refs=10)
883 # Trash one of them.
884 ref = refs.pop()
885 uri = datastore.getURI(ref)
886 datastore.trash(ref)
887 self.assertTrue(uri.exists(), uri) # Not deleted yet
888 datastore.emptyTrash()
889 self.assertFalse(uri.exists(), uri)
891 # Trash it again should be fine.
892 datastore.trash(ref)
894 # Trash multiple items at once.
895 subset = [refs.pop(), refs.pop()]
896 datastore.trash(subset)
897 datastore.emptyTrash()
899 # Remove a record and trash should do nothing.
900 # This is execution butler scenario.
901 ref = refs.pop()
902 uri = datastore.getURI(ref)
903 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
904 self.assertTrue(uri.exists())
905 datastore.trash(ref)
906 datastore.emptyTrash()
907 self.assertTrue(uri.exists())
909 # Switch on trust and it should delete the file.
910 datastore.trustGetRequest = True
911 datastore.trash([ref])
912 self.assertFalse(uri.exists())
914 # Remove multiples at once in trust mode.
915 subset = [refs.pop() for i in range(3)]
916 datastore.trash(subset)
917 datastore.trash(refs.pop()) # Check that a single ref can trash
920class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
921 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
923 def setUp(self):
924 # Override the working directory before calling the base class
925 self.root = tempfile.mkdtemp(dir=TESTDIR)
926 super().setUp()
928 def testCleanup(self):
929 """Test that a failed formatter write does cleanup a partial file."""
930 metrics = makeExampleMetrics()
931 datastore = self.makeDatastore()
933 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
935 dimensions = self.universe.extract(("visit", "physical_filter"))
936 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
938 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
940 # Determine where the file will end up (we assume Formatters use
941 # the same file extension)
942 expectedUri = datastore.getURI(ref, predict=True)
943 self.assertEqual(expectedUri.fragment, "predicted")
945 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
947 # Try formatter that fails and formatter that fails and leaves
948 # a file behind
949 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
950 with self.subTest(formatter=formatter):
952 # Monkey patch the formatter
953 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
955 # Try to put the dataset, it should fail
956 with self.assertRaises(Exception):
957 datastore.put(metrics, ref)
959 # Check that there is no file on disk
960 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
962 # Check that there is a directory
963 dir = expectedUri.dirname()
964 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
966 # Force YamlFormatter and check that this time a file is written
967 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
968 datastore.put(metrics, ref)
969 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
970 datastore.remove(ref)
971 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
974class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
975 """PosixDatastore specialization"""
977 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
978 uriScheme = "mem"
979 hasUnsupportedPut = False
980 ingestTransferModes = ()
981 isEphemeral = True
982 rootKeys = None
983 validationCanFail = False
986class ChainedDatastoreTestCase(PosixDatastoreTestCase):
987 """ChainedDatastore specialization using a POSIXDatastore"""
989 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
990 hasUnsupportedPut = False
991 canIngestNoTransferAuto = False
992 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
993 isEphemeral = False
994 rootKeys = (".datastores.1.root", ".datastores.2.root")
995 validationCanFail = True
998class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
999 """ChainedDatastore specialization using all InMemoryDatastore"""
1001 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1002 validationCanFail = False
1005class DatastoreConstraintsTests(DatastoreTestsBase):
1006 """Basic tests of constraints model of Datastores."""
1008 def testConstraints(self):
1009 """Test constraints model. Assumes that each test class has the
1010 same constraints."""
1011 metrics = makeExampleMetrics()
1012 datastore = self.makeDatastore()
1014 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1015 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1016 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1017 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1019 # Write empty file suitable for ingest check (JSON and YAML variants)
1020 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1021 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1022 for datasetTypeName, sc, accepted in (
1023 ("metric", sc1, True),
1024 ("metric2", sc1, False),
1025 ("metric33", sc1, True),
1026 ("metric2", sc2, True),
1027 ):
1028 # Choose different temp file depending on StorageClass
1029 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1031 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1032 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1033 if accepted:
1034 datastore.put(metrics, ref)
1035 self.assertTrue(datastore.exists(ref))
1036 datastore.remove(ref)
1038 # Try ingest
1039 if self.canIngest:
1040 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1041 self.assertTrue(datastore.exists(ref))
1042 datastore.remove(ref)
1043 else:
1044 with self.assertRaises(DatasetTypeNotSupportedError):
1045 datastore.put(metrics, ref)
1046 self.assertFalse(datastore.exists(ref))
1048 # Again with ingest
1049 if self.canIngest:
1050 with self.assertRaises(DatasetTypeNotSupportedError):
1051 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1052 self.assertFalse(datastore.exists(ref))
1055class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1056 """PosixDatastore specialization"""
1058 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1059 canIngest = True
1061 def setUp(self):
1062 # Override the working directory before calling the base class
1063 self.root = tempfile.mkdtemp(dir=TESTDIR)
1064 super().setUp()
1067class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1068 """InMemoryDatastore specialization"""
1070 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1071 canIngest = False
1074class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1075 """ChainedDatastore specialization using a POSIXDatastore and constraints
1076 at the ChainedDatstore"""
1078 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1081class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1082 """ChainedDatastore specialization using a POSIXDatastore"""
1084 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1087class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1088 """ChainedDatastore specialization using all InMemoryDatastore"""
1090 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1091 canIngest = False
1094class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1095 """Test that a chained datastore can control constraints per-datastore
1096 even if child datastore would accept."""
1098 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1100 def setUp(self):
1101 # Override the working directory before calling the base class
1102 self.root = tempfile.mkdtemp(dir=TESTDIR)
1103 super().setUp()
1105 def testConstraints(self):
1106 """Test chained datastore constraints model."""
1107 metrics = makeExampleMetrics()
1108 datastore = self.makeDatastore()
1110 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1111 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1112 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1113 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1114 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1116 # Write empty file suitable for ingest check (JSON and YAML variants)
1117 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1118 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1120 for typeName, dataId, sc, accept, ingest in (
1121 ("metric", dataId1, sc1, (False, True, False), True),
1122 ("metric2", dataId1, sc1, (False, False, False), False),
1123 ("metric2", dataId2, sc1, (True, False, False), False),
1124 ("metric33", dataId2, sc2, (True, True, False), True),
1125 ("metric2", dataId1, sc2, (False, True, False), True),
1126 ):
1128 # Choose different temp file depending on StorageClass
1129 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1131 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1132 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1133 if any(accept):
1134 datastore.put(metrics, ref)
1135 self.assertTrue(datastore.exists(ref))
1137 # Check each datastore inside the chained datastore
1138 for childDatastore, expected in zip(datastore.datastores, accept):
1139 self.assertEqual(
1140 childDatastore.exists(ref),
1141 expected,
1142 f"Testing presence of {ref} in datastore {childDatastore.name}",
1143 )
1145 datastore.remove(ref)
1147 # Check that ingest works
1148 if ingest:
1149 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1150 self.assertTrue(datastore.exists(ref))
1152 # Check each datastore inside the chained datastore
1153 for childDatastore, expected in zip(datastore.datastores, accept):
1154 # Ephemeral datastores means InMemory at the moment
1155 # and that does not accept ingest of files.
1156 if childDatastore.isEphemeral:
1157 expected = False
1158 self.assertEqual(
1159 childDatastore.exists(ref),
1160 expected,
1161 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1162 )
1164 datastore.remove(ref)
1165 else:
1166 with self.assertRaises(DatasetTypeNotSupportedError):
1167 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1169 else:
1170 with self.assertRaises(DatasetTypeNotSupportedError):
1171 datastore.put(metrics, ref)
1172 self.assertFalse(datastore.exists(ref))
1174 # Again with ingest
1175 with self.assertRaises(DatasetTypeNotSupportedError):
1176 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1177 self.assertFalse(datastore.exists(ref))
1180class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1181 """Tests for datastore caching infrastructure."""
1183 @classmethod
1184 def setUpClass(cls):
1185 cls.storageClassFactory = StorageClassFactory()
1186 cls.universe = DimensionUniverse()
1188 # Ensure that we load the test storage class definitions.
1189 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1190 cls.storageClassFactory.addFromConfig(scConfigFile)
1192 def setUp(self):
1193 self.id = 0
1195 # Create a root that we can use for caching tests.
1196 self.root = tempfile.mkdtemp(dir=TESTDIR)
1198 # Create some test dataset refs and associated test files
1199 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1200 dimensions = self.universe.extract(("visit", "physical_filter"))
1201 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1203 # Create list of refs and list of temporary files
1204 n_datasets = 10
1205 self.refs = [
1206 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1207 for n in range(n_datasets)
1208 ]
1210 root_uri = ResourcePath(self.root, forceDirectory=True)
1211 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1213 # Create test files.
1214 for uri in self.files:
1215 uri.write(b"0123456789")
1217 # Create some composite refs with component files.
1218 sc = self.storageClassFactory.getStorageClass("StructuredData")
1219 self.composite_refs = [
1220 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1221 ]
1222 self.comp_files = []
1223 self.comp_refs = []
1224 for n, ref in enumerate(self.composite_refs):
1225 component_refs = []
1226 component_files = []
1227 for component in sc.components:
1228 component_ref = ref.makeComponentRef(component)
1229 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1230 component_refs.append(component_ref)
1231 component_files.append(file)
1232 file.write(b"9876543210")
1234 self.comp_files.append(component_files)
1235 self.comp_refs.append(component_refs)
1237 def tearDown(self):
1238 if self.root is not None and os.path.exists(self.root):
1239 shutil.rmtree(self.root, ignore_errors=True)
1241 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1242 config = Config.fromYaml(config_str)
1243 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1245 def testNoCacheDir(self):
1246 config_str = """
1247cached:
1248 root: null
1249 cacheable:
1250 metric0: true
1251 """
1252 cache_manager = self._make_cache_manager(config_str)
1254 # Look inside to check we don't have a cache directory
1255 self.assertIsNone(cache_manager._cache_directory)
1257 self.assertCache(cache_manager)
1259 # Test that the cache directory is marked temporary
1260 self.assertTrue(cache_manager.cache_directory.isTemporary)
1262 def testNoCacheDirReversed(self):
1263 """Use default caching status and metric1 to false"""
1264 config_str = """
1265cached:
1266 root: null
1267 default: true
1268 cacheable:
1269 metric1: false
1270 """
1271 cache_manager = self._make_cache_manager(config_str)
1273 self.assertCache(cache_manager)
1275 def testExplicitCacheDir(self):
1276 config_str = f"""
1277cached:
1278 root: '{self.root}'
1279 cacheable:
1280 metric0: true
1281 """
1282 cache_manager = self._make_cache_manager(config_str)
1284 # Look inside to check we do have a cache directory.
1285 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1287 self.assertCache(cache_manager)
1289 # Test that the cache directory is not marked temporary
1290 self.assertFalse(cache_manager.cache_directory.isTemporary)
1292 def assertCache(self, cache_manager):
1293 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1294 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1296 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1297 self.assertIsInstance(uri, ResourcePath)
1298 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1300 # Check presence in cache using ref and then using file extension.
1301 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1302 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1303 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1304 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1306 # Cached file should no longer exist but uncached file should be
1307 # unaffected.
1308 self.assertFalse(self.files[0].exists())
1309 self.assertTrue(self.files[1].exists())
1311 # Should find this file and it should be within the cache directory.
1312 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1313 self.assertTrue(found.exists())
1314 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1316 # Should not be able to find these in cache
1317 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1318 self.assertIsNone(found)
1319 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1320 self.assertIsNone(found)
1322 def testNoCache(self):
1323 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1324 for uri, ref in zip(self.files, self.refs):
1325 self.assertFalse(cache_manager.should_be_cached(ref))
1326 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1327 self.assertFalse(cache_manager.known_to_cache(ref))
1328 with cache_manager.find_in_cache(ref, ".txt") as found:
1329 self.assertIsNone(found, msg=f"{cache_manager}")
1331 def _expiration_config(self, mode: str, threshold: int) -> str:
1332 return f"""
1333cached:
1334 default: true
1335 expiry:
1336 mode: {mode}
1337 threshold: {threshold}
1338 cacheable:
1339 unused: true
1340 """
1342 def testCacheExpiryFiles(self):
1343 threshold = 2 # Keep at least 2 files.
1344 mode = "files"
1345 config_str = self._expiration_config(mode, threshold)
1347 cache_manager = self._make_cache_manager(config_str)
1349 # Check that an empty cache returns unknown for arbitrary ref
1350 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1352 # Should end with datasets: 2, 3, 4
1353 self.assertExpiration(cache_manager, 5, threshold + 1)
1354 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1356 # Check that we will not expire a file that is actively in use.
1357 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1358 self.assertIsNotNone(found)
1360 # Trigger cache expiration that should remove the file
1361 # we just retrieved. Should now have: 3, 4, 5
1362 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1363 self.assertIsNotNone(cached)
1365 # Cache should still report the standard file count.
1366 self.assertEqual(cache_manager.file_count, threshold + 1)
1368 # Add additional entry to cache.
1369 # Should now have 4, 5, 6
1370 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1371 self.assertIsNotNone(cached)
1373 # Is the file still there?
1374 self.assertTrue(found.exists())
1376 # Can we read it?
1377 data = found.read()
1378 self.assertGreater(len(data), 0)
1380 # Outside context the file should no longer exist.
1381 self.assertFalse(found.exists())
1383 # File count should not have changed.
1384 self.assertEqual(cache_manager.file_count, threshold + 1)
1386 # Dataset 2 was in the exempt directory but because hardlinks
1387 # are used it was deleted from the main cache during cache expiry
1388 # above and so should no longer be found.
1389 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1390 self.assertIsNone(found)
1392 # And the one stored after it is also gone.
1393 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1394 self.assertIsNone(found)
1396 # But dataset 4 is present.
1397 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1398 self.assertIsNotNone(found)
1400 # Adding a new dataset to the cache should now delete it.
1401 cache_manager.move_to_cache(self.files[7], self.refs[7])
1403 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1404 self.assertIsNone(found)
1406 def testCacheExpiryDatasets(self):
1407 threshold = 2 # Keep 2 datasets.
1408 mode = "datasets"
1409 config_str = self._expiration_config(mode, threshold)
1411 cache_manager = self._make_cache_manager(config_str)
1412 self.assertExpiration(cache_manager, 5, threshold + 1)
1413 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1415 def testCacheExpiryDatasetsComposite(self):
1416 threshold = 2 # Keep 2 datasets.
1417 mode = "datasets"
1418 config_str = self._expiration_config(mode, threshold)
1420 cache_manager = self._make_cache_manager(config_str)
1422 n_datasets = 3
1423 for i in range(n_datasets):
1424 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1425 cached = cache_manager.move_to_cache(component_file, component_ref)
1426 self.assertIsNotNone(cached)
1427 self.assertTrue(cache_manager.known_to_cache(component_ref))
1428 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1429 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1431 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1433 # Write two new non-composite and the number of files should drop.
1434 self.assertExpiration(cache_manager, 2, 5)
1436 def testCacheExpirySize(self):
1437 threshold = 55 # Each file is 10 bytes
1438 mode = "size"
1439 config_str = self._expiration_config(mode, threshold)
1441 cache_manager = self._make_cache_manager(config_str)
1442 self.assertExpiration(cache_manager, 10, 6)
1443 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1445 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1446 """Insert the datasets and then check the number retained."""
1447 for i in range(n_datasets):
1448 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1449 self.assertIsNotNone(cached)
1451 self.assertEqual(cache_manager.file_count, n_retained)
1453 # The oldest file should not be in the cache any more.
1454 for i in range(n_datasets):
1455 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1456 if i >= n_datasets - n_retained:
1457 self.assertIsInstance(found, ResourcePath)
1458 else:
1459 self.assertIsNone(found)
1461 def testCacheExpiryAge(self):
1462 threshold = 1 # Expire older than 2 seconds
1463 mode = "age"
1464 config_str = self._expiration_config(mode, threshold)
1466 cache_manager = self._make_cache_manager(config_str)
1467 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1469 # Insert 3 files, then sleep, then insert more.
1470 for i in range(2):
1471 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1472 self.assertIsNotNone(cached)
1473 time.sleep(2.0)
1474 for j in range(4):
1475 i = 2 + j # Continue the counting
1476 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1477 self.assertIsNotNone(cached)
1479 # Only the files written after the sleep should exist.
1480 self.assertEqual(cache_manager.file_count, 4)
1481 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1482 self.assertIsNone(found)
1483 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1484 self.assertIsInstance(found, ResourcePath)
1487if __name__ == "__main__": 1487 ↛ 1488line 1487 didn't jump to line 1488, because the condition on line 1487 was never true
1488 unittest.main()