Coverage for tests/test_datastore.py: 12%
953 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-07 02:47 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-07 02:47 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import shutil
24import sys
25import tempfile
26import time
27import unittest
28from collections import UserDict
29from dataclasses import dataclass
31import lsst.utils.tests
32import yaml
33from lsst.daf.butler import (
34 Config,
35 DatasetRefURIs,
36 DatasetTypeNotSupportedError,
37 DatastoreCacheManager,
38 DatastoreCacheManagerConfig,
39 DatastoreConfig,
40 DatastoreDisabledCacheManager,
41 DatastoreValidationError,
42 DimensionUniverse,
43 FileDataset,
44 NamedKeyDict,
45 StorageClass,
46 StorageClassFactory,
47)
48from lsst.daf.butler.formatters.yaml import YamlFormatter
49from lsst.daf.butler.tests import (
50 BadNoWriteFormatter,
51 BadWriteFormatter,
52 DatasetTestHelper,
53 DatastoreTestHelper,
54 DummyRegistry,
55 MetricsExample,
56)
57from lsst.resources import ResourcePath
58from lsst.utils import doImport
60TESTDIR = os.path.dirname(__file__)
63class DataIdForTest(UserDict):
65 """A dict-like class that can be used for a DataId dict that is hashable.
67 By default the class is immutable ("frozen"). The `frozen`
68 attribute can be set to `False` to change values (but note that
69 the hash values before and after mutation will be different!).
70 """
72 def __init__(self, *args, **kwargs):
73 self.frozen = False
74 super().__init__(*args, **kwargs)
75 self.frozen = True
77 def __hash__(self):
78 return hash(str(self.data))
80 def __setitem__(self, k, v):
81 if self.frozen:
82 raise RuntimeError("DataIdForTest is frozen.")
83 return super().__setitem__(k, v)
85 def __delitem__(self, k):
86 if self.frozen:
87 raise RuntimeError("DataIdForTest is frozen.")
88 return super().__delitem__(k)
90 def __ior__(self, other):
91 assert sys.version_info[0] == 3
92 if sys.version_info[1] < 9:
93 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
94 if self.frozen:
95 raise RuntimeError("DataIdForTest is frozen.")
96 return super().__ior__(other)
98 def pop(self, k):
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().pop(k)
103 def popitem(self):
104 if self.frozen:
105 raise RuntimeError("DataIdForTest is frozen.")
106 return super().popitem()
108 def update(self, *args, **kwargs):
109 if self.frozen:
110 raise RuntimeError("DataIdForTest is frozen.")
111 super().update(*args, **kwargs)
114def makeExampleMetrics(use_none=False):
115 if use_none:
116 array = None
117 else:
118 array = [563, 234, 456.7, 105, 2054, -1045]
119 return MetricsExample(
120 {"AM1": 5.2, "AM2": 30.6},
121 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
122 array,
123 )
126@dataclass(frozen=True)
127class Named:
128 name: str
131class FakeDataCoordinate(NamedKeyDict):
132 """A fake hashable frozen DataCoordinate built from a simple dict."""
134 @classmethod
135 def from_dict(cls, dataId):
136 new = cls()
137 for k, v in dataId.items():
138 new[Named(k)] = v
139 return new.freeze()
141 def __hash__(self) -> int:
142 return hash(frozenset(self.items()))
145class TransactionTestError(Exception):
146 """Specific error for transactions, to prevent misdiagnosing
147 that might otherwise occur when a standard exception is used.
148 """
150 pass
153class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
154 """Support routines for datastore testing"""
156 root = None
158 @classmethod
159 def setUpClass(cls):
160 # Storage Classes are fixed for all datastores in these tests
161 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
162 cls.storageClassFactory = StorageClassFactory()
163 cls.storageClassFactory.addFromConfig(scConfigFile)
165 # Read the Datastore config so we can get the class
166 # information (since we should not assume the constructor
167 # name here, but rely on the configuration file itself)
168 datastoreConfig = DatastoreConfig(cls.configFile)
169 cls.datastoreType = doImport(datastoreConfig["cls"])
170 cls.universe = DimensionUniverse()
172 def setUp(self):
173 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
175 def tearDown(self):
176 if self.root is not None and os.path.exists(self.root):
177 shutil.rmtree(self.root, ignore_errors=True)
180class DatastoreTests(DatastoreTestsBase):
181 """Some basic tests of a simple datastore."""
183 hasUnsupportedPut = True
185 def testConfigRoot(self):
186 full = DatastoreConfig(self.configFile)
187 config = DatastoreConfig(self.configFile, mergeDefaults=False)
188 newroot = "/random/location"
189 self.datastoreType.setConfigRoot(newroot, config, full)
190 if self.rootKeys:
191 for k in self.rootKeys:
192 self.assertIn(newroot, config[k])
194 def testConstructor(self):
195 datastore = self.makeDatastore()
196 self.assertIsNotNone(datastore)
197 self.assertIs(datastore.isEphemeral, self.isEphemeral)
199 def testConfigurationValidation(self):
200 datastore = self.makeDatastore()
201 sc = self.storageClassFactory.getStorageClass("ThingOne")
202 datastore.validateConfiguration([sc])
204 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
205 if self.validationCanFail:
206 with self.assertRaises(DatastoreValidationError):
207 datastore.validateConfiguration([sc2], logFailures=True)
209 dimensions = self.universe.extract(("visit", "physical_filter"))
210 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
211 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
212 datastore.validateConfiguration([ref])
214 def testParameterValidation(self):
215 """Check that parameters are validated"""
216 sc = self.storageClassFactory.getStorageClass("ThingOne")
217 dimensions = self.universe.extract(("visit", "physical_filter"))
218 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
219 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
220 datastore = self.makeDatastore()
221 data = {1: 2, 3: 4}
222 datastore.put(data, ref)
223 newdata = datastore.get(ref)
224 self.assertEqual(data, newdata)
225 with self.assertRaises(KeyError):
226 newdata = datastore.get(ref, parameters={"missing": 5})
228 def testBasicPutGet(self):
229 metrics = makeExampleMetrics()
230 datastore = self.makeDatastore()
232 # Create multiple storage classes for testing different formulations
233 storageClasses = [
234 self.storageClassFactory.getStorageClass(sc)
235 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
236 ]
238 dimensions = self.universe.extract(("visit", "physical_filter"))
239 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
240 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
242 for sc in storageClasses:
243 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
244 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
246 # Make sure that using getManyURIs without predicting before the
247 # dataset has been put raises.
248 with self.assertRaises(FileNotFoundError):
249 datastore.getManyURIs([ref], predict=False)
251 # Make sure that using getManyURIs with predicting before the
252 # dataset has been put predicts the URI.
253 uris = datastore.getManyURIs([ref, ref2], predict=True)
254 self.assertIn("52", uris[ref].primaryURI.geturl())
255 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
256 self.assertIn("53", uris[ref2].primaryURI.geturl())
257 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
259 datastore.put(metrics, ref)
261 # Does it exist?
262 self.assertTrue(datastore.exists(ref))
264 # Get
265 metricsOut = datastore.get(ref, parameters=None)
266 self.assertEqual(metrics, metricsOut)
268 uri = datastore.getURI(ref)
269 self.assertEqual(uri.scheme, self.uriScheme)
271 uris = datastore.getManyURIs([ref])
272 self.assertEqual(len(uris), 1)
273 ref, uri = uris.popitem()
274 self.assertTrue(uri.primaryURI.exists())
275 self.assertFalse(uri.componentURIs)
277 # Get a component -- we need to construct new refs for them
278 # with derived storage classes but with parent ID
279 for comp in ("data", "output"):
280 compRef = ref.makeComponentRef(comp)
281 output = datastore.get(compRef)
282 self.assertEqual(output, getattr(metricsOut, comp))
284 uri = datastore.getURI(compRef)
285 self.assertEqual(uri.scheme, self.uriScheme)
287 uris = datastore.getManyURIs([compRef])
288 self.assertEqual(len(uris), 1)
290 storageClass = sc
292 # Check that we can put a metric with None in a component and
293 # get it back as None
294 metricsNone = makeExampleMetrics(use_none=True)
295 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
296 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
297 datastore.put(metricsNone, refNone)
299 comp = "data"
300 for comp in ("data", "output"):
301 compRef = refNone.makeComponentRef(comp)
302 output = datastore.get(compRef)
303 self.assertEqual(output, getattr(metricsNone, comp))
305 # Check that a put fails if the dataset type is not supported
306 if self.hasUnsupportedPut:
307 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
308 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
309 with self.assertRaises(DatasetTypeNotSupportedError):
310 datastore.put(metrics, ref)
312 # These should raise
313 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
314 with self.assertRaises(FileNotFoundError):
315 # non-existing file
316 datastore.get(ref)
318 # Get a URI from it
319 uri = datastore.getURI(ref, predict=True)
320 self.assertEqual(uri.scheme, self.uriScheme)
322 with self.assertRaises(FileNotFoundError):
323 datastore.getURI(ref)
325 def testTrustGetRequest(self):
326 """Check that we can get datasets that registry knows nothing about."""
328 datastore = self.makeDatastore()
330 # Skip test if the attribute is not defined
331 if not hasattr(datastore, "trustGetRequest"):
332 return
334 metrics = makeExampleMetrics()
336 i = 0
337 for sc_name in ("StructuredData", "StructuredComposite"):
338 i += 1
339 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
341 if sc_name == "StructuredComposite":
342 disassembled = True
343 else:
344 disassembled = False
346 # Start datastore in default configuration of using registry
347 datastore.trustGetRequest = False
349 # Create multiple storage classes for testing with or without
350 # disassembly
351 sc = self.storageClassFactory.getStorageClass(sc_name)
352 dimensions = self.universe.extract(("visit", "physical_filter"))
354 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
356 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
357 datastore.put(metrics, ref)
359 # Does it exist?
360 self.assertTrue(datastore.exists(ref))
362 # Get
363 metricsOut = datastore.get(ref)
364 self.assertEqual(metrics, metricsOut)
366 # Get the URI(s)
367 primaryURI, componentURIs = datastore.getURIs(ref)
368 if disassembled:
369 self.assertIsNone(primaryURI)
370 self.assertEqual(len(componentURIs), 3)
371 else:
372 self.assertIn(datasetTypeName, primaryURI.path)
373 self.assertFalse(componentURIs)
375 # Delete registry entry so now we are trusting
376 datastore.removeStoredItemInfo(ref)
378 # Now stop trusting and check that things break
379 datastore.trustGetRequest = False
381 # Does it exist?
382 self.assertFalse(datastore.exists(ref))
384 with self.assertRaises(FileNotFoundError):
385 datastore.get(ref)
387 with self.assertRaises(FileNotFoundError):
388 datastore.get(ref.makeComponentRef("data"))
390 # URI should fail unless we ask for prediction
391 with self.assertRaises(FileNotFoundError):
392 datastore.getURIs(ref)
394 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
395 if disassembled:
396 self.assertIsNone(predicted_primary)
397 self.assertEqual(len(predicted_disassembled), 3)
398 for uri in predicted_disassembled.values():
399 self.assertEqual(uri.fragment, "predicted")
400 self.assertIn(datasetTypeName, uri.path)
401 else:
402 self.assertIn(datasetTypeName, predicted_primary.path)
403 self.assertFalse(predicted_disassembled)
404 self.assertEqual(predicted_primary.fragment, "predicted")
406 # Now enable registry-free trusting mode
407 datastore.trustGetRequest = True
409 # Try again to get it
410 metricsOut = datastore.get(ref)
411 self.assertEqual(metricsOut, metrics)
413 # Does it exist?
414 self.assertTrue(datastore.exists(ref))
416 # Get a component
417 comp = "data"
418 compRef = ref.makeComponentRef(comp)
419 output = datastore.get(compRef)
420 self.assertEqual(output, getattr(metrics, comp))
422 # Get the URI -- if we trust this should work even without
423 # enabling prediction.
424 primaryURI2, componentURIs2 = datastore.getURIs(ref)
425 self.assertEqual(primaryURI2, primaryURI)
426 self.assertEqual(componentURIs2, componentURIs)
428 def testDisassembly(self):
429 """Test disassembly within datastore."""
430 metrics = makeExampleMetrics()
431 if self.isEphemeral:
432 # in-memory datastore does not disassemble
433 return
435 # Create multiple storage classes for testing different formulations
436 # of composites. One of these will not disassemble to provide
437 # a reference.
438 storageClasses = [
439 self.storageClassFactory.getStorageClass(sc)
440 for sc in (
441 "StructuredComposite",
442 "StructuredCompositeTestA",
443 "StructuredCompositeTestB",
444 "StructuredCompositeReadComp",
445 "StructuredData", # No disassembly
446 "StructuredCompositeReadCompNoDisassembly",
447 )
448 ]
450 # Create the test datastore
451 datastore = self.makeDatastore()
453 # Dummy dataId
454 dimensions = self.universe.extract(("visit", "physical_filter"))
455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
457 for i, sc in enumerate(storageClasses):
458 with self.subTest(storageClass=sc.name):
459 # Create a different dataset type each time round
460 # so that a test failure in this subtest does not trigger
461 # a cascade of tests because of file clashes
462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
466 datastore.put(metrics, ref)
468 baseURI, compURIs = datastore.getURIs(ref)
469 if disassembled:
470 self.assertIsNone(baseURI)
471 self.assertEqual(set(compURIs), {"data", "output", "summary"})
472 else:
473 self.assertIsNotNone(baseURI)
474 self.assertEqual(compURIs, {})
476 metrics_get = datastore.get(ref)
477 self.assertEqual(metrics_get, metrics)
479 # Retrieve the composite with read parameter
480 stop = 4
481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
482 self.assertEqual(metrics_get.summary, metrics.summary)
483 self.assertEqual(metrics_get.output, metrics.output)
484 self.assertEqual(metrics_get.data, metrics.data[:stop])
486 # Retrieve a component
487 data = datastore.get(ref.makeComponentRef("data"))
488 self.assertEqual(data, metrics.data)
490 # On supported storage classes attempt to access a read
491 # only component
492 if "ReadComp" in sc.name:
493 cRef = ref.makeComponentRef("counter")
494 counter = datastore.get(cRef)
495 self.assertEqual(counter, len(metrics.data))
497 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
498 self.assertEqual(counter, stop)
500 datastore.remove(ref)
502 def prepDeleteTest(self, n_refs=1):
503 metrics = makeExampleMetrics()
504 datastore = self.makeDatastore()
505 # Put
506 dimensions = self.universe.extract(("visit", "physical_filter"))
507 sc = self.storageClassFactory.getStorageClass("StructuredData")
508 refs = []
509 for i in range(n_refs):
510 dataId = FakeDataCoordinate.from_dict(
511 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
512 )
513 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
514 datastore.put(metrics, ref)
516 # Does it exist?
517 self.assertTrue(datastore.exists(ref))
519 # Get
520 metricsOut = datastore.get(ref)
521 self.assertEqual(metrics, metricsOut)
522 refs.append(ref)
524 return datastore, *refs
526 def testRemove(self):
527 datastore, ref = self.prepDeleteTest()
529 # Remove
530 datastore.remove(ref)
532 # Does it exist?
533 self.assertFalse(datastore.exists(ref))
535 # Do we now get a predicted URI?
536 uri = datastore.getURI(ref, predict=True)
537 self.assertEqual(uri.fragment, "predicted")
539 # Get should now fail
540 with self.assertRaises(FileNotFoundError):
541 datastore.get(ref)
542 # Can only delete once
543 with self.assertRaises(FileNotFoundError):
544 datastore.remove(ref)
546 def testForget(self):
547 datastore, ref = self.prepDeleteTest()
549 # Remove
550 datastore.forget([ref])
552 # Does it exist (as far as we know)?
553 self.assertFalse(datastore.exists(ref))
555 # Do we now get a predicted URI?
556 uri = datastore.getURI(ref, predict=True)
557 self.assertEqual(uri.fragment, "predicted")
559 # Get should now fail
560 with self.assertRaises(FileNotFoundError):
561 datastore.get(ref)
563 # Forgetting again is a silent no-op
564 datastore.forget([ref])
566 # Predicted URI should still point to the file.
567 self.assertTrue(uri.exists())
569 def testTransfer(self):
570 metrics = makeExampleMetrics()
572 dimensions = self.universe.extract(("visit", "physical_filter"))
573 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
575 sc = self.storageClassFactory.getStorageClass("StructuredData")
576 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
578 inputDatastore = self.makeDatastore("test_input_datastore")
579 outputDatastore = self.makeDatastore("test_output_datastore")
581 inputDatastore.put(metrics, ref)
582 outputDatastore.transfer(inputDatastore, ref)
584 metricsOut = outputDatastore.get(ref)
585 self.assertEqual(metrics, metricsOut)
587 def testBasicTransaction(self):
588 datastore = self.makeDatastore()
589 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
590 dimensions = self.universe.extract(("visit", "physical_filter"))
591 nDatasets = 6
592 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
593 data = [
594 (
595 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
596 makeExampleMetrics(),
597 )
598 for dataId in dataIds
599 ]
600 succeed = data[: nDatasets // 2]
601 fail = data[nDatasets // 2 :]
602 # All datasets added in this transaction should continue to exist
603 with datastore.transaction():
604 for ref, metrics in succeed:
605 datastore.put(metrics, ref)
606 # Whereas datasets added in this transaction should not
607 with self.assertRaises(TransactionTestError):
608 with datastore.transaction():
609 for ref, metrics in fail:
610 datastore.put(metrics, ref)
611 raise TransactionTestError("This should propagate out of the context manager")
612 # Check for datasets that should exist
613 for ref, metrics in succeed:
614 # Does it exist?
615 self.assertTrue(datastore.exists(ref))
616 # Get
617 metricsOut = datastore.get(ref, parameters=None)
618 self.assertEqual(metrics, metricsOut)
619 # URI
620 uri = datastore.getURI(ref)
621 self.assertEqual(uri.scheme, self.uriScheme)
622 # Check for datasets that should not exist
623 for ref, _ in fail:
624 # These should raise
625 with self.assertRaises(FileNotFoundError):
626 # non-existing file
627 datastore.get(ref)
628 with self.assertRaises(FileNotFoundError):
629 datastore.getURI(ref)
631 def testNestedTransaction(self):
632 datastore = self.makeDatastore()
633 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
634 dimensions = self.universe.extract(("visit", "physical_filter"))
635 metrics = makeExampleMetrics()
637 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
639 datastore.put(metrics, refBefore)
640 with self.assertRaises(TransactionTestError):
641 with datastore.transaction():
642 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
644 datastore.put(metrics, refOuter)
645 with datastore.transaction():
646 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
648 datastore.put(metrics, refInner)
649 # All datasets should exist
650 for ref in (refBefore, refOuter, refInner):
651 metricsOut = datastore.get(ref, parameters=None)
652 self.assertEqual(metrics, metricsOut)
653 raise TransactionTestError("This should roll back the transaction")
654 # Dataset(s) inserted before the transaction should still exist
655 metricsOut = datastore.get(refBefore, parameters=None)
656 self.assertEqual(metrics, metricsOut)
657 # But all datasets inserted during the (rolled back) transaction
658 # should be gone
659 with self.assertRaises(FileNotFoundError):
660 datastore.get(refOuter)
661 with self.assertRaises(FileNotFoundError):
662 datastore.get(refInner)
664 def _prepareIngestTest(self):
665 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
666 dimensions = self.universe.extract(("visit", "physical_filter"))
667 metrics = makeExampleMetrics()
668 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
670 return metrics, ref
672 def runIngestTest(self, func, expectOutput=True):
673 metrics, ref = self._prepareIngestTest()
674 # The file will be deleted after the test.
675 # For symlink tests this leads to a situation where the datastore
676 # points to a file that does not exist. This will make os.path.exist
677 # return False but then the new symlink will fail with
678 # FileExistsError later in the code so the test still passes.
679 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
680 with open(path, "w") as fd:
681 yaml.dump(metrics._asdict(), stream=fd)
682 func(metrics, path, ref)
684 def testIngestNoTransfer(self):
685 """Test ingesting existing files with no transfer."""
686 for mode in (None, "auto"):
688 # Some datastores have auto but can't do in place transfer
689 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
690 continue
692 with self.subTest(mode=mode):
693 datastore = self.makeDatastore()
695 def succeed(obj, path, ref):
696 """Ingest a file already in the datastore root."""
697 # first move it into the root, and adjust the path
698 # accordingly
699 path = shutil.copy(path, datastore.root.ospath)
700 path = os.path.relpath(path, start=datastore.root.ospath)
701 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
702 self.assertEqual(obj, datastore.get(ref))
704 def failInputDoesNotExist(obj, path, ref):
705 """Can't ingest files if we're given a bad path."""
706 with self.assertRaises(FileNotFoundError):
707 datastore.ingest(
708 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
709 )
710 self.assertFalse(datastore.exists(ref))
712 def failOutsideRoot(obj, path, ref):
713 """Can't ingest files outside of datastore root unless
714 auto."""
715 if mode == "auto":
716 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
717 self.assertTrue(datastore.exists(ref))
718 else:
719 with self.assertRaises(RuntimeError):
720 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
721 self.assertFalse(datastore.exists(ref))
723 def failNotImplemented(obj, path, ref):
724 with self.assertRaises(NotImplementedError):
725 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
727 if mode in self.ingestTransferModes:
728 self.runIngestTest(failOutsideRoot)
729 self.runIngestTest(failInputDoesNotExist)
730 self.runIngestTest(succeed)
731 else:
732 self.runIngestTest(failNotImplemented)
734 def testIngestTransfer(self):
735 """Test ingesting existing files after transferring them."""
736 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
737 with self.subTest(mode=mode):
738 datastore = self.makeDatastore(mode)
740 def succeed(obj, path, ref):
741 """Ingest a file by transferring it to the template
742 location."""
743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
744 self.assertEqual(obj, datastore.get(ref))
746 def failInputDoesNotExist(obj, path, ref):
747 """Can't ingest files if we're given a bad path."""
748 with self.assertRaises(FileNotFoundError):
749 # Ensure the file does not look like it is in
750 # datastore for auto mode
751 datastore.ingest(
752 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
753 )
754 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
756 def failNotImplemented(obj, path, ref):
757 with self.assertRaises(NotImplementedError):
758 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
760 if mode in self.ingestTransferModes:
761 self.runIngestTest(failInputDoesNotExist)
762 self.runIngestTest(succeed, expectOutput=(mode != "move"))
763 else:
764 self.runIngestTest(failNotImplemented)
766 def testIngestSymlinkOfSymlink(self):
767 """Special test for symlink to a symlink ingest"""
768 metrics, ref = self._prepareIngestTest()
769 # The aim of this test is to create a dataset on disk, then
770 # create a symlink to it and finally ingest the symlink such that
771 # the symlink in the datastore points to the original dataset.
772 for mode in ("symlink", "relsymlink"):
773 if mode not in self.ingestTransferModes:
774 continue
776 print(f"Trying mode {mode}")
777 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
778 with open(realpath, "w") as fd:
779 yaml.dump(metrics._asdict(), stream=fd)
780 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
781 os.symlink(os.path.abspath(realpath), sympath)
783 datastore = self.makeDatastore()
784 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
786 uri = datastore.getURI(ref)
787 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
788 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
790 linkTarget = os.readlink(uri.ospath)
791 if mode == "relsymlink":
792 self.assertFalse(os.path.isabs(linkTarget))
793 else:
794 self.assertEqual(linkTarget, os.path.abspath(realpath))
796 # Check that we can get the dataset back regardless of mode
797 metric2 = datastore.get(ref)
798 self.assertEqual(metric2, metrics)
800 # Cleanup the file for next time round loop
801 # since it will get the same file name in store
802 datastore.remove(ref)
804 def testExportImportRecords(self):
805 """Test for export_records and import_records methods."""
807 datastore = self.makeDatastore("test_datastore")
809 # For now only the FileDatastore can be used for this test.
810 # ChainedDatastore that only includes InMemoryDatastores have to be
811 # skipped as well.
812 for name in datastore.names:
813 if not name.startswith("InMemoryDatastore"):
814 break
815 else:
816 raise unittest.SkipTest("in-memory datastore does not support record export/import")
818 metrics = makeExampleMetrics()
819 dimensions = self.universe.extract(("visit", "physical_filter"))
820 sc = self.storageClassFactory.getStorageClass("StructuredData")
822 refs = []
823 for visit in (2048, 2049, 2050):
824 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
825 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
826 datastore.put(metrics, ref)
827 refs.append(ref)
829 for exported_refs in (refs, refs[1:]):
830 n_refs = len(exported_refs)
831 records = datastore.export_records(exported_refs)
832 self.assertGreater(len(records), 0)
833 self.assertTrue(set(records.keys()) <= set(datastore.names))
834 # In a ChainedDatastore each FileDatastore will have a complete set
835 for datastore_name in records:
836 record_data = records[datastore_name]
837 self.assertEqual(len(record_data.records), n_refs)
839 # Use the same datastore name to import relative path.
840 datastore2 = self.makeDatastore("test_datastore")
842 records = datastore.export_records(refs[1:])
843 datastore2.import_records(records)
845 with self.assertRaises(FileNotFoundError):
846 data = datastore2.get(refs[0])
847 data = datastore2.get(refs[1])
848 self.assertIsNotNone(data)
849 data = datastore2.get(refs[2])
850 self.assertIsNotNone(data)
853class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
854 """PosixDatastore specialization"""
856 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
857 uriScheme = "file"
858 canIngestNoTransferAuto = True
859 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
860 isEphemeral = False
861 rootKeys = ("root",)
862 validationCanFail = True
864 def setUp(self):
865 # Override the working directory before calling the base class
866 self.root = tempfile.mkdtemp(dir=TESTDIR)
867 super().setUp()
869 def testAtomicWrite(self):
870 """Test that we write to a temporary and then rename"""
871 datastore = self.makeDatastore()
872 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
873 dimensions = self.universe.extract(("visit", "physical_filter"))
874 metrics = makeExampleMetrics()
876 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
877 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
879 with self.assertLogs("lsst.resources", "DEBUG") as cm:
880 datastore.put(metrics, ref)
881 move_logs = [ll for ll in cm.output if "transfer=" in ll]
882 self.assertIn("transfer=move", move_logs[0])
884 # And the transfer should be file to file.
885 self.assertEqual(move_logs[0].count("file://"), 2)
887 def testCanNotDeterminePutFormatterLocation(self):
888 """Verify that the expected exception is raised if the FileDatastore
889 can not determine the put formatter location."""
891 _ = makeExampleMetrics()
892 datastore = self.makeDatastore()
894 # Create multiple storage classes for testing different formulations
895 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
897 sccomp = StorageClass("Dummy")
898 compositeStorageClass = StorageClass(
899 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
900 )
902 dimensions = self.universe.extract(("visit", "physical_filter"))
903 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
905 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
906 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
908 def raiser(ref):
909 raise DatasetTypeNotSupportedError()
911 with unittest.mock.patch.object(
912 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
913 "_determine_put_formatter_location",
914 side_effect=raiser,
915 ):
916 # verify the non-composite ref execution path:
917 with self.assertRaises(DatasetTypeNotSupportedError):
918 datastore.getURIs(ref, predict=True)
920 # verify the composite-ref execution path:
921 with self.assertRaises(DatasetTypeNotSupportedError):
922 datastore.getURIs(compRef, predict=True)
925class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
926 """Posix datastore tests but with checksums disabled."""
928 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
930 def testChecksum(self):
931 """Ensure that checksums have not been calculated."""
933 datastore = self.makeDatastore()
934 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
935 dimensions = self.universe.extract(("visit", "physical_filter"))
936 metrics = makeExampleMetrics()
938 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
939 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
941 # Configuration should have disabled checksum calculation
942 datastore.put(metrics, ref)
943 infos = datastore.getStoredItemsInfo(ref)
944 self.assertIsNone(infos[0].checksum)
946 # Remove put back but with checksums enabled explicitly
947 datastore.remove(ref)
948 datastore.useChecksum = True
949 datastore.put(metrics, ref)
951 infos = datastore.getStoredItemsInfo(ref)
952 self.assertIsNotNone(infos[0].checksum)
955class TrashDatastoreTestCase(PosixDatastoreTestCase):
956 """Restrict trash test to FileDatastore."""
958 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
960 def testTrash(self):
961 datastore, *refs = self.prepDeleteTest(n_refs=10)
963 # Trash one of them.
964 ref = refs.pop()
965 uri = datastore.getURI(ref)
966 datastore.trash(ref)
967 self.assertTrue(uri.exists(), uri) # Not deleted yet
968 datastore.emptyTrash()
969 self.assertFalse(uri.exists(), uri)
971 # Trash it again should be fine.
972 datastore.trash(ref)
974 # Trash multiple items at once.
975 subset = [refs.pop(), refs.pop()]
976 datastore.trash(subset)
977 datastore.emptyTrash()
979 # Remove a record and trash should do nothing.
980 # This is execution butler scenario.
981 ref = refs.pop()
982 uri = datastore.getURI(ref)
983 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
984 self.assertTrue(uri.exists())
985 datastore.trash(ref)
986 datastore.emptyTrash()
987 self.assertTrue(uri.exists())
989 # Switch on trust and it should delete the file.
990 datastore.trustGetRequest = True
991 datastore.trash([ref])
992 self.assertFalse(uri.exists())
994 # Remove multiples at once in trust mode.
995 subset = [refs.pop() for i in range(3)]
996 datastore.trash(subset)
997 datastore.trash(refs.pop()) # Check that a single ref can trash
1000class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1001 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1003 def setUp(self):
1004 # Override the working directory before calling the base class
1005 self.root = tempfile.mkdtemp(dir=TESTDIR)
1006 super().setUp()
1008 def testCleanup(self):
1009 """Test that a failed formatter write does cleanup a partial file."""
1010 metrics = makeExampleMetrics()
1011 datastore = self.makeDatastore()
1013 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1015 dimensions = self.universe.extract(("visit", "physical_filter"))
1016 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1018 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1020 # Determine where the file will end up (we assume Formatters use
1021 # the same file extension)
1022 expectedUri = datastore.getURI(ref, predict=True)
1023 self.assertEqual(expectedUri.fragment, "predicted")
1025 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1027 # Try formatter that fails and formatter that fails and leaves
1028 # a file behind
1029 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1030 with self.subTest(formatter=formatter):
1032 # Monkey patch the formatter
1033 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1035 # Try to put the dataset, it should fail
1036 with self.assertRaises(Exception):
1037 datastore.put(metrics, ref)
1039 # Check that there is no file on disk
1040 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1042 # Check that there is a directory
1043 dir = expectedUri.dirname()
1044 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1046 # Force YamlFormatter and check that this time a file is written
1047 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1048 datastore.put(metrics, ref)
1049 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1050 datastore.remove(ref)
1051 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1054class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1055 """PosixDatastore specialization"""
1057 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1058 uriScheme = "mem"
1059 hasUnsupportedPut = False
1060 ingestTransferModes = ()
1061 isEphemeral = True
1062 rootKeys = None
1063 validationCanFail = False
1066class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1067 """ChainedDatastore specialization using a POSIXDatastore"""
1069 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1070 hasUnsupportedPut = False
1071 canIngestNoTransferAuto = False
1072 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
1073 isEphemeral = False
1074 rootKeys = (".datastores.1.root", ".datastores.2.root")
1075 validationCanFail = True
1078class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1079 """ChainedDatastore specialization using all InMemoryDatastore"""
1081 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1082 validationCanFail = False
1085class DatastoreConstraintsTests(DatastoreTestsBase):
1086 """Basic tests of constraints model of Datastores."""
1088 def testConstraints(self):
1089 """Test constraints model. Assumes that each test class has the
1090 same constraints."""
1091 metrics = makeExampleMetrics()
1092 datastore = self.makeDatastore()
1094 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1095 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1096 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1097 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1099 # Write empty file suitable for ingest check (JSON and YAML variants)
1100 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1101 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1102 for datasetTypeName, sc, accepted in (
1103 ("metric", sc1, True),
1104 ("metric5", sc1, False),
1105 ("metric33", sc1, True),
1106 ("metric5", sc2, True),
1107 ):
1108 # Choose different temp file depending on StorageClass
1109 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1111 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1112 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1113 if accepted:
1114 datastore.put(metrics, ref)
1115 self.assertTrue(datastore.exists(ref))
1116 datastore.remove(ref)
1118 # Try ingest
1119 if self.canIngest:
1120 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1121 self.assertTrue(datastore.exists(ref))
1122 datastore.remove(ref)
1123 else:
1124 with self.assertRaises(DatasetTypeNotSupportedError):
1125 datastore.put(metrics, ref)
1126 self.assertFalse(datastore.exists(ref))
1128 # Again with ingest
1129 if self.canIngest:
1130 with self.assertRaises(DatasetTypeNotSupportedError):
1131 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1132 self.assertFalse(datastore.exists(ref))
1135class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1136 """PosixDatastore specialization"""
1138 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1139 canIngest = True
1141 def setUp(self):
1142 # Override the working directory before calling the base class
1143 self.root = tempfile.mkdtemp(dir=TESTDIR)
1144 super().setUp()
1147class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1148 """InMemoryDatastore specialization"""
1150 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1151 canIngest = False
1154class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1155 """ChainedDatastore specialization using a POSIXDatastore and constraints
1156 at the ChainedDatstore"""
1158 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1161class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1162 """ChainedDatastore specialization using a POSIXDatastore"""
1164 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1167class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1168 """ChainedDatastore specialization using all InMemoryDatastore"""
1170 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1171 canIngest = False
1174class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1175 """Test that a chained datastore can control constraints per-datastore
1176 even if child datastore would accept."""
1178 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1180 def setUp(self):
1181 # Override the working directory before calling the base class
1182 self.root = tempfile.mkdtemp(dir=TESTDIR)
1183 super().setUp()
1185 def testConstraints(self):
1186 """Test chained datastore constraints model."""
1187 metrics = makeExampleMetrics()
1188 datastore = self.makeDatastore()
1190 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1191 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1192 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1193 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1194 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1196 # Write empty file suitable for ingest check (JSON and YAML variants)
1197 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1198 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1200 for typeName, dataId, sc, accept, ingest in (
1201 ("metric", dataId1, sc1, (False, True, False), True),
1202 ("metric5", dataId1, sc1, (False, False, False), False),
1203 ("metric5", dataId2, sc1, (True, False, False), False),
1204 ("metric33", dataId2, sc2, (True, True, False), True),
1205 ("metric5", dataId1, sc2, (False, True, False), True),
1206 ):
1208 # Choose different temp file depending on StorageClass
1209 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1211 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1212 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1213 if any(accept):
1214 datastore.put(metrics, ref)
1215 self.assertTrue(datastore.exists(ref))
1217 # Check each datastore inside the chained datastore
1218 for childDatastore, expected in zip(datastore.datastores, accept):
1219 self.assertEqual(
1220 childDatastore.exists(ref),
1221 expected,
1222 f"Testing presence of {ref} in datastore {childDatastore.name}",
1223 )
1225 datastore.remove(ref)
1227 # Check that ingest works
1228 if ingest:
1229 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1230 self.assertTrue(datastore.exists(ref))
1232 # Check each datastore inside the chained datastore
1233 for childDatastore, expected in zip(datastore.datastores, accept):
1234 # Ephemeral datastores means InMemory at the moment
1235 # and that does not accept ingest of files.
1236 if childDatastore.isEphemeral:
1237 expected = False
1238 self.assertEqual(
1239 childDatastore.exists(ref),
1240 expected,
1241 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1242 )
1244 datastore.remove(ref)
1245 else:
1246 with self.assertRaises(DatasetTypeNotSupportedError):
1247 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1249 else:
1250 with self.assertRaises(DatasetTypeNotSupportedError):
1251 datastore.put(metrics, ref)
1252 self.assertFalse(datastore.exists(ref))
1254 # Again with ingest
1255 with self.assertRaises(DatasetTypeNotSupportedError):
1256 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1257 self.assertFalse(datastore.exists(ref))
1260class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1261 """Tests for datastore caching infrastructure."""
1263 @classmethod
1264 def setUpClass(cls):
1265 cls.storageClassFactory = StorageClassFactory()
1266 cls.universe = DimensionUniverse()
1268 # Ensure that we load the test storage class definitions.
1269 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1270 cls.storageClassFactory.addFromConfig(scConfigFile)
1272 def setUp(self):
1273 self.id = 0
1275 # Create a root that we can use for caching tests.
1276 self.root = tempfile.mkdtemp(dir=TESTDIR)
1278 # Create some test dataset refs and associated test files
1279 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1280 dimensions = self.universe.extract(("visit", "physical_filter"))
1281 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1283 # Create list of refs and list of temporary files
1284 n_datasets = 10
1285 self.refs = [
1286 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1287 for n in range(n_datasets)
1288 ]
1290 root_uri = ResourcePath(self.root, forceDirectory=True)
1291 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1293 # Create test files.
1294 for uri in self.files:
1295 uri.write(b"0123456789")
1297 # Create some composite refs with component files.
1298 sc = self.storageClassFactory.getStorageClass("StructuredData")
1299 self.composite_refs = [
1300 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1301 ]
1302 self.comp_files = []
1303 self.comp_refs = []
1304 for n, ref in enumerate(self.composite_refs):
1305 component_refs = []
1306 component_files = []
1307 for component in sc.components:
1308 component_ref = ref.makeComponentRef(component)
1309 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1310 component_refs.append(component_ref)
1311 component_files.append(file)
1312 file.write(b"9876543210")
1314 self.comp_files.append(component_files)
1315 self.comp_refs.append(component_refs)
1317 def tearDown(self):
1318 if self.root is not None and os.path.exists(self.root):
1319 shutil.rmtree(self.root, ignore_errors=True)
1321 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1322 config = Config.fromYaml(config_str)
1323 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1325 def testNoCacheDir(self):
1326 config_str = """
1327cached:
1328 root: null
1329 cacheable:
1330 metric0: true
1331 """
1332 cache_manager = self._make_cache_manager(config_str)
1334 # Look inside to check we don't have a cache directory
1335 self.assertIsNone(cache_manager._cache_directory)
1337 self.assertCache(cache_manager)
1339 # Test that the cache directory is marked temporary
1340 self.assertTrue(cache_manager.cache_directory.isTemporary)
1342 def testNoCacheDirReversed(self):
1343 """Use default caching status and metric1 to false"""
1344 config_str = """
1345cached:
1346 root: null
1347 default: true
1348 cacheable:
1349 metric1: false
1350 """
1351 cache_manager = self._make_cache_manager(config_str)
1353 self.assertCache(cache_manager)
1355 def testExplicitCacheDir(self):
1356 config_str = f"""
1357cached:
1358 root: '{self.root}'
1359 cacheable:
1360 metric0: true
1361 """
1362 cache_manager = self._make_cache_manager(config_str)
1364 # Look inside to check we do have a cache directory.
1365 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1367 self.assertCache(cache_manager)
1369 # Test that the cache directory is not marked temporary
1370 self.assertFalse(cache_manager.cache_directory.isTemporary)
1372 def assertCache(self, cache_manager):
1373 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1374 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1376 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1377 self.assertIsInstance(uri, ResourcePath)
1378 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1380 # Check presence in cache using ref and then using file extension.
1381 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1382 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1383 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1384 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1386 # Cached file should no longer exist but uncached file should be
1387 # unaffected.
1388 self.assertFalse(self.files[0].exists())
1389 self.assertTrue(self.files[1].exists())
1391 # Should find this file and it should be within the cache directory.
1392 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1393 self.assertTrue(found.exists())
1394 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1396 # Should not be able to find these in cache
1397 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1398 self.assertIsNone(found)
1399 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1400 self.assertIsNone(found)
1402 def testNoCache(self):
1403 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1404 for uri, ref in zip(self.files, self.refs):
1405 self.assertFalse(cache_manager.should_be_cached(ref))
1406 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1407 self.assertFalse(cache_manager.known_to_cache(ref))
1408 with cache_manager.find_in_cache(ref, ".txt") as found:
1409 self.assertIsNone(found, msg=f"{cache_manager}")
1411 def _expiration_config(self, mode: str, threshold: int) -> str:
1412 return f"""
1413cached:
1414 default: true
1415 expiry:
1416 mode: {mode}
1417 threshold: {threshold}
1418 cacheable:
1419 unused: true
1420 """
1422 def testCacheExpiryFiles(self):
1423 threshold = 2 # Keep at least 2 files.
1424 mode = "files"
1425 config_str = self._expiration_config(mode, threshold)
1427 cache_manager = self._make_cache_manager(config_str)
1429 # Check that an empty cache returns unknown for arbitrary ref
1430 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1432 # Should end with datasets: 2, 3, 4
1433 self.assertExpiration(cache_manager, 5, threshold + 1)
1434 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1436 # Check that we will not expire a file that is actively in use.
1437 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1438 self.assertIsNotNone(found)
1440 # Trigger cache expiration that should remove the file
1441 # we just retrieved. Should now have: 3, 4, 5
1442 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1443 self.assertIsNotNone(cached)
1445 # Cache should still report the standard file count.
1446 self.assertEqual(cache_manager.file_count, threshold + 1)
1448 # Add additional entry to cache.
1449 # Should now have 4, 5, 6
1450 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1451 self.assertIsNotNone(cached)
1453 # Is the file still there?
1454 self.assertTrue(found.exists())
1456 # Can we read it?
1457 data = found.read()
1458 self.assertGreater(len(data), 0)
1460 # Outside context the file should no longer exist.
1461 self.assertFalse(found.exists())
1463 # File count should not have changed.
1464 self.assertEqual(cache_manager.file_count, threshold + 1)
1466 # Dataset 2 was in the exempt directory but because hardlinks
1467 # are used it was deleted from the main cache during cache expiry
1468 # above and so should no longer be found.
1469 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1470 self.assertIsNone(found)
1472 # And the one stored after it is also gone.
1473 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1474 self.assertIsNone(found)
1476 # But dataset 4 is present.
1477 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1478 self.assertIsNotNone(found)
1480 # Adding a new dataset to the cache should now delete it.
1481 cache_manager.move_to_cache(self.files[7], self.refs[7])
1483 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1484 self.assertIsNone(found)
1486 def testCacheExpiryDatasets(self):
1487 threshold = 2 # Keep 2 datasets.
1488 mode = "datasets"
1489 config_str = self._expiration_config(mode, threshold)
1491 cache_manager = self._make_cache_manager(config_str)
1492 self.assertExpiration(cache_manager, 5, threshold + 1)
1493 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1495 def testCacheExpiryDatasetsComposite(self):
1496 threshold = 2 # Keep 2 datasets.
1497 mode = "datasets"
1498 config_str = self._expiration_config(mode, threshold)
1500 cache_manager = self._make_cache_manager(config_str)
1502 n_datasets = 3
1503 for i in range(n_datasets):
1504 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1505 cached = cache_manager.move_to_cache(component_file, component_ref)
1506 self.assertIsNotNone(cached)
1507 self.assertTrue(cache_manager.known_to_cache(component_ref))
1508 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1509 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1511 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1513 # Write two new non-composite and the number of files should drop.
1514 self.assertExpiration(cache_manager, 2, 5)
1516 def testCacheExpirySize(self):
1517 threshold = 55 # Each file is 10 bytes
1518 mode = "size"
1519 config_str = self._expiration_config(mode, threshold)
1521 cache_manager = self._make_cache_manager(config_str)
1522 self.assertExpiration(cache_manager, 10, 6)
1523 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1525 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1526 """Insert the datasets and then check the number retained."""
1527 for i in range(n_datasets):
1528 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1529 self.assertIsNotNone(cached)
1531 self.assertEqual(cache_manager.file_count, n_retained)
1533 # The oldest file should not be in the cache any more.
1534 for i in range(n_datasets):
1535 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1536 if i >= n_datasets - n_retained:
1537 self.assertIsInstance(found, ResourcePath)
1538 else:
1539 self.assertIsNone(found)
1541 def testCacheExpiryAge(self):
1542 threshold = 1 # Expire older than 2 seconds
1543 mode = "age"
1544 config_str = self._expiration_config(mode, threshold)
1546 cache_manager = self._make_cache_manager(config_str)
1547 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1549 # Insert 3 files, then sleep, then insert more.
1550 for i in range(2):
1551 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1552 self.assertIsNotNone(cached)
1553 time.sleep(2.0)
1554 for j in range(4):
1555 i = 2 + j # Continue the counting
1556 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1557 self.assertIsNotNone(cached)
1559 # Only the files written after the sleep should exist.
1560 self.assertEqual(cache_manager.file_count, 4)
1561 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1562 self.assertIsNone(found)
1563 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1564 self.assertIsInstance(found, ResourcePath)
1567class DatasetRefURIsTestCase(unittest.TestCase):
1568 """Tests for DatasetRefURIs."""
1570 def testSequenceAccess(self):
1571 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1572 uris = DatasetRefURIs()
1574 self.assertEqual(len(uris), 2)
1575 self.assertEqual(uris[0], None)
1576 self.assertEqual(uris[1], {})
1578 primaryURI = ResourcePath("1/2/3")
1579 componentURI = ResourcePath("a/b/c")
1581 # affirm that DatasetRefURIs does not support MutableSequence functions
1582 with self.assertRaises(TypeError):
1583 uris[0] = primaryURI
1584 with self.assertRaises(TypeError):
1585 uris[1] = {"foo": componentURI}
1587 # but DatasetRefURIs can be set by property name:
1588 uris.primaryURI = primaryURI
1589 uris.componentURIs = {"foo": componentURI}
1590 self.assertEqual(uris.primaryURI, primaryURI)
1591 self.assertEqual(uris[0], primaryURI)
1593 primary, components = uris
1594 self.assertEqual(primary, primaryURI)
1595 self.assertEqual(components, {"foo": componentURI})
1597 def testRepr(self):
1598 """Verify __repr__ output."""
1599 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1600 self.assertEqual(
1601 repr(uris),
1602 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1603 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1604 )
1607class DataIdForTestTestCase(unittest.TestCase):
1608 """Tests for the DataIdForTest class."""
1610 def testImmutable(self):
1611 """Verify that an instance is immutable by default."""
1612 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1613 initial_hash = hash(dataId)
1615 with self.assertRaises(RuntimeError):
1616 dataId["instrument"] = "foo"
1618 with self.assertRaises(RuntimeError):
1619 del dataId["instrument"]
1621 assert sys.version_info[0] == 3
1622 if sys.version_info[1] >= 9:
1623 with self.assertRaises(RuntimeError):
1624 dataId |= dict(foo="bar")
1626 with self.assertRaises(RuntimeError):
1627 dataId.pop("instrument")
1629 with self.assertRaises(RuntimeError):
1630 dataId.popitem()
1632 with self.assertRaises(RuntimeError):
1633 dataId.update(dict(instrument="foo"))
1635 # verify that the hash value has not changed.
1636 self.assertEqual(initial_hash, hash(dataId))
1638 def testMutable(self):
1639 """Verify that an instance can be made mutable (unfrozen)."""
1640 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1641 initial_hash = hash(dataId)
1642 dataId.frozen = False
1643 self.assertEqual(initial_hash, hash(dataId))
1645 dataId["instrument"] = "foo"
1646 self.assertEqual(dataId["instrument"], "foo")
1647 self.assertNotEqual(initial_hash, hash(dataId))
1648 initial_hash = hash(dataId)
1650 del dataId["instrument"]
1651 self.assertTrue("instrument" not in dataId)
1652 self.assertNotEqual(initial_hash, hash(dataId))
1653 initial_hash = hash(dataId)
1655 assert sys.version_info[0] == 3
1656 if sys.version_info[1] >= 9:
1657 dataId |= dict(foo="bar")
1658 self.assertEqual(dataId["foo"], "bar")
1659 self.assertNotEqual(initial_hash, hash(dataId))
1660 initial_hash = hash(dataId)
1662 dataId.pop("visit")
1663 self.assertTrue("visit" not in dataId)
1664 self.assertNotEqual(initial_hash, hash(dataId))
1665 initial_hash = hash(dataId)
1667 dataId.popitem()
1668 self.assertTrue("physical_filter" not in dataId)
1669 self.assertNotEqual(initial_hash, hash(dataId))
1670 initial_hash = hash(dataId)
1672 dataId.update(dict(instrument="foo"))
1673 self.assertEqual(dataId["instrument"], "foo")
1674 self.assertNotEqual(initial_hash, hash(dataId))
1675 initial_hash = hash(dataId)
1678if __name__ == "__main__": 1678 ↛ 1679line 1678 didn't jump to line 1679, because the condition on line 1678 was never true
1679 unittest.main()