Coverage for tests/test_datastore.py: 15%
975 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-27 01:57 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-27 01:57 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import shutil
24import sys
25import tempfile
26import time
27import unittest
28from collections import UserDict
29from dataclasses import dataclass
31import lsst.utils.tests
32import yaml
33from lsst.daf.butler import (
34 Config,
35 DatasetRefURIs,
36 DatasetTypeNotSupportedError,
37 DatastoreCacheManager,
38 DatastoreCacheManagerConfig,
39 DatastoreConfig,
40 DatastoreDisabledCacheManager,
41 DatastoreValidationError,
42 DimensionUniverse,
43 FileDataset,
44 NamedKeyDict,
45 StorageClass,
46 StorageClassFactory,
47)
48from lsst.daf.butler.formatters.yaml import YamlFormatter
49from lsst.daf.butler.tests import (
50 BadNoWriteFormatter,
51 BadWriteFormatter,
52 DatasetTestHelper,
53 DatastoreTestHelper,
54 DummyRegistry,
55 MetricsExample,
56)
57from lsst.resources import ResourcePath
58from lsst.utils import doImport
60TESTDIR = os.path.dirname(__file__)
63class DataIdForTest(UserDict):
65 """A dict-like class that can be used for a DataId dict that is hashable.
67 By default the class is immutable ("frozen"). The `frozen`
68 attribute can be set to `False` to change values (but note that
69 the hash values before and after mutation will be different!).
70 """
72 def __init__(self, *args, **kwargs):
73 self.frozen = False
74 super().__init__(*args, **kwargs)
75 self.frozen = True
77 def __hash__(self):
78 return hash(str(self.data))
80 def __setitem__(self, k, v):
81 if self.frozen:
82 raise RuntimeError("DataIdForTest is frozen.")
83 return super().__setitem__(k, v)
85 def __delitem__(self, k):
86 if self.frozen:
87 raise RuntimeError("DataIdForTest is frozen.")
88 return super().__delitem__(k)
90 def __ior__(self, other):
91 assert sys.version_info[0] == 3
92 if sys.version_info[1] < 9:
93 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
94 if self.frozen:
95 raise RuntimeError("DataIdForTest is frozen.")
96 return super().__ior__(other)
98 def pop(self, k):
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().pop(k)
103 def popitem(self):
104 if self.frozen:
105 raise RuntimeError("DataIdForTest is frozen.")
106 return super().popitem()
108 def update(self, *args, **kwargs):
109 if self.frozen:
110 raise RuntimeError("DataIdForTest is frozen.")
111 super().update(*args, **kwargs)
114def makeExampleMetrics(use_none=False):
115 if use_none:
116 array = None
117 else:
118 array = [563, 234, 456.7, 105, 2054, -1045]
119 return MetricsExample(
120 {"AM1": 5.2, "AM2": 30.6},
121 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
122 array,
123 )
126@dataclass(frozen=True)
127class Named:
128 name: str
131class FakeDataCoordinate(NamedKeyDict):
132 """A fake hashable frozen DataCoordinate built from a simple dict."""
134 @classmethod
135 def from_dict(cls, dataId):
136 new = cls()
137 for k, v in dataId.items():
138 new[Named(k)] = v
139 return new.freeze()
141 def __hash__(self) -> int:
142 return hash(frozenset(self.items()))
145class TransactionTestError(Exception):
146 """Specific error for transactions, to prevent misdiagnosing
147 that might otherwise occur when a standard exception is used.
148 """
150 pass
153class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
154 """Support routines for datastore testing"""
156 root = None
158 @classmethod
159 def setUpClass(cls):
160 # Storage Classes are fixed for all datastores in these tests
161 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
162 cls.storageClassFactory = StorageClassFactory()
163 cls.storageClassFactory.addFromConfig(scConfigFile)
165 # Read the Datastore config so we can get the class
166 # information (since we should not assume the constructor
167 # name here, but rely on the configuration file itself)
168 datastoreConfig = DatastoreConfig(cls.configFile)
169 cls.datastoreType = doImport(datastoreConfig["cls"])
170 cls.universe = DimensionUniverse()
172 def setUp(self):
173 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
175 def tearDown(self):
176 if self.root is not None and os.path.exists(self.root):
177 shutil.rmtree(self.root, ignore_errors=True)
180class DatastoreTests(DatastoreTestsBase):
181 """Some basic tests of a simple datastore."""
183 hasUnsupportedPut = True
185 def testConfigRoot(self):
186 full = DatastoreConfig(self.configFile)
187 config = DatastoreConfig(self.configFile, mergeDefaults=False)
188 newroot = "/random/location"
189 self.datastoreType.setConfigRoot(newroot, config, full)
190 if self.rootKeys:
191 for k in self.rootKeys:
192 self.assertIn(newroot, config[k])
194 def testConstructor(self):
195 datastore = self.makeDatastore()
196 self.assertIsNotNone(datastore)
197 self.assertIs(datastore.isEphemeral, self.isEphemeral)
199 def testConfigurationValidation(self):
200 datastore = self.makeDatastore()
201 sc = self.storageClassFactory.getStorageClass("ThingOne")
202 datastore.validateConfiguration([sc])
204 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
205 if self.validationCanFail:
206 with self.assertRaises(DatastoreValidationError):
207 datastore.validateConfiguration([sc2], logFailures=True)
209 dimensions = self.universe.extract(("visit", "physical_filter"))
210 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
211 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
212 datastore.validateConfiguration([ref])
214 def testParameterValidation(self):
215 """Check that parameters are validated"""
216 sc = self.storageClassFactory.getStorageClass("ThingOne")
217 dimensions = self.universe.extract(("visit", "physical_filter"))
218 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
219 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
220 datastore = self.makeDatastore()
221 data = {1: 2, 3: 4}
222 datastore.put(data, ref)
223 newdata = datastore.get(ref)
224 self.assertEqual(data, newdata)
225 with self.assertRaises(KeyError):
226 newdata = datastore.get(ref, parameters={"missing": 5})
228 def testBasicPutGet(self):
229 metrics = makeExampleMetrics()
230 datastore = self.makeDatastore()
232 # Create multiple storage classes for testing different formulations
233 storageClasses = [
234 self.storageClassFactory.getStorageClass(sc)
235 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
236 ]
238 dimensions = self.universe.extract(("visit", "physical_filter"))
239 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
240 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
242 for sc in storageClasses:
243 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
244 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
246 # Make sure that using getManyURIs without predicting before the
247 # dataset has been put raises.
248 with self.assertRaises(FileNotFoundError):
249 datastore.getManyURIs([ref], predict=False)
251 # Make sure that using getManyURIs with predicting before the
252 # dataset has been put predicts the URI.
253 uris = datastore.getManyURIs([ref, ref2], predict=True)
254 self.assertIn("52", uris[ref].primaryURI.geturl())
255 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
256 self.assertIn("53", uris[ref2].primaryURI.geturl())
257 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
259 datastore.put(metrics, ref)
261 # Does it exist?
262 self.assertTrue(datastore.exists(ref))
264 # Get
265 metricsOut = datastore.get(ref, parameters=None)
266 self.assertEqual(metrics, metricsOut)
268 uri = datastore.getURI(ref)
269 self.assertEqual(uri.scheme, self.uriScheme)
271 uris = datastore.getManyURIs([ref])
272 self.assertEqual(len(uris), 1)
273 ref, uri = uris.popitem()
274 self.assertTrue(uri.primaryURI.exists())
275 self.assertFalse(uri.componentURIs)
277 # Get a component -- we need to construct new refs for them
278 # with derived storage classes but with parent ID
279 for comp in ("data", "output"):
280 compRef = ref.makeComponentRef(comp)
281 output = datastore.get(compRef)
282 self.assertEqual(output, getattr(metricsOut, comp))
284 uri = datastore.getURI(compRef)
285 self.assertEqual(uri.scheme, self.uriScheme)
287 uris = datastore.getManyURIs([compRef])
288 self.assertEqual(len(uris), 1)
290 storageClass = sc
292 # Check that we can put a metric with None in a component and
293 # get it back as None
294 metricsNone = makeExampleMetrics(use_none=True)
295 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
296 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
297 datastore.put(metricsNone, refNone)
299 comp = "data"
300 for comp in ("data", "output"):
301 compRef = refNone.makeComponentRef(comp)
302 output = datastore.get(compRef)
303 self.assertEqual(output, getattr(metricsNone, comp))
305 # Check that a put fails if the dataset type is not supported
306 if self.hasUnsupportedPut:
307 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
308 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
309 with self.assertRaises(DatasetTypeNotSupportedError):
310 datastore.put(metrics, ref)
312 # These should raise
313 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
314 with self.assertRaises(FileNotFoundError):
315 # non-existing file
316 datastore.get(ref)
318 # Get a URI from it
319 uri = datastore.getURI(ref, predict=True)
320 self.assertEqual(uri.scheme, self.uriScheme)
322 with self.assertRaises(FileNotFoundError):
323 datastore.getURI(ref)
325 def testTrustGetRequest(self):
326 """Check that we can get datasets that registry knows nothing about."""
328 datastore = self.makeDatastore()
330 # Skip test if the attribute is not defined
331 if not hasattr(datastore, "trustGetRequest"):
332 return
334 metrics = makeExampleMetrics()
336 i = 0
337 for sc_name in ("StructuredData", "StructuredComposite"):
338 i += 1
339 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
341 if sc_name == "StructuredComposite":
342 disassembled = True
343 else:
344 disassembled = False
346 # Start datastore in default configuration of using registry
347 datastore.trustGetRequest = False
349 # Create multiple storage classes for testing with or without
350 # disassembly
351 sc = self.storageClassFactory.getStorageClass(sc_name)
352 dimensions = self.universe.extract(("visit", "physical_filter"))
354 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
356 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
357 datastore.put(metrics, ref)
359 # Does it exist?
360 self.assertTrue(datastore.exists(ref))
362 # Get
363 metricsOut = datastore.get(ref)
364 self.assertEqual(metrics, metricsOut)
366 # Get the URI(s)
367 primaryURI, componentURIs = datastore.getURIs(ref)
368 if disassembled:
369 self.assertIsNone(primaryURI)
370 self.assertEqual(len(componentURIs), 3)
371 else:
372 self.assertIn(datasetTypeName, primaryURI.path)
373 self.assertFalse(componentURIs)
375 # Delete registry entry so now we are trusting
376 datastore.removeStoredItemInfo(ref)
378 # Now stop trusting and check that things break
379 datastore.trustGetRequest = False
381 # Does it exist?
382 self.assertFalse(datastore.exists(ref))
384 with self.assertRaises(FileNotFoundError):
385 datastore.get(ref)
387 with self.assertRaises(FileNotFoundError):
388 datastore.get(ref.makeComponentRef("data"))
390 # URI should fail unless we ask for prediction
391 with self.assertRaises(FileNotFoundError):
392 datastore.getURIs(ref)
394 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
395 if disassembled:
396 self.assertIsNone(predicted_primary)
397 self.assertEqual(len(predicted_disassembled), 3)
398 for uri in predicted_disassembled.values():
399 self.assertEqual(uri.fragment, "predicted")
400 self.assertIn(datasetTypeName, uri.path)
401 else:
402 self.assertIn(datasetTypeName, predicted_primary.path)
403 self.assertFalse(predicted_disassembled)
404 self.assertEqual(predicted_primary.fragment, "predicted")
406 # Now enable registry-free trusting mode
407 datastore.trustGetRequest = True
409 # Try again to get it
410 metricsOut = datastore.get(ref)
411 self.assertEqual(metricsOut, metrics)
413 # Does it exist?
414 self.assertTrue(datastore.exists(ref))
416 # Get a component
417 comp = "data"
418 compRef = ref.makeComponentRef(comp)
419 output = datastore.get(compRef)
420 self.assertEqual(output, getattr(metrics, comp))
422 # Get the URI -- if we trust this should work even without
423 # enabling prediction.
424 primaryURI2, componentURIs2 = datastore.getURIs(ref)
425 self.assertEqual(primaryURI2, primaryURI)
426 self.assertEqual(componentURIs2, componentURIs)
428 def testDisassembly(self):
429 """Test disassembly within datastore."""
430 metrics = makeExampleMetrics()
431 if self.isEphemeral:
432 # in-memory datastore does not disassemble
433 return
435 # Create multiple storage classes for testing different formulations
436 # of composites. One of these will not disassemble to provide
437 # a reference.
438 storageClasses = [
439 self.storageClassFactory.getStorageClass(sc)
440 for sc in (
441 "StructuredComposite",
442 "StructuredCompositeTestA",
443 "StructuredCompositeTestB",
444 "StructuredCompositeReadComp",
445 "StructuredData", # No disassembly
446 "StructuredCompositeReadCompNoDisassembly",
447 )
448 ]
450 # Create the test datastore
451 datastore = self.makeDatastore()
453 # Dummy dataId
454 dimensions = self.universe.extract(("visit", "physical_filter"))
455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
457 for i, sc in enumerate(storageClasses):
458 with self.subTest(storageClass=sc.name):
459 # Create a different dataset type each time round
460 # so that a test failure in this subtest does not trigger
461 # a cascade of tests because of file clashes
462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
466 datastore.put(metrics, ref)
468 baseURI, compURIs = datastore.getURIs(ref)
469 if disassembled:
470 self.assertIsNone(baseURI)
471 self.assertEqual(set(compURIs), {"data", "output", "summary"})
472 else:
473 self.assertIsNotNone(baseURI)
474 self.assertEqual(compURIs, {})
476 metrics_get = datastore.get(ref)
477 self.assertEqual(metrics_get, metrics)
479 # Retrieve the composite with read parameter
480 stop = 4
481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
482 self.assertEqual(metrics_get.summary, metrics.summary)
483 self.assertEqual(metrics_get.output, metrics.output)
484 self.assertEqual(metrics_get.data, metrics.data[:stop])
486 # Retrieve a component
487 data = datastore.get(ref.makeComponentRef("data"))
488 self.assertEqual(data, metrics.data)
490 # On supported storage classes attempt to access a read
491 # only component
492 if "ReadComp" in sc.name:
493 cRef = ref.makeComponentRef("counter")
494 counter = datastore.get(cRef)
495 self.assertEqual(counter, len(metrics.data))
497 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
498 self.assertEqual(counter, stop)
500 datastore.remove(ref)
502 def testRegistryCompositePutGet(self):
503 """Tests the case where registry disassembles and puts to datastore."""
504 metrics = makeExampleMetrics()
505 datastore = self.makeDatastore()
507 # Create multiple storage classes for testing different formulations
508 # of composites
509 storageClasses = [
510 self.storageClassFactory.getStorageClass(sc)
511 for sc in (
512 "StructuredComposite",
513 "StructuredCompositeTestA",
514 "StructuredCompositeTestB",
515 )
516 ]
518 dimensions = self.universe.extract(("visit", "physical_filter"))
519 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
521 for sc in storageClasses:
522 print("Using storageClass: {}".format(sc.name))
523 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
525 components = sc.delegate().disassemble(metrics)
526 self.assertTrue(components)
528 compsRead = {}
529 for compName, compInfo in components.items():
530 compRef = self.makeDatasetRef(
531 ref.datasetType.componentTypeName(compName),
532 dimensions,
533 components[compName].storageClass,
534 dataId,
535 conform=False,
536 )
538 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
539 datastore.put(compInfo.component, compRef)
541 uri = datastore.getURI(compRef)
542 self.assertEqual(uri.scheme, self.uriScheme)
544 compsRead[compName] = datastore.get(compRef)
546 # We can generate identical files for each storage class
547 # so remove the component here
548 datastore.remove(compRef)
550 # combine all the components we read back into a new composite
551 metricsOut = sc.delegate().assemble(compsRead)
552 self.assertEqual(metrics, metricsOut)
554 def prepDeleteTest(self, n_refs=1):
555 metrics = makeExampleMetrics()
556 datastore = self.makeDatastore()
557 # Put
558 dimensions = self.universe.extract(("visit", "physical_filter"))
559 sc = self.storageClassFactory.getStorageClass("StructuredData")
560 refs = []
561 for i in range(n_refs):
562 dataId = FakeDataCoordinate.from_dict(
563 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
564 )
565 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
566 datastore.put(metrics, ref)
568 # Does it exist?
569 self.assertTrue(datastore.exists(ref))
571 # Get
572 metricsOut = datastore.get(ref)
573 self.assertEqual(metrics, metricsOut)
574 refs.append(ref)
576 return datastore, *refs
578 def testRemove(self):
579 datastore, ref = self.prepDeleteTest()
581 # Remove
582 datastore.remove(ref)
584 # Does it exist?
585 self.assertFalse(datastore.exists(ref))
587 # Do we now get a predicted URI?
588 uri = datastore.getURI(ref, predict=True)
589 self.assertEqual(uri.fragment, "predicted")
591 # Get should now fail
592 with self.assertRaises(FileNotFoundError):
593 datastore.get(ref)
594 # Can only delete once
595 with self.assertRaises(FileNotFoundError):
596 datastore.remove(ref)
598 def testForget(self):
599 datastore, ref = self.prepDeleteTest()
601 # Remove
602 datastore.forget([ref])
604 # Does it exist (as far as we know)?
605 self.assertFalse(datastore.exists(ref))
607 # Do we now get a predicted URI?
608 uri = datastore.getURI(ref, predict=True)
609 self.assertEqual(uri.fragment, "predicted")
611 # Get should now fail
612 with self.assertRaises(FileNotFoundError):
613 datastore.get(ref)
615 # Forgetting again is a silent no-op
616 datastore.forget([ref])
618 # Predicted URI should still point to the file.
619 self.assertTrue(uri.exists())
621 def testTransfer(self):
622 metrics = makeExampleMetrics()
624 dimensions = self.universe.extract(("visit", "physical_filter"))
625 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
627 sc = self.storageClassFactory.getStorageClass("StructuredData")
628 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
630 inputDatastore = self.makeDatastore("test_input_datastore")
631 outputDatastore = self.makeDatastore("test_output_datastore")
633 inputDatastore.put(metrics, ref)
634 outputDatastore.transfer(inputDatastore, ref)
636 metricsOut = outputDatastore.get(ref)
637 self.assertEqual(metrics, metricsOut)
639 def testBasicTransaction(self):
640 datastore = self.makeDatastore()
641 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
642 dimensions = self.universe.extract(("visit", "physical_filter"))
643 nDatasets = 6
644 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
645 data = [
646 (
647 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
648 makeExampleMetrics(),
649 )
650 for dataId in dataIds
651 ]
652 succeed = data[: nDatasets // 2]
653 fail = data[nDatasets // 2 :]
654 # All datasets added in this transaction should continue to exist
655 with datastore.transaction():
656 for ref, metrics in succeed:
657 datastore.put(metrics, ref)
658 # Whereas datasets added in this transaction should not
659 with self.assertRaises(TransactionTestError):
660 with datastore.transaction():
661 for ref, metrics in fail:
662 datastore.put(metrics, ref)
663 raise TransactionTestError("This should propagate out of the context manager")
664 # Check for datasets that should exist
665 for ref, metrics in succeed:
666 # Does it exist?
667 self.assertTrue(datastore.exists(ref))
668 # Get
669 metricsOut = datastore.get(ref, parameters=None)
670 self.assertEqual(metrics, metricsOut)
671 # URI
672 uri = datastore.getURI(ref)
673 self.assertEqual(uri.scheme, self.uriScheme)
674 # Check for datasets that should not exist
675 for ref, _ in fail:
676 # These should raise
677 with self.assertRaises(FileNotFoundError):
678 # non-existing file
679 datastore.get(ref)
680 with self.assertRaises(FileNotFoundError):
681 datastore.getURI(ref)
683 def testNestedTransaction(self):
684 datastore = self.makeDatastore()
685 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
686 dimensions = self.universe.extract(("visit", "physical_filter"))
687 metrics = makeExampleMetrics()
689 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
690 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
691 datastore.put(metrics, refBefore)
692 with self.assertRaises(TransactionTestError):
693 with datastore.transaction():
694 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
695 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
696 datastore.put(metrics, refOuter)
697 with datastore.transaction():
698 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
699 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
700 datastore.put(metrics, refInner)
701 # All datasets should exist
702 for ref in (refBefore, refOuter, refInner):
703 metricsOut = datastore.get(ref, parameters=None)
704 self.assertEqual(metrics, metricsOut)
705 raise TransactionTestError("This should roll back the transaction")
706 # Dataset(s) inserted before the transaction should still exist
707 metricsOut = datastore.get(refBefore, parameters=None)
708 self.assertEqual(metrics, metricsOut)
709 # But all datasets inserted during the (rolled back) transaction
710 # should be gone
711 with self.assertRaises(FileNotFoundError):
712 datastore.get(refOuter)
713 with self.assertRaises(FileNotFoundError):
714 datastore.get(refInner)
716 def _prepareIngestTest(self):
717 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
718 dimensions = self.universe.extract(("visit", "physical_filter"))
719 metrics = makeExampleMetrics()
720 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
721 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
722 return metrics, ref
724 def runIngestTest(self, func, expectOutput=True):
725 metrics, ref = self._prepareIngestTest()
726 # The file will be deleted after the test.
727 # For symlink tests this leads to a situation where the datastore
728 # points to a file that does not exist. This will make os.path.exist
729 # return False but then the new symlink will fail with
730 # FileExistsError later in the code so the test still passes.
731 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
732 with open(path, "w") as fd:
733 yaml.dump(metrics._asdict(), stream=fd)
734 func(metrics, path, ref)
736 def testIngestNoTransfer(self):
737 """Test ingesting existing files with no transfer."""
738 for mode in (None, "auto"):
740 # Some datastores have auto but can't do in place transfer
741 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
742 continue
744 with self.subTest(mode=mode):
745 datastore = self.makeDatastore()
747 def succeed(obj, path, ref):
748 """Ingest a file already in the datastore root."""
749 # first move it into the root, and adjust the path
750 # accordingly
751 path = shutil.copy(path, datastore.root.ospath)
752 path = os.path.relpath(path, start=datastore.root.ospath)
753 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
754 self.assertEqual(obj, datastore.get(ref))
756 def failInputDoesNotExist(obj, path, ref):
757 """Can't ingest files if we're given a bad path."""
758 with self.assertRaises(FileNotFoundError):
759 datastore.ingest(
760 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
761 )
762 self.assertFalse(datastore.exists(ref))
764 def failOutsideRoot(obj, path, ref):
765 """Can't ingest files outside of datastore root unless
766 auto."""
767 if mode == "auto":
768 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
769 self.assertTrue(datastore.exists(ref))
770 else:
771 with self.assertRaises(RuntimeError):
772 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
773 self.assertFalse(datastore.exists(ref))
775 def failNotImplemented(obj, path, ref):
776 with self.assertRaises(NotImplementedError):
777 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
779 if mode in self.ingestTransferModes:
780 self.runIngestTest(failOutsideRoot)
781 self.runIngestTest(failInputDoesNotExist)
782 self.runIngestTest(succeed)
783 else:
784 self.runIngestTest(failNotImplemented)
786 def testIngestTransfer(self):
787 """Test ingesting existing files after transferring them."""
788 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
789 with self.subTest(mode=mode):
790 datastore = self.makeDatastore(mode)
792 def succeed(obj, path, ref):
793 """Ingest a file by transferring it to the template
794 location."""
795 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
796 self.assertEqual(obj, datastore.get(ref))
798 def failInputDoesNotExist(obj, path, ref):
799 """Can't ingest files if we're given a bad path."""
800 with self.assertRaises(FileNotFoundError):
801 # Ensure the file does not look like it is in
802 # datastore for auto mode
803 datastore.ingest(
804 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
805 )
806 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
808 def failNotImplemented(obj, path, ref):
809 with self.assertRaises(NotImplementedError):
810 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
812 if mode in self.ingestTransferModes:
813 self.runIngestTest(failInputDoesNotExist)
814 self.runIngestTest(succeed, expectOutput=(mode != "move"))
815 else:
816 self.runIngestTest(failNotImplemented)
818 def testIngestSymlinkOfSymlink(self):
819 """Special test for symlink to a symlink ingest"""
820 metrics, ref = self._prepareIngestTest()
821 # The aim of this test is to create a dataset on disk, then
822 # create a symlink to it and finally ingest the symlink such that
823 # the symlink in the datastore points to the original dataset.
824 for mode in ("symlink", "relsymlink"):
825 if mode not in self.ingestTransferModes:
826 continue
828 print(f"Trying mode {mode}")
829 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
830 with open(realpath, "w") as fd:
831 yaml.dump(metrics._asdict(), stream=fd)
832 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
833 os.symlink(os.path.abspath(realpath), sympath)
835 datastore = self.makeDatastore()
836 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
838 uri = datastore.getURI(ref)
839 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
840 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
842 linkTarget = os.readlink(uri.ospath)
843 if mode == "relsymlink":
844 self.assertFalse(os.path.isabs(linkTarget))
845 else:
846 self.assertEqual(linkTarget, os.path.abspath(realpath))
848 # Check that we can get the dataset back regardless of mode
849 metric2 = datastore.get(ref)
850 self.assertEqual(metric2, metrics)
852 # Cleanup the file for next time round loop
853 # since it will get the same file name in store
854 datastore.remove(ref)
856 def testExportImportRecords(self):
857 """Test for export_records and import_records methods."""
859 datastore = self.makeDatastore("test_datastore")
861 # For now only the FileDatastore can be used for this test.
862 # ChainedDatastore that only includes InMemoryDatastores have to be
863 # skipped as well.
864 for name in datastore.names:
865 if not name.startswith("InMemoryDatastore"):
866 break
867 else:
868 raise unittest.SkipTest("in-memory datastore does not support record export/import")
870 metrics = makeExampleMetrics()
871 dimensions = self.universe.extract(("visit", "physical_filter"))
872 sc = self.storageClassFactory.getStorageClass("StructuredData")
874 refs = []
875 for visit in (2048, 2049, 2050):
876 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
877 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
878 datastore.put(metrics, ref)
879 refs.append(ref)
881 for exported_refs in (refs, refs[1:]):
882 n_refs = len(exported_refs)
883 records = datastore.export_records(exported_refs)
884 self.assertGreater(len(records), 0)
885 self.assertTrue(set(records.keys()) <= set(datastore.names))
886 # In a ChainedDatastore each FileDatastore will have a complete set
887 for datastore_name in records:
888 record_data = records[datastore_name]
889 self.assertEqual(len(record_data.records), n_refs)
891 # Use the same datastore name to import relative path.
892 datastore2 = self.makeDatastore("test_datastore")
894 records = datastore.export_records(refs[1:])
895 datastore2.import_records(records)
897 with self.assertRaises(FileNotFoundError):
898 data = datastore2.get(refs[0])
899 data = datastore2.get(refs[1])
900 self.assertIsNotNone(data)
901 data = datastore2.get(refs[2])
902 self.assertIsNotNone(data)
905class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
906 """PosixDatastore specialization"""
908 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
909 uriScheme = "file"
910 canIngestNoTransferAuto = True
911 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
912 isEphemeral = False
913 rootKeys = ("root",)
914 validationCanFail = True
916 def setUp(self):
917 # Override the working directory before calling the base class
918 self.root = tempfile.mkdtemp(dir=TESTDIR)
919 super().setUp()
921 def testAtomicWrite(self):
922 """Test that we write to a temporary and then rename"""
923 datastore = self.makeDatastore()
924 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
925 dimensions = self.universe.extract(("visit", "physical_filter"))
926 metrics = makeExampleMetrics()
928 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
929 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
931 with self.assertLogs("lsst.resources", "DEBUG") as cm:
932 datastore.put(metrics, ref)
933 move_logs = [ll for ll in cm.output if "transfer=" in ll]
934 self.assertIn("transfer=move", move_logs[0])
936 # And the transfer should be file to file.
937 self.assertEqual(move_logs[0].count("file://"), 2)
939 def testCanNotDeterminePutFormatterLocation(self):
940 """Verify that the expected exception is raised if the FileDatastore
941 can not determine the put formatter location."""
943 _ = makeExampleMetrics()
944 datastore = self.makeDatastore()
946 # Create multiple storage classes for testing different formulations
947 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
949 sccomp = StorageClass("Dummy")
950 compositeStorageClass = StorageClass(
951 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
952 )
954 dimensions = self.universe.extract(("visit", "physical_filter"))
955 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
957 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
958 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
960 def raiser(ref):
961 raise DatasetTypeNotSupportedError()
963 with unittest.mock.patch.object(
964 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
965 "_determine_put_formatter_location",
966 side_effect=raiser,
967 ):
968 # verify the non-composite ref execution path:
969 with self.assertRaises(DatasetTypeNotSupportedError):
970 datastore.getURIs(ref, predict=True)
972 # verify the composite-ref execution path:
973 with self.assertRaises(DatasetTypeNotSupportedError):
974 datastore.getURIs(compRef, predict=True)
977class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
978 """Posix datastore tests but with checksums disabled."""
980 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
982 def testChecksum(self):
983 """Ensure that checksums have not been calculated."""
985 datastore = self.makeDatastore()
986 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
987 dimensions = self.universe.extract(("visit", "physical_filter"))
988 metrics = makeExampleMetrics()
990 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
991 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
993 # Configuration should have disabled checksum calculation
994 datastore.put(metrics, ref)
995 infos = datastore.getStoredItemsInfo(ref)
996 self.assertIsNone(infos[0].checksum)
998 # Remove put back but with checksums enabled explicitly
999 datastore.remove(ref)
1000 datastore.useChecksum = True
1001 datastore.put(metrics, ref)
1003 infos = datastore.getStoredItemsInfo(ref)
1004 self.assertIsNotNone(infos[0].checksum)
1007class TrashDatastoreTestCase(PosixDatastoreTestCase):
1008 """Restrict trash test to FileDatastore."""
1010 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1012 def testTrash(self):
1013 datastore, *refs = self.prepDeleteTest(n_refs=10)
1015 # Trash one of them.
1016 ref = refs.pop()
1017 uri = datastore.getURI(ref)
1018 datastore.trash(ref)
1019 self.assertTrue(uri.exists(), uri) # Not deleted yet
1020 datastore.emptyTrash()
1021 self.assertFalse(uri.exists(), uri)
1023 # Trash it again should be fine.
1024 datastore.trash(ref)
1026 # Trash multiple items at once.
1027 subset = [refs.pop(), refs.pop()]
1028 datastore.trash(subset)
1029 datastore.emptyTrash()
1031 # Remove a record and trash should do nothing.
1032 # This is execution butler scenario.
1033 ref = refs.pop()
1034 uri = datastore.getURI(ref)
1035 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1036 self.assertTrue(uri.exists())
1037 datastore.trash(ref)
1038 datastore.emptyTrash()
1039 self.assertTrue(uri.exists())
1041 # Switch on trust and it should delete the file.
1042 datastore.trustGetRequest = True
1043 datastore.trash([ref])
1044 self.assertFalse(uri.exists())
1046 # Remove multiples at once in trust mode.
1047 subset = [refs.pop() for i in range(3)]
1048 datastore.trash(subset)
1049 datastore.trash(refs.pop()) # Check that a single ref can trash
1052class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1053 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1055 def setUp(self):
1056 # Override the working directory before calling the base class
1057 self.root = tempfile.mkdtemp(dir=TESTDIR)
1058 super().setUp()
1060 def testCleanup(self):
1061 """Test that a failed formatter write does cleanup a partial file."""
1062 metrics = makeExampleMetrics()
1063 datastore = self.makeDatastore()
1065 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1067 dimensions = self.universe.extract(("visit", "physical_filter"))
1068 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1070 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1072 # Determine where the file will end up (we assume Formatters use
1073 # the same file extension)
1074 expectedUri = datastore.getURI(ref, predict=True)
1075 self.assertEqual(expectedUri.fragment, "predicted")
1077 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1079 # Try formatter that fails and formatter that fails and leaves
1080 # a file behind
1081 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1082 with self.subTest(formatter=formatter):
1084 # Monkey patch the formatter
1085 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1087 # Try to put the dataset, it should fail
1088 with self.assertRaises(Exception):
1089 datastore.put(metrics, ref)
1091 # Check that there is no file on disk
1092 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1094 # Check that there is a directory
1095 dir = expectedUri.dirname()
1096 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1098 # Force YamlFormatter and check that this time a file is written
1099 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1100 datastore.put(metrics, ref)
1101 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1102 datastore.remove(ref)
1103 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1106class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1107 """PosixDatastore specialization"""
1109 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1110 uriScheme = "mem"
1111 hasUnsupportedPut = False
1112 ingestTransferModes = ()
1113 isEphemeral = True
1114 rootKeys = None
1115 validationCanFail = False
1118class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1119 """ChainedDatastore specialization using a POSIXDatastore"""
1121 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1122 hasUnsupportedPut = False
1123 canIngestNoTransferAuto = False
1124 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
1125 isEphemeral = False
1126 rootKeys = (".datastores.1.root", ".datastores.2.root")
1127 validationCanFail = True
1130class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1131 """ChainedDatastore specialization using all InMemoryDatastore"""
1133 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1134 validationCanFail = False
1137class DatastoreConstraintsTests(DatastoreTestsBase):
1138 """Basic tests of constraints model of Datastores."""
1140 def testConstraints(self):
1141 """Test constraints model. Assumes that each test class has the
1142 same constraints."""
1143 metrics = makeExampleMetrics()
1144 datastore = self.makeDatastore()
1146 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1147 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1148 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1149 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1151 # Write empty file suitable for ingest check (JSON and YAML variants)
1152 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1153 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1154 for datasetTypeName, sc, accepted in (
1155 ("metric", sc1, True),
1156 ("metric5", sc1, False),
1157 ("metric33", sc1, True),
1158 ("metric5", sc2, True),
1159 ):
1160 # Choose different temp file depending on StorageClass
1161 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1163 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1164 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1165 if accepted:
1166 datastore.put(metrics, ref)
1167 self.assertTrue(datastore.exists(ref))
1168 datastore.remove(ref)
1170 # Try ingest
1171 if self.canIngest:
1172 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1173 self.assertTrue(datastore.exists(ref))
1174 datastore.remove(ref)
1175 else:
1176 with self.assertRaises(DatasetTypeNotSupportedError):
1177 datastore.put(metrics, ref)
1178 self.assertFalse(datastore.exists(ref))
1180 # Again with ingest
1181 if self.canIngest:
1182 with self.assertRaises(DatasetTypeNotSupportedError):
1183 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1184 self.assertFalse(datastore.exists(ref))
1187class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1188 """PosixDatastore specialization"""
1190 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1191 canIngest = True
1193 def setUp(self):
1194 # Override the working directory before calling the base class
1195 self.root = tempfile.mkdtemp(dir=TESTDIR)
1196 super().setUp()
1199class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1200 """InMemoryDatastore specialization"""
1202 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1203 canIngest = False
1206class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1207 """ChainedDatastore specialization using a POSIXDatastore and constraints
1208 at the ChainedDatstore"""
1210 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1213class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1214 """ChainedDatastore specialization using a POSIXDatastore"""
1216 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1219class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1220 """ChainedDatastore specialization using all InMemoryDatastore"""
1222 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1223 canIngest = False
1226class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1227 """Test that a chained datastore can control constraints per-datastore
1228 even if child datastore would accept."""
1230 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1232 def setUp(self):
1233 # Override the working directory before calling the base class
1234 self.root = tempfile.mkdtemp(dir=TESTDIR)
1235 super().setUp()
1237 def testConstraints(self):
1238 """Test chained datastore constraints model."""
1239 metrics = makeExampleMetrics()
1240 datastore = self.makeDatastore()
1242 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1243 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1244 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1245 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1246 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1248 # Write empty file suitable for ingest check (JSON and YAML variants)
1249 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1250 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1252 for typeName, dataId, sc, accept, ingest in (
1253 ("metric", dataId1, sc1, (False, True, False), True),
1254 ("metric5", dataId1, sc1, (False, False, False), False),
1255 ("metric5", dataId2, sc1, (True, False, False), False),
1256 ("metric33", dataId2, sc2, (True, True, False), True),
1257 ("metric5", dataId1, sc2, (False, True, False), True),
1258 ):
1260 # Choose different temp file depending on StorageClass
1261 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1263 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1264 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1265 if any(accept):
1266 datastore.put(metrics, ref)
1267 self.assertTrue(datastore.exists(ref))
1269 # Check each datastore inside the chained datastore
1270 for childDatastore, expected in zip(datastore.datastores, accept):
1271 self.assertEqual(
1272 childDatastore.exists(ref),
1273 expected,
1274 f"Testing presence of {ref} in datastore {childDatastore.name}",
1275 )
1277 datastore.remove(ref)
1279 # Check that ingest works
1280 if ingest:
1281 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1282 self.assertTrue(datastore.exists(ref))
1284 # Check each datastore inside the chained datastore
1285 for childDatastore, expected in zip(datastore.datastores, accept):
1286 # Ephemeral datastores means InMemory at the moment
1287 # and that does not accept ingest of files.
1288 if childDatastore.isEphemeral:
1289 expected = False
1290 self.assertEqual(
1291 childDatastore.exists(ref),
1292 expected,
1293 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1294 )
1296 datastore.remove(ref)
1297 else:
1298 with self.assertRaises(DatasetTypeNotSupportedError):
1299 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1301 else:
1302 with self.assertRaises(DatasetTypeNotSupportedError):
1303 datastore.put(metrics, ref)
1304 self.assertFalse(datastore.exists(ref))
1306 # Again with ingest
1307 with self.assertRaises(DatasetTypeNotSupportedError):
1308 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1309 self.assertFalse(datastore.exists(ref))
1312class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1313 """Tests for datastore caching infrastructure."""
1315 @classmethod
1316 def setUpClass(cls):
1317 cls.storageClassFactory = StorageClassFactory()
1318 cls.universe = DimensionUniverse()
1320 # Ensure that we load the test storage class definitions.
1321 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1322 cls.storageClassFactory.addFromConfig(scConfigFile)
1324 def setUp(self):
1325 self.id = 0
1327 # Create a root that we can use for caching tests.
1328 self.root = tempfile.mkdtemp(dir=TESTDIR)
1330 # Create some test dataset refs and associated test files
1331 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1332 dimensions = self.universe.extract(("visit", "physical_filter"))
1333 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1335 # Create list of refs and list of temporary files
1336 n_datasets = 10
1337 self.refs = [
1338 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1339 for n in range(n_datasets)
1340 ]
1342 root_uri = ResourcePath(self.root, forceDirectory=True)
1343 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1345 # Create test files.
1346 for uri in self.files:
1347 uri.write(b"0123456789")
1349 # Create some composite refs with component files.
1350 sc = self.storageClassFactory.getStorageClass("StructuredData")
1351 self.composite_refs = [
1352 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1353 ]
1354 self.comp_files = []
1355 self.comp_refs = []
1356 for n, ref in enumerate(self.composite_refs):
1357 component_refs = []
1358 component_files = []
1359 for component in sc.components:
1360 component_ref = ref.makeComponentRef(component)
1361 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1362 component_refs.append(component_ref)
1363 component_files.append(file)
1364 file.write(b"9876543210")
1366 self.comp_files.append(component_files)
1367 self.comp_refs.append(component_refs)
1369 def tearDown(self):
1370 if self.root is not None and os.path.exists(self.root):
1371 shutil.rmtree(self.root, ignore_errors=True)
1373 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1374 config = Config.fromYaml(config_str)
1375 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1377 def testNoCacheDir(self):
1378 config_str = """
1379cached:
1380 root: null
1381 cacheable:
1382 metric0: true
1383 """
1384 cache_manager = self._make_cache_manager(config_str)
1386 # Look inside to check we don't have a cache directory
1387 self.assertIsNone(cache_manager._cache_directory)
1389 self.assertCache(cache_manager)
1391 # Test that the cache directory is marked temporary
1392 self.assertTrue(cache_manager.cache_directory.isTemporary)
1394 def testNoCacheDirReversed(self):
1395 """Use default caching status and metric1 to false"""
1396 config_str = """
1397cached:
1398 root: null
1399 default: true
1400 cacheable:
1401 metric1: false
1402 """
1403 cache_manager = self._make_cache_manager(config_str)
1405 self.assertCache(cache_manager)
1407 def testExplicitCacheDir(self):
1408 config_str = f"""
1409cached:
1410 root: '{self.root}'
1411 cacheable:
1412 metric0: true
1413 """
1414 cache_manager = self._make_cache_manager(config_str)
1416 # Look inside to check we do have a cache directory.
1417 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1419 self.assertCache(cache_manager)
1421 # Test that the cache directory is not marked temporary
1422 self.assertFalse(cache_manager.cache_directory.isTemporary)
1424 def assertCache(self, cache_manager):
1425 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1426 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1428 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1429 self.assertIsInstance(uri, ResourcePath)
1430 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1432 # Check presence in cache using ref and then using file extension.
1433 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1434 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1435 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1436 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1438 # Cached file should no longer exist but uncached file should be
1439 # unaffected.
1440 self.assertFalse(self.files[0].exists())
1441 self.assertTrue(self.files[1].exists())
1443 # Should find this file and it should be within the cache directory.
1444 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1445 self.assertTrue(found.exists())
1446 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1448 # Should not be able to find these in cache
1449 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1450 self.assertIsNone(found)
1451 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1452 self.assertIsNone(found)
1454 def testNoCache(self):
1455 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1456 for uri, ref in zip(self.files, self.refs):
1457 self.assertFalse(cache_manager.should_be_cached(ref))
1458 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1459 self.assertFalse(cache_manager.known_to_cache(ref))
1460 with cache_manager.find_in_cache(ref, ".txt") as found:
1461 self.assertIsNone(found, msg=f"{cache_manager}")
1463 def _expiration_config(self, mode: str, threshold: int) -> str:
1464 return f"""
1465cached:
1466 default: true
1467 expiry:
1468 mode: {mode}
1469 threshold: {threshold}
1470 cacheable:
1471 unused: true
1472 """
1474 def testCacheExpiryFiles(self):
1475 threshold = 2 # Keep at least 2 files.
1476 mode = "files"
1477 config_str = self._expiration_config(mode, threshold)
1479 cache_manager = self._make_cache_manager(config_str)
1481 # Check that an empty cache returns unknown for arbitrary ref
1482 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1484 # Should end with datasets: 2, 3, 4
1485 self.assertExpiration(cache_manager, 5, threshold + 1)
1486 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1488 # Check that we will not expire a file that is actively in use.
1489 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1490 self.assertIsNotNone(found)
1492 # Trigger cache expiration that should remove the file
1493 # we just retrieved. Should now have: 3, 4, 5
1494 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1495 self.assertIsNotNone(cached)
1497 # Cache should still report the standard file count.
1498 self.assertEqual(cache_manager.file_count, threshold + 1)
1500 # Add additional entry to cache.
1501 # Should now have 4, 5, 6
1502 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1503 self.assertIsNotNone(cached)
1505 # Is the file still there?
1506 self.assertTrue(found.exists())
1508 # Can we read it?
1509 data = found.read()
1510 self.assertGreater(len(data), 0)
1512 # Outside context the file should no longer exist.
1513 self.assertFalse(found.exists())
1515 # File count should not have changed.
1516 self.assertEqual(cache_manager.file_count, threshold + 1)
1518 # Dataset 2 was in the exempt directory but because hardlinks
1519 # are used it was deleted from the main cache during cache expiry
1520 # above and so should no longer be found.
1521 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1522 self.assertIsNone(found)
1524 # And the one stored after it is also gone.
1525 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1526 self.assertIsNone(found)
1528 # But dataset 4 is present.
1529 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1530 self.assertIsNotNone(found)
1532 # Adding a new dataset to the cache should now delete it.
1533 cache_manager.move_to_cache(self.files[7], self.refs[7])
1535 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1536 self.assertIsNone(found)
1538 def testCacheExpiryDatasets(self):
1539 threshold = 2 # Keep 2 datasets.
1540 mode = "datasets"
1541 config_str = self._expiration_config(mode, threshold)
1543 cache_manager = self._make_cache_manager(config_str)
1544 self.assertExpiration(cache_manager, 5, threshold + 1)
1545 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1547 def testCacheExpiryDatasetsComposite(self):
1548 threshold = 2 # Keep 2 datasets.
1549 mode = "datasets"
1550 config_str = self._expiration_config(mode, threshold)
1552 cache_manager = self._make_cache_manager(config_str)
1554 n_datasets = 3
1555 for i in range(n_datasets):
1556 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1557 cached = cache_manager.move_to_cache(component_file, component_ref)
1558 self.assertIsNotNone(cached)
1559 self.assertTrue(cache_manager.known_to_cache(component_ref))
1560 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1561 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1563 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1565 # Write two new non-composite and the number of files should drop.
1566 self.assertExpiration(cache_manager, 2, 5)
1568 def testCacheExpirySize(self):
1569 threshold = 55 # Each file is 10 bytes
1570 mode = "size"
1571 config_str = self._expiration_config(mode, threshold)
1573 cache_manager = self._make_cache_manager(config_str)
1574 self.assertExpiration(cache_manager, 10, 6)
1575 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1577 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1578 """Insert the datasets and then check the number retained."""
1579 for i in range(n_datasets):
1580 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1581 self.assertIsNotNone(cached)
1583 self.assertEqual(cache_manager.file_count, n_retained)
1585 # The oldest file should not be in the cache any more.
1586 for i in range(n_datasets):
1587 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1588 if i >= n_datasets - n_retained:
1589 self.assertIsInstance(found, ResourcePath)
1590 else:
1591 self.assertIsNone(found)
1593 def testCacheExpiryAge(self):
1594 threshold = 1 # Expire older than 2 seconds
1595 mode = "age"
1596 config_str = self._expiration_config(mode, threshold)
1598 cache_manager = self._make_cache_manager(config_str)
1599 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1601 # Insert 3 files, then sleep, then insert more.
1602 for i in range(2):
1603 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1604 self.assertIsNotNone(cached)
1605 time.sleep(2.0)
1606 for j in range(4):
1607 i = 2 + j # Continue the counting
1608 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1609 self.assertIsNotNone(cached)
1611 # Only the files written after the sleep should exist.
1612 self.assertEqual(cache_manager.file_count, 4)
1613 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1614 self.assertIsNone(found)
1615 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1616 self.assertIsInstance(found, ResourcePath)
1619class DatasetRefURIsTestCase(unittest.TestCase):
1620 """Tests for DatasetRefURIs."""
1622 def testSequenceAccess(self):
1623 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1624 uris = DatasetRefURIs()
1626 self.assertEqual(len(uris), 2)
1627 self.assertEqual(uris[0], None)
1628 self.assertEqual(uris[1], {})
1630 primaryURI = ResourcePath("1/2/3")
1631 componentURI = ResourcePath("a/b/c")
1633 # affirm that DatasetRefURIs does not support MutableSequence functions
1634 with self.assertRaises(TypeError):
1635 uris[0] = primaryURI
1636 with self.assertRaises(TypeError):
1637 uris[1] = {"foo": componentURI}
1639 # but DatasetRefURIs can be set by property name:
1640 uris.primaryURI = primaryURI
1641 uris.componentURIs = {"foo": componentURI}
1642 self.assertEqual(uris.primaryURI, primaryURI)
1643 self.assertEqual(uris[0], primaryURI)
1645 primary, components = uris
1646 self.assertEqual(primary, primaryURI)
1647 self.assertEqual(components, {"foo": componentURI})
1649 def testRepr(self):
1650 """Verify __repr__ output."""
1651 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1652 self.assertEqual(
1653 repr(uris),
1654 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1655 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1656 )
1659class DataIdForTestTestCase(unittest.TestCase):
1660 """Tests for the DataIdForTest class."""
1662 def testImmutable(self):
1663 """Verify that an instance is immutable by default."""
1664 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1665 initial_hash = hash(dataId)
1667 with self.assertRaises(RuntimeError):
1668 dataId["instrument"] = "foo"
1670 with self.assertRaises(RuntimeError):
1671 del dataId["instrument"]
1673 assert sys.version_info[0] == 3
1674 if sys.version_info[1] >= 9:
1675 with self.assertRaises(RuntimeError):
1676 dataId |= dict(foo="bar")
1678 with self.assertRaises(RuntimeError):
1679 dataId.pop("instrument")
1681 with self.assertRaises(RuntimeError):
1682 dataId.popitem()
1684 with self.assertRaises(RuntimeError):
1685 dataId.update(dict(instrument="foo"))
1687 # verify that the hash value has not changed.
1688 self.assertEqual(initial_hash, hash(dataId))
1690 def testMutable(self):
1691 """Verify that an instance can be made mutable (unfrozen)."""
1692 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1693 initial_hash = hash(dataId)
1694 dataId.frozen = False
1695 self.assertEqual(initial_hash, hash(dataId))
1697 dataId["instrument"] = "foo"
1698 self.assertEqual(dataId["instrument"], "foo")
1699 self.assertNotEqual(initial_hash, hash(dataId))
1700 initial_hash = hash(dataId)
1702 del dataId["instrument"]
1703 self.assertTrue("instrument" not in dataId)
1704 self.assertNotEqual(initial_hash, hash(dataId))
1705 initial_hash = hash(dataId)
1707 assert sys.version_info[0] == 3
1708 if sys.version_info[1] >= 9:
1709 dataId |= dict(foo="bar")
1710 self.assertEqual(dataId["foo"], "bar")
1711 self.assertNotEqual(initial_hash, hash(dataId))
1712 initial_hash = hash(dataId)
1714 dataId.pop("visit")
1715 self.assertTrue("visit" not in dataId)
1716 self.assertNotEqual(initial_hash, hash(dataId))
1717 initial_hash = hash(dataId)
1719 dataId.popitem()
1720 self.assertTrue("physical_filter" not in dataId)
1721 self.assertNotEqual(initial_hash, hash(dataId))
1722 initial_hash = hash(dataId)
1724 dataId.update(dict(instrument="foo"))
1725 self.assertEqual(dataId["instrument"], "foo")
1726 self.assertNotEqual(initial_hash, hash(dataId))
1727 initial_hash = hash(dataId)
1730if __name__ == "__main__": 1730 ↛ 1731line 1730 didn't jump to line 1731, because the condition on line 1730 was never true
1731 unittest.main()