Coverage for tests/test_datastore.py: 12%
984 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import shutil
24import sys
25import tempfile
26import time
27import unittest
28import unittest.mock
29from collections import UserDict
30from dataclasses import dataclass
32import lsst.utils.tests
33import yaml
34from lsst.daf.butler import (
35 Config,
36 DatasetRefURIs,
37 DatasetTypeNotSupportedError,
38 DatastoreCacheManager,
39 DatastoreCacheManagerConfig,
40 DatastoreConfig,
41 DatastoreDisabledCacheManager,
42 DatastoreValidationError,
43 DimensionUniverse,
44 FileDataset,
45 NamedKeyDict,
46 StorageClass,
47 StorageClassFactory,
48)
49from lsst.daf.butler.formatters.yaml import YamlFormatter
50from lsst.daf.butler.tests import (
51 BadNoWriteFormatter,
52 BadWriteFormatter,
53 DatasetTestHelper,
54 DatastoreTestHelper,
55 DummyRegistry,
56 MetricsExample,
57)
58from lsst.resources import ResourcePath
59from lsst.utils import doImport
61TESTDIR = os.path.dirname(__file__)
64class DataIdForTest(UserDict):
66 """A dict-like class that can be used for a DataId dict that is hashable.
68 By default the class is immutable ("frozen"). The `frozen`
69 attribute can be set to `False` to change values (but note that
70 the hash values before and after mutation will be different!).
71 """
73 def __init__(self, *args, **kwargs):
74 self.frozen = False
75 super().__init__(*args, **kwargs)
76 self.frozen = True
78 def __hash__(self):
79 return hash(str(self.data))
81 def __setitem__(self, k, v):
82 if self.frozen:
83 raise RuntimeError("DataIdForTest is frozen.")
84 return super().__setitem__(k, v)
86 def __delitem__(self, k):
87 if self.frozen:
88 raise RuntimeError("DataIdForTest is frozen.")
89 return super().__delitem__(k)
91 def __ior__(self, other):
92 assert sys.version_info[0] == 3
93 if sys.version_info[1] < 9:
94 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
95 if self.frozen:
96 raise RuntimeError("DataIdForTest is frozen.")
97 return super().__ior__(other)
99 def pop(self, k):
100 if self.frozen:
101 raise RuntimeError("DataIdForTest is frozen.")
102 return super().pop(k)
104 def popitem(self):
105 if self.frozen:
106 raise RuntimeError("DataIdForTest is frozen.")
107 return super().popitem()
109 def update(self, *args, **kwargs):
110 if self.frozen:
111 raise RuntimeError("DataIdForTest is frozen.")
112 super().update(*args, **kwargs)
115def makeExampleMetrics(use_none=False):
116 if use_none:
117 array = None
118 else:
119 array = [563, 234, 456.7, 105, 2054, -1045]
120 return MetricsExample(
121 {"AM1": 5.2, "AM2": 30.6},
122 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
123 array,
124 )
127@dataclass(frozen=True)
128class Named:
129 name: str
132class FakeDataCoordinate(NamedKeyDict):
133 """A fake hashable frozen DataCoordinate built from a simple dict."""
135 @classmethod
136 def from_dict(cls, dataId):
137 new = cls()
138 for k, v in dataId.items():
139 new[Named(k)] = v
140 return new.freeze()
142 def __hash__(self) -> int:
143 return hash(frozenset(self.items()))
146class TransactionTestError(Exception):
147 """Specific error for transactions, to prevent misdiagnosing
148 that might otherwise occur when a standard exception is used.
149 """
151 pass
154class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
155 """Support routines for datastore testing"""
157 root = None
159 @classmethod
160 def setUpClass(cls):
161 # Storage Classes are fixed for all datastores in these tests
162 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
163 cls.storageClassFactory = StorageClassFactory()
164 cls.storageClassFactory.addFromConfig(scConfigFile)
166 # Read the Datastore config so we can get the class
167 # information (since we should not assume the constructor
168 # name here, but rely on the configuration file itself)
169 datastoreConfig = DatastoreConfig(cls.configFile)
170 cls.datastoreType = doImport(datastoreConfig["cls"])
171 cls.universe = DimensionUniverse()
173 def setUp(self):
174 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
176 def tearDown(self):
177 if self.root is not None and os.path.exists(self.root):
178 shutil.rmtree(self.root, ignore_errors=True)
181class DatastoreTests(DatastoreTestsBase):
182 """Some basic tests of a simple datastore."""
184 hasUnsupportedPut = True
186 def testConfigRoot(self):
187 full = DatastoreConfig(self.configFile)
188 config = DatastoreConfig(self.configFile, mergeDefaults=False)
189 newroot = "/random/location"
190 self.datastoreType.setConfigRoot(newroot, config, full)
191 if self.rootKeys:
192 for k in self.rootKeys:
193 self.assertIn(newroot, config[k])
195 def testConstructor(self):
196 datastore = self.makeDatastore()
197 self.assertIsNotNone(datastore)
198 self.assertIs(datastore.isEphemeral, self.isEphemeral)
200 def testConfigurationValidation(self):
201 datastore = self.makeDatastore()
202 sc = self.storageClassFactory.getStorageClass("ThingOne")
203 datastore.validateConfiguration([sc])
205 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
206 if self.validationCanFail:
207 with self.assertRaises(DatastoreValidationError):
208 datastore.validateConfiguration([sc2], logFailures=True)
210 dimensions = self.universe.extract(("visit", "physical_filter"))
211 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
212 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
213 datastore.validateConfiguration([ref])
215 def testParameterValidation(self):
216 """Check that parameters are validated"""
217 sc = self.storageClassFactory.getStorageClass("ThingOne")
218 dimensions = self.universe.extract(("visit", "physical_filter"))
219 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
220 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
221 datastore = self.makeDatastore()
222 data = {1: 2, 3: 4}
223 datastore.put(data, ref)
224 newdata = datastore.get(ref)
225 self.assertEqual(data, newdata)
226 with self.assertRaises(KeyError):
227 newdata = datastore.get(ref, parameters={"missing": 5})
229 def testBasicPutGet(self):
230 metrics = makeExampleMetrics()
231 datastore = self.makeDatastore()
233 # Create multiple storage classes for testing different formulations
234 storageClasses = [
235 self.storageClassFactory.getStorageClass(sc)
236 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
237 ]
239 dimensions = self.universe.extract(("visit", "physical_filter"))
240 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
241 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
243 for sc in storageClasses:
244 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
245 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
247 # Make sure that using getManyURIs without predicting before the
248 # dataset has been put raises.
249 with self.assertRaises(FileNotFoundError):
250 datastore.getManyURIs([ref], predict=False)
252 # Make sure that using getManyURIs with predicting before the
253 # dataset has been put predicts the URI.
254 uris = datastore.getManyURIs([ref, ref2], predict=True)
255 self.assertIn("52", uris[ref].primaryURI.geturl())
256 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
257 self.assertIn("53", uris[ref2].primaryURI.geturl())
258 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
260 datastore.put(metrics, ref)
262 # Does it exist?
263 self.assertTrue(datastore.exists(ref))
265 # Get
266 metricsOut = datastore.get(ref, parameters=None)
267 self.assertEqual(metrics, metricsOut)
269 uri = datastore.getURI(ref)
270 self.assertEqual(uri.scheme, self.uriScheme)
272 uris = datastore.getManyURIs([ref])
273 self.assertEqual(len(uris), 1)
274 ref, uri = uris.popitem()
275 self.assertTrue(uri.primaryURI.exists())
276 self.assertFalse(uri.componentURIs)
278 # Get a component -- we need to construct new refs for them
279 # with derived storage classes but with parent ID
280 for comp in ("data", "output"):
281 compRef = ref.makeComponentRef(comp)
282 output = datastore.get(compRef)
283 self.assertEqual(output, getattr(metricsOut, comp))
285 uri = datastore.getURI(compRef)
286 self.assertEqual(uri.scheme, self.uriScheme)
288 uris = datastore.getManyURIs([compRef])
289 self.assertEqual(len(uris), 1)
291 storageClass = sc
293 # Check that we can put a metric with None in a component and
294 # get it back as None
295 metricsNone = makeExampleMetrics(use_none=True)
296 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
297 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
298 datastore.put(metricsNone, refNone)
300 comp = "data"
301 for comp in ("data", "output"):
302 compRef = refNone.makeComponentRef(comp)
303 output = datastore.get(compRef)
304 self.assertEqual(output, getattr(metricsNone, comp))
306 # Check that a put fails if the dataset type is not supported
307 if self.hasUnsupportedPut:
308 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
309 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
310 with self.assertRaises(DatasetTypeNotSupportedError):
311 datastore.put(metrics, ref)
313 # These should raise
314 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
315 with self.assertRaises(FileNotFoundError):
316 # non-existing file
317 datastore.get(ref)
319 # Get a URI from it
320 uri = datastore.getURI(ref, predict=True)
321 self.assertEqual(uri.scheme, self.uriScheme)
323 with self.assertRaises(FileNotFoundError):
324 datastore.getURI(ref)
326 def testTrustGetRequest(self):
327 """Check that we can get datasets that registry knows nothing about."""
329 datastore = self.makeDatastore()
331 # Skip test if the attribute is not defined
332 if not hasattr(datastore, "trustGetRequest"):
333 return
335 metrics = makeExampleMetrics()
337 i = 0
338 for sc_name in ("StructuredData", "StructuredComposite"):
339 i += 1
340 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
342 if sc_name == "StructuredComposite":
343 disassembled = True
344 else:
345 disassembled = False
347 # Start datastore in default configuration of using registry
348 datastore.trustGetRequest = False
350 # Create multiple storage classes for testing with or without
351 # disassembly
352 sc = self.storageClassFactory.getStorageClass(sc_name)
353 dimensions = self.universe.extract(("visit", "physical_filter"))
355 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
357 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
358 datastore.put(metrics, ref)
360 # Does it exist?
361 self.assertTrue(datastore.exists(ref))
363 # Get
364 metricsOut = datastore.get(ref)
365 self.assertEqual(metrics, metricsOut)
367 # Get the URI(s)
368 primaryURI, componentURIs = datastore.getURIs(ref)
369 if disassembled:
370 self.assertIsNone(primaryURI)
371 self.assertEqual(len(componentURIs), 3)
372 else:
373 self.assertIn(datasetTypeName, primaryURI.path)
374 self.assertFalse(componentURIs)
376 # Delete registry entry so now we are trusting
377 datastore.removeStoredItemInfo(ref)
379 # Now stop trusting and check that things break
380 datastore.trustGetRequest = False
382 # Does it exist?
383 self.assertFalse(datastore.exists(ref))
385 with self.assertRaises(FileNotFoundError):
386 datastore.get(ref)
388 with self.assertRaises(FileNotFoundError):
389 datastore.get(ref.makeComponentRef("data"))
391 # URI should fail unless we ask for prediction
392 with self.assertRaises(FileNotFoundError):
393 datastore.getURIs(ref)
395 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
396 if disassembled:
397 self.assertIsNone(predicted_primary)
398 self.assertEqual(len(predicted_disassembled), 3)
399 for uri in predicted_disassembled.values():
400 self.assertEqual(uri.fragment, "predicted")
401 self.assertIn(datasetTypeName, uri.path)
402 else:
403 self.assertIn(datasetTypeName, predicted_primary.path)
404 self.assertFalse(predicted_disassembled)
405 self.assertEqual(predicted_primary.fragment, "predicted")
407 # Now enable registry-free trusting mode
408 datastore.trustGetRequest = True
410 # Try again to get it
411 metricsOut = datastore.get(ref)
412 self.assertEqual(metricsOut, metrics)
414 # Does it exist?
415 self.assertTrue(datastore.exists(ref))
417 # Get a component
418 comp = "data"
419 compRef = ref.makeComponentRef(comp)
420 output = datastore.get(compRef)
421 self.assertEqual(output, getattr(metrics, comp))
423 # Get the URI -- if we trust this should work even without
424 # enabling prediction.
425 primaryURI2, componentURIs2 = datastore.getURIs(ref)
426 self.assertEqual(primaryURI2, primaryURI)
427 self.assertEqual(componentURIs2, componentURIs)
429 def testDisassembly(self):
430 """Test disassembly within datastore."""
431 metrics = makeExampleMetrics()
432 if self.isEphemeral:
433 # in-memory datastore does not disassemble
434 return
436 # Create multiple storage classes for testing different formulations
437 # of composites. One of these will not disassemble to provide
438 # a reference.
439 storageClasses = [
440 self.storageClassFactory.getStorageClass(sc)
441 for sc in (
442 "StructuredComposite",
443 "StructuredCompositeTestA",
444 "StructuredCompositeTestB",
445 "StructuredCompositeReadComp",
446 "StructuredData", # No disassembly
447 "StructuredCompositeReadCompNoDisassembly",
448 )
449 ]
451 # Create the test datastore
452 datastore = self.makeDatastore()
454 # Dummy dataId
455 dimensions = self.universe.extract(("visit", "physical_filter"))
456 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
458 for i, sc in enumerate(storageClasses):
459 with self.subTest(storageClass=sc.name):
460 # Create a different dataset type each time round
461 # so that a test failure in this subtest does not trigger
462 # a cascade of tests because of file clashes
463 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
465 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
467 datastore.put(metrics, ref)
469 baseURI, compURIs = datastore.getURIs(ref)
470 if disassembled:
471 self.assertIsNone(baseURI)
472 self.assertEqual(set(compURIs), {"data", "output", "summary"})
473 else:
474 self.assertIsNotNone(baseURI)
475 self.assertEqual(compURIs, {})
477 metrics_get = datastore.get(ref)
478 self.assertEqual(metrics_get, metrics)
480 # Retrieve the composite with read parameter
481 stop = 4
482 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
483 self.assertEqual(metrics_get.summary, metrics.summary)
484 self.assertEqual(metrics_get.output, metrics.output)
485 self.assertEqual(metrics_get.data, metrics.data[:stop])
487 # Retrieve a component
488 data = datastore.get(ref.makeComponentRef("data"))
489 self.assertEqual(data, metrics.data)
491 # On supported storage classes attempt to access a read
492 # only component
493 if "ReadComp" in sc.name:
494 cRef = ref.makeComponentRef("counter")
495 counter = datastore.get(cRef)
496 self.assertEqual(counter, len(metrics.data))
498 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
499 self.assertEqual(counter, stop)
501 datastore.remove(ref)
503 def prepDeleteTest(self, n_refs=1):
504 metrics = makeExampleMetrics()
505 datastore = self.makeDatastore()
506 # Put
507 dimensions = self.universe.extract(("visit", "physical_filter"))
508 sc = self.storageClassFactory.getStorageClass("StructuredData")
509 refs = []
510 for i in range(n_refs):
511 dataId = FakeDataCoordinate.from_dict(
512 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
513 )
514 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
515 datastore.put(metrics, ref)
517 # Does it exist?
518 self.assertTrue(datastore.exists(ref))
520 # Get
521 metricsOut = datastore.get(ref)
522 self.assertEqual(metrics, metricsOut)
523 refs.append(ref)
525 return datastore, *refs
527 def testRemove(self):
528 datastore, ref = self.prepDeleteTest()
530 # Remove
531 datastore.remove(ref)
533 # Does it exist?
534 self.assertFalse(datastore.exists(ref))
536 # Do we now get a predicted URI?
537 uri = datastore.getURI(ref, predict=True)
538 self.assertEqual(uri.fragment, "predicted")
540 # Get should now fail
541 with self.assertRaises(FileNotFoundError):
542 datastore.get(ref)
543 # Can only delete once
544 with self.assertRaises(FileNotFoundError):
545 datastore.remove(ref)
547 def testForget(self):
548 datastore, ref = self.prepDeleteTest()
550 # Remove
551 datastore.forget([ref])
553 # Does it exist (as far as we know)?
554 self.assertFalse(datastore.exists(ref))
556 # Do we now get a predicted URI?
557 uri = datastore.getURI(ref, predict=True)
558 self.assertEqual(uri.fragment, "predicted")
560 # Get should now fail
561 with self.assertRaises(FileNotFoundError):
562 datastore.get(ref)
564 # Forgetting again is a silent no-op
565 datastore.forget([ref])
567 # Predicted URI should still point to the file.
568 self.assertTrue(uri.exists())
570 def testTransfer(self):
571 metrics = makeExampleMetrics()
573 dimensions = self.universe.extract(("visit", "physical_filter"))
574 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
576 sc = self.storageClassFactory.getStorageClass("StructuredData")
577 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
579 inputDatastore = self.makeDatastore("test_input_datastore")
580 outputDatastore = self.makeDatastore("test_output_datastore")
582 inputDatastore.put(metrics, ref)
583 outputDatastore.transfer(inputDatastore, ref)
585 metricsOut = outputDatastore.get(ref)
586 self.assertEqual(metrics, metricsOut)
588 def testBasicTransaction(self):
589 datastore = self.makeDatastore()
590 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
591 dimensions = self.universe.extract(("visit", "physical_filter"))
592 nDatasets = 6
593 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
594 data = [
595 (
596 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
597 makeExampleMetrics(),
598 )
599 for dataId in dataIds
600 ]
601 succeed = data[: nDatasets // 2]
602 fail = data[nDatasets // 2 :]
603 # All datasets added in this transaction should continue to exist
604 with datastore.transaction():
605 for ref, metrics in succeed:
606 datastore.put(metrics, ref)
607 # Whereas datasets added in this transaction should not
608 with self.assertRaises(TransactionTestError):
609 with datastore.transaction():
610 for ref, metrics in fail:
611 datastore.put(metrics, ref)
612 raise TransactionTestError("This should propagate out of the context manager")
613 # Check for datasets that should exist
614 for ref, metrics in succeed:
615 # Does it exist?
616 self.assertTrue(datastore.exists(ref))
617 # Get
618 metricsOut = datastore.get(ref, parameters=None)
619 self.assertEqual(metrics, metricsOut)
620 # URI
621 uri = datastore.getURI(ref)
622 self.assertEqual(uri.scheme, self.uriScheme)
623 # Check for datasets that should not exist
624 for ref, _ in fail:
625 # These should raise
626 with self.assertRaises(FileNotFoundError):
627 # non-existing file
628 datastore.get(ref)
629 with self.assertRaises(FileNotFoundError):
630 datastore.getURI(ref)
632 def testNestedTransaction(self):
633 datastore = self.makeDatastore()
634 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
635 dimensions = self.universe.extract(("visit", "physical_filter"))
636 metrics = makeExampleMetrics()
638 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
639 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
640 datastore.put(metrics, refBefore)
641 with self.assertRaises(TransactionTestError):
642 with datastore.transaction():
643 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
644 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
645 datastore.put(metrics, refOuter)
646 with datastore.transaction():
647 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
648 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
649 datastore.put(metrics, refInner)
650 # All datasets should exist
651 for ref in (refBefore, refOuter, refInner):
652 metricsOut = datastore.get(ref, parameters=None)
653 self.assertEqual(metrics, metricsOut)
654 raise TransactionTestError("This should roll back the transaction")
655 # Dataset(s) inserted before the transaction should still exist
656 metricsOut = datastore.get(refBefore, parameters=None)
657 self.assertEqual(metrics, metricsOut)
658 # But all datasets inserted during the (rolled back) transaction
659 # should be gone
660 with self.assertRaises(FileNotFoundError):
661 datastore.get(refOuter)
662 with self.assertRaises(FileNotFoundError):
663 datastore.get(refInner)
665 def _prepareIngestTest(self):
666 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
667 dimensions = self.universe.extract(("visit", "physical_filter"))
668 metrics = makeExampleMetrics()
669 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
670 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
671 return metrics, ref
673 def runIngestTest(self, func, expectOutput=True):
674 metrics, ref = self._prepareIngestTest()
675 # The file will be deleted after the test.
676 # For symlink tests this leads to a situation where the datastore
677 # points to a file that does not exist. This will make os.path.exist
678 # return False but then the new symlink will fail with
679 # FileExistsError later in the code so the test still passes.
680 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
681 with open(path, "w") as fd:
682 yaml.dump(metrics._asdict(), stream=fd)
683 func(metrics, path, ref)
685 def testIngestNoTransfer(self):
686 """Test ingesting existing files with no transfer."""
687 for mode in (None, "auto"):
689 # Some datastores have auto but can't do in place transfer
690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
691 continue
693 with self.subTest(mode=mode):
694 datastore = self.makeDatastore()
696 def succeed(obj, path, ref):
697 """Ingest a file already in the datastore root."""
698 # first move it into the root, and adjust the path
699 # accordingly
700 path = shutil.copy(path, datastore.root.ospath)
701 path = os.path.relpath(path, start=datastore.root.ospath)
702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
703 self.assertEqual(obj, datastore.get(ref))
705 def failInputDoesNotExist(obj, path, ref):
706 """Can't ingest files if we're given a bad path."""
707 with self.assertRaises(FileNotFoundError):
708 datastore.ingest(
709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
710 )
711 self.assertFalse(datastore.exists(ref))
713 def failOutsideRoot(obj, path, ref):
714 """Can't ingest files outside of datastore root unless
715 auto."""
716 if mode == "auto":
717 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
718 self.assertTrue(datastore.exists(ref))
719 else:
720 with self.assertRaises(RuntimeError):
721 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
722 self.assertFalse(datastore.exists(ref))
724 def failNotImplemented(obj, path, ref):
725 with self.assertRaises(NotImplementedError):
726 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
728 if mode in self.ingestTransferModes:
729 self.runIngestTest(failOutsideRoot)
730 self.runIngestTest(failInputDoesNotExist)
731 self.runIngestTest(succeed)
732 else:
733 self.runIngestTest(failNotImplemented)
735 def testIngestTransfer(self):
736 """Test ingesting existing files after transferring them."""
737 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
738 with self.subTest(mode=mode):
739 datastore = self.makeDatastore(mode)
741 def succeed(obj, path, ref):
742 """Ingest a file by transferring it to the template
743 location."""
744 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
745 self.assertEqual(obj, datastore.get(ref))
747 def failInputDoesNotExist(obj, path, ref):
748 """Can't ingest files if we're given a bad path."""
749 with self.assertRaises(FileNotFoundError):
750 # Ensure the file does not look like it is in
751 # datastore for auto mode
752 datastore.ingest(
753 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
754 )
755 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
757 def failNotImplemented(obj, path, ref):
758 with self.assertRaises(NotImplementedError):
759 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
761 if mode in self.ingestTransferModes:
762 self.runIngestTest(failInputDoesNotExist)
763 self.runIngestTest(succeed, expectOutput=(mode != "move"))
764 else:
765 self.runIngestTest(failNotImplemented)
767 def testIngestSymlinkOfSymlink(self):
768 """Special test for symlink to a symlink ingest"""
769 metrics, ref = self._prepareIngestTest()
770 # The aim of this test is to create a dataset on disk, then
771 # create a symlink to it and finally ingest the symlink such that
772 # the symlink in the datastore points to the original dataset.
773 for mode in ("symlink", "relsymlink"):
774 if mode not in self.ingestTransferModes:
775 continue
777 print(f"Trying mode {mode}")
778 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
779 with open(realpath, "w") as fd:
780 yaml.dump(metrics._asdict(), stream=fd)
781 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
782 os.symlink(os.path.abspath(realpath), sympath)
784 datastore = self.makeDatastore()
785 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
787 uri = datastore.getURI(ref)
788 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
789 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
791 linkTarget = os.readlink(uri.ospath)
792 if mode == "relsymlink":
793 self.assertFalse(os.path.isabs(linkTarget))
794 else:
795 self.assertEqual(linkTarget, os.path.abspath(realpath))
797 # Check that we can get the dataset back regardless of mode
798 metric2 = datastore.get(ref)
799 self.assertEqual(metric2, metrics)
801 # Cleanup the file for next time round loop
802 # since it will get the same file name in store
803 datastore.remove(ref)
805 def testExportImportRecords(self):
806 """Test for export_records and import_records methods."""
808 datastore = self.makeDatastore("test_datastore")
810 # For now only the FileDatastore can be used for this test.
811 # ChainedDatastore that only includes InMemoryDatastores have to be
812 # skipped as well.
813 for name in datastore.names:
814 if not name.startswith("InMemoryDatastore"):
815 break
816 else:
817 raise unittest.SkipTest("in-memory datastore does not support record export/import")
819 metrics = makeExampleMetrics()
820 dimensions = self.universe.extract(("visit", "physical_filter"))
821 sc = self.storageClassFactory.getStorageClass("StructuredData")
823 refs = []
824 for visit in (2048, 2049, 2050):
825 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
826 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
827 datastore.put(metrics, ref)
828 refs.append(ref)
830 for exported_refs in (refs, refs[1:]):
831 n_refs = len(exported_refs)
832 records = datastore.export_records(exported_refs)
833 self.assertGreater(len(records), 0)
834 self.assertTrue(set(records.keys()) <= set(datastore.names))
835 # In a ChainedDatastore each FileDatastore will have a complete set
836 for datastore_name in records:
837 record_data = records[datastore_name]
838 self.assertEqual(len(record_data.records), n_refs)
840 # Use the same datastore name to import relative path.
841 datastore2 = self.makeDatastore("test_datastore")
843 records = datastore.export_records(refs[1:])
844 datastore2.import_records(records)
846 with self.assertRaises(FileNotFoundError):
847 data = datastore2.get(refs[0])
848 data = datastore2.get(refs[1])
849 self.assertIsNotNone(data)
850 data = datastore2.get(refs[2])
851 self.assertIsNotNone(data)
854class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
855 """PosixDatastore specialization"""
857 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
858 uriScheme = "file"
859 canIngestNoTransferAuto = True
860 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
861 isEphemeral = False
862 rootKeys = ("root",)
863 validationCanFail = True
865 def setUp(self):
866 # Override the working directory before calling the base class
867 self.root = tempfile.mkdtemp(dir=TESTDIR)
868 super().setUp()
870 def testAtomicWrite(self):
871 """Test that we write to a temporary and then rename"""
872 datastore = self.makeDatastore()
873 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
874 dimensions = self.universe.extract(("visit", "physical_filter"))
875 metrics = makeExampleMetrics()
877 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
878 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
880 with self.assertLogs("lsst.resources", "DEBUG") as cm:
881 datastore.put(metrics, ref)
882 move_logs = [ll for ll in cm.output if "transfer=" in ll]
883 self.assertIn("transfer=move", move_logs[0])
885 # And the transfer should be file to file.
886 self.assertEqual(move_logs[0].count("file://"), 2)
888 def testCanNotDeterminePutFormatterLocation(self):
889 """Verify that the expected exception is raised if the FileDatastore
890 can not determine the put formatter location."""
892 _ = makeExampleMetrics()
893 datastore = self.makeDatastore()
895 # Create multiple storage classes for testing different formulations
896 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
898 sccomp = StorageClass("Dummy")
899 compositeStorageClass = StorageClass(
900 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
901 )
903 dimensions = self.universe.extract(("visit", "physical_filter"))
904 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
906 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
907 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
909 def raiser(ref):
910 raise DatasetTypeNotSupportedError()
912 with unittest.mock.patch.object(
913 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
914 "_determine_put_formatter_location",
915 side_effect=raiser,
916 ):
917 # verify the non-composite ref execution path:
918 with self.assertRaises(DatasetTypeNotSupportedError):
919 datastore.getURIs(ref, predict=True)
921 # verify the composite-ref execution path:
922 with self.assertRaises(DatasetTypeNotSupportedError):
923 datastore.getURIs(compRef, predict=True)
926class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
927 """Posix datastore tests but with checksums disabled."""
929 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
931 def testChecksum(self):
932 """Ensure that checksums have not been calculated."""
934 datastore = self.makeDatastore()
935 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
936 dimensions = self.universe.extract(("visit", "physical_filter"))
937 metrics = makeExampleMetrics()
939 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
940 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
942 # Configuration should have disabled checksum calculation
943 datastore.put(metrics, ref)
944 infos = datastore.getStoredItemsInfo(ref)
945 self.assertIsNone(infos[0].checksum)
947 # Remove put back but with checksums enabled explicitly
948 datastore.remove(ref)
949 datastore.useChecksum = True
950 datastore.put(metrics, ref)
952 infos = datastore.getStoredItemsInfo(ref)
953 self.assertIsNotNone(infos[0].checksum)
956class TrashDatastoreTestCase(PosixDatastoreTestCase):
957 """Restrict trash test to FileDatastore."""
959 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
961 def testTrash(self):
962 datastore, *refs = self.prepDeleteTest(n_refs=10)
964 # Trash one of them.
965 ref = refs.pop()
966 uri = datastore.getURI(ref)
967 datastore.trash(ref)
968 self.assertTrue(uri.exists(), uri) # Not deleted yet
969 datastore.emptyTrash()
970 self.assertFalse(uri.exists(), uri)
972 # Trash it again should be fine.
973 datastore.trash(ref)
975 # Trash multiple items at once.
976 subset = [refs.pop(), refs.pop()]
977 datastore.trash(subset)
978 datastore.emptyTrash()
980 # Remove a record and trash should do nothing.
981 # This is execution butler scenario.
982 ref = refs.pop()
983 uri = datastore.getURI(ref)
984 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
985 self.assertTrue(uri.exists())
986 datastore.trash(ref)
987 datastore.emptyTrash()
988 self.assertTrue(uri.exists())
990 # Switch on trust and it should delete the file.
991 datastore.trustGetRequest = True
992 datastore.trash([ref])
993 self.assertFalse(uri.exists())
995 # Remove multiples at once in trust mode.
996 subset = [refs.pop() for i in range(3)]
997 datastore.trash(subset)
998 datastore.trash(refs.pop()) # Check that a single ref can trash
1001class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1002 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1004 def setUp(self):
1005 # Override the working directory before calling the base class
1006 self.root = tempfile.mkdtemp(dir=TESTDIR)
1007 super().setUp()
1009 def testCleanup(self):
1010 """Test that a failed formatter write does cleanup a partial file."""
1011 metrics = makeExampleMetrics()
1012 datastore = self.makeDatastore()
1014 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1016 dimensions = self.universe.extract(("visit", "physical_filter"))
1017 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1019 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1021 # Determine where the file will end up (we assume Formatters use
1022 # the same file extension)
1023 expectedUri = datastore.getURI(ref, predict=True)
1024 self.assertEqual(expectedUri.fragment, "predicted")
1026 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1028 # Try formatter that fails and formatter that fails and leaves
1029 # a file behind
1030 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1031 with self.subTest(formatter=formatter):
1033 # Monkey patch the formatter
1034 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1036 # Try to put the dataset, it should fail
1037 with self.assertRaises(Exception):
1038 datastore.put(metrics, ref)
1040 # Check that there is no file on disk
1041 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1043 # Check that there is a directory
1044 dir = expectedUri.dirname()
1045 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1047 # Force YamlFormatter and check that this time a file is written
1048 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1049 datastore.put(metrics, ref)
1050 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1051 datastore.remove(ref)
1052 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1055class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1056 """PosixDatastore specialization"""
1058 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1059 uriScheme = "mem"
1060 hasUnsupportedPut = False
1061 ingestTransferModes = ()
1062 isEphemeral = True
1063 rootKeys = None
1064 validationCanFail = False
1067class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1068 """ChainedDatastore specialization using a POSIXDatastore"""
1070 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1071 hasUnsupportedPut = False
1072 canIngestNoTransferAuto = False
1073 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1074 isEphemeral = False
1075 rootKeys = (".datastores.1.root", ".datastores.2.root")
1076 validationCanFail = True
1079class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1080 """ChainedDatastore specialization using all InMemoryDatastore"""
1082 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1083 validationCanFail = False
1086class DatastoreConstraintsTests(DatastoreTestsBase):
1087 """Basic tests of constraints model of Datastores."""
1089 def testConstraints(self):
1090 """Test constraints model. Assumes that each test class has the
1091 same constraints."""
1092 metrics = makeExampleMetrics()
1093 datastore = self.makeDatastore()
1095 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1096 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1097 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1098 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1100 # Write empty file suitable for ingest check (JSON and YAML variants)
1101 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1102 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1103 for datasetTypeName, sc, accepted in (
1104 ("metric", sc1, True),
1105 ("metric5", sc1, False),
1106 ("metric33", sc1, True),
1107 ("metric5", sc2, True),
1108 ):
1109 # Choose different temp file depending on StorageClass
1110 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1112 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1113 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1114 if accepted:
1115 datastore.put(metrics, ref)
1116 self.assertTrue(datastore.exists(ref))
1117 datastore.remove(ref)
1119 # Try ingest
1120 if self.canIngest:
1121 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1122 self.assertTrue(datastore.exists(ref))
1123 datastore.remove(ref)
1124 else:
1125 with self.assertRaises(DatasetTypeNotSupportedError):
1126 datastore.put(metrics, ref)
1127 self.assertFalse(datastore.exists(ref))
1129 # Again with ingest
1130 if self.canIngest:
1131 with self.assertRaises(DatasetTypeNotSupportedError):
1132 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1133 self.assertFalse(datastore.exists(ref))
1136class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1137 """PosixDatastore specialization"""
1139 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1140 canIngest = True
1142 def setUp(self):
1143 # Override the working directory before calling the base class
1144 self.root = tempfile.mkdtemp(dir=TESTDIR)
1145 super().setUp()
1148class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1149 """InMemoryDatastore specialization"""
1151 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1152 canIngest = False
1155class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1156 """ChainedDatastore specialization using a POSIXDatastore and constraints
1157 at the ChainedDatstore"""
1159 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1162class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1163 """ChainedDatastore specialization using a POSIXDatastore"""
1165 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1168class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1169 """ChainedDatastore specialization using all InMemoryDatastore"""
1171 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1172 canIngest = False
1175class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1176 """Test that a chained datastore can control constraints per-datastore
1177 even if child datastore would accept."""
1179 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1181 def setUp(self):
1182 # Override the working directory before calling the base class
1183 self.root = tempfile.mkdtemp(dir=TESTDIR)
1184 super().setUp()
1186 def testConstraints(self):
1187 """Test chained datastore constraints model."""
1188 metrics = makeExampleMetrics()
1189 datastore = self.makeDatastore()
1191 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1192 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1193 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1194 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1195 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1197 # Write empty file suitable for ingest check (JSON and YAML variants)
1198 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1199 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1201 for typeName, dataId, sc, accept, ingest in (
1202 ("metric", dataId1, sc1, (False, True, False), True),
1203 ("metric5", dataId1, sc1, (False, False, False), False),
1204 ("metric5", dataId2, sc1, (True, False, False), False),
1205 ("metric33", dataId2, sc2, (True, True, False), True),
1206 ("metric5", dataId1, sc2, (False, True, False), True),
1207 ):
1209 # Choose different temp file depending on StorageClass
1210 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1212 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1213 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1214 if any(accept):
1215 datastore.put(metrics, ref)
1216 self.assertTrue(datastore.exists(ref))
1218 # Check each datastore inside the chained datastore
1219 for childDatastore, expected in zip(datastore.datastores, accept):
1220 self.assertEqual(
1221 childDatastore.exists(ref),
1222 expected,
1223 f"Testing presence of {ref} in datastore {childDatastore.name}",
1224 )
1226 datastore.remove(ref)
1228 # Check that ingest works
1229 if ingest:
1230 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1231 self.assertTrue(datastore.exists(ref))
1233 # Check each datastore inside the chained datastore
1234 for childDatastore, expected in zip(datastore.datastores, accept):
1235 # Ephemeral datastores means InMemory at the moment
1236 # and that does not accept ingest of files.
1237 if childDatastore.isEphemeral:
1238 expected = False
1239 self.assertEqual(
1240 childDatastore.exists(ref),
1241 expected,
1242 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1243 )
1245 datastore.remove(ref)
1246 else:
1247 with self.assertRaises(DatasetTypeNotSupportedError):
1248 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1250 else:
1251 with self.assertRaises(DatasetTypeNotSupportedError):
1252 datastore.put(metrics, ref)
1253 self.assertFalse(datastore.exists(ref))
1255 # Again with ingest
1256 with self.assertRaises(DatasetTypeNotSupportedError):
1257 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1258 self.assertFalse(datastore.exists(ref))
1261class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1262 """Tests for datastore caching infrastructure."""
1264 @classmethod
1265 def setUpClass(cls):
1266 cls.storageClassFactory = StorageClassFactory()
1267 cls.universe = DimensionUniverse()
1269 # Ensure that we load the test storage class definitions.
1270 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1271 cls.storageClassFactory.addFromConfig(scConfigFile)
1273 def setUp(self):
1274 self.id = 0
1276 # Create a root that we can use for caching tests.
1277 self.root = tempfile.mkdtemp(dir=TESTDIR)
1279 # Create some test dataset refs and associated test files
1280 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1281 dimensions = self.universe.extract(("visit", "physical_filter"))
1282 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1284 # Create list of refs and list of temporary files
1285 n_datasets = 10
1286 self.refs = [
1287 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1288 for n in range(n_datasets)
1289 ]
1291 root_uri = ResourcePath(self.root, forceDirectory=True)
1292 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1294 # Create test files.
1295 for uri in self.files:
1296 uri.write(b"0123456789")
1298 # Create some composite refs with component files.
1299 sc = self.storageClassFactory.getStorageClass("StructuredData")
1300 self.composite_refs = [
1301 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1302 ]
1303 self.comp_files = []
1304 self.comp_refs = []
1305 for n, ref in enumerate(self.composite_refs):
1306 component_refs = []
1307 component_files = []
1308 for component in sc.components:
1309 component_ref = ref.makeComponentRef(component)
1310 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1311 component_refs.append(component_ref)
1312 component_files.append(file)
1313 file.write(b"9876543210")
1315 self.comp_files.append(component_files)
1316 self.comp_refs.append(component_refs)
1318 def tearDown(self):
1319 if self.root is not None and os.path.exists(self.root):
1320 shutil.rmtree(self.root, ignore_errors=True)
1322 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1323 config = Config.fromYaml(config_str)
1324 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1326 def testNoCacheDir(self):
1327 config_str = """
1328cached:
1329 root: null
1330 cacheable:
1331 metric0: true
1332 """
1333 cache_manager = self._make_cache_manager(config_str)
1335 # Look inside to check we don't have a cache directory
1336 self.assertIsNone(cache_manager._cache_directory)
1338 self.assertCache(cache_manager)
1340 # Test that the cache directory is marked temporary
1341 self.assertTrue(cache_manager.cache_directory.isTemporary)
1343 def testNoCacheDirReversed(self):
1344 """Use default caching status and metric1 to false"""
1345 config_str = """
1346cached:
1347 root: null
1348 default: true
1349 cacheable:
1350 metric1: false
1351 """
1352 cache_manager = self._make_cache_manager(config_str)
1354 self.assertCache(cache_manager)
1356 def testEnvvarCacheDir(self):
1357 config_str = f"""
1358cached:
1359 root: '{self.root}'
1360 cacheable:
1361 metric0: true
1362 """
1364 root = ResourcePath(self.root, forceDirectory=True)
1365 env_dir = root.join("somewhere", forceDirectory=True)
1366 elsewhere = root.join("elsewhere", forceDirectory=True)
1368 # Environment variable should override the config value.
1369 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1370 cache_manager = self._make_cache_manager(config_str)
1371 self.assertEqual(cache_manager.cache_directory, env_dir)
1373 # This environment variable should not override the config value.
1374 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1375 cache_manager = self._make_cache_manager(config_str)
1376 self.assertEqual(cache_manager.cache_directory, root)
1378 # No default setting.
1379 config_str = """
1380cached:
1381 root: null
1382 default: true
1383 cacheable:
1384 metric1: false
1385 """
1386 cache_manager = self._make_cache_manager(config_str)
1388 # This environment variable should override the config value.
1389 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1390 cache_manager = self._make_cache_manager(config_str)
1391 self.assertEqual(cache_manager.cache_directory, env_dir)
1393 # If both environment variables are set the main (not IF_UNSET)
1394 # variable should win.
1395 with unittest.mock.patch.dict(
1396 os.environ,
1397 {
1398 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1399 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1400 },
1401 ):
1402 cache_manager = self._make_cache_manager(config_str)
1403 self.assertEqual(cache_manager.cache_directory, env_dir)
1405 # Use the API to set the environment variable, making sure that the
1406 # variable is reset on exit.
1407 with unittest.mock.patch.dict(
1408 os.environ,
1409 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1410 ):
1411 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1412 self.assertTrue(defined)
1413 cache_manager = self._make_cache_manager(config_str)
1414 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1416 # Now create the cache manager ahead of time and set the fallback
1417 # later.
1418 cache_manager = self._make_cache_manager(config_str)
1419 self.assertIsNone(cache_manager._cache_directory)
1420 with unittest.mock.patch.dict(
1421 os.environ,
1422 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1423 ):
1424 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1425 self.assertTrue(defined)
1426 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1428 def testExplicitCacheDir(self):
1429 config_str = f"""
1430cached:
1431 root: '{self.root}'
1432 cacheable:
1433 metric0: true
1434 """
1435 cache_manager = self._make_cache_manager(config_str)
1437 # Look inside to check we do have a cache directory.
1438 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1440 self.assertCache(cache_manager)
1442 # Test that the cache directory is not marked temporary
1443 self.assertFalse(cache_manager.cache_directory.isTemporary)
1445 def assertCache(self, cache_manager):
1446 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1447 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1449 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1450 self.assertIsInstance(uri, ResourcePath)
1451 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1453 # Check presence in cache using ref and then using file extension.
1454 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1455 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1456 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1457 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1459 # Cached file should no longer exist but uncached file should be
1460 # unaffected.
1461 self.assertFalse(self.files[0].exists())
1462 self.assertTrue(self.files[1].exists())
1464 # Should find this file and it should be within the cache directory.
1465 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1466 self.assertTrue(found.exists())
1467 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1469 # Should not be able to find these in cache
1470 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1471 self.assertIsNone(found)
1472 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1473 self.assertIsNone(found)
1475 def testNoCache(self):
1476 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1477 for uri, ref in zip(self.files, self.refs):
1478 self.assertFalse(cache_manager.should_be_cached(ref))
1479 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1480 self.assertFalse(cache_manager.known_to_cache(ref))
1481 with cache_manager.find_in_cache(ref, ".txt") as found:
1482 self.assertIsNone(found, msg=f"{cache_manager}")
1484 def _expiration_config(self, mode: str, threshold: int) -> str:
1485 return f"""
1486cached:
1487 default: true
1488 expiry:
1489 mode: {mode}
1490 threshold: {threshold}
1491 cacheable:
1492 unused: true
1493 """
1495 def testCacheExpiryFiles(self):
1496 threshold = 2 # Keep at least 2 files.
1497 mode = "files"
1498 config_str = self._expiration_config(mode, threshold)
1500 cache_manager = self._make_cache_manager(config_str)
1502 # Check that an empty cache returns unknown for arbitrary ref
1503 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1505 # Should end with datasets: 2, 3, 4
1506 self.assertExpiration(cache_manager, 5, threshold + 1)
1507 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1509 # Check that we will not expire a file that is actively in use.
1510 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1511 self.assertIsNotNone(found)
1513 # Trigger cache expiration that should remove the file
1514 # we just retrieved. Should now have: 3, 4, 5
1515 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1516 self.assertIsNotNone(cached)
1518 # Cache should still report the standard file count.
1519 self.assertEqual(cache_manager.file_count, threshold + 1)
1521 # Add additional entry to cache.
1522 # Should now have 4, 5, 6
1523 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1524 self.assertIsNotNone(cached)
1526 # Is the file still there?
1527 self.assertTrue(found.exists())
1529 # Can we read it?
1530 data = found.read()
1531 self.assertGreater(len(data), 0)
1533 # Outside context the file should no longer exist.
1534 self.assertFalse(found.exists())
1536 # File count should not have changed.
1537 self.assertEqual(cache_manager.file_count, threshold + 1)
1539 # Dataset 2 was in the exempt directory but because hardlinks
1540 # are used it was deleted from the main cache during cache expiry
1541 # above and so should no longer be found.
1542 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1543 self.assertIsNone(found)
1545 # And the one stored after it is also gone.
1546 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1547 self.assertIsNone(found)
1549 # But dataset 4 is present.
1550 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1551 self.assertIsNotNone(found)
1553 # Adding a new dataset to the cache should now delete it.
1554 cache_manager.move_to_cache(self.files[7], self.refs[7])
1556 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1557 self.assertIsNone(found)
1559 def testCacheExpiryDatasets(self):
1560 threshold = 2 # Keep 2 datasets.
1561 mode = "datasets"
1562 config_str = self._expiration_config(mode, threshold)
1564 cache_manager = self._make_cache_manager(config_str)
1565 self.assertExpiration(cache_manager, 5, threshold + 1)
1566 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1568 def testCacheExpiryDatasetsComposite(self):
1569 threshold = 2 # Keep 2 datasets.
1570 mode = "datasets"
1571 config_str = self._expiration_config(mode, threshold)
1573 cache_manager = self._make_cache_manager(config_str)
1575 n_datasets = 3
1576 for i in range(n_datasets):
1577 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1578 cached = cache_manager.move_to_cache(component_file, component_ref)
1579 self.assertIsNotNone(cached)
1580 self.assertTrue(cache_manager.known_to_cache(component_ref))
1581 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1582 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1584 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1586 # Write two new non-composite and the number of files should drop.
1587 self.assertExpiration(cache_manager, 2, 5)
1589 def testCacheExpirySize(self):
1590 threshold = 55 # Each file is 10 bytes
1591 mode = "size"
1592 config_str = self._expiration_config(mode, threshold)
1594 cache_manager = self._make_cache_manager(config_str)
1595 self.assertExpiration(cache_manager, 10, 6)
1596 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1598 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1599 """Insert the datasets and then check the number retained."""
1600 for i in range(n_datasets):
1601 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1602 self.assertIsNotNone(cached)
1604 self.assertEqual(cache_manager.file_count, n_retained)
1606 # The oldest file should not be in the cache any more.
1607 for i in range(n_datasets):
1608 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1609 if i >= n_datasets - n_retained:
1610 self.assertIsInstance(found, ResourcePath)
1611 else:
1612 self.assertIsNone(found)
1614 def testCacheExpiryAge(self):
1615 threshold = 1 # Expire older than 2 seconds
1616 mode = "age"
1617 config_str = self._expiration_config(mode, threshold)
1619 cache_manager = self._make_cache_manager(config_str)
1620 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1622 # Insert 3 files, then sleep, then insert more.
1623 for i in range(2):
1624 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1625 self.assertIsNotNone(cached)
1626 time.sleep(2.0)
1627 for j in range(4):
1628 i = 2 + j # Continue the counting
1629 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1630 self.assertIsNotNone(cached)
1632 # Only the files written after the sleep should exist.
1633 self.assertEqual(cache_manager.file_count, 4)
1634 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1635 self.assertIsNone(found)
1636 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1637 self.assertIsInstance(found, ResourcePath)
1640class DatasetRefURIsTestCase(unittest.TestCase):
1641 """Tests for DatasetRefURIs."""
1643 def testSequenceAccess(self):
1644 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1645 uris = DatasetRefURIs()
1647 self.assertEqual(len(uris), 2)
1648 self.assertEqual(uris[0], None)
1649 self.assertEqual(uris[1], {})
1651 primaryURI = ResourcePath("1/2/3")
1652 componentURI = ResourcePath("a/b/c")
1654 # affirm that DatasetRefURIs does not support MutableSequence functions
1655 with self.assertRaises(TypeError):
1656 uris[0] = primaryURI
1657 with self.assertRaises(TypeError):
1658 uris[1] = {"foo": componentURI}
1660 # but DatasetRefURIs can be set by property name:
1661 uris.primaryURI = primaryURI
1662 uris.componentURIs = {"foo": componentURI}
1663 self.assertEqual(uris.primaryURI, primaryURI)
1664 self.assertEqual(uris[0], primaryURI)
1666 primary, components = uris
1667 self.assertEqual(primary, primaryURI)
1668 self.assertEqual(components, {"foo": componentURI})
1670 def testRepr(self):
1671 """Verify __repr__ output."""
1672 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1673 self.assertEqual(
1674 repr(uris),
1675 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1676 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1677 )
1680class DataIdForTestTestCase(unittest.TestCase):
1681 """Tests for the DataIdForTest class."""
1683 def testImmutable(self):
1684 """Verify that an instance is immutable by default."""
1685 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1686 initial_hash = hash(dataId)
1688 with self.assertRaises(RuntimeError):
1689 dataId["instrument"] = "foo"
1691 with self.assertRaises(RuntimeError):
1692 del dataId["instrument"]
1694 assert sys.version_info[0] == 3
1695 if sys.version_info[1] >= 9:
1696 with self.assertRaises(RuntimeError):
1697 dataId |= dict(foo="bar")
1699 with self.assertRaises(RuntimeError):
1700 dataId.pop("instrument")
1702 with self.assertRaises(RuntimeError):
1703 dataId.popitem()
1705 with self.assertRaises(RuntimeError):
1706 dataId.update(dict(instrument="foo"))
1708 # verify that the hash value has not changed.
1709 self.assertEqual(initial_hash, hash(dataId))
1711 def testMutable(self):
1712 """Verify that an instance can be made mutable (unfrozen)."""
1713 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1714 initial_hash = hash(dataId)
1715 dataId.frozen = False
1716 self.assertEqual(initial_hash, hash(dataId))
1718 dataId["instrument"] = "foo"
1719 self.assertEqual(dataId["instrument"], "foo")
1720 self.assertNotEqual(initial_hash, hash(dataId))
1721 initial_hash = hash(dataId)
1723 del dataId["instrument"]
1724 self.assertTrue("instrument" not in dataId)
1725 self.assertNotEqual(initial_hash, hash(dataId))
1726 initial_hash = hash(dataId)
1728 assert sys.version_info[0] == 3
1729 if sys.version_info[1] >= 9:
1730 dataId |= dict(foo="bar")
1731 self.assertEqual(dataId["foo"], "bar")
1732 self.assertNotEqual(initial_hash, hash(dataId))
1733 initial_hash = hash(dataId)
1735 dataId.pop("visit")
1736 self.assertTrue("visit" not in dataId)
1737 self.assertNotEqual(initial_hash, hash(dataId))
1738 initial_hash = hash(dataId)
1740 dataId.popitem()
1741 self.assertTrue("physical_filter" not in dataId)
1742 self.assertNotEqual(initial_hash, hash(dataId))
1743 initial_hash = hash(dataId)
1745 dataId.update(dict(instrument="foo"))
1746 self.assertEqual(dataId["instrument"], "foo")
1747 self.assertNotEqual(initial_hash, hash(dataId))
1748 initial_hash = hash(dataId)
1751if __name__ == "__main__": 1751 ↛ 1752line 1751 didn't jump to line 1752, because the condition on line 1751 was never true
1752 unittest.main()