Coverage for tests/test_datastore.py: 12%
1020 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 10:01 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 10:01 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import sys
27import tempfile
28import time
29import unittest
30import unittest.mock
31from collections import UserDict
32from dataclasses import dataclass
34import lsst.utils.tests
35import yaml
36from lsst.daf.butler import (
37 Config,
38 DatasetRef,
39 DatasetRefURIs,
40 DatasetTypeNotSupportedError,
41 Datastore,
42 DatastoreCacheManager,
43 DatastoreCacheManagerConfig,
44 DatastoreConfig,
45 DatastoreDisabledCacheManager,
46 DatastoreValidationError,
47 DimensionUniverse,
48 FileDataset,
49 NamedKeyDict,
50 StorageClass,
51 StorageClassFactory,
52)
53from lsst.daf.butler.formatters.yaml import YamlFormatter
54from lsst.daf.butler.tests import (
55 BadNoWriteFormatter,
56 BadWriteFormatter,
57 DatasetTestHelper,
58 DatastoreTestHelper,
59 DummyRegistry,
60 MetricsExample,
61)
62from lsst.resources import ResourcePath
63from lsst.utils import doImport
65TESTDIR = os.path.dirname(__file__)
68class DataIdForTest(UserDict):
70 """A dict-like class that can be used for a DataId dict that is hashable.
72 By default the class is immutable ("frozen"). The `frozen`
73 attribute can be set to `False` to change values (but note that
74 the hash values before and after mutation will be different!).
75 """
77 def __init__(self, *args, **kwargs):
78 self.frozen = False
79 super().__init__(*args, **kwargs)
80 self.frozen = True
82 def __hash__(self):
83 return hash(str(self.data))
85 def __setitem__(self, k, v):
86 if self.frozen:
87 raise RuntimeError("DataIdForTest is frozen.")
88 return super().__setitem__(k, v)
90 def __delitem__(self, k):
91 if self.frozen:
92 raise RuntimeError("DataIdForTest is frozen.")
93 return super().__delitem__(k)
95 def __ior__(self, other):
96 assert sys.version_info[0] == 3
97 if sys.version_info[1] < 9:
98 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().__ior__(other)
103 def pop(self, k):
104 if self.frozen:
105 raise RuntimeError("DataIdForTest is frozen.")
106 return super().pop(k)
108 def popitem(self):
109 if self.frozen:
110 raise RuntimeError("DataIdForTest is frozen.")
111 return super().popitem()
113 def update(self, *args, **kwargs):
114 if self.frozen:
115 raise RuntimeError("DataIdForTest is frozen.")
116 super().update(*args, **kwargs)
119def makeExampleMetrics(use_none=False):
120 if use_none:
121 array = None
122 else:
123 array = [563, 234, 456.7, 105, 2054, -1045]
124 return MetricsExample(
125 {"AM1": 5.2, "AM2": 30.6},
126 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
127 array,
128 )
131@dataclass(frozen=True)
132class Named:
133 name: str
136class FakeDataCoordinate(NamedKeyDict):
137 """A fake hashable frozen DataCoordinate built from a simple dict."""
139 @classmethod
140 def from_dict(cls, dataId):
141 new = cls()
142 for k, v in dataId.items():
143 new[Named(k)] = v
144 return new.freeze()
146 def __hash__(self) -> int:
147 return hash(frozenset(self.items()))
150class TransactionTestError(Exception):
151 """Specific error for transactions, to prevent misdiagnosing
152 that might otherwise occur when a standard exception is used.
153 """
155 pass
158class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
159 """Support routines for datastore testing"""
161 root = None
163 @classmethod
164 def setUpClass(cls):
165 # Storage Classes are fixed for all datastores in these tests
166 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
167 cls.storageClassFactory = StorageClassFactory()
168 cls.storageClassFactory.addFromConfig(scConfigFile)
170 # Read the Datastore config so we can get the class
171 # information (since we should not assume the constructor
172 # name here, but rely on the configuration file itself)
173 datastoreConfig = DatastoreConfig(cls.configFile)
174 cls.datastoreType = doImport(datastoreConfig["cls"])
175 cls.universe = DimensionUniverse()
177 def setUp(self):
178 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
180 def tearDown(self):
181 if self.root is not None and os.path.exists(self.root):
182 shutil.rmtree(self.root, ignore_errors=True)
185class DatastoreTests(DatastoreTestsBase):
186 """Some basic tests of a simple datastore."""
188 hasUnsupportedPut = True
190 def testConfigRoot(self):
191 full = DatastoreConfig(self.configFile)
192 config = DatastoreConfig(self.configFile, mergeDefaults=False)
193 newroot = "/random/location"
194 self.datastoreType.setConfigRoot(newroot, config, full)
195 if self.rootKeys:
196 for k in self.rootKeys:
197 self.assertIn(newroot, config[k])
199 def testConstructor(self):
200 datastore = self.makeDatastore()
201 self.assertIsNotNone(datastore)
202 self.assertIs(datastore.isEphemeral, self.isEphemeral)
204 def testConfigurationValidation(self):
205 datastore = self.makeDatastore()
206 sc = self.storageClassFactory.getStorageClass("ThingOne")
207 datastore.validateConfiguration([sc])
209 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
210 if self.validationCanFail:
211 with self.assertRaises(DatastoreValidationError):
212 datastore.validateConfiguration([sc2], logFailures=True)
214 dimensions = self.universe.extract(("visit", "physical_filter"))
215 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
216 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
217 datastore.validateConfiguration([ref])
219 def testParameterValidation(self):
220 """Check that parameters are validated"""
221 sc = self.storageClassFactory.getStorageClass("ThingOne")
222 dimensions = self.universe.extract(("visit", "physical_filter"))
223 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
225 datastore = self.makeDatastore()
226 data = {1: 2, 3: 4}
227 datastore.put(data, ref)
228 newdata = datastore.get(ref)
229 self.assertEqual(data, newdata)
230 with self.assertRaises(KeyError):
231 newdata = datastore.get(ref, parameters={"missing": 5})
233 def testBasicPutGet(self):
234 metrics = makeExampleMetrics()
235 datastore = self.makeDatastore()
237 # Create multiple storage classes for testing different formulations
238 storageClasses = [
239 self.storageClassFactory.getStorageClass(sc)
240 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
241 ]
243 dimensions = self.universe.extract(("visit", "physical_filter"))
244 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
245 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
247 for sc in storageClasses:
248 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
249 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
251 # Make sure that using getManyURIs without predicting before the
252 # dataset has been put raises.
253 with self.assertRaises(FileNotFoundError):
254 datastore.getManyURIs([ref], predict=False)
256 # Make sure that using getManyURIs with predicting before the
257 # dataset has been put predicts the URI.
258 uris = datastore.getManyURIs([ref, ref2], predict=True)
259 self.assertIn("52", uris[ref].primaryURI.geturl())
260 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
261 self.assertIn("53", uris[ref2].primaryURI.geturl())
262 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
264 datastore.put(metrics, ref)
266 # Does it exist?
267 self.assertTrue(datastore.exists(ref))
268 self.assertTrue(datastore.knows(ref))
269 multi = datastore.knows_these([ref])
270 self.assertTrue(multi[ref])
271 multi = datastore.mexists([ref])
272 self.assertTrue(multi[ref])
274 # Get
275 metricsOut = datastore.get(ref, parameters=None)
276 self.assertEqual(metrics, metricsOut)
278 uri = datastore.getURI(ref)
279 self.assertEqual(uri.scheme, self.uriScheme)
281 uris = datastore.getManyURIs([ref])
282 self.assertEqual(len(uris), 1)
283 ref, uri = uris.popitem()
284 self.assertTrue(uri.primaryURI.exists())
285 self.assertFalse(uri.componentURIs)
287 # Get a component -- we need to construct new refs for them
288 # with derived storage classes but with parent ID
289 for comp in ("data", "output"):
290 compRef = ref.makeComponentRef(comp)
291 output = datastore.get(compRef)
292 self.assertEqual(output, getattr(metricsOut, comp))
294 uri = datastore.getURI(compRef)
295 self.assertEqual(uri.scheme, self.uriScheme)
297 uris = datastore.getManyURIs([compRef])
298 self.assertEqual(len(uris), 1)
300 storageClass = sc
302 # Check that we can put a metric with None in a component and
303 # get it back as None
304 metricsNone = makeExampleMetrics(use_none=True)
305 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
306 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
307 datastore.put(metricsNone, refNone)
309 comp = "data"
310 for comp in ("data", "output"):
311 compRef = refNone.makeComponentRef(comp)
312 output = datastore.get(compRef)
313 self.assertEqual(output, getattr(metricsNone, comp))
315 # Check that a put fails if the dataset type is not supported
316 if self.hasUnsupportedPut:
317 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
318 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
319 with self.assertRaises(DatasetTypeNotSupportedError):
320 datastore.put(metrics, ref)
322 # These should raise
323 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
324 with self.assertRaises(FileNotFoundError):
325 # non-existing file
326 datastore.get(ref)
328 # Get a URI from it
329 uri = datastore.getURI(ref, predict=True)
330 self.assertEqual(uri.scheme, self.uriScheme)
332 with self.assertRaises(FileNotFoundError):
333 datastore.getURI(ref)
335 def testTrustGetRequest(self):
336 """Check that we can get datasets that registry knows nothing about."""
338 datastore = self.makeDatastore()
340 # Skip test if the attribute is not defined
341 if not hasattr(datastore, "trustGetRequest"):
342 return
344 metrics = makeExampleMetrics()
346 i = 0
347 for sc_name in ("StructuredData", "StructuredComposite"):
348 i += 1
349 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
351 if sc_name == "StructuredComposite":
352 disassembled = True
353 else:
354 disassembled = False
356 # Start datastore in default configuration of using registry
357 datastore.trustGetRequest = False
359 # Create multiple storage classes for testing with or without
360 # disassembly
361 sc = self.storageClassFactory.getStorageClass(sc_name)
362 dimensions = self.universe.extract(("visit", "physical_filter"))
364 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
366 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
367 datastore.put(metrics, ref)
369 # Does it exist?
370 self.assertTrue(datastore.exists(ref))
371 self.assertTrue(datastore.knows(ref))
372 multi = datastore.knows_these([ref])
373 self.assertTrue(multi[ref])
374 multi = datastore.mexists([ref])
375 self.assertTrue(multi[ref])
377 # Get
378 metricsOut = datastore.get(ref)
379 self.assertEqual(metrics, metricsOut)
381 # Get the URI(s)
382 primaryURI, componentURIs = datastore.getURIs(ref)
383 if disassembled:
384 self.assertIsNone(primaryURI)
385 self.assertEqual(len(componentURIs), 3)
386 else:
387 self.assertIn(datasetTypeName, primaryURI.path)
388 self.assertFalse(componentURIs)
390 # Delete registry entry so now we are trusting
391 datastore.removeStoredItemInfo(ref)
393 # Now stop trusting and check that things break
394 datastore.trustGetRequest = False
396 # Does it exist?
397 self.assertFalse(datastore.exists(ref))
398 self.assertFalse(datastore.knows(ref))
399 multi = datastore.knows_these([ref])
400 self.assertFalse(multi[ref])
401 multi = datastore.mexists([ref])
402 self.assertFalse(multi[ref])
404 with self.assertRaises(FileNotFoundError):
405 datastore.get(ref)
407 with self.assertRaises(FileNotFoundError):
408 datastore.get(ref.makeComponentRef("data"))
410 # URI should fail unless we ask for prediction
411 with self.assertRaises(FileNotFoundError):
412 datastore.getURIs(ref)
414 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
415 if disassembled:
416 self.assertIsNone(predicted_primary)
417 self.assertEqual(len(predicted_disassembled), 3)
418 for uri in predicted_disassembled.values():
419 self.assertEqual(uri.fragment, "predicted")
420 self.assertIn(datasetTypeName, uri.path)
421 else:
422 self.assertIn(datasetTypeName, predicted_primary.path)
423 self.assertFalse(predicted_disassembled)
424 self.assertEqual(predicted_primary.fragment, "predicted")
426 # Now enable registry-free trusting mode
427 datastore.trustGetRequest = True
429 # Try again to get it
430 metricsOut = datastore.get(ref)
431 self.assertEqual(metricsOut, metrics)
433 # Does it exist?
434 self.assertTrue(datastore.exists(ref))
436 # Get a component
437 comp = "data"
438 compRef = ref.makeComponentRef(comp)
439 output = datastore.get(compRef)
440 self.assertEqual(output, getattr(metrics, comp))
442 # Get the URI -- if we trust this should work even without
443 # enabling prediction.
444 primaryURI2, componentURIs2 = datastore.getURIs(ref)
445 self.assertEqual(primaryURI2, primaryURI)
446 self.assertEqual(componentURIs2, componentURIs)
448 def testDisassembly(self):
449 """Test disassembly within datastore."""
450 metrics = makeExampleMetrics()
451 if self.isEphemeral:
452 # in-memory datastore does not disassemble
453 return
455 # Create multiple storage classes for testing different formulations
456 # of composites. One of these will not disassemble to provide
457 # a reference.
458 storageClasses = [
459 self.storageClassFactory.getStorageClass(sc)
460 for sc in (
461 "StructuredComposite",
462 "StructuredCompositeTestA",
463 "StructuredCompositeTestB",
464 "StructuredCompositeReadComp",
465 "StructuredData", # No disassembly
466 "StructuredCompositeReadCompNoDisassembly",
467 )
468 ]
470 # Create the test datastore
471 datastore = self.makeDatastore()
473 # Dummy dataId
474 dimensions = self.universe.extract(("visit", "physical_filter"))
475 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
477 for i, sc in enumerate(storageClasses):
478 with self.subTest(storageClass=sc.name):
479 # Create a different dataset type each time round
480 # so that a test failure in this subtest does not trigger
481 # a cascade of tests because of file clashes
482 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
484 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
486 datastore.put(metrics, ref)
488 baseURI, compURIs = datastore.getURIs(ref)
489 if disassembled:
490 self.assertIsNone(baseURI)
491 self.assertEqual(set(compURIs), {"data", "output", "summary"})
492 else:
493 self.assertIsNotNone(baseURI)
494 self.assertEqual(compURIs, {})
496 metrics_get = datastore.get(ref)
497 self.assertEqual(metrics_get, metrics)
499 # Retrieve the composite with read parameter
500 stop = 4
501 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
502 self.assertEqual(metrics_get.summary, metrics.summary)
503 self.assertEqual(metrics_get.output, metrics.output)
504 self.assertEqual(metrics_get.data, metrics.data[:stop])
506 # Retrieve a component
507 data = datastore.get(ref.makeComponentRef("data"))
508 self.assertEqual(data, metrics.data)
510 # On supported storage classes attempt to access a read
511 # only component
512 if "ReadComp" in sc.name:
513 cRef = ref.makeComponentRef("counter")
514 counter = datastore.get(cRef)
515 self.assertEqual(counter, len(metrics.data))
517 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
518 self.assertEqual(counter, stop)
520 datastore.remove(ref)
522 def prepDeleteTest(self, n_refs=1):
523 metrics = makeExampleMetrics()
524 datastore = self.makeDatastore()
525 # Put
526 dimensions = self.universe.extract(("visit", "physical_filter"))
527 sc = self.storageClassFactory.getStorageClass("StructuredData")
528 refs = []
529 for i in range(n_refs):
530 dataId = FakeDataCoordinate.from_dict(
531 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
532 )
533 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
534 datastore.put(metrics, ref)
536 # Does it exist?
537 self.assertTrue(datastore.exists(ref))
539 # Get
540 metricsOut = datastore.get(ref)
541 self.assertEqual(metrics, metricsOut)
542 refs.append(ref)
544 return datastore, *refs
546 def testRemove(self):
547 datastore, ref = self.prepDeleteTest()
549 # Remove
550 datastore.remove(ref)
552 # Does it exist?
553 self.assertFalse(datastore.exists(ref))
555 # Do we now get a predicted URI?
556 uri = datastore.getURI(ref, predict=True)
557 self.assertEqual(uri.fragment, "predicted")
559 # Get should now fail
560 with self.assertRaises(FileNotFoundError):
561 datastore.get(ref)
562 # Can only delete once
563 with self.assertRaises(FileNotFoundError):
564 datastore.remove(ref)
566 def testForget(self):
567 datastore, ref = self.prepDeleteTest()
569 # Remove
570 datastore.forget([ref])
572 # Does it exist (as far as we know)?
573 self.assertFalse(datastore.exists(ref))
575 # Do we now get a predicted URI?
576 uri = datastore.getURI(ref, predict=True)
577 self.assertEqual(uri.fragment, "predicted")
579 # Get should now fail
580 with self.assertRaises(FileNotFoundError):
581 datastore.get(ref)
583 # Forgetting again is a silent no-op
584 datastore.forget([ref])
586 # Predicted URI should still point to the file.
587 self.assertTrue(uri.exists())
589 def testTransfer(self):
590 metrics = makeExampleMetrics()
592 dimensions = self.universe.extract(("visit", "physical_filter"))
593 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"})
595 sc = self.storageClassFactory.getStorageClass("StructuredData")
596 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
598 inputDatastore = self.makeDatastore("test_input_datastore")
599 outputDatastore = self.makeDatastore("test_output_datastore")
601 inputDatastore.put(metrics, ref)
602 outputDatastore.transfer(inputDatastore, ref)
604 metricsOut = outputDatastore.get(ref)
605 self.assertEqual(metrics, metricsOut)
607 def testBasicTransaction(self):
608 datastore = self.makeDatastore()
609 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
610 dimensions = self.universe.extract(("visit", "physical_filter"))
611 nDatasets = 6
612 dataIds = [
613 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"})
614 for i in range(nDatasets)
615 ]
616 data = [
617 (
618 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
619 makeExampleMetrics(),
620 )
621 for dataId in dataIds
622 ]
623 succeed = data[: nDatasets // 2]
624 fail = data[nDatasets // 2 :]
625 # All datasets added in this transaction should continue to exist
626 with datastore.transaction():
627 for ref, metrics in succeed:
628 datastore.put(metrics, ref)
629 # Whereas datasets added in this transaction should not
630 with self.assertRaises(TransactionTestError):
631 with datastore.transaction():
632 for ref, metrics in fail:
633 datastore.put(metrics, ref)
634 raise TransactionTestError("This should propagate out of the context manager")
635 # Check for datasets that should exist
636 for ref, metrics in succeed:
637 # Does it exist?
638 self.assertTrue(datastore.exists(ref))
639 # Get
640 metricsOut = datastore.get(ref, parameters=None)
641 self.assertEqual(metrics, metricsOut)
642 # URI
643 uri = datastore.getURI(ref)
644 self.assertEqual(uri.scheme, self.uriScheme)
645 # Check for datasets that should not exist
646 for ref, _ in fail:
647 # These should raise
648 with self.assertRaises(FileNotFoundError):
649 # non-existing file
650 datastore.get(ref)
651 with self.assertRaises(FileNotFoundError):
652 datastore.getURI(ref)
654 def testNestedTransaction(self):
655 datastore = self.makeDatastore()
656 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
657 dimensions = self.universe.extract(("visit", "physical_filter"))
658 metrics = makeExampleMetrics()
660 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
661 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
662 datastore.put(metrics, refBefore)
663 with self.assertRaises(TransactionTestError):
664 with datastore.transaction():
665 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"})
666 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
667 datastore.put(metrics, refOuter)
668 with datastore.transaction():
669 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"})
670 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
671 datastore.put(metrics, refInner)
672 # All datasets should exist
673 for ref in (refBefore, refOuter, refInner):
674 metricsOut = datastore.get(ref, parameters=None)
675 self.assertEqual(metrics, metricsOut)
676 raise TransactionTestError("This should roll back the transaction")
677 # Dataset(s) inserted before the transaction should still exist
678 metricsOut = datastore.get(refBefore, parameters=None)
679 self.assertEqual(metrics, metricsOut)
680 # But all datasets inserted during the (rolled back) transaction
681 # should be gone
682 with self.assertRaises(FileNotFoundError):
683 datastore.get(refOuter)
684 with self.assertRaises(FileNotFoundError):
685 datastore.get(refInner)
687 def _prepareIngestTest(self):
688 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
689 dimensions = self.universe.extract(("visit", "physical_filter"))
690 metrics = makeExampleMetrics()
691 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
692 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
693 return metrics, ref
695 def runIngestTest(self, func, expectOutput=True):
696 metrics, ref = self._prepareIngestTest()
697 # The file will be deleted after the test.
698 # For symlink tests this leads to a situation where the datastore
699 # points to a file that does not exist. This will make os.path.exist
700 # return False but then the new symlink will fail with
701 # FileExistsError later in the code so the test still passes.
702 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
703 with open(path, "w") as fd:
704 yaml.dump(metrics._asdict(), stream=fd)
705 func(metrics, path, ref)
707 def testIngestNoTransfer(self):
708 """Test ingesting existing files with no transfer."""
709 for mode in (None, "auto"):
711 # Some datastores have auto but can't do in place transfer
712 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
713 continue
715 with self.subTest(mode=mode):
716 datastore = self.makeDatastore()
718 def succeed(obj, path, ref):
719 """Ingest a file already in the datastore root."""
720 # first move it into the root, and adjust the path
721 # accordingly
722 path = shutil.copy(path, datastore.root.ospath)
723 path = os.path.relpath(path, start=datastore.root.ospath)
724 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
725 self.assertEqual(obj, datastore.get(ref))
727 def failInputDoesNotExist(obj, path, ref):
728 """Can't ingest files if we're given a bad path."""
729 with self.assertRaises(FileNotFoundError):
730 datastore.ingest(
731 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
732 )
733 self.assertFalse(datastore.exists(ref))
735 def failOutsideRoot(obj, path, ref):
736 """Can't ingest files outside of datastore root unless
737 auto."""
738 if mode == "auto":
739 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
740 self.assertTrue(datastore.exists(ref))
741 else:
742 with self.assertRaises(RuntimeError):
743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
744 self.assertFalse(datastore.exists(ref))
746 def failNotImplemented(obj, path, ref):
747 with self.assertRaises(NotImplementedError):
748 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
750 if mode in self.ingestTransferModes:
751 self.runIngestTest(failOutsideRoot)
752 self.runIngestTest(failInputDoesNotExist)
753 self.runIngestTest(succeed)
754 else:
755 self.runIngestTest(failNotImplemented)
757 def testIngestTransfer(self):
758 """Test ingesting existing files after transferring them."""
759 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
760 with self.subTest(mode=mode):
761 datastore = self.makeDatastore(mode)
763 def succeed(obj, path, ref):
764 """Ingest a file by transferring it to the template
765 location."""
766 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
767 self.assertEqual(obj, datastore.get(ref))
769 def failInputDoesNotExist(obj, path, ref):
770 """Can't ingest files if we're given a bad path."""
771 with self.assertRaises(FileNotFoundError):
772 # Ensure the file does not look like it is in
773 # datastore for auto mode
774 datastore.ingest(
775 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
776 )
777 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
779 def failNotImplemented(obj, path, ref):
780 with self.assertRaises(NotImplementedError):
781 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
783 if mode in self.ingestTransferModes:
784 self.runIngestTest(failInputDoesNotExist)
785 self.runIngestTest(succeed, expectOutput=(mode != "move"))
786 else:
787 self.runIngestTest(failNotImplemented)
789 def testIngestSymlinkOfSymlink(self):
790 """Special test for symlink to a symlink ingest"""
791 metrics, ref = self._prepareIngestTest()
792 # The aim of this test is to create a dataset on disk, then
793 # create a symlink to it and finally ingest the symlink such that
794 # the symlink in the datastore points to the original dataset.
795 for mode in ("symlink", "relsymlink"):
796 if mode not in self.ingestTransferModes:
797 continue
799 print(f"Trying mode {mode}")
800 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
801 with open(realpath, "w") as fd:
802 yaml.dump(metrics._asdict(), stream=fd)
803 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
804 os.symlink(os.path.abspath(realpath), sympath)
806 datastore = self.makeDatastore()
807 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
809 uri = datastore.getURI(ref)
810 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
811 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
813 linkTarget = os.readlink(uri.ospath)
814 if mode == "relsymlink":
815 self.assertFalse(os.path.isabs(linkTarget))
816 else:
817 self.assertEqual(linkTarget, os.path.abspath(realpath))
819 # Check that we can get the dataset back regardless of mode
820 metric2 = datastore.get(ref)
821 self.assertEqual(metric2, metrics)
823 # Cleanup the file for next time round loop
824 # since it will get the same file name in store
825 datastore.remove(ref)
827 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
828 datastore = self.makeDatastore(name)
830 # For now only the FileDatastore can be used for this test.
831 # ChainedDatastore that only includes InMemoryDatastores have to be
832 # skipped as well.
833 for name in datastore.names:
834 if not name.startswith("InMemoryDatastore"):
835 break
836 else:
837 raise unittest.SkipTest("in-memory datastore does not support record export/import")
839 metrics = makeExampleMetrics()
840 dimensions = self.universe.extract(("visit", "physical_filter"))
841 sc = self.storageClassFactory.getStorageClass("StructuredData")
843 refs = []
844 for visit in (2048, 2049, 2050):
845 dataId = FakeDataCoordinate.from_dict(
846 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
847 )
848 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
849 datastore.put(metrics, ref)
850 refs.append(ref)
851 return datastore, refs
853 def testExportImportRecords(self):
854 """Test for export_records and import_records methods."""
855 datastore, refs = self._populate_export_datastore("test_datastore")
856 for exported_refs in (refs, refs[1:]):
857 n_refs = len(exported_refs)
858 records = datastore.export_records(exported_refs)
859 self.assertGreater(len(records), 0)
860 self.assertTrue(set(records.keys()) <= set(datastore.names))
861 # In a ChainedDatastore each FileDatastore will have a complete set
862 for datastore_name in records:
863 record_data = records[datastore_name]
864 self.assertEqual(len(record_data.records), n_refs)
866 # Use the same datastore name to import relative path.
867 datastore2 = self.makeDatastore("test_datastore")
869 records = datastore.export_records(refs[1:])
870 datastore2.import_records(records)
872 with self.assertRaises(FileNotFoundError):
873 data = datastore2.get(refs[0])
874 data = datastore2.get(refs[1])
875 self.assertIsNotNone(data)
876 data = datastore2.get(refs[2])
877 self.assertIsNotNone(data)
879 def testExport(self):
880 datastore, refs = self._populate_export_datastore("test_datastore")
882 datasets = list(datastore.export(refs))
883 self.assertEqual(len(datasets), 3)
885 for transfer in (None, "auto"):
886 # Both will default to None
887 datasets = list(datastore.export(refs, transfer=transfer))
888 self.assertEqual(len(datasets), 3)
890 with self.assertRaises(TypeError):
891 list(datastore.export(refs, transfer="copy"))
893 with self.assertRaises(TypeError):
894 list(datastore.export(refs, directory="exportDir", transfer="move"))
896 # Create a new ref that is not known to the datastore and try to
897 # export it.
898 sc = self.storageClassFactory.getStorageClass("ThingOne")
899 dimensions = self.universe.extract(("visit", "physical_filter"))
900 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
901 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
902 with self.assertRaises(FileNotFoundError):
903 list(datastore.export(refs + [ref], transfer=None))
906class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
907 """PosixDatastore specialization"""
909 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
910 uriScheme = "file"
911 canIngestNoTransferAuto = True
912 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
913 isEphemeral = False
914 rootKeys = ("root",)
915 validationCanFail = True
917 def setUp(self):
918 # Override the working directory before calling the base class
919 self.root = tempfile.mkdtemp(dir=TESTDIR)
920 super().setUp()
922 def testAtomicWrite(self):
923 """Test that we write to a temporary and then rename"""
924 datastore = self.makeDatastore()
925 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
926 dimensions = self.universe.extract(("visit", "physical_filter"))
927 metrics = makeExampleMetrics()
929 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
930 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
932 with self.assertLogs("lsst.resources", "DEBUG") as cm:
933 datastore.put(metrics, ref)
934 move_logs = [ll for ll in cm.output if "transfer=" in ll]
935 self.assertIn("transfer=move", move_logs[0])
937 # And the transfer should be file to file.
938 self.assertEqual(move_logs[0].count("file://"), 2)
940 def testCanNotDeterminePutFormatterLocation(self):
941 """Verify that the expected exception is raised if the FileDatastore
942 can not determine the put formatter location."""
944 _ = makeExampleMetrics()
945 datastore = self.makeDatastore()
947 # Create multiple storage classes for testing different formulations
948 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
950 sccomp = StorageClass("Dummy")
951 compositeStorageClass = StorageClass(
952 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
953 )
955 dimensions = self.universe.extract(("visit", "physical_filter"))
956 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
958 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
959 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
961 def raiser(ref):
962 raise DatasetTypeNotSupportedError()
964 with unittest.mock.patch.object(
965 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
966 "_determine_put_formatter_location",
967 side_effect=raiser,
968 ):
969 # verify the non-composite ref execution path:
970 with self.assertRaises(DatasetTypeNotSupportedError):
971 datastore.getURIs(ref, predict=True)
973 # verify the composite-ref execution path:
974 with self.assertRaises(DatasetTypeNotSupportedError):
975 datastore.getURIs(compRef, predict=True)
978class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
979 """Posix datastore tests but with checksums disabled."""
981 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
983 def testChecksum(self):
984 """Ensure that checksums have not been calculated."""
986 datastore = self.makeDatastore()
987 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
988 dimensions = self.universe.extract(("visit", "physical_filter"))
989 metrics = makeExampleMetrics()
991 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
992 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
994 # Configuration should have disabled checksum calculation
995 datastore.put(metrics, ref)
996 infos = datastore.getStoredItemsInfo(ref)
997 self.assertIsNone(infos[0].checksum)
999 # Remove put back but with checksums enabled explicitly
1000 datastore.remove(ref)
1001 datastore.useChecksum = True
1002 datastore.put(metrics, ref)
1004 infos = datastore.getStoredItemsInfo(ref)
1005 self.assertIsNotNone(infos[0].checksum)
1008class TrashDatastoreTestCase(PosixDatastoreTestCase):
1009 """Restrict trash test to FileDatastore."""
1011 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1013 def testTrash(self):
1014 datastore, *refs = self.prepDeleteTest(n_refs=10)
1016 # Trash one of them.
1017 ref = refs.pop()
1018 uri = datastore.getURI(ref)
1019 datastore.trash(ref)
1020 self.assertTrue(uri.exists(), uri) # Not deleted yet
1021 datastore.emptyTrash()
1022 self.assertFalse(uri.exists(), uri)
1024 # Trash it again should be fine.
1025 datastore.trash(ref)
1027 # Trash multiple items at once.
1028 subset = [refs.pop(), refs.pop()]
1029 datastore.trash(subset)
1030 datastore.emptyTrash()
1032 # Remove a record and trash should do nothing.
1033 # This is execution butler scenario.
1034 ref = refs.pop()
1035 uri = datastore.getURI(ref)
1036 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1037 self.assertTrue(uri.exists())
1038 datastore.trash(ref)
1039 datastore.emptyTrash()
1040 self.assertTrue(uri.exists())
1042 # Switch on trust and it should delete the file.
1043 datastore.trustGetRequest = True
1044 datastore.trash([ref])
1045 self.assertFalse(uri.exists())
1047 # Remove multiples at once in trust mode.
1048 subset = [refs.pop() for i in range(3)]
1049 datastore.trash(subset)
1050 datastore.trash(refs.pop()) # Check that a single ref can trash
1053class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1054 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1056 def setUp(self):
1057 # Override the working directory before calling the base class
1058 self.root = tempfile.mkdtemp(dir=TESTDIR)
1059 super().setUp()
1061 def testCleanup(self):
1062 """Test that a failed formatter write does cleanup a partial file."""
1063 metrics = makeExampleMetrics()
1064 datastore = self.makeDatastore()
1066 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1068 dimensions = self.universe.extract(("visit", "physical_filter"))
1069 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1071 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1073 # Determine where the file will end up (we assume Formatters use
1074 # the same file extension)
1075 expectedUri = datastore.getURI(ref, predict=True)
1076 self.assertEqual(expectedUri.fragment, "predicted")
1078 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1080 # Try formatter that fails and formatter that fails and leaves
1081 # a file behind
1082 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1083 with self.subTest(formatter=formatter):
1085 # Monkey patch the formatter
1086 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1088 # Try to put the dataset, it should fail
1089 with self.assertRaises(Exception):
1090 datastore.put(metrics, ref)
1092 # Check that there is no file on disk
1093 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1095 # Check that there is a directory
1096 dir = expectedUri.dirname()
1097 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1099 # Force YamlFormatter and check that this time a file is written
1100 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1101 datastore.put(metrics, ref)
1102 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1103 datastore.remove(ref)
1104 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1107class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1108 """PosixDatastore specialization"""
1110 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1111 uriScheme = "mem"
1112 hasUnsupportedPut = False
1113 ingestTransferModes = ()
1114 isEphemeral = True
1115 rootKeys = None
1116 validationCanFail = False
1119class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1120 """ChainedDatastore specialization using a POSIXDatastore"""
1122 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1123 hasUnsupportedPut = False
1124 canIngestNoTransferAuto = False
1125 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1126 isEphemeral = False
1127 rootKeys = (".datastores.1.root", ".datastores.2.root")
1128 validationCanFail = True
1131class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1132 """ChainedDatastore specialization using all InMemoryDatastore"""
1134 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1135 validationCanFail = False
1138class DatastoreConstraintsTests(DatastoreTestsBase):
1139 """Basic tests of constraints model of Datastores."""
1141 def testConstraints(self):
1142 """Test constraints model. Assumes that each test class has the
1143 same constraints."""
1144 metrics = makeExampleMetrics()
1145 datastore = self.makeDatastore()
1147 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1148 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1149 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1150 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"})
1152 # Write empty file suitable for ingest check (JSON and YAML variants)
1153 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1154 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1155 for datasetTypeName, sc, accepted in (
1156 ("metric", sc1, True),
1157 ("metric5", sc1, False),
1158 ("metric33", sc1, True),
1159 ("metric5", sc2, True),
1160 ):
1161 # Choose different temp file depending on StorageClass
1162 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1164 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1165 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1166 if accepted:
1167 datastore.put(metrics, ref)
1168 self.assertTrue(datastore.exists(ref))
1169 datastore.remove(ref)
1171 # Try ingest
1172 if self.canIngest:
1173 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1174 self.assertTrue(datastore.exists(ref))
1175 datastore.remove(ref)
1176 else:
1177 with self.assertRaises(DatasetTypeNotSupportedError):
1178 datastore.put(metrics, ref)
1179 self.assertFalse(datastore.exists(ref))
1181 # Again with ingest
1182 if self.canIngest:
1183 with self.assertRaises(DatasetTypeNotSupportedError):
1184 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1185 self.assertFalse(datastore.exists(ref))
1188class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1189 """PosixDatastore specialization"""
1191 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1192 canIngest = True
1194 def setUp(self):
1195 # Override the working directory before calling the base class
1196 self.root = tempfile.mkdtemp(dir=TESTDIR)
1197 super().setUp()
1200class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1201 """InMemoryDatastore specialization"""
1203 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1204 canIngest = False
1207class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1208 """ChainedDatastore specialization using a POSIXDatastore and constraints
1209 at the ChainedDatstore"""
1211 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1214class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1215 """ChainedDatastore specialization using a POSIXDatastore"""
1217 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1220class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1221 """ChainedDatastore specialization using all InMemoryDatastore"""
1223 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1224 canIngest = False
1227class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1228 """Test that a chained datastore can control constraints per-datastore
1229 even if child datastore would accept."""
1231 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1233 def setUp(self):
1234 # Override the working directory before calling the base class
1235 self.root = tempfile.mkdtemp(dir=TESTDIR)
1236 super().setUp()
1238 def testConstraints(self):
1239 """Test chained datastore constraints model."""
1240 metrics = makeExampleMetrics()
1241 datastore = self.makeDatastore()
1243 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1244 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1245 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1246 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1247 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1249 # Write empty file suitable for ingest check (JSON and YAML variants)
1250 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1251 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1253 for typeName, dataId, sc, accept, ingest in (
1254 ("metric", dataId1, sc1, (False, True, False), True),
1255 ("metric5", dataId1, sc1, (False, False, False), False),
1256 ("metric5", dataId2, sc1, (True, False, False), False),
1257 ("metric33", dataId2, sc2, (True, True, False), True),
1258 ("metric5", dataId1, sc2, (False, True, False), True),
1259 ):
1261 # Choose different temp file depending on StorageClass
1262 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1264 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1265 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1266 if any(accept):
1267 datastore.put(metrics, ref)
1268 self.assertTrue(datastore.exists(ref))
1270 # Check each datastore inside the chained datastore
1271 for childDatastore, expected in zip(datastore.datastores, accept):
1272 self.assertEqual(
1273 childDatastore.exists(ref),
1274 expected,
1275 f"Testing presence of {ref} in datastore {childDatastore.name}",
1276 )
1278 datastore.remove(ref)
1280 # Check that ingest works
1281 if ingest:
1282 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1283 self.assertTrue(datastore.exists(ref))
1285 # Check each datastore inside the chained datastore
1286 for childDatastore, expected in zip(datastore.datastores, accept):
1287 # Ephemeral datastores means InMemory at the moment
1288 # and that does not accept ingest of files.
1289 if childDatastore.isEphemeral:
1290 expected = False
1291 self.assertEqual(
1292 childDatastore.exists(ref),
1293 expected,
1294 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1295 )
1297 datastore.remove(ref)
1298 else:
1299 with self.assertRaises(DatasetTypeNotSupportedError):
1300 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1302 else:
1303 with self.assertRaises(DatasetTypeNotSupportedError):
1304 datastore.put(metrics, ref)
1305 self.assertFalse(datastore.exists(ref))
1307 # Again with ingest
1308 with self.assertRaises(DatasetTypeNotSupportedError):
1309 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1310 self.assertFalse(datastore.exists(ref))
1313class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1314 """Tests for datastore caching infrastructure."""
1316 @classmethod
1317 def setUpClass(cls):
1318 cls.storageClassFactory = StorageClassFactory()
1319 cls.universe = DimensionUniverse()
1321 # Ensure that we load the test storage class definitions.
1322 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1323 cls.storageClassFactory.addFromConfig(scConfigFile)
1325 def setUp(self):
1326 self.id = 0
1328 # Create a root that we can use for caching tests.
1329 self.root = tempfile.mkdtemp(dir=TESTDIR)
1331 # Create some test dataset refs and associated test files
1332 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1333 dimensions = self.universe.extract(("visit", "physical_filter"))
1334 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1336 # Create list of refs and list of temporary files
1337 n_datasets = 10
1338 self.refs = [
1339 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1340 for n in range(n_datasets)
1341 ]
1343 root_uri = ResourcePath(self.root, forceDirectory=True)
1344 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1346 # Create test files.
1347 for uri in self.files:
1348 uri.write(b"0123456789")
1350 # Create some composite refs with component files.
1351 sc = self.storageClassFactory.getStorageClass("StructuredData")
1352 self.composite_refs = [
1353 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1354 ]
1355 self.comp_files = []
1356 self.comp_refs = []
1357 for n, ref in enumerate(self.composite_refs):
1358 component_refs = []
1359 component_files = []
1360 for component in sc.components:
1361 component_ref = ref.makeComponentRef(component)
1362 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1363 component_refs.append(component_ref)
1364 component_files.append(file)
1365 file.write(b"9876543210")
1367 self.comp_files.append(component_files)
1368 self.comp_refs.append(component_refs)
1370 def tearDown(self):
1371 if self.root is not None and os.path.exists(self.root):
1372 shutil.rmtree(self.root, ignore_errors=True)
1374 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1375 config = Config.fromYaml(config_str)
1376 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1378 def testNoCacheDir(self):
1379 config_str = """
1380cached:
1381 root: null
1382 cacheable:
1383 metric0: true
1384 """
1385 cache_manager = self._make_cache_manager(config_str)
1387 # Look inside to check we don't have a cache directory
1388 self.assertIsNone(cache_manager._cache_directory)
1390 self.assertCache(cache_manager)
1392 # Test that the cache directory is marked temporary
1393 self.assertTrue(cache_manager.cache_directory.isTemporary)
1395 def testNoCacheDirReversed(self):
1396 """Use default caching status and metric1 to false"""
1397 config_str = """
1398cached:
1399 root: null
1400 default: true
1401 cacheable:
1402 metric1: false
1403 """
1404 cache_manager = self._make_cache_manager(config_str)
1406 self.assertCache(cache_manager)
1408 def testEnvvarCacheDir(self):
1409 config_str = f"""
1410cached:
1411 root: '{self.root}'
1412 cacheable:
1413 metric0: true
1414 """
1416 root = ResourcePath(self.root, forceDirectory=True)
1417 env_dir = root.join("somewhere", forceDirectory=True)
1418 elsewhere = root.join("elsewhere", forceDirectory=True)
1420 # Environment variable should override the config value.
1421 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1422 cache_manager = self._make_cache_manager(config_str)
1423 self.assertEqual(cache_manager.cache_directory, env_dir)
1425 # This environment variable should not override the config value.
1426 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1427 cache_manager = self._make_cache_manager(config_str)
1428 self.assertEqual(cache_manager.cache_directory, root)
1430 # No default setting.
1431 config_str = """
1432cached:
1433 root: null
1434 default: true
1435 cacheable:
1436 metric1: false
1437 """
1438 cache_manager = self._make_cache_manager(config_str)
1440 # This environment variable should override the config value.
1441 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1442 cache_manager = self._make_cache_manager(config_str)
1443 self.assertEqual(cache_manager.cache_directory, env_dir)
1445 # If both environment variables are set the main (not IF_UNSET)
1446 # variable should win.
1447 with unittest.mock.patch.dict(
1448 os.environ,
1449 {
1450 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1451 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1452 },
1453 ):
1454 cache_manager = self._make_cache_manager(config_str)
1455 self.assertEqual(cache_manager.cache_directory, env_dir)
1457 # Use the API to set the environment variable, making sure that the
1458 # variable is reset on exit.
1459 with unittest.mock.patch.dict(
1460 os.environ,
1461 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1462 ):
1463 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1464 self.assertTrue(defined)
1465 cache_manager = self._make_cache_manager(config_str)
1466 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1468 # Now create the cache manager ahead of time and set the fallback
1469 # later.
1470 cache_manager = self._make_cache_manager(config_str)
1471 self.assertIsNone(cache_manager._cache_directory)
1472 with unittest.mock.patch.dict(
1473 os.environ,
1474 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1475 ):
1476 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1477 self.assertTrue(defined)
1478 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1480 def testExplicitCacheDir(self):
1481 config_str = f"""
1482cached:
1483 root: '{self.root}'
1484 cacheable:
1485 metric0: true
1486 """
1487 cache_manager = self._make_cache_manager(config_str)
1489 # Look inside to check we do have a cache directory.
1490 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1492 self.assertCache(cache_manager)
1494 # Test that the cache directory is not marked temporary
1495 self.assertFalse(cache_manager.cache_directory.isTemporary)
1497 def assertCache(self, cache_manager):
1498 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1499 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1501 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1502 self.assertIsInstance(uri, ResourcePath)
1503 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1505 # Check presence in cache using ref and then using file extension.
1506 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1507 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1508 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1509 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1511 # Cached file should no longer exist but uncached file should be
1512 # unaffected.
1513 self.assertFalse(self.files[0].exists())
1514 self.assertTrue(self.files[1].exists())
1516 # Should find this file and it should be within the cache directory.
1517 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1518 self.assertTrue(found.exists())
1519 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1521 # Should not be able to find these in cache
1522 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1523 self.assertIsNone(found)
1524 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1525 self.assertIsNone(found)
1527 def testNoCache(self):
1528 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1529 for uri, ref in zip(self.files, self.refs):
1530 self.assertFalse(cache_manager.should_be_cached(ref))
1531 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1532 self.assertFalse(cache_manager.known_to_cache(ref))
1533 with cache_manager.find_in_cache(ref, ".txt") as found:
1534 self.assertIsNone(found, msg=f"{cache_manager}")
1536 def _expiration_config(self, mode: str, threshold: int) -> str:
1537 return f"""
1538cached:
1539 default: true
1540 expiry:
1541 mode: {mode}
1542 threshold: {threshold}
1543 cacheable:
1544 unused: true
1545 """
1547 def testCacheExpiryFiles(self):
1548 threshold = 2 # Keep at least 2 files.
1549 mode = "files"
1550 config_str = self._expiration_config(mode, threshold)
1552 cache_manager = self._make_cache_manager(config_str)
1554 # Check that an empty cache returns unknown for arbitrary ref
1555 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1557 # Should end with datasets: 2, 3, 4
1558 self.assertExpiration(cache_manager, 5, threshold + 1)
1559 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1561 # Check that we will not expire a file that is actively in use.
1562 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1563 self.assertIsNotNone(found)
1565 # Trigger cache expiration that should remove the file
1566 # we just retrieved. Should now have: 3, 4, 5
1567 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1568 self.assertIsNotNone(cached)
1570 # Cache should still report the standard file count.
1571 self.assertEqual(cache_manager.file_count, threshold + 1)
1573 # Add additional entry to cache.
1574 # Should now have 4, 5, 6
1575 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1576 self.assertIsNotNone(cached)
1578 # Is the file still there?
1579 self.assertTrue(found.exists())
1581 # Can we read it?
1582 data = found.read()
1583 self.assertGreater(len(data), 0)
1585 # Outside context the file should no longer exist.
1586 self.assertFalse(found.exists())
1588 # File count should not have changed.
1589 self.assertEqual(cache_manager.file_count, threshold + 1)
1591 # Dataset 2 was in the exempt directory but because hardlinks
1592 # are used it was deleted from the main cache during cache expiry
1593 # above and so should no longer be found.
1594 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1595 self.assertIsNone(found)
1597 # And the one stored after it is also gone.
1598 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1599 self.assertIsNone(found)
1601 # But dataset 4 is present.
1602 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1603 self.assertIsNotNone(found)
1605 # Adding a new dataset to the cache should now delete it.
1606 cache_manager.move_to_cache(self.files[7], self.refs[7])
1608 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1609 self.assertIsNone(found)
1611 def testCacheExpiryDatasets(self):
1612 threshold = 2 # Keep 2 datasets.
1613 mode = "datasets"
1614 config_str = self._expiration_config(mode, threshold)
1616 cache_manager = self._make_cache_manager(config_str)
1617 self.assertExpiration(cache_manager, 5, threshold + 1)
1618 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1620 def testCacheExpiryDatasetsComposite(self):
1621 threshold = 2 # Keep 2 datasets.
1622 mode = "datasets"
1623 config_str = self._expiration_config(mode, threshold)
1625 cache_manager = self._make_cache_manager(config_str)
1627 n_datasets = 3
1628 for i in range(n_datasets):
1629 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1630 cached = cache_manager.move_to_cache(component_file, component_ref)
1631 self.assertIsNotNone(cached)
1632 self.assertTrue(cache_manager.known_to_cache(component_ref))
1633 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1634 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1636 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1638 # Write two new non-composite and the number of files should drop.
1639 self.assertExpiration(cache_manager, 2, 5)
1641 def testCacheExpirySize(self):
1642 threshold = 55 # Each file is 10 bytes
1643 mode = "size"
1644 config_str = self._expiration_config(mode, threshold)
1646 cache_manager = self._make_cache_manager(config_str)
1647 self.assertExpiration(cache_manager, 10, 6)
1648 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1650 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1651 """Insert the datasets and then check the number retained."""
1652 for i in range(n_datasets):
1653 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1654 self.assertIsNotNone(cached)
1656 self.assertEqual(cache_manager.file_count, n_retained)
1658 # The oldest file should not be in the cache any more.
1659 for i in range(n_datasets):
1660 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1661 if i >= n_datasets - n_retained:
1662 self.assertIsInstance(found, ResourcePath)
1663 else:
1664 self.assertIsNone(found)
1666 def testCacheExpiryAge(self):
1667 threshold = 1 # Expire older than 2 seconds
1668 mode = "age"
1669 config_str = self._expiration_config(mode, threshold)
1671 cache_manager = self._make_cache_manager(config_str)
1672 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1674 # Insert 3 files, then sleep, then insert more.
1675 for i in range(2):
1676 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1677 self.assertIsNotNone(cached)
1678 time.sleep(2.0)
1679 for j in range(4):
1680 i = 2 + j # Continue the counting
1681 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1682 self.assertIsNotNone(cached)
1684 # Only the files written after the sleep should exist.
1685 self.assertEqual(cache_manager.file_count, 4)
1686 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1687 self.assertIsNone(found)
1688 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1689 self.assertIsInstance(found, ResourcePath)
1692class DatasetRefURIsTestCase(unittest.TestCase):
1693 """Tests for DatasetRefURIs."""
1695 def testSequenceAccess(self):
1696 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1697 uris = DatasetRefURIs()
1699 self.assertEqual(len(uris), 2)
1700 self.assertEqual(uris[0], None)
1701 self.assertEqual(uris[1], {})
1703 primaryURI = ResourcePath("1/2/3")
1704 componentURI = ResourcePath("a/b/c")
1706 # affirm that DatasetRefURIs does not support MutableSequence functions
1707 with self.assertRaises(TypeError):
1708 uris[0] = primaryURI
1709 with self.assertRaises(TypeError):
1710 uris[1] = {"foo": componentURI}
1712 # but DatasetRefURIs can be set by property name:
1713 uris.primaryURI = primaryURI
1714 uris.componentURIs = {"foo": componentURI}
1715 self.assertEqual(uris.primaryURI, primaryURI)
1716 self.assertEqual(uris[0], primaryURI)
1718 primary, components = uris
1719 self.assertEqual(primary, primaryURI)
1720 self.assertEqual(components, {"foo": componentURI})
1722 def testRepr(self):
1723 """Verify __repr__ output."""
1724 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1725 self.assertEqual(
1726 repr(uris),
1727 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1728 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1729 )
1732class DataIdForTestTestCase(unittest.TestCase):
1733 """Tests for the DataIdForTest class."""
1735 def testImmutable(self):
1736 """Verify that an instance is immutable by default."""
1737 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1738 initial_hash = hash(dataId)
1740 with self.assertRaises(RuntimeError):
1741 dataId["instrument"] = "foo"
1743 with self.assertRaises(RuntimeError):
1744 del dataId["instrument"]
1746 assert sys.version_info[0] == 3
1747 if sys.version_info[1] >= 9:
1748 with self.assertRaises(RuntimeError):
1749 dataId |= dict(foo="bar")
1751 with self.assertRaises(RuntimeError):
1752 dataId.pop("instrument")
1754 with self.assertRaises(RuntimeError):
1755 dataId.popitem()
1757 with self.assertRaises(RuntimeError):
1758 dataId.update(dict(instrument="foo"))
1760 # verify that the hash value has not changed.
1761 self.assertEqual(initial_hash, hash(dataId))
1763 def testMutable(self):
1764 """Verify that an instance can be made mutable (unfrozen)."""
1765 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1766 initial_hash = hash(dataId)
1767 dataId.frozen = False
1768 self.assertEqual(initial_hash, hash(dataId))
1770 dataId["instrument"] = "foo"
1771 self.assertEqual(dataId["instrument"], "foo")
1772 self.assertNotEqual(initial_hash, hash(dataId))
1773 initial_hash = hash(dataId)
1775 del dataId["instrument"]
1776 self.assertTrue("instrument" not in dataId)
1777 self.assertNotEqual(initial_hash, hash(dataId))
1778 initial_hash = hash(dataId)
1780 assert sys.version_info[0] == 3
1781 if sys.version_info[1] >= 9:
1782 dataId |= dict(foo="bar")
1783 self.assertEqual(dataId["foo"], "bar")
1784 self.assertNotEqual(initial_hash, hash(dataId))
1785 initial_hash = hash(dataId)
1787 dataId.pop("visit")
1788 self.assertTrue("visit" not in dataId)
1789 self.assertNotEqual(initial_hash, hash(dataId))
1790 initial_hash = hash(dataId)
1792 dataId.popitem()
1793 self.assertTrue("physical_filter" not in dataId)
1794 self.assertNotEqual(initial_hash, hash(dataId))
1795 initial_hash = hash(dataId)
1797 dataId.update(dict(instrument="foo"))
1798 self.assertEqual(dataId["instrument"], "foo")
1799 self.assertNotEqual(initial_hash, hash(dataId))
1800 initial_hash = hash(dataId)
1803if __name__ == "__main__": 1803 ↛ 1804line 1803 didn't jump to line 1804, because the condition on line 1803 was never true
1804 unittest.main()