Coverage for tests/test_datastore.py: 12%
1020 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-15 00:10 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-15 00:10 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import sys
27import tempfile
28import time
29import unittest
30import unittest.mock
31from collections import UserDict
32from dataclasses import dataclass
34import lsst.utils.tests
35import yaml
36from lsst.daf.butler import (
37 Config,
38 DatasetRef,
39 DatasetRefURIs,
40 DatasetTypeNotSupportedError,
41 Datastore,
42 DatastoreCacheManager,
43 DatastoreCacheManagerConfig,
44 DatastoreConfig,
45 DatastoreDisabledCacheManager,
46 DatastoreValidationError,
47 DimensionUniverse,
48 FileDataset,
49 NamedKeyDict,
50 StorageClass,
51 StorageClassFactory,
52)
53from lsst.daf.butler.formatters.yaml import YamlFormatter
54from lsst.daf.butler.tests import (
55 BadNoWriteFormatter,
56 BadWriteFormatter,
57 DatasetTestHelper,
58 DatastoreTestHelper,
59 DummyRegistry,
60 MetricsExample,
61)
62from lsst.resources import ResourcePath
63from lsst.utils import doImport
65TESTDIR = os.path.dirname(__file__)
68class DataIdForTest(UserDict):
70 """A dict-like class that can be used for a DataId dict that is hashable.
72 By default the class is immutable ("frozen"). The `frozen`
73 attribute can be set to `False` to change values (but note that
74 the hash values before and after mutation will be different!).
75 """
77 def __init__(self, *args, **kwargs):
78 self.frozen = False
79 super().__init__(*args, **kwargs)
80 self.frozen = True
82 def __hash__(self):
83 return hash(str(self.data))
85 def __setitem__(self, k, v):
86 if self.frozen:
87 raise RuntimeError("DataIdForTest is frozen.")
88 return super().__setitem__(k, v)
90 def __delitem__(self, k):
91 if self.frozen:
92 raise RuntimeError("DataIdForTest is frozen.")
93 return super().__delitem__(k)
95 def __ior__(self, other):
96 assert sys.version_info[0] == 3
97 if sys.version_info[1] < 9:
98 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().__ior__(other)
103 def pop(self, k):
104 if self.frozen:
105 raise RuntimeError("DataIdForTest is frozen.")
106 return super().pop(k)
108 def popitem(self):
109 if self.frozen:
110 raise RuntimeError("DataIdForTest is frozen.")
111 return super().popitem()
113 def update(self, *args, **kwargs):
114 if self.frozen:
115 raise RuntimeError("DataIdForTest is frozen.")
116 super().update(*args, **kwargs)
119def makeExampleMetrics(use_none=False):
120 if use_none:
121 array = None
122 else:
123 array = [563, 234, 456.7, 105, 2054, -1045]
124 return MetricsExample(
125 {"AM1": 5.2, "AM2": 30.6},
126 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
127 array,
128 )
131@dataclass(frozen=True)
132class Named:
133 name: str
136class FakeDataCoordinate(NamedKeyDict):
137 """A fake hashable frozen DataCoordinate built from a simple dict."""
139 @classmethod
140 def from_dict(cls, dataId):
141 new = cls()
142 for k, v in dataId.items():
143 new[Named(k)] = v
144 return new.freeze()
146 def __hash__(self) -> int:
147 return hash(frozenset(self.items()))
150class TransactionTestError(Exception):
151 """Specific error for transactions, to prevent misdiagnosing
152 that might otherwise occur when a standard exception is used.
153 """
155 pass
158class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
159 """Support routines for datastore testing"""
161 root = None
163 @classmethod
164 def setUpClass(cls):
165 # Storage Classes are fixed for all datastores in these tests
166 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
167 cls.storageClassFactory = StorageClassFactory()
168 cls.storageClassFactory.addFromConfig(scConfigFile)
170 # Read the Datastore config so we can get the class
171 # information (since we should not assume the constructor
172 # name here, but rely on the configuration file itself)
173 datastoreConfig = DatastoreConfig(cls.configFile)
174 cls.datastoreType = doImport(datastoreConfig["cls"])
175 cls.universe = DimensionUniverse()
177 def setUp(self):
178 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
180 def tearDown(self):
181 if self.root is not None and os.path.exists(self.root):
182 shutil.rmtree(self.root, ignore_errors=True)
185class DatastoreTests(DatastoreTestsBase):
186 """Some basic tests of a simple datastore."""
188 hasUnsupportedPut = True
190 def testConfigRoot(self):
191 full = DatastoreConfig(self.configFile)
192 config = DatastoreConfig(self.configFile, mergeDefaults=False)
193 newroot = "/random/location"
194 self.datastoreType.setConfigRoot(newroot, config, full)
195 if self.rootKeys:
196 for k in self.rootKeys:
197 self.assertIn(newroot, config[k])
199 def testConstructor(self):
200 datastore = self.makeDatastore()
201 self.assertIsNotNone(datastore)
202 self.assertIs(datastore.isEphemeral, self.isEphemeral)
204 def testConfigurationValidation(self):
205 datastore = self.makeDatastore()
206 sc = self.storageClassFactory.getStorageClass("ThingOne")
207 datastore.validateConfiguration([sc])
209 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
210 if self.validationCanFail:
211 with self.assertRaises(DatastoreValidationError):
212 datastore.validateConfiguration([sc2], logFailures=True)
214 dimensions = self.universe.extract(("visit", "physical_filter"))
215 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
216 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
217 datastore.validateConfiguration([ref])
219 def testParameterValidation(self):
220 """Check that parameters are validated"""
221 sc = self.storageClassFactory.getStorageClass("ThingOne")
222 dimensions = self.universe.extract(("visit", "physical_filter"))
223 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
225 datastore = self.makeDatastore()
226 data = {1: 2, 3: 4}
227 datastore.put(data, ref)
228 newdata = datastore.get(ref)
229 self.assertEqual(data, newdata)
230 with self.assertRaises(KeyError):
231 newdata = datastore.get(ref, parameters={"missing": 5})
233 def testBasicPutGet(self):
234 metrics = makeExampleMetrics()
235 datastore = self.makeDatastore()
237 # Create multiple storage classes for testing different formulations
238 storageClasses = [
239 self.storageClassFactory.getStorageClass(sc)
240 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
241 ]
243 dimensions = self.universe.extract(("visit", "physical_filter"))
244 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
245 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
247 for sc in storageClasses:
248 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
249 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
251 # Make sure that using getManyURIs without predicting before the
252 # dataset has been put raises.
253 with self.assertRaises(FileNotFoundError):
254 datastore.getManyURIs([ref], predict=False)
256 # Make sure that using getManyURIs with predicting before the
257 # dataset has been put predicts the URI.
258 uris = datastore.getManyURIs([ref, ref2], predict=True)
259 self.assertIn("52", uris[ref].primaryURI.geturl())
260 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
261 self.assertIn("53", uris[ref2].primaryURI.geturl())
262 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
264 datastore.put(metrics, ref)
266 # Does it exist?
267 self.assertTrue(datastore.exists(ref))
268 self.assertTrue(datastore.knows(ref))
269 multi = datastore.knows_these([ref])
270 self.assertTrue(multi[ref])
271 multi = datastore.mexists([ref])
272 self.assertTrue(multi[ref])
274 # Get
275 metricsOut = datastore.get(ref, parameters=None)
276 self.assertEqual(metrics, metricsOut)
278 uri = datastore.getURI(ref)
279 self.assertEqual(uri.scheme, self.uriScheme)
281 uris = datastore.getManyURIs([ref])
282 self.assertEqual(len(uris), 1)
283 ref, uri = uris.popitem()
284 self.assertTrue(uri.primaryURI.exists())
285 self.assertFalse(uri.componentURIs)
287 # Get a component -- we need to construct new refs for them
288 # with derived storage classes but with parent ID
289 for comp in ("data", "output"):
290 compRef = ref.makeComponentRef(comp)
291 output = datastore.get(compRef)
292 self.assertEqual(output, getattr(metricsOut, comp))
294 uri = datastore.getURI(compRef)
295 self.assertEqual(uri.scheme, self.uriScheme)
297 uris = datastore.getManyURIs([compRef])
298 self.assertEqual(len(uris), 1)
300 storageClass = sc
302 # Check that we can put a metric with None in a component and
303 # get it back as None
304 metricsNone = makeExampleMetrics(use_none=True)
305 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
306 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
307 datastore.put(metricsNone, refNone)
309 comp = "data"
310 for comp in ("data", "output"):
311 compRef = refNone.makeComponentRef(comp)
312 output = datastore.get(compRef)
313 self.assertEqual(output, getattr(metricsNone, comp))
315 # Check that a put fails if the dataset type is not supported
316 if self.hasUnsupportedPut:
317 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
318 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
319 with self.assertRaises(DatasetTypeNotSupportedError):
320 datastore.put(metrics, ref)
322 # These should raise
323 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
324 with self.assertRaises(FileNotFoundError):
325 # non-existing file
326 datastore.get(ref)
328 # Get a URI from it
329 uri = datastore.getURI(ref, predict=True)
330 self.assertEqual(uri.scheme, self.uriScheme)
332 with self.assertRaises(FileNotFoundError):
333 datastore.getURI(ref)
335 def testTrustGetRequest(self):
336 """Check that we can get datasets that registry knows nothing about."""
338 datastore = self.makeDatastore()
340 # Skip test if the attribute is not defined
341 if not hasattr(datastore, "trustGetRequest"):
342 return
344 metrics = makeExampleMetrics()
346 i = 0
347 for sc_name in ("StructuredData", "StructuredComposite"):
348 i += 1
349 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
351 if sc_name == "StructuredComposite":
352 disassembled = True
353 else:
354 disassembled = False
356 # Start datastore in default configuration of using registry
357 datastore.trustGetRequest = False
359 # Create multiple storage classes for testing with or without
360 # disassembly
361 sc = self.storageClassFactory.getStorageClass(sc_name)
362 dimensions = self.universe.extract(("visit", "physical_filter"))
364 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
366 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
367 datastore.put(metrics, ref)
369 # Does it exist?
370 self.assertTrue(datastore.exists(ref))
371 self.assertTrue(datastore.knows(ref))
372 multi = datastore.knows_these([ref])
373 self.assertTrue(multi[ref])
374 multi = datastore.mexists([ref])
375 self.assertTrue(multi[ref])
377 # Get
378 metricsOut = datastore.get(ref)
379 self.assertEqual(metrics, metricsOut)
381 # Get the URI(s)
382 primaryURI, componentURIs = datastore.getURIs(ref)
383 if disassembled:
384 self.assertIsNone(primaryURI)
385 self.assertEqual(len(componentURIs), 3)
386 else:
387 self.assertIn(datasetTypeName, primaryURI.path)
388 self.assertFalse(componentURIs)
390 # Delete registry entry so now we are trusting
391 datastore.removeStoredItemInfo(ref)
393 # Now stop trusting and check that things break
394 datastore.trustGetRequest = False
396 # Does it exist?
397 self.assertFalse(datastore.exists(ref))
398 self.assertFalse(datastore.knows(ref))
399 multi = datastore.knows_these([ref])
400 self.assertFalse(multi[ref])
401 multi = datastore.mexists([ref])
402 self.assertFalse(multi[ref])
404 with self.assertRaises(FileNotFoundError):
405 datastore.get(ref)
407 with self.assertRaises(FileNotFoundError):
408 datastore.get(ref.makeComponentRef("data"))
410 # URI should fail unless we ask for prediction
411 with self.assertRaises(FileNotFoundError):
412 datastore.getURIs(ref)
414 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
415 if disassembled:
416 self.assertIsNone(predicted_primary)
417 self.assertEqual(len(predicted_disassembled), 3)
418 for uri in predicted_disassembled.values():
419 self.assertEqual(uri.fragment, "predicted")
420 self.assertIn(datasetTypeName, uri.path)
421 else:
422 self.assertIn(datasetTypeName, predicted_primary.path)
423 self.assertFalse(predicted_disassembled)
424 self.assertEqual(predicted_primary.fragment, "predicted")
426 # Now enable registry-free trusting mode
427 datastore.trustGetRequest = True
429 # Try again to get it
430 metricsOut = datastore.get(ref)
431 self.assertEqual(metricsOut, metrics)
433 # Does it exist?
434 self.assertTrue(datastore.exists(ref))
436 # Get a component
437 comp = "data"
438 compRef = ref.makeComponentRef(comp)
439 output = datastore.get(compRef)
440 self.assertEqual(output, getattr(metrics, comp))
442 # Get the URI -- if we trust this should work even without
443 # enabling prediction.
444 primaryURI2, componentURIs2 = datastore.getURIs(ref)
445 self.assertEqual(primaryURI2, primaryURI)
446 self.assertEqual(componentURIs2, componentURIs)
448 def testDisassembly(self):
449 """Test disassembly within datastore."""
450 metrics = makeExampleMetrics()
451 if self.isEphemeral:
452 # in-memory datastore does not disassemble
453 return
455 # Create multiple storage classes for testing different formulations
456 # of composites. One of these will not disassemble to provide
457 # a reference.
458 storageClasses = [
459 self.storageClassFactory.getStorageClass(sc)
460 for sc in (
461 "StructuredComposite",
462 "StructuredCompositeTestA",
463 "StructuredCompositeTestB",
464 "StructuredCompositeReadComp",
465 "StructuredData", # No disassembly
466 "StructuredCompositeReadCompNoDisassembly",
467 )
468 ]
470 # Create the test datastore
471 datastore = self.makeDatastore()
473 # Dummy dataId
474 dimensions = self.universe.extract(("visit", "physical_filter"))
475 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
477 for i, sc in enumerate(storageClasses):
478 with self.subTest(storageClass=sc.name):
479 # Create a different dataset type each time round
480 # so that a test failure in this subtest does not trigger
481 # a cascade of tests because of file clashes
482 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
484 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
486 datastore.put(metrics, ref)
488 baseURI, compURIs = datastore.getURIs(ref)
489 if disassembled:
490 self.assertIsNone(baseURI)
491 self.assertEqual(set(compURIs), {"data", "output", "summary"})
492 else:
493 self.assertIsNotNone(baseURI)
494 self.assertEqual(compURIs, {})
496 metrics_get = datastore.get(ref)
497 self.assertEqual(metrics_get, metrics)
499 # Retrieve the composite with read parameter
500 stop = 4
501 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
502 self.assertEqual(metrics_get.summary, metrics.summary)
503 self.assertEqual(metrics_get.output, metrics.output)
504 self.assertEqual(metrics_get.data, metrics.data[:stop])
506 # Retrieve a component
507 data = datastore.get(ref.makeComponentRef("data"))
508 self.assertEqual(data, metrics.data)
510 # On supported storage classes attempt to access a read
511 # only component
512 if "ReadComp" in sc.name:
513 cRef = ref.makeComponentRef("counter")
514 counter = datastore.get(cRef)
515 self.assertEqual(counter, len(metrics.data))
517 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
518 self.assertEqual(counter, stop)
520 datastore.remove(ref)
522 def prepDeleteTest(self, n_refs=1):
523 metrics = makeExampleMetrics()
524 datastore = self.makeDatastore()
525 # Put
526 dimensions = self.universe.extract(("visit", "physical_filter"))
527 sc = self.storageClassFactory.getStorageClass("StructuredData")
528 refs = []
529 for i in range(n_refs):
530 dataId = FakeDataCoordinate.from_dict(
531 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
532 )
533 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
534 datastore.put(metrics, ref)
536 # Does it exist?
537 self.assertTrue(datastore.exists(ref))
539 # Get
540 metricsOut = datastore.get(ref)
541 self.assertEqual(metrics, metricsOut)
542 refs.append(ref)
544 return datastore, *refs
546 def testRemove(self):
547 datastore, ref = self.prepDeleteTest()
549 # Remove
550 datastore.remove(ref)
552 # Does it exist?
553 self.assertFalse(datastore.exists(ref))
555 # Do we now get a predicted URI?
556 uri = datastore.getURI(ref, predict=True)
557 self.assertEqual(uri.fragment, "predicted")
559 # Get should now fail
560 with self.assertRaises(FileNotFoundError):
561 datastore.get(ref)
562 # Can only delete once
563 with self.assertRaises(FileNotFoundError):
564 datastore.remove(ref)
566 def testForget(self):
567 datastore, ref = self.prepDeleteTest()
569 # Remove
570 datastore.forget([ref])
572 # Does it exist (as far as we know)?
573 self.assertFalse(datastore.exists(ref))
575 # Do we now get a predicted URI?
576 uri = datastore.getURI(ref, predict=True)
577 self.assertEqual(uri.fragment, "predicted")
579 # Get should now fail
580 with self.assertRaises(FileNotFoundError):
581 datastore.get(ref)
583 # Forgetting again is a silent no-op
584 datastore.forget([ref])
586 # Predicted URI should still point to the file.
587 self.assertTrue(uri.exists())
589 def testTransfer(self):
590 metrics = makeExampleMetrics()
592 dimensions = self.universe.extract(("visit", "physical_filter"))
593 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
595 sc = self.storageClassFactory.getStorageClass("StructuredData")
596 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
598 inputDatastore = self.makeDatastore("test_input_datastore")
599 outputDatastore = self.makeDatastore("test_output_datastore")
601 inputDatastore.put(metrics, ref)
602 outputDatastore.transfer(inputDatastore, ref)
604 metricsOut = outputDatastore.get(ref)
605 self.assertEqual(metrics, metricsOut)
607 def testBasicTransaction(self):
608 datastore = self.makeDatastore()
609 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
610 dimensions = self.universe.extract(("visit", "physical_filter"))
611 nDatasets = 6
612 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
613 data = [
614 (
615 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
616 makeExampleMetrics(),
617 )
618 for dataId in dataIds
619 ]
620 succeed = data[: nDatasets // 2]
621 fail = data[nDatasets // 2 :]
622 # All datasets added in this transaction should continue to exist
623 with datastore.transaction():
624 for ref, metrics in succeed:
625 datastore.put(metrics, ref)
626 # Whereas datasets added in this transaction should not
627 with self.assertRaises(TransactionTestError):
628 with datastore.transaction():
629 for ref, metrics in fail:
630 datastore.put(metrics, ref)
631 raise TransactionTestError("This should propagate out of the context manager")
632 # Check for datasets that should exist
633 for ref, metrics in succeed:
634 # Does it exist?
635 self.assertTrue(datastore.exists(ref))
636 # Get
637 metricsOut = datastore.get(ref, parameters=None)
638 self.assertEqual(metrics, metricsOut)
639 # URI
640 uri = datastore.getURI(ref)
641 self.assertEqual(uri.scheme, self.uriScheme)
642 # Check for datasets that should not exist
643 for ref, _ in fail:
644 # These should raise
645 with self.assertRaises(FileNotFoundError):
646 # non-existing file
647 datastore.get(ref)
648 with self.assertRaises(FileNotFoundError):
649 datastore.getURI(ref)
651 def testNestedTransaction(self):
652 datastore = self.makeDatastore()
653 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
654 dimensions = self.universe.extract(("visit", "physical_filter"))
655 metrics = makeExampleMetrics()
657 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
658 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
659 datastore.put(metrics, refBefore)
660 with self.assertRaises(TransactionTestError):
661 with datastore.transaction():
662 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
663 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
664 datastore.put(metrics, refOuter)
665 with datastore.transaction():
666 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
667 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
668 datastore.put(metrics, refInner)
669 # All datasets should exist
670 for ref in (refBefore, refOuter, refInner):
671 metricsOut = datastore.get(ref, parameters=None)
672 self.assertEqual(metrics, metricsOut)
673 raise TransactionTestError("This should roll back the transaction")
674 # Dataset(s) inserted before the transaction should still exist
675 metricsOut = datastore.get(refBefore, parameters=None)
676 self.assertEqual(metrics, metricsOut)
677 # But all datasets inserted during the (rolled back) transaction
678 # should be gone
679 with self.assertRaises(FileNotFoundError):
680 datastore.get(refOuter)
681 with self.assertRaises(FileNotFoundError):
682 datastore.get(refInner)
684 def _prepareIngestTest(self):
685 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
686 dimensions = self.universe.extract(("visit", "physical_filter"))
687 metrics = makeExampleMetrics()
688 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
689 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
690 return metrics, ref
692 def runIngestTest(self, func, expectOutput=True):
693 metrics, ref = self._prepareIngestTest()
694 # The file will be deleted after the test.
695 # For symlink tests this leads to a situation where the datastore
696 # points to a file that does not exist. This will make os.path.exist
697 # return False but then the new symlink will fail with
698 # FileExistsError later in the code so the test still passes.
699 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
700 with open(path, "w") as fd:
701 yaml.dump(metrics._asdict(), stream=fd)
702 func(metrics, path, ref)
704 def testIngestNoTransfer(self):
705 """Test ingesting existing files with no transfer."""
706 for mode in (None, "auto"):
707 # Some datastores have auto but can't do in place transfer
708 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
709 continue
711 with self.subTest(mode=mode):
712 datastore = self.makeDatastore()
714 def succeed(obj, path, ref):
715 """Ingest a file already in the datastore root."""
716 # first move it into the root, and adjust the path
717 # accordingly
718 path = shutil.copy(path, datastore.root.ospath)
719 path = os.path.relpath(path, start=datastore.root.ospath)
720 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
721 self.assertEqual(obj, datastore.get(ref))
723 def failInputDoesNotExist(obj, path, ref):
724 """Can't ingest files if we're given a bad path."""
725 with self.assertRaises(FileNotFoundError):
726 datastore.ingest(
727 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
728 )
729 self.assertFalse(datastore.exists(ref))
731 def failOutsideRoot(obj, path, ref):
732 """Can't ingest files outside of datastore root unless
733 auto."""
734 if mode == "auto":
735 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
736 self.assertTrue(datastore.exists(ref))
737 else:
738 with self.assertRaises(RuntimeError):
739 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
740 self.assertFalse(datastore.exists(ref))
742 def failNotImplemented(obj, path, ref):
743 with self.assertRaises(NotImplementedError):
744 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
746 if mode in self.ingestTransferModes:
747 self.runIngestTest(failOutsideRoot)
748 self.runIngestTest(failInputDoesNotExist)
749 self.runIngestTest(succeed)
750 else:
751 self.runIngestTest(failNotImplemented)
753 def testIngestTransfer(self):
754 """Test ingesting existing files after transferring them."""
755 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
756 with self.subTest(mode=mode):
757 datastore = self.makeDatastore(mode)
759 def succeed(obj, path, ref):
760 """Ingest a file by transferring it to the template
761 location."""
762 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
763 self.assertEqual(obj, datastore.get(ref))
765 def failInputDoesNotExist(obj, path, ref):
766 """Can't ingest files if we're given a bad path."""
767 with self.assertRaises(FileNotFoundError):
768 # Ensure the file does not look like it is in
769 # datastore for auto mode
770 datastore.ingest(
771 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
772 )
773 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
775 def failNotImplemented(obj, path, ref):
776 with self.assertRaises(NotImplementedError):
777 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
779 if mode in self.ingestTransferModes:
780 self.runIngestTest(failInputDoesNotExist)
781 self.runIngestTest(succeed, expectOutput=(mode != "move"))
782 else:
783 self.runIngestTest(failNotImplemented)
785 def testIngestSymlinkOfSymlink(self):
786 """Special test for symlink to a symlink ingest"""
787 metrics, ref = self._prepareIngestTest()
788 # The aim of this test is to create a dataset on disk, then
789 # create a symlink to it and finally ingest the symlink such that
790 # the symlink in the datastore points to the original dataset.
791 for mode in ("symlink", "relsymlink"):
792 if mode not in self.ingestTransferModes:
793 continue
795 print(f"Trying mode {mode}")
796 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
797 with open(realpath, "w") as fd:
798 yaml.dump(metrics._asdict(), stream=fd)
799 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
800 os.symlink(os.path.abspath(realpath), sympath)
802 datastore = self.makeDatastore()
803 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
805 uri = datastore.getURI(ref)
806 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
807 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
809 linkTarget = os.readlink(uri.ospath)
810 if mode == "relsymlink":
811 self.assertFalse(os.path.isabs(linkTarget))
812 else:
813 self.assertEqual(linkTarget, os.path.abspath(realpath))
815 # Check that we can get the dataset back regardless of mode
816 metric2 = datastore.get(ref)
817 self.assertEqual(metric2, metrics)
819 # Cleanup the file for next time round loop
820 # since it will get the same file name in store
821 datastore.remove(ref)
823 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
824 datastore = self.makeDatastore(name)
826 # For now only the FileDatastore can be used for this test.
827 # ChainedDatastore that only includes InMemoryDatastores have to be
828 # skipped as well.
829 for name in datastore.names:
830 if not name.startswith("InMemoryDatastore"):
831 break
832 else:
833 raise unittest.SkipTest("in-memory datastore does not support record export/import")
835 metrics = makeExampleMetrics()
836 dimensions = self.universe.extract(("visit", "physical_filter"))
837 sc = self.storageClassFactory.getStorageClass("StructuredData")
839 refs = []
840 for visit in (2048, 2049, 2050):
841 dataId = FakeDataCoordinate.from_dict(
842 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
843 )
844 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
845 datastore.put(metrics, ref)
846 refs.append(ref)
847 return datastore, refs
849 def testExportImportRecords(self):
850 """Test for export_records and import_records methods."""
851 datastore, refs = self._populate_export_datastore("test_datastore")
852 for exported_refs in (refs, refs[1:]):
853 n_refs = len(exported_refs)
854 records = datastore.export_records(exported_refs)
855 self.assertGreater(len(records), 0)
856 self.assertTrue(set(records.keys()) <= set(datastore.names))
857 # In a ChainedDatastore each FileDatastore will have a complete set
858 for datastore_name in records:
859 record_data = records[datastore_name]
860 self.assertEqual(len(record_data.records), n_refs)
862 # Use the same datastore name to import relative path.
863 datastore2 = self.makeDatastore("test_datastore")
865 records = datastore.export_records(refs[1:])
866 datastore2.import_records(records)
868 with self.assertRaises(FileNotFoundError):
869 data = datastore2.get(refs[0])
870 data = datastore2.get(refs[1])
871 self.assertIsNotNone(data)
872 data = datastore2.get(refs[2])
873 self.assertIsNotNone(data)
875 def testExport(self):
876 datastore, refs = self._populate_export_datastore("test_datastore")
878 datasets = list(datastore.export(refs))
879 self.assertEqual(len(datasets), 3)
881 for transfer in (None, "auto"):
882 # Both will default to None
883 datasets = list(datastore.export(refs, transfer=transfer))
884 self.assertEqual(len(datasets), 3)
886 with self.assertRaises(TypeError):
887 list(datastore.export(refs, transfer="copy"))
889 with self.assertRaises(TypeError):
890 list(datastore.export(refs, directory="exportDir", transfer="move"))
892 # Create a new ref that is not known to the datastore and try to
893 # export it.
894 sc = self.storageClassFactory.getStorageClass("ThingOne")
895 dimensions = self.universe.extract(("visit", "physical_filter"))
896 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
897 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
898 with self.assertRaises(FileNotFoundError):
899 list(datastore.export(refs + [ref], transfer=None))
902class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
903 """PosixDatastore specialization"""
905 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
906 uriScheme = "file"
907 canIngestNoTransferAuto = True
908 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
909 isEphemeral = False
910 rootKeys = ("root",)
911 validationCanFail = True
913 def setUp(self):
914 # Override the working directory before calling the base class
915 self.root = tempfile.mkdtemp(dir=TESTDIR)
916 super().setUp()
918 def testAtomicWrite(self):
919 """Test that we write to a temporary and then rename"""
920 datastore = self.makeDatastore()
921 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
922 dimensions = self.universe.extract(("visit", "physical_filter"))
923 metrics = makeExampleMetrics()
925 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
926 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
928 with self.assertLogs("lsst.resources", "DEBUG") as cm:
929 datastore.put(metrics, ref)
930 move_logs = [ll for ll in cm.output if "transfer=" in ll]
931 self.assertIn("transfer=move", move_logs[0])
933 # And the transfer should be file to file.
934 self.assertEqual(move_logs[0].count("file://"), 2)
936 def testCanNotDeterminePutFormatterLocation(self):
937 """Verify that the expected exception is raised if the FileDatastore
938 can not determine the put formatter location."""
940 _ = makeExampleMetrics()
941 datastore = self.makeDatastore()
943 # Create multiple storage classes for testing different formulations
944 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
946 sccomp = StorageClass("Dummy")
947 compositeStorageClass = StorageClass(
948 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
949 )
951 dimensions = self.universe.extract(("visit", "physical_filter"))
952 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
954 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
955 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
957 def raiser(ref):
958 raise DatasetTypeNotSupportedError()
960 with unittest.mock.patch.object(
961 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
962 "_determine_put_formatter_location",
963 side_effect=raiser,
964 ):
965 # verify the non-composite ref execution path:
966 with self.assertRaises(DatasetTypeNotSupportedError):
967 datastore.getURIs(ref, predict=True)
969 # verify the composite-ref execution path:
970 with self.assertRaises(DatasetTypeNotSupportedError):
971 datastore.getURIs(compRef, predict=True)
974class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
975 """Posix datastore tests but with checksums disabled."""
977 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
979 def testChecksum(self):
980 """Ensure that checksums have not been calculated."""
982 datastore = self.makeDatastore()
983 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
984 dimensions = self.universe.extract(("visit", "physical_filter"))
985 metrics = makeExampleMetrics()
987 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
988 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
990 # Configuration should have disabled checksum calculation
991 datastore.put(metrics, ref)
992 infos = datastore.getStoredItemsInfo(ref)
993 self.assertIsNone(infos[0].checksum)
995 # Remove put back but with checksums enabled explicitly
996 datastore.remove(ref)
997 datastore.useChecksum = True
998 datastore.put(metrics, ref)
1000 infos = datastore.getStoredItemsInfo(ref)
1001 self.assertIsNotNone(infos[0].checksum)
1004class TrashDatastoreTestCase(PosixDatastoreTestCase):
1005 """Restrict trash test to FileDatastore."""
1007 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1009 def testTrash(self):
1010 datastore, *refs = self.prepDeleteTest(n_refs=10)
1012 # Trash one of them.
1013 ref = refs.pop()
1014 uri = datastore.getURI(ref)
1015 datastore.trash(ref)
1016 self.assertTrue(uri.exists(), uri) # Not deleted yet
1017 datastore.emptyTrash()
1018 self.assertFalse(uri.exists(), uri)
1020 # Trash it again should be fine.
1021 datastore.trash(ref)
1023 # Trash multiple items at once.
1024 subset = [refs.pop(), refs.pop()]
1025 datastore.trash(subset)
1026 datastore.emptyTrash()
1028 # Remove a record and trash should do nothing.
1029 # This is execution butler scenario.
1030 ref = refs.pop()
1031 uri = datastore.getURI(ref)
1032 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1033 self.assertTrue(uri.exists())
1034 datastore.trash(ref)
1035 datastore.emptyTrash()
1036 self.assertTrue(uri.exists())
1038 # Switch on trust and it should delete the file.
1039 datastore.trustGetRequest = True
1040 datastore.trash([ref])
1041 self.assertFalse(uri.exists())
1043 # Remove multiples at once in trust mode.
1044 subset = [refs.pop() for i in range(3)]
1045 datastore.trash(subset)
1046 datastore.trash(refs.pop()) # Check that a single ref can trash
1049class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1050 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1052 def setUp(self):
1053 # Override the working directory before calling the base class
1054 self.root = tempfile.mkdtemp(dir=TESTDIR)
1055 super().setUp()
1057 def testCleanup(self):
1058 """Test that a failed formatter write does cleanup a partial file."""
1059 metrics = makeExampleMetrics()
1060 datastore = self.makeDatastore()
1062 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1064 dimensions = self.universe.extract(("visit", "physical_filter"))
1065 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1067 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1069 # Determine where the file will end up (we assume Formatters use
1070 # the same file extension)
1071 expectedUri = datastore.getURI(ref, predict=True)
1072 self.assertEqual(expectedUri.fragment, "predicted")
1074 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1076 # Try formatter that fails and formatter that fails and leaves
1077 # a file behind
1078 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1079 with self.subTest(formatter=formatter):
1080 # Monkey patch the formatter
1081 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1083 # Try to put the dataset, it should fail
1084 with self.assertRaises(Exception):
1085 datastore.put(metrics, ref)
1087 # Check that there is no file on disk
1088 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1090 # Check that there is a directory
1091 dir = expectedUri.dirname()
1092 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1094 # Force YamlFormatter and check that this time a file is written
1095 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1096 datastore.put(metrics, ref)
1097 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1098 datastore.remove(ref)
1099 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1102class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1103 """PosixDatastore specialization"""
1105 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1106 uriScheme = "mem"
1107 hasUnsupportedPut = False
1108 ingestTransferModes = ()
1109 isEphemeral = True
1110 rootKeys = None
1111 validationCanFail = False
1114class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1115 """ChainedDatastore specialization using a POSIXDatastore"""
1117 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1118 hasUnsupportedPut = False
1119 canIngestNoTransferAuto = False
1120 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1121 isEphemeral = False
1122 rootKeys = (".datastores.1.root", ".datastores.2.root")
1123 validationCanFail = True
1126class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1127 """ChainedDatastore specialization using all InMemoryDatastore"""
1129 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1130 validationCanFail = False
1133class DatastoreConstraintsTests(DatastoreTestsBase):
1134 """Basic tests of constraints model of Datastores."""
1136 def testConstraints(self):
1137 """Test constraints model. Assumes that each test class has the
1138 same constraints."""
1139 metrics = makeExampleMetrics()
1140 datastore = self.makeDatastore()
1142 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1143 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1144 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1145 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1147 # Write empty file suitable for ingest check (JSON and YAML variants)
1148 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1149 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1150 for datasetTypeName, sc, accepted in (
1151 ("metric", sc1, True),
1152 ("metric5", sc1, False),
1153 ("metric33", sc1, True),
1154 ("metric5", sc2, True),
1155 ):
1156 # Choose different temp file depending on StorageClass
1157 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1159 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1160 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1161 if accepted:
1162 datastore.put(metrics, ref)
1163 self.assertTrue(datastore.exists(ref))
1164 datastore.remove(ref)
1166 # Try ingest
1167 if self.canIngest:
1168 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1169 self.assertTrue(datastore.exists(ref))
1170 datastore.remove(ref)
1171 else:
1172 with self.assertRaises(DatasetTypeNotSupportedError):
1173 datastore.put(metrics, ref)
1174 self.assertFalse(datastore.exists(ref))
1176 # Again with ingest
1177 if self.canIngest:
1178 with self.assertRaises(DatasetTypeNotSupportedError):
1179 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1180 self.assertFalse(datastore.exists(ref))
1183class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1184 """PosixDatastore specialization"""
1186 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1187 canIngest = True
1189 def setUp(self):
1190 # Override the working directory before calling the base class
1191 self.root = tempfile.mkdtemp(dir=TESTDIR)
1192 super().setUp()
1195class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1196 """InMemoryDatastore specialization"""
1198 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1199 canIngest = False
1202class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1203 """ChainedDatastore specialization using a POSIXDatastore and constraints
1204 at the ChainedDatstore"""
1206 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1209class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1210 """ChainedDatastore specialization using a POSIXDatastore"""
1212 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1215class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1216 """ChainedDatastore specialization using all InMemoryDatastore"""
1218 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1219 canIngest = False
1222class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1223 """Test that a chained datastore can control constraints per-datastore
1224 even if child datastore would accept."""
1226 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1228 def setUp(self):
1229 # Override the working directory before calling the base class
1230 self.root = tempfile.mkdtemp(dir=TESTDIR)
1231 super().setUp()
1233 def testConstraints(self):
1234 """Test chained datastore constraints model."""
1235 metrics = makeExampleMetrics()
1236 datastore = self.makeDatastore()
1238 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1239 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1240 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1241 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1242 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1244 # Write empty file suitable for ingest check (JSON and YAML variants)
1245 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1246 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1248 for typeName, dataId, sc, accept, ingest in (
1249 ("metric", dataId1, sc1, (False, True, False), True),
1250 ("metric5", dataId1, sc1, (False, False, False), False),
1251 ("metric5", dataId2, sc1, (True, False, False), False),
1252 ("metric33", dataId2, sc2, (True, True, False), True),
1253 ("metric5", dataId1, sc2, (False, True, False), True),
1254 ):
1255 # Choose different temp file depending on StorageClass
1256 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1258 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1259 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1260 if any(accept):
1261 datastore.put(metrics, ref)
1262 self.assertTrue(datastore.exists(ref))
1264 # Check each datastore inside the chained datastore
1265 for childDatastore, expected in zip(datastore.datastores, accept):
1266 self.assertEqual(
1267 childDatastore.exists(ref),
1268 expected,
1269 f"Testing presence of {ref} in datastore {childDatastore.name}",
1270 )
1272 datastore.remove(ref)
1274 # Check that ingest works
1275 if ingest:
1276 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1277 self.assertTrue(datastore.exists(ref))
1279 # Check each datastore inside the chained datastore
1280 for childDatastore, expected in zip(datastore.datastores, accept):
1281 # Ephemeral datastores means InMemory at the moment
1282 # and that does not accept ingest of files.
1283 if childDatastore.isEphemeral:
1284 expected = False
1285 self.assertEqual(
1286 childDatastore.exists(ref),
1287 expected,
1288 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1289 )
1291 datastore.remove(ref)
1292 else:
1293 with self.assertRaises(DatasetTypeNotSupportedError):
1294 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1296 else:
1297 with self.assertRaises(DatasetTypeNotSupportedError):
1298 datastore.put(metrics, ref)
1299 self.assertFalse(datastore.exists(ref))
1301 # Again with ingest
1302 with self.assertRaises(DatasetTypeNotSupportedError):
1303 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1304 self.assertFalse(datastore.exists(ref))
1307class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1308 """Tests for datastore caching infrastructure."""
1310 @classmethod
1311 def setUpClass(cls):
1312 cls.storageClassFactory = StorageClassFactory()
1313 cls.universe = DimensionUniverse()
1315 # Ensure that we load the test storage class definitions.
1316 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1317 cls.storageClassFactory.addFromConfig(scConfigFile)
1319 def setUp(self):
1320 self.id = 0
1322 # Create a root that we can use for caching tests.
1323 self.root = tempfile.mkdtemp(dir=TESTDIR)
1325 # Create some test dataset refs and associated test files
1326 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1327 dimensions = self.universe.extract(("visit", "physical_filter"))
1328 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1330 # Create list of refs and list of temporary files
1331 n_datasets = 10
1332 self.refs = [
1333 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1334 for n in range(n_datasets)
1335 ]
1337 root_uri = ResourcePath(self.root, forceDirectory=True)
1338 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1340 # Create test files.
1341 for uri in self.files:
1342 uri.write(b"0123456789")
1344 # Create some composite refs with component files.
1345 sc = self.storageClassFactory.getStorageClass("StructuredData")
1346 self.composite_refs = [
1347 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1348 ]
1349 self.comp_files = []
1350 self.comp_refs = []
1351 for n, ref in enumerate(self.composite_refs):
1352 component_refs = []
1353 component_files = []
1354 for component in sc.components:
1355 component_ref = ref.makeComponentRef(component)
1356 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1357 component_refs.append(component_ref)
1358 component_files.append(file)
1359 file.write(b"9876543210")
1361 self.comp_files.append(component_files)
1362 self.comp_refs.append(component_refs)
1364 def tearDown(self):
1365 if self.root is not None and os.path.exists(self.root):
1366 shutil.rmtree(self.root, ignore_errors=True)
1368 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1369 config = Config.fromYaml(config_str)
1370 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1372 def testNoCacheDir(self):
1373 config_str = """
1374cached:
1375 root: null
1376 cacheable:
1377 metric0: true
1378 """
1379 cache_manager = self._make_cache_manager(config_str)
1381 # Look inside to check we don't have a cache directory
1382 self.assertIsNone(cache_manager._cache_directory)
1384 self.assertCache(cache_manager)
1386 # Test that the cache directory is marked temporary
1387 self.assertTrue(cache_manager.cache_directory.isTemporary)
1389 def testNoCacheDirReversed(self):
1390 """Use default caching status and metric1 to false"""
1391 config_str = """
1392cached:
1393 root: null
1394 default: true
1395 cacheable:
1396 metric1: false
1397 """
1398 cache_manager = self._make_cache_manager(config_str)
1400 self.assertCache(cache_manager)
1402 def testEnvvarCacheDir(self):
1403 config_str = f"""
1404cached:
1405 root: '{self.root}'
1406 cacheable:
1407 metric0: true
1408 """
1410 root = ResourcePath(self.root, forceDirectory=True)
1411 env_dir = root.join("somewhere", forceDirectory=True)
1412 elsewhere = root.join("elsewhere", forceDirectory=True)
1414 # Environment variable should override the config value.
1415 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1416 cache_manager = self._make_cache_manager(config_str)
1417 self.assertEqual(cache_manager.cache_directory, env_dir)
1419 # This environment variable should not override the config value.
1420 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1421 cache_manager = self._make_cache_manager(config_str)
1422 self.assertEqual(cache_manager.cache_directory, root)
1424 # No default setting.
1425 config_str = """
1426cached:
1427 root: null
1428 default: true
1429 cacheable:
1430 metric1: false
1431 """
1432 cache_manager = self._make_cache_manager(config_str)
1434 # This environment variable should override the config value.
1435 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1436 cache_manager = self._make_cache_manager(config_str)
1437 self.assertEqual(cache_manager.cache_directory, env_dir)
1439 # If both environment variables are set the main (not IF_UNSET)
1440 # variable should win.
1441 with unittest.mock.patch.dict(
1442 os.environ,
1443 {
1444 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1445 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1446 },
1447 ):
1448 cache_manager = self._make_cache_manager(config_str)
1449 self.assertEqual(cache_manager.cache_directory, env_dir)
1451 # Use the API to set the environment variable, making sure that the
1452 # variable is reset on exit.
1453 with unittest.mock.patch.dict(
1454 os.environ,
1455 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1456 ):
1457 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1458 self.assertTrue(defined)
1459 cache_manager = self._make_cache_manager(config_str)
1460 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1462 # Now create the cache manager ahead of time and set the fallback
1463 # later.
1464 cache_manager = self._make_cache_manager(config_str)
1465 self.assertIsNone(cache_manager._cache_directory)
1466 with unittest.mock.patch.dict(
1467 os.environ,
1468 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1469 ):
1470 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1471 self.assertTrue(defined)
1472 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1474 def testExplicitCacheDir(self):
1475 config_str = f"""
1476cached:
1477 root: '{self.root}'
1478 cacheable:
1479 metric0: true
1480 """
1481 cache_manager = self._make_cache_manager(config_str)
1483 # Look inside to check we do have a cache directory.
1484 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1486 self.assertCache(cache_manager)
1488 # Test that the cache directory is not marked temporary
1489 self.assertFalse(cache_manager.cache_directory.isTemporary)
1491 def assertCache(self, cache_manager):
1492 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1493 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1495 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1496 self.assertIsInstance(uri, ResourcePath)
1497 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1499 # Check presence in cache using ref and then using file extension.
1500 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1501 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1502 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1503 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1505 # Cached file should no longer exist but uncached file should be
1506 # unaffected.
1507 self.assertFalse(self.files[0].exists())
1508 self.assertTrue(self.files[1].exists())
1510 # Should find this file and it should be within the cache directory.
1511 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1512 self.assertTrue(found.exists())
1513 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1515 # Should not be able to find these in cache
1516 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1517 self.assertIsNone(found)
1518 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1519 self.assertIsNone(found)
1521 def testNoCache(self):
1522 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1523 for uri, ref in zip(self.files, self.refs):
1524 self.assertFalse(cache_manager.should_be_cached(ref))
1525 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1526 self.assertFalse(cache_manager.known_to_cache(ref))
1527 with cache_manager.find_in_cache(ref, ".txt") as found:
1528 self.assertIsNone(found, msg=f"{cache_manager}")
1530 def _expiration_config(self, mode: str, threshold: int) -> str:
1531 return f"""
1532cached:
1533 default: true
1534 expiry:
1535 mode: {mode}
1536 threshold: {threshold}
1537 cacheable:
1538 unused: true
1539 """
1541 def testCacheExpiryFiles(self):
1542 threshold = 2 # Keep at least 2 files.
1543 mode = "files"
1544 config_str = self._expiration_config(mode, threshold)
1546 cache_manager = self._make_cache_manager(config_str)
1548 # Check that an empty cache returns unknown for arbitrary ref
1549 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1551 # Should end with datasets: 2, 3, 4
1552 self.assertExpiration(cache_manager, 5, threshold + 1)
1553 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1555 # Check that we will not expire a file that is actively in use.
1556 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1557 self.assertIsNotNone(found)
1559 # Trigger cache expiration that should remove the file
1560 # we just retrieved. Should now have: 3, 4, 5
1561 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1562 self.assertIsNotNone(cached)
1564 # Cache should still report the standard file count.
1565 self.assertEqual(cache_manager.file_count, threshold + 1)
1567 # Add additional entry to cache.
1568 # Should now have 4, 5, 6
1569 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1570 self.assertIsNotNone(cached)
1572 # Is the file still there?
1573 self.assertTrue(found.exists())
1575 # Can we read it?
1576 data = found.read()
1577 self.assertGreater(len(data), 0)
1579 # Outside context the file should no longer exist.
1580 self.assertFalse(found.exists())
1582 # File count should not have changed.
1583 self.assertEqual(cache_manager.file_count, threshold + 1)
1585 # Dataset 2 was in the exempt directory but because hardlinks
1586 # are used it was deleted from the main cache during cache expiry
1587 # above and so should no longer be found.
1588 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1589 self.assertIsNone(found)
1591 # And the one stored after it is also gone.
1592 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1593 self.assertIsNone(found)
1595 # But dataset 4 is present.
1596 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1597 self.assertIsNotNone(found)
1599 # Adding a new dataset to the cache should now delete it.
1600 cache_manager.move_to_cache(self.files[7], self.refs[7])
1602 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1603 self.assertIsNone(found)
1605 def testCacheExpiryDatasets(self):
1606 threshold = 2 # Keep 2 datasets.
1607 mode = "datasets"
1608 config_str = self._expiration_config(mode, threshold)
1610 cache_manager = self._make_cache_manager(config_str)
1611 self.assertExpiration(cache_manager, 5, threshold + 1)
1612 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1614 def testCacheExpiryDatasetsComposite(self):
1615 threshold = 2 # Keep 2 datasets.
1616 mode = "datasets"
1617 config_str = self._expiration_config(mode, threshold)
1619 cache_manager = self._make_cache_manager(config_str)
1621 n_datasets = 3
1622 for i in range(n_datasets):
1623 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1624 cached = cache_manager.move_to_cache(component_file, component_ref)
1625 self.assertIsNotNone(cached)
1626 self.assertTrue(cache_manager.known_to_cache(component_ref))
1627 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1628 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1630 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1632 # Write two new non-composite and the number of files should drop.
1633 self.assertExpiration(cache_manager, 2, 5)
1635 def testCacheExpirySize(self):
1636 threshold = 55 # Each file is 10 bytes
1637 mode = "size"
1638 config_str = self._expiration_config(mode, threshold)
1640 cache_manager = self._make_cache_manager(config_str)
1641 self.assertExpiration(cache_manager, 10, 6)
1642 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1644 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1645 """Insert the datasets and then check the number retained."""
1646 for i in range(n_datasets):
1647 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1648 self.assertIsNotNone(cached)
1650 self.assertEqual(cache_manager.file_count, n_retained)
1652 # The oldest file should not be in the cache any more.
1653 for i in range(n_datasets):
1654 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1655 if i >= n_datasets - n_retained:
1656 self.assertIsInstance(found, ResourcePath)
1657 else:
1658 self.assertIsNone(found)
1660 def testCacheExpiryAge(self):
1661 threshold = 1 # Expire older than 2 seconds
1662 mode = "age"
1663 config_str = self._expiration_config(mode, threshold)
1665 cache_manager = self._make_cache_manager(config_str)
1666 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1668 # Insert 3 files, then sleep, then insert more.
1669 for i in range(2):
1670 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1671 self.assertIsNotNone(cached)
1672 time.sleep(2.0)
1673 for j in range(4):
1674 i = 2 + j # Continue the counting
1675 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1676 self.assertIsNotNone(cached)
1678 # Only the files written after the sleep should exist.
1679 self.assertEqual(cache_manager.file_count, 4)
1680 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1681 self.assertIsNone(found)
1682 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1683 self.assertIsInstance(found, ResourcePath)
1686class DatasetRefURIsTestCase(unittest.TestCase):
1687 """Tests for DatasetRefURIs."""
1689 def testSequenceAccess(self):
1690 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1691 uris = DatasetRefURIs()
1693 self.assertEqual(len(uris), 2)
1694 self.assertEqual(uris[0], None)
1695 self.assertEqual(uris[1], {})
1697 primaryURI = ResourcePath("1/2/3")
1698 componentURI = ResourcePath("a/b/c")
1700 # affirm that DatasetRefURIs does not support MutableSequence functions
1701 with self.assertRaises(TypeError):
1702 uris[0] = primaryURI
1703 with self.assertRaises(TypeError):
1704 uris[1] = {"foo": componentURI}
1706 # but DatasetRefURIs can be set by property name:
1707 uris.primaryURI = primaryURI
1708 uris.componentURIs = {"foo": componentURI}
1709 self.assertEqual(uris.primaryURI, primaryURI)
1710 self.assertEqual(uris[0], primaryURI)
1712 primary, components = uris
1713 self.assertEqual(primary, primaryURI)
1714 self.assertEqual(components, {"foo": componentURI})
1716 def testRepr(self):
1717 """Verify __repr__ output."""
1718 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1719 self.assertEqual(
1720 repr(uris),
1721 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1722 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1723 )
1726class DataIdForTestTestCase(unittest.TestCase):
1727 """Tests for the DataIdForTest class."""
1729 def testImmutable(self):
1730 """Verify that an instance is immutable by default."""
1731 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1732 initial_hash = hash(dataId)
1734 with self.assertRaises(RuntimeError):
1735 dataId["instrument"] = "foo"
1737 with self.assertRaises(RuntimeError):
1738 del dataId["instrument"]
1740 assert sys.version_info[0] == 3
1741 if sys.version_info[1] >= 9:
1742 with self.assertRaises(RuntimeError):
1743 dataId |= dict(foo="bar")
1745 with self.assertRaises(RuntimeError):
1746 dataId.pop("instrument")
1748 with self.assertRaises(RuntimeError):
1749 dataId.popitem()
1751 with self.assertRaises(RuntimeError):
1752 dataId.update(dict(instrument="foo"))
1754 # verify that the hash value has not changed.
1755 self.assertEqual(initial_hash, hash(dataId))
1757 def testMutable(self):
1758 """Verify that an instance can be made mutable (unfrozen)."""
1759 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1760 initial_hash = hash(dataId)
1761 dataId.frozen = False
1762 self.assertEqual(initial_hash, hash(dataId))
1764 dataId["instrument"] = "foo"
1765 self.assertEqual(dataId["instrument"], "foo")
1766 self.assertNotEqual(initial_hash, hash(dataId))
1767 initial_hash = hash(dataId)
1769 del dataId["instrument"]
1770 self.assertTrue("instrument" not in dataId)
1771 self.assertNotEqual(initial_hash, hash(dataId))
1772 initial_hash = hash(dataId)
1774 assert sys.version_info[0] == 3
1775 if sys.version_info[1] >= 9:
1776 dataId |= dict(foo="bar")
1777 self.assertEqual(dataId["foo"], "bar")
1778 self.assertNotEqual(initial_hash, hash(dataId))
1779 initial_hash = hash(dataId)
1781 dataId.pop("visit")
1782 self.assertTrue("visit" not in dataId)
1783 self.assertNotEqual(initial_hash, hash(dataId))
1784 initial_hash = hash(dataId)
1786 dataId.popitem()
1787 self.assertTrue("physical_filter" not in dataId)
1788 self.assertNotEqual(initial_hash, hash(dataId))
1789 initial_hash = hash(dataId)
1791 dataId.update(dict(instrument="foo"))
1792 self.assertEqual(dataId["instrument"], "foo")
1793 self.assertNotEqual(initial_hash, hash(dataId))
1794 initial_hash = hash(dataId)
1797if __name__ == "__main__": 1797 ↛ 1798line 1797 didn't jump to line 1798, because the condition on line 1797 was never true
1798 unittest.main()