Coverage for tests/test_datastore.py: 12%
1053 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import sys
27import tempfile
28import time
29import unittest
30import unittest.mock
31from collections import UserDict
32from dataclasses import dataclass
34import lsst.utils.tests
35import yaml
36from lsst.daf.butler import (
37 Config,
38 DataCoordinate,
39 DatasetRef,
40 DatasetRefURIs,
41 DatasetTypeNotSupportedError,
42 Datastore,
43 DatastoreCacheManager,
44 DatastoreCacheManagerConfig,
45 DatastoreConfig,
46 DatastoreDisabledCacheManager,
47 DatastoreValidationError,
48 DimensionUniverse,
49 FileDataset,
50 NamedKeyDict,
51 StorageClass,
52 StorageClassFactory,
53)
54from lsst.daf.butler.formatters.yaml import YamlFormatter
55from lsst.daf.butler.tests import (
56 BadNoWriteFormatter,
57 BadWriteFormatter,
58 DatasetTestHelper,
59 DatastoreTestHelper,
60 DummyRegistry,
61 MetricsExample,
62 MetricsExampleDataclass,
63 MetricsExampleModel,
64)
65from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
66from lsst.resources import ResourcePath
67from lsst.utils import doImport
69TESTDIR = os.path.dirname(__file__)
72class DataIdForTest(UserDict):
74 """A dict-like class that can be used for a DataId dict that is hashable.
76 By default the class is immutable ("frozen"). The `frozen`
77 attribute can be set to `False` to change values (but note that
78 the hash values before and after mutation will be different!).
79 """
81 def __init__(self, *args, **kwargs):
82 self.frozen = False
83 super().__init__(*args, **kwargs)
84 self.frozen = True
86 def __hash__(self):
87 return hash(str(self.data))
89 def __setitem__(self, k, v):
90 if self.frozen:
91 raise RuntimeError("DataIdForTest is frozen.")
92 return super().__setitem__(k, v)
94 def __delitem__(self, k):
95 if self.frozen:
96 raise RuntimeError("DataIdForTest is frozen.")
97 return super().__delitem__(k)
99 def __ior__(self, other):
100 assert sys.version_info[0] == 3
101 if sys.version_info[1] < 9:
102 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
103 if self.frozen:
104 raise RuntimeError("DataIdForTest is frozen.")
105 return super().__ior__(other)
107 def pop(self, k):
108 if self.frozen:
109 raise RuntimeError("DataIdForTest is frozen.")
110 return super().pop(k)
112 def popitem(self):
113 if self.frozen:
114 raise RuntimeError("DataIdForTest is frozen.")
115 return super().popitem()
117 def update(self, *args, **kwargs):
118 if self.frozen:
119 raise RuntimeError("DataIdForTest is frozen.")
120 super().update(*args, **kwargs)
123def makeExampleMetrics(use_none=False):
124 if use_none:
125 array = None
126 else:
127 array = [563, 234, 456.7, 105, 2054, -1045]
128 return MetricsExample(
129 {"AM1": 5.2, "AM2": 30.6},
130 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
131 array,
132 )
135@dataclass(frozen=True)
136class Named:
137 name: str
140class FakeDataCoordinate(NamedKeyDict):
141 """A fake hashable frozen DataCoordinate built from a simple dict."""
143 @classmethod
144 def from_dict(cls, dataId):
145 new = cls()
146 for k, v in dataId.items():
147 new[Named(k)] = v
148 return new.freeze()
150 def __hash__(self) -> int:
151 return hash(frozenset(self.items()))
154class TransactionTestError(Exception):
155 """Specific error for transactions, to prevent misdiagnosing
156 that might otherwise occur when a standard exception is used.
157 """
159 pass
162class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
163 """Support routines for datastore testing"""
165 root = None
167 @classmethod
168 def setUpClass(cls):
169 # Storage Classes are fixed for all datastores in these tests
170 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
171 cls.storageClassFactory = StorageClassFactory()
172 cls.storageClassFactory.addFromConfig(scConfigFile)
174 # Read the Datastore config so we can get the class
175 # information (since we should not assume the constructor
176 # name here, but rely on the configuration file itself)
177 datastoreConfig = DatastoreConfig(cls.configFile)
178 cls.datastoreType = doImport(datastoreConfig["cls"])
179 cls.universe = DimensionUniverse()
181 def setUp(self):
182 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
184 def tearDown(self):
185 if self.root is not None and os.path.exists(self.root):
186 shutil.rmtree(self.root, ignore_errors=True)
189class DatastoreTests(DatastoreTestsBase):
190 """Some basic tests of a simple datastore."""
192 hasUnsupportedPut = True
194 def testConfigRoot(self):
195 full = DatastoreConfig(self.configFile)
196 config = DatastoreConfig(self.configFile, mergeDefaults=False)
197 newroot = "/random/location"
198 self.datastoreType.setConfigRoot(newroot, config, full)
199 if self.rootKeys:
200 for k in self.rootKeys:
201 self.assertIn(newroot, config[k])
203 def testConstructor(self):
204 datastore = self.makeDatastore()
205 self.assertIsNotNone(datastore)
206 self.assertIs(datastore.isEphemeral, self.isEphemeral)
208 def testConfigurationValidation(self):
209 datastore = self.makeDatastore()
210 sc = self.storageClassFactory.getStorageClass("ThingOne")
211 datastore.validateConfiguration([sc])
213 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
214 if self.validationCanFail:
215 with self.assertRaises(DatastoreValidationError):
216 datastore.validateConfiguration([sc2], logFailures=True)
218 dimensions = self.universe.extract(("visit", "physical_filter"))
219 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
220 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
221 datastore.validateConfiguration([ref])
223 def testParameterValidation(self):
224 """Check that parameters are validated"""
225 sc = self.storageClassFactory.getStorageClass("ThingOne")
226 dimensions = self.universe.extract(("visit", "physical_filter"))
227 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
228 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
229 datastore = self.makeDatastore()
230 data = {1: 2, 3: 4}
231 datastore.put(data, ref)
232 newdata = datastore.get(ref)
233 self.assertEqual(data, newdata)
234 with self.assertRaises(KeyError):
235 newdata = datastore.get(ref, parameters={"missing": 5})
237 def testBasicPutGet(self):
238 metrics = makeExampleMetrics()
239 datastore = self.makeDatastore()
241 # Create multiple storage classes for testing different formulations
242 storageClasses = [
243 self.storageClassFactory.getStorageClass(sc)
244 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
245 ]
247 dimensions = self.universe.extract(("visit", "physical_filter"))
248 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
249 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
251 for sc in storageClasses:
252 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
253 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
255 # Make sure that using getManyURIs without predicting before the
256 # dataset has been put raises.
257 with self.assertRaises(FileNotFoundError):
258 datastore.getManyURIs([ref], predict=False)
260 # Make sure that using getManyURIs with predicting before the
261 # dataset has been put predicts the URI.
262 uris = datastore.getManyURIs([ref, ref2], predict=True)
263 self.assertIn("52", uris[ref].primaryURI.geturl())
264 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
265 self.assertIn("53", uris[ref2].primaryURI.geturl())
266 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
268 datastore.put(metrics, ref)
270 # Does it exist?
271 self.assertTrue(datastore.exists(ref))
272 self.assertTrue(datastore.knows(ref))
273 multi = datastore.knows_these([ref])
274 self.assertTrue(multi[ref])
275 multi = datastore.mexists([ref])
276 self.assertTrue(multi[ref])
278 # Get
279 metricsOut = datastore.get(ref, parameters=None)
280 self.assertEqual(metrics, metricsOut)
282 uri = datastore.getURI(ref)
283 self.assertEqual(uri.scheme, self.uriScheme)
285 uris = datastore.getManyURIs([ref])
286 self.assertEqual(len(uris), 1)
287 ref, uri = uris.popitem()
288 self.assertTrue(uri.primaryURI.exists())
289 self.assertFalse(uri.componentURIs)
291 # Get a component -- we need to construct new refs for them
292 # with derived storage classes but with parent ID
293 for comp in ("data", "output"):
294 compRef = ref.makeComponentRef(comp)
295 output = datastore.get(compRef)
296 self.assertEqual(output, getattr(metricsOut, comp))
298 uri = datastore.getURI(compRef)
299 self.assertEqual(uri.scheme, self.uriScheme)
301 uris = datastore.getManyURIs([compRef])
302 self.assertEqual(len(uris), 1)
304 storageClass = sc
306 # Check that we can put a metric with None in a component and
307 # get it back as None
308 metricsNone = makeExampleMetrics(use_none=True)
309 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
310 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
311 datastore.put(metricsNone, refNone)
313 comp = "data"
314 for comp in ("data", "output"):
315 compRef = refNone.makeComponentRef(comp)
316 output = datastore.get(compRef)
317 self.assertEqual(output, getattr(metricsNone, comp))
319 # Check that a put fails if the dataset type is not supported
320 if self.hasUnsupportedPut:
321 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
322 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
323 with self.assertRaises(DatasetTypeNotSupportedError):
324 datastore.put(metrics, ref)
326 # These should raise
327 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
328 with self.assertRaises(FileNotFoundError):
329 # non-existing file
330 datastore.get(ref)
332 # Get a URI from it
333 uri = datastore.getURI(ref, predict=True)
334 self.assertEqual(uri.scheme, self.uriScheme)
336 with self.assertRaises(FileNotFoundError):
337 datastore.getURI(ref)
339 def testTrustGetRequest(self):
340 """Check that we can get datasets that registry knows nothing about."""
342 datastore = self.makeDatastore()
344 # Skip test if the attribute is not defined
345 if not hasattr(datastore, "trustGetRequest"):
346 return
348 metrics = makeExampleMetrics()
350 i = 0
351 for sc_name in ("StructuredData", "StructuredComposite"):
352 i += 1
353 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
355 if sc_name == "StructuredComposite":
356 disassembled = True
357 else:
358 disassembled = False
360 # Start datastore in default configuration of using registry
361 datastore.trustGetRequest = False
363 # Create multiple storage classes for testing with or without
364 # disassembly
365 sc = self.storageClassFactory.getStorageClass(sc_name)
366 dimensions = self.universe.extract(("visit", "physical_filter"))
368 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
370 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
371 datastore.put(metrics, ref)
373 # Does it exist?
374 self.assertTrue(datastore.exists(ref))
375 self.assertTrue(datastore.knows(ref))
376 multi = datastore.knows_these([ref])
377 self.assertTrue(multi[ref])
378 multi = datastore.mexists([ref])
379 self.assertTrue(multi[ref])
381 # Get
382 metricsOut = datastore.get(ref)
383 self.assertEqual(metrics, metricsOut)
385 # Get the URI(s)
386 primaryURI, componentURIs = datastore.getURIs(ref)
387 if disassembled:
388 self.assertIsNone(primaryURI)
389 self.assertEqual(len(componentURIs), 3)
390 else:
391 self.assertIn(datasetTypeName, primaryURI.path)
392 self.assertFalse(componentURIs)
394 # Delete registry entry so now we are trusting
395 datastore.removeStoredItemInfo(ref)
397 # Now stop trusting and check that things break
398 datastore.trustGetRequest = False
400 # Does it exist?
401 self.assertFalse(datastore.exists(ref))
402 self.assertFalse(datastore.knows(ref))
403 multi = datastore.knows_these([ref])
404 self.assertFalse(multi[ref])
405 multi = datastore.mexists([ref])
406 self.assertFalse(multi[ref])
408 with self.assertRaises(FileNotFoundError):
409 datastore.get(ref)
411 with self.assertRaises(FileNotFoundError):
412 datastore.get(ref.makeComponentRef("data"))
414 # URI should fail unless we ask for prediction
415 with self.assertRaises(FileNotFoundError):
416 datastore.getURIs(ref)
418 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
419 if disassembled:
420 self.assertIsNone(predicted_primary)
421 self.assertEqual(len(predicted_disassembled), 3)
422 for uri in predicted_disassembled.values():
423 self.assertEqual(uri.fragment, "predicted")
424 self.assertIn(datasetTypeName, uri.path)
425 else:
426 self.assertIn(datasetTypeName, predicted_primary.path)
427 self.assertFalse(predicted_disassembled)
428 self.assertEqual(predicted_primary.fragment, "predicted")
430 # Now enable registry-free trusting mode
431 datastore.trustGetRequest = True
433 # Try again to get it
434 metricsOut = datastore.get(ref)
435 self.assertEqual(metricsOut, metrics)
437 # Does it exist?
438 self.assertTrue(datastore.exists(ref))
440 # Get a component
441 comp = "data"
442 compRef = ref.makeComponentRef(comp)
443 output = datastore.get(compRef)
444 self.assertEqual(output, getattr(metrics, comp))
446 # Get the URI -- if we trust this should work even without
447 # enabling prediction.
448 primaryURI2, componentURIs2 = datastore.getURIs(ref)
449 self.assertEqual(primaryURI2, primaryURI)
450 self.assertEqual(componentURIs2, componentURIs)
452 def testDisassembly(self):
453 """Test disassembly within datastore."""
454 metrics = makeExampleMetrics()
455 if self.isEphemeral:
456 # in-memory datastore does not disassemble
457 return
459 # Create multiple storage classes for testing different formulations
460 # of composites. One of these will not disassemble to provide
461 # a reference.
462 storageClasses = [
463 self.storageClassFactory.getStorageClass(sc)
464 for sc in (
465 "StructuredComposite",
466 "StructuredCompositeTestA",
467 "StructuredCompositeTestB",
468 "StructuredCompositeReadComp",
469 "StructuredData", # No disassembly
470 "StructuredCompositeReadCompNoDisassembly",
471 )
472 ]
474 # Create the test datastore
475 datastore = self.makeDatastore()
477 # Dummy dataId
478 dimensions = self.universe.extract(("visit", "physical_filter"))
479 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
481 for i, sc in enumerate(storageClasses):
482 with self.subTest(storageClass=sc.name):
483 # Create a different dataset type each time round
484 # so that a test failure in this subtest does not trigger
485 # a cascade of tests because of file clashes
486 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
488 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
490 datastore.put(metrics, ref)
492 baseURI, compURIs = datastore.getURIs(ref)
493 if disassembled:
494 self.assertIsNone(baseURI)
495 self.assertEqual(set(compURIs), {"data", "output", "summary"})
496 else:
497 self.assertIsNotNone(baseURI)
498 self.assertEqual(compURIs, {})
500 metrics_get = datastore.get(ref)
501 self.assertEqual(metrics_get, metrics)
503 # Retrieve the composite with read parameter
504 stop = 4
505 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
506 self.assertEqual(metrics_get.summary, metrics.summary)
507 self.assertEqual(metrics_get.output, metrics.output)
508 self.assertEqual(metrics_get.data, metrics.data[:stop])
510 # Retrieve a component
511 data = datastore.get(ref.makeComponentRef("data"))
512 self.assertEqual(data, metrics.data)
514 # On supported storage classes attempt to access a read
515 # only component
516 if "ReadComp" in sc.name:
517 cRef = ref.makeComponentRef("counter")
518 counter = datastore.get(cRef)
519 self.assertEqual(counter, len(metrics.data))
521 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
522 self.assertEqual(counter, stop)
524 datastore.remove(ref)
526 def prepDeleteTest(self, n_refs=1):
527 metrics = makeExampleMetrics()
528 datastore = self.makeDatastore()
529 # Put
530 dimensions = self.universe.extract(("visit", "physical_filter"))
531 sc = self.storageClassFactory.getStorageClass("StructuredData")
532 refs = []
533 for i in range(n_refs):
534 dataId = FakeDataCoordinate.from_dict(
535 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
536 )
537 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
538 datastore.put(metrics, ref)
540 # Does it exist?
541 self.assertTrue(datastore.exists(ref))
543 # Get
544 metricsOut = datastore.get(ref)
545 self.assertEqual(metrics, metricsOut)
546 refs.append(ref)
548 return datastore, *refs
550 def testRemove(self):
551 datastore, ref = self.prepDeleteTest()
553 # Remove
554 datastore.remove(ref)
556 # Does it exist?
557 self.assertFalse(datastore.exists(ref))
559 # Do we now get a predicted URI?
560 uri = datastore.getURI(ref, predict=True)
561 self.assertEqual(uri.fragment, "predicted")
563 # Get should now fail
564 with self.assertRaises(FileNotFoundError):
565 datastore.get(ref)
566 # Can only delete once
567 with self.assertRaises(FileNotFoundError):
568 datastore.remove(ref)
570 def testForget(self):
571 datastore, ref = self.prepDeleteTest()
573 # Remove
574 datastore.forget([ref])
576 # Does it exist (as far as we know)?
577 self.assertFalse(datastore.exists(ref))
579 # Do we now get a predicted URI?
580 uri = datastore.getURI(ref, predict=True)
581 self.assertEqual(uri.fragment, "predicted")
583 # Get should now fail
584 with self.assertRaises(FileNotFoundError):
585 datastore.get(ref)
587 # Forgetting again is a silent no-op
588 datastore.forget([ref])
590 # Predicted URI should still point to the file.
591 self.assertTrue(uri.exists())
593 def testTransfer(self):
594 metrics = makeExampleMetrics()
596 dimensions = self.universe.extract(("visit", "physical_filter"))
597 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"})
599 sc = self.storageClassFactory.getStorageClass("StructuredData")
600 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
602 inputDatastore = self.makeDatastore("test_input_datastore")
603 outputDatastore = self.makeDatastore("test_output_datastore")
605 inputDatastore.put(metrics, ref)
606 outputDatastore.transfer(inputDatastore, ref)
608 metricsOut = outputDatastore.get(ref)
609 self.assertEqual(metrics, metricsOut)
611 def testBasicTransaction(self):
612 datastore = self.makeDatastore()
613 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
614 dimensions = self.universe.extract(("visit", "physical_filter"))
615 nDatasets = 6
616 dataIds = [
617 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"})
618 for i in range(nDatasets)
619 ]
620 data = [
621 (
622 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
623 makeExampleMetrics(),
624 )
625 for dataId in dataIds
626 ]
627 succeed = data[: nDatasets // 2]
628 fail = data[nDatasets // 2 :]
629 # All datasets added in this transaction should continue to exist
630 with datastore.transaction():
631 for ref, metrics in succeed:
632 datastore.put(metrics, ref)
633 # Whereas datasets added in this transaction should not
634 with self.assertRaises(TransactionTestError):
635 with datastore.transaction():
636 for ref, metrics in fail:
637 datastore.put(metrics, ref)
638 raise TransactionTestError("This should propagate out of the context manager")
639 # Check for datasets that should exist
640 for ref, metrics in succeed:
641 # Does it exist?
642 self.assertTrue(datastore.exists(ref))
643 # Get
644 metricsOut = datastore.get(ref, parameters=None)
645 self.assertEqual(metrics, metricsOut)
646 # URI
647 uri = datastore.getURI(ref)
648 self.assertEqual(uri.scheme, self.uriScheme)
649 # Check for datasets that should not exist
650 for ref, _ in fail:
651 # These should raise
652 with self.assertRaises(FileNotFoundError):
653 # non-existing file
654 datastore.get(ref)
655 with self.assertRaises(FileNotFoundError):
656 datastore.getURI(ref)
658 def testNestedTransaction(self):
659 datastore = self.makeDatastore()
660 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
661 dimensions = self.universe.extract(("visit", "physical_filter"))
662 metrics = makeExampleMetrics()
664 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
665 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
666 datastore.put(metrics, refBefore)
667 with self.assertRaises(TransactionTestError):
668 with datastore.transaction():
669 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"})
670 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
671 datastore.put(metrics, refOuter)
672 with datastore.transaction():
673 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"})
674 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
675 datastore.put(metrics, refInner)
676 # All datasets should exist
677 for ref in (refBefore, refOuter, refInner):
678 metricsOut = datastore.get(ref, parameters=None)
679 self.assertEqual(metrics, metricsOut)
680 raise TransactionTestError("This should roll back the transaction")
681 # Dataset(s) inserted before the transaction should still exist
682 metricsOut = datastore.get(refBefore, parameters=None)
683 self.assertEqual(metrics, metricsOut)
684 # But all datasets inserted during the (rolled back) transaction
685 # should be gone
686 with self.assertRaises(FileNotFoundError):
687 datastore.get(refOuter)
688 with self.assertRaises(FileNotFoundError):
689 datastore.get(refInner)
691 def _prepareIngestTest(self):
692 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
693 dimensions = self.universe.extract(("visit", "physical_filter"))
694 metrics = makeExampleMetrics()
695 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
696 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
697 return metrics, ref
699 def runIngestTest(self, func, expectOutput=True):
700 metrics, ref = self._prepareIngestTest()
701 # The file will be deleted after the test.
702 # For symlink tests this leads to a situation where the datastore
703 # points to a file that does not exist. This will make os.path.exist
704 # return False but then the new symlink will fail with
705 # FileExistsError later in the code so the test still passes.
706 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
707 with open(path, "w") as fd:
708 yaml.dump(metrics._asdict(), stream=fd)
709 func(metrics, path, ref)
711 def testIngestNoTransfer(self):
712 """Test ingesting existing files with no transfer."""
713 for mode in (None, "auto"):
715 # Some datastores have auto but can't do in place transfer
716 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
717 continue
719 with self.subTest(mode=mode):
720 datastore = self.makeDatastore()
722 def succeed(obj, path, ref):
723 """Ingest a file already in the datastore root."""
724 # first move it into the root, and adjust the path
725 # accordingly
726 path = shutil.copy(path, datastore.root.ospath)
727 path = os.path.relpath(path, start=datastore.root.ospath)
728 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
729 self.assertEqual(obj, datastore.get(ref))
731 def failInputDoesNotExist(obj, path, ref):
732 """Can't ingest files if we're given a bad path."""
733 with self.assertRaises(FileNotFoundError):
734 datastore.ingest(
735 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
736 )
737 self.assertFalse(datastore.exists(ref))
739 def failOutsideRoot(obj, path, ref):
740 """Can't ingest files outside of datastore root unless
741 auto."""
742 if mode == "auto":
743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
744 self.assertTrue(datastore.exists(ref))
745 else:
746 with self.assertRaises(RuntimeError):
747 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
748 self.assertFalse(datastore.exists(ref))
750 def failNotImplemented(obj, path, ref):
751 with self.assertRaises(NotImplementedError):
752 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
754 if mode in self.ingestTransferModes:
755 self.runIngestTest(failOutsideRoot)
756 self.runIngestTest(failInputDoesNotExist)
757 self.runIngestTest(succeed)
758 else:
759 self.runIngestTest(failNotImplemented)
761 def testIngestTransfer(self):
762 """Test ingesting existing files after transferring them."""
763 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
764 with self.subTest(mode=mode):
765 datastore = self.makeDatastore(mode)
767 def succeed(obj, path, ref):
768 """Ingest a file by transferring it to the template
769 location."""
770 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
771 self.assertEqual(obj, datastore.get(ref))
773 def failInputDoesNotExist(obj, path, ref):
774 """Can't ingest files if we're given a bad path."""
775 with self.assertRaises(FileNotFoundError):
776 # Ensure the file does not look like it is in
777 # datastore for auto mode
778 datastore.ingest(
779 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
780 )
781 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
783 def failNotImplemented(obj, path, ref):
784 with self.assertRaises(NotImplementedError):
785 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
787 if mode in self.ingestTransferModes:
788 self.runIngestTest(failInputDoesNotExist)
789 self.runIngestTest(succeed, expectOutput=(mode != "move"))
790 else:
791 self.runIngestTest(failNotImplemented)
793 def testIngestSymlinkOfSymlink(self):
794 """Special test for symlink to a symlink ingest"""
795 metrics, ref = self._prepareIngestTest()
796 # The aim of this test is to create a dataset on disk, then
797 # create a symlink to it and finally ingest the symlink such that
798 # the symlink in the datastore points to the original dataset.
799 for mode in ("symlink", "relsymlink"):
800 if mode not in self.ingestTransferModes:
801 continue
803 print(f"Trying mode {mode}")
804 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
805 with open(realpath, "w") as fd:
806 yaml.dump(metrics._asdict(), stream=fd)
807 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
808 os.symlink(os.path.abspath(realpath), sympath)
810 datastore = self.makeDatastore()
811 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
813 uri = datastore.getURI(ref)
814 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
815 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
817 linkTarget = os.readlink(uri.ospath)
818 if mode == "relsymlink":
819 self.assertFalse(os.path.isabs(linkTarget))
820 else:
821 self.assertEqual(linkTarget, os.path.abspath(realpath))
823 # Check that we can get the dataset back regardless of mode
824 metric2 = datastore.get(ref)
825 self.assertEqual(metric2, metrics)
827 # Cleanup the file for next time round loop
828 # since it will get the same file name in store
829 datastore.remove(ref)
831 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
832 datastore = self.makeDatastore(name)
834 # For now only the FileDatastore can be used for this test.
835 # ChainedDatastore that only includes InMemoryDatastores have to be
836 # skipped as well.
837 for name in datastore.names:
838 if not name.startswith("InMemoryDatastore"):
839 break
840 else:
841 raise unittest.SkipTest("in-memory datastore does not support record export/import")
843 metrics = makeExampleMetrics()
844 dimensions = self.universe.extract(("visit", "physical_filter"))
845 sc = self.storageClassFactory.getStorageClass("StructuredData")
847 refs = []
848 for visit in (2048, 2049, 2050):
849 dataId = FakeDataCoordinate.from_dict(
850 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
851 )
852 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
853 datastore.put(metrics, ref)
854 refs.append(ref)
855 return datastore, refs
857 def testExportImportRecords(self):
858 """Test for export_records and import_records methods."""
859 datastore, refs = self._populate_export_datastore("test_datastore")
860 for exported_refs in (refs, refs[1:]):
861 n_refs = len(exported_refs)
862 records = datastore.export_records(exported_refs)
863 self.assertGreater(len(records), 0)
864 self.assertTrue(set(records.keys()) <= set(datastore.names))
865 # In a ChainedDatastore each FileDatastore will have a complete set
866 for datastore_name in records:
867 record_data = records[datastore_name]
868 self.assertEqual(len(record_data.records), n_refs)
870 # Use the same datastore name to import relative path.
871 datastore2 = self.makeDatastore("test_datastore")
873 records = datastore.export_records(refs[1:])
874 datastore2.import_records(records)
876 with self.assertRaises(FileNotFoundError):
877 data = datastore2.get(refs[0])
878 data = datastore2.get(refs[1])
879 self.assertIsNotNone(data)
880 data = datastore2.get(refs[2])
881 self.assertIsNotNone(data)
883 def testExport(self):
884 datastore, refs = self._populate_export_datastore("test_datastore")
886 datasets = list(datastore.export(refs))
887 self.assertEqual(len(datasets), 3)
889 for transfer in (None, "auto"):
890 # Both will default to None
891 datasets = list(datastore.export(refs, transfer=transfer))
892 self.assertEqual(len(datasets), 3)
894 with self.assertRaises(TypeError):
895 list(datastore.export(refs, transfer="copy"))
897 with self.assertRaises(TypeError):
898 list(datastore.export(refs, directory="exportDir", transfer="move"))
900 # Create a new ref that is not known to the datastore and try to
901 # export it.
902 sc = self.storageClassFactory.getStorageClass("ThingOne")
903 dimensions = self.universe.extract(("visit", "physical_filter"))
904 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
905 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
906 with self.assertRaises(FileNotFoundError):
907 list(datastore.export(refs + [ref], transfer=None))
909 def test_pydantic_dict_storage_class_conversions(self):
910 """Test converting a dataset stored as a pydantic model into a dict on
911 read.
912 """
913 datastore = self.makeDatastore()
914 store_as_model = self.makeDatasetRef(
915 "store_as_model",
916 dimensions=self.universe.empty,
917 storageClass="DictConvertibleModel",
918 dataId=DataCoordinate.makeEmpty(self.universe),
919 )
920 content = {"a": "one", "b": "two"}
921 model = DictConvertibleModel.from_dict(content, extra="original content")
922 datastore.put(model, store_as_model)
923 retrieved_model = datastore.get(store_as_model)
924 self.assertEqual(retrieved_model, model)
925 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
926 self.assertEqual(type(loaded), dict)
927 self.assertEqual(loaded, content)
929 def test_simple_class_put_get(self):
930 """Test that we can put and get a simple class with dict()
931 constructor."""
932 datastore = self.makeDatastore()
933 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
934 self._assert_different_puts(datastore, "MetricsExample", data)
936 def test_dataclass_put_get(self):
937 """Test that we can put and get a simple dataclass."""
938 datastore = self.makeDatastore()
939 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
940 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
942 def test_pydantic_put_get(self):
943 """Test that we can put and get a simple Pydantic model."""
944 datastore = self.makeDatastore()
945 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
946 self._assert_different_puts(datastore, "MetricsExampleModel", data)
948 def test_tuple_put_get(self):
949 """Test that we can put and get a tuple."""
950 datastore = self.makeDatastore()
951 data = tuple(["a", "b", 1])
952 self._assert_different_puts(datastore, "TupleExample", data)
954 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None:
955 refs = {
956 x: self.makeDatasetRef(
957 f"stora_as_{x}",
958 dimensions=self.universe.empty,
959 storageClass=f"{storageClass_root}{x}",
960 dataId=DataCoordinate.makeEmpty(self.universe),
961 )
962 for x in ["A", "B"]
963 }
965 for ref in refs.values():
966 datastore.put(data, ref)
968 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
971class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
972 """PosixDatastore specialization"""
974 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
975 uriScheme = "file"
976 canIngestNoTransferAuto = True
977 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
978 isEphemeral = False
979 rootKeys = ("root",)
980 validationCanFail = True
982 def setUp(self):
983 # Override the working directory before calling the base class
984 self.root = tempfile.mkdtemp(dir=TESTDIR)
985 super().setUp()
987 def testAtomicWrite(self):
988 """Test that we write to a temporary and then rename"""
989 datastore = self.makeDatastore()
990 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
991 dimensions = self.universe.extract(("visit", "physical_filter"))
992 metrics = makeExampleMetrics()
994 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
995 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
997 with self.assertLogs("lsst.resources", "DEBUG") as cm:
998 datastore.put(metrics, ref)
999 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1000 self.assertIn("transfer=move", move_logs[0])
1002 # And the transfer should be file to file.
1003 self.assertEqual(move_logs[0].count("file://"), 2)
1005 def testCanNotDeterminePutFormatterLocation(self):
1006 """Verify that the expected exception is raised if the FileDatastore
1007 can not determine the put formatter location."""
1009 _ = makeExampleMetrics()
1010 datastore = self.makeDatastore()
1012 # Create multiple storage classes for testing different formulations
1013 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1015 sccomp = StorageClass("Dummy")
1016 compositeStorageClass = StorageClass(
1017 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1018 )
1020 dimensions = self.universe.extract(("visit", "physical_filter"))
1021 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1023 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1024 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
1026 def raiser(ref):
1027 raise DatasetTypeNotSupportedError()
1029 with unittest.mock.patch.object(
1030 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1031 "_determine_put_formatter_location",
1032 side_effect=raiser,
1033 ):
1034 # verify the non-composite ref execution path:
1035 with self.assertRaises(DatasetTypeNotSupportedError):
1036 datastore.getURIs(ref, predict=True)
1038 # verify the composite-ref execution path:
1039 with self.assertRaises(DatasetTypeNotSupportedError):
1040 datastore.getURIs(compRef, predict=True)
1043class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1044 """Posix datastore tests but with checksums disabled."""
1046 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1048 def testChecksum(self):
1049 """Ensure that checksums have not been calculated."""
1051 datastore = self.makeDatastore()
1052 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1053 dimensions = self.universe.extract(("visit", "physical_filter"))
1054 metrics = makeExampleMetrics()
1056 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
1057 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1059 # Configuration should have disabled checksum calculation
1060 datastore.put(metrics, ref)
1061 infos = datastore.getStoredItemsInfo(ref)
1062 self.assertIsNone(infos[0].checksum)
1064 # Remove put back but with checksums enabled explicitly
1065 datastore.remove(ref)
1066 datastore.useChecksum = True
1067 datastore.put(metrics, ref)
1069 infos = datastore.getStoredItemsInfo(ref)
1070 self.assertIsNotNone(infos[0].checksum)
1073class TrashDatastoreTestCase(PosixDatastoreTestCase):
1074 """Restrict trash test to FileDatastore."""
1076 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1078 def testTrash(self):
1079 datastore, *refs = self.prepDeleteTest(n_refs=10)
1081 # Trash one of them.
1082 ref = refs.pop()
1083 uri = datastore.getURI(ref)
1084 datastore.trash(ref)
1085 self.assertTrue(uri.exists(), uri) # Not deleted yet
1086 datastore.emptyTrash()
1087 self.assertFalse(uri.exists(), uri)
1089 # Trash it again should be fine.
1090 datastore.trash(ref)
1092 # Trash multiple items at once.
1093 subset = [refs.pop(), refs.pop()]
1094 datastore.trash(subset)
1095 datastore.emptyTrash()
1097 # Remove a record and trash should do nothing.
1098 # This is execution butler scenario.
1099 ref = refs.pop()
1100 uri = datastore.getURI(ref)
1101 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1102 self.assertTrue(uri.exists())
1103 datastore.trash(ref)
1104 datastore.emptyTrash()
1105 self.assertTrue(uri.exists())
1107 # Switch on trust and it should delete the file.
1108 datastore.trustGetRequest = True
1109 datastore.trash([ref])
1110 self.assertFalse(uri.exists())
1112 # Remove multiples at once in trust mode.
1113 subset = [refs.pop() for i in range(3)]
1114 datastore.trash(subset)
1115 datastore.trash(refs.pop()) # Check that a single ref can trash
1118class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1119 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1121 def setUp(self):
1122 # Override the working directory before calling the base class
1123 self.root = tempfile.mkdtemp(dir=TESTDIR)
1124 super().setUp()
1126 def testCleanup(self):
1127 """Test that a failed formatter write does cleanup a partial file."""
1128 metrics = makeExampleMetrics()
1129 datastore = self.makeDatastore()
1131 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1133 dimensions = self.universe.extract(("visit", "physical_filter"))
1134 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1136 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1138 # Determine where the file will end up (we assume Formatters use
1139 # the same file extension)
1140 expectedUri = datastore.getURI(ref, predict=True)
1141 self.assertEqual(expectedUri.fragment, "predicted")
1143 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1145 # Try formatter that fails and formatter that fails and leaves
1146 # a file behind
1147 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1148 with self.subTest(formatter=formatter):
1150 # Monkey patch the formatter
1151 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1153 # Try to put the dataset, it should fail
1154 with self.assertRaises(Exception):
1155 datastore.put(metrics, ref)
1157 # Check that there is no file on disk
1158 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1160 # Check that there is a directory
1161 dir = expectedUri.dirname()
1162 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1164 # Force YamlFormatter and check that this time a file is written
1165 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1166 datastore.put(metrics, ref)
1167 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1168 datastore.remove(ref)
1169 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1172class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1173 """PosixDatastore specialization"""
1175 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1176 uriScheme = "mem"
1177 hasUnsupportedPut = False
1178 ingestTransferModes = ()
1179 isEphemeral = True
1180 rootKeys = None
1181 validationCanFail = False
1184class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1185 """ChainedDatastore specialization using a POSIXDatastore"""
1187 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1188 hasUnsupportedPut = False
1189 canIngestNoTransferAuto = False
1190 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1191 isEphemeral = False
1192 rootKeys = (".datastores.1.root", ".datastores.2.root")
1193 validationCanFail = True
1196class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1197 """ChainedDatastore specialization using all InMemoryDatastore"""
1199 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1200 validationCanFail = False
1203class DatastoreConstraintsTests(DatastoreTestsBase):
1204 """Basic tests of constraints model of Datastores."""
1206 def testConstraints(self):
1207 """Test constraints model. Assumes that each test class has the
1208 same constraints."""
1209 metrics = makeExampleMetrics()
1210 datastore = self.makeDatastore()
1212 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1213 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1214 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1215 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"})
1217 # Write empty file suitable for ingest check (JSON and YAML variants)
1218 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1219 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1220 for datasetTypeName, sc, accepted in (
1221 ("metric", sc1, True),
1222 ("metric5", sc1, False),
1223 ("metric33", sc1, True),
1224 ("metric5", sc2, True),
1225 ):
1226 # Choose different temp file depending on StorageClass
1227 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1229 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1230 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1231 if accepted:
1232 datastore.put(metrics, ref)
1233 self.assertTrue(datastore.exists(ref))
1234 datastore.remove(ref)
1236 # Try ingest
1237 if self.canIngest:
1238 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1239 self.assertTrue(datastore.exists(ref))
1240 datastore.remove(ref)
1241 else:
1242 with self.assertRaises(DatasetTypeNotSupportedError):
1243 datastore.put(metrics, ref)
1244 self.assertFalse(datastore.exists(ref))
1246 # Again with ingest
1247 if self.canIngest:
1248 with self.assertRaises(DatasetTypeNotSupportedError):
1249 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1250 self.assertFalse(datastore.exists(ref))
1253class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1254 """PosixDatastore specialization"""
1256 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1257 canIngest = True
1259 def setUp(self):
1260 # Override the working directory before calling the base class
1261 self.root = tempfile.mkdtemp(dir=TESTDIR)
1262 super().setUp()
1265class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1266 """InMemoryDatastore specialization"""
1268 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1269 canIngest = False
1272class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1273 """ChainedDatastore specialization using a POSIXDatastore and constraints
1274 at the ChainedDatstore"""
1276 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1279class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1280 """ChainedDatastore specialization using a POSIXDatastore"""
1282 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1285class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1286 """ChainedDatastore specialization using all InMemoryDatastore"""
1288 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1289 canIngest = False
1292class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1293 """Test that a chained datastore can control constraints per-datastore
1294 even if child datastore would accept."""
1296 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1298 def setUp(self):
1299 # Override the working directory before calling the base class
1300 self.root = tempfile.mkdtemp(dir=TESTDIR)
1301 super().setUp()
1303 def testConstraints(self):
1304 """Test chained datastore constraints model."""
1305 metrics = makeExampleMetrics()
1306 datastore = self.makeDatastore()
1308 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1309 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1310 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1311 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1312 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1314 # Write empty file suitable for ingest check (JSON and YAML variants)
1315 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1316 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1318 for typeName, dataId, sc, accept, ingest in (
1319 ("metric", dataId1, sc1, (False, True, False), True),
1320 ("metric5", dataId1, sc1, (False, False, False), False),
1321 ("metric5", dataId2, sc1, (True, False, False), False),
1322 ("metric33", dataId2, sc2, (True, True, False), True),
1323 ("metric5", dataId1, sc2, (False, True, False), True),
1324 ):
1326 # Choose different temp file depending on StorageClass
1327 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1329 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1330 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1331 if any(accept):
1332 datastore.put(metrics, ref)
1333 self.assertTrue(datastore.exists(ref))
1335 # Check each datastore inside the chained datastore
1336 for childDatastore, expected in zip(datastore.datastores, accept):
1337 self.assertEqual(
1338 childDatastore.exists(ref),
1339 expected,
1340 f"Testing presence of {ref} in datastore {childDatastore.name}",
1341 )
1343 datastore.remove(ref)
1345 # Check that ingest works
1346 if ingest:
1347 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1348 self.assertTrue(datastore.exists(ref))
1350 # Check each datastore inside the chained datastore
1351 for childDatastore, expected in zip(datastore.datastores, accept):
1352 # Ephemeral datastores means InMemory at the moment
1353 # and that does not accept ingest of files.
1354 if childDatastore.isEphemeral:
1355 expected = False
1356 self.assertEqual(
1357 childDatastore.exists(ref),
1358 expected,
1359 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1360 )
1362 datastore.remove(ref)
1363 else:
1364 with self.assertRaises(DatasetTypeNotSupportedError):
1365 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1367 else:
1368 with self.assertRaises(DatasetTypeNotSupportedError):
1369 datastore.put(metrics, ref)
1370 self.assertFalse(datastore.exists(ref))
1372 # Again with ingest
1373 with self.assertRaises(DatasetTypeNotSupportedError):
1374 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1375 self.assertFalse(datastore.exists(ref))
1378class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1379 """Tests for datastore caching infrastructure."""
1381 @classmethod
1382 def setUpClass(cls):
1383 cls.storageClassFactory = StorageClassFactory()
1384 cls.universe = DimensionUniverse()
1386 # Ensure that we load the test storage class definitions.
1387 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1388 cls.storageClassFactory.addFromConfig(scConfigFile)
1390 def setUp(self):
1391 self.id = 0
1393 # Create a root that we can use for caching tests.
1394 self.root = tempfile.mkdtemp(dir=TESTDIR)
1396 # Create some test dataset refs and associated test files
1397 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1398 dimensions = self.universe.extract(("visit", "physical_filter"))
1399 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1401 # Create list of refs and list of temporary files
1402 n_datasets = 10
1403 self.refs = [
1404 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1405 for n in range(n_datasets)
1406 ]
1408 root_uri = ResourcePath(self.root, forceDirectory=True)
1409 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1411 # Create test files.
1412 for uri in self.files:
1413 uri.write(b"0123456789")
1415 # Create some composite refs with component files.
1416 sc = self.storageClassFactory.getStorageClass("StructuredData")
1417 self.composite_refs = [
1418 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1419 ]
1420 self.comp_files = []
1421 self.comp_refs = []
1422 for n, ref in enumerate(self.composite_refs):
1423 component_refs = []
1424 component_files = []
1425 for component in sc.components:
1426 component_ref = ref.makeComponentRef(component)
1427 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1428 component_refs.append(component_ref)
1429 component_files.append(file)
1430 file.write(b"9876543210")
1432 self.comp_files.append(component_files)
1433 self.comp_refs.append(component_refs)
1435 def tearDown(self):
1436 if self.root is not None and os.path.exists(self.root):
1437 shutil.rmtree(self.root, ignore_errors=True)
1439 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1440 config = Config.fromYaml(config_str)
1441 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1443 def testNoCacheDir(self):
1444 config_str = """
1445cached:
1446 root: null
1447 cacheable:
1448 metric0: true
1449 """
1450 cache_manager = self._make_cache_manager(config_str)
1452 # Look inside to check we don't have a cache directory
1453 self.assertIsNone(cache_manager._cache_directory)
1455 self.assertCache(cache_manager)
1457 # Test that the cache directory is marked temporary
1458 self.assertTrue(cache_manager.cache_directory.isTemporary)
1460 def testNoCacheDirReversed(self):
1461 """Use default caching status and metric1 to false"""
1462 config_str = """
1463cached:
1464 root: null
1465 default: true
1466 cacheable:
1467 metric1: false
1468 """
1469 cache_manager = self._make_cache_manager(config_str)
1471 self.assertCache(cache_manager)
1473 def testEnvvarCacheDir(self):
1474 config_str = f"""
1475cached:
1476 root: '{self.root}'
1477 cacheable:
1478 metric0: true
1479 """
1481 root = ResourcePath(self.root, forceDirectory=True)
1482 env_dir = root.join("somewhere", forceDirectory=True)
1483 elsewhere = root.join("elsewhere", forceDirectory=True)
1485 # Environment variable should override the config value.
1486 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1487 cache_manager = self._make_cache_manager(config_str)
1488 self.assertEqual(cache_manager.cache_directory, env_dir)
1490 # This environment variable should not override the config value.
1491 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1492 cache_manager = self._make_cache_manager(config_str)
1493 self.assertEqual(cache_manager.cache_directory, root)
1495 # No default setting.
1496 config_str = """
1497cached:
1498 root: null
1499 default: true
1500 cacheable:
1501 metric1: false
1502 """
1503 cache_manager = self._make_cache_manager(config_str)
1505 # This environment variable should override the config value.
1506 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1507 cache_manager = self._make_cache_manager(config_str)
1508 self.assertEqual(cache_manager.cache_directory, env_dir)
1510 # If both environment variables are set the main (not IF_UNSET)
1511 # variable should win.
1512 with unittest.mock.patch.dict(
1513 os.environ,
1514 {
1515 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1516 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1517 },
1518 ):
1519 cache_manager = self._make_cache_manager(config_str)
1520 self.assertEqual(cache_manager.cache_directory, env_dir)
1522 # Use the API to set the environment variable, making sure that the
1523 # variable is reset on exit.
1524 with unittest.mock.patch.dict(
1525 os.environ,
1526 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1527 ):
1528 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1529 self.assertTrue(defined)
1530 cache_manager = self._make_cache_manager(config_str)
1531 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1533 # Now create the cache manager ahead of time and set the fallback
1534 # later.
1535 cache_manager = self._make_cache_manager(config_str)
1536 self.assertIsNone(cache_manager._cache_directory)
1537 with unittest.mock.patch.dict(
1538 os.environ,
1539 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1540 ):
1541 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1542 self.assertTrue(defined)
1543 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1545 def testExplicitCacheDir(self):
1546 config_str = f"""
1547cached:
1548 root: '{self.root}'
1549 cacheable:
1550 metric0: true
1551 """
1552 cache_manager = self._make_cache_manager(config_str)
1554 # Look inside to check we do have a cache directory.
1555 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1557 self.assertCache(cache_manager)
1559 # Test that the cache directory is not marked temporary
1560 self.assertFalse(cache_manager.cache_directory.isTemporary)
1562 def assertCache(self, cache_manager):
1563 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1564 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1566 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1567 self.assertIsInstance(uri, ResourcePath)
1568 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1570 # Check presence in cache using ref and then using file extension.
1571 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1572 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1573 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1574 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1576 # Cached file should no longer exist but uncached file should be
1577 # unaffected.
1578 self.assertFalse(self.files[0].exists())
1579 self.assertTrue(self.files[1].exists())
1581 # Should find this file and it should be within the cache directory.
1582 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1583 self.assertTrue(found.exists())
1584 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1586 # Should not be able to find these in cache
1587 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1588 self.assertIsNone(found)
1589 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1590 self.assertIsNone(found)
1592 def testNoCache(self):
1593 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1594 for uri, ref in zip(self.files, self.refs):
1595 self.assertFalse(cache_manager.should_be_cached(ref))
1596 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1597 self.assertFalse(cache_manager.known_to_cache(ref))
1598 with cache_manager.find_in_cache(ref, ".txt") as found:
1599 self.assertIsNone(found, msg=f"{cache_manager}")
1601 def _expiration_config(self, mode: str, threshold: int) -> str:
1602 return f"""
1603cached:
1604 default: true
1605 expiry:
1606 mode: {mode}
1607 threshold: {threshold}
1608 cacheable:
1609 unused: true
1610 """
1612 def testCacheExpiryFiles(self):
1613 threshold = 2 # Keep at least 2 files.
1614 mode = "files"
1615 config_str = self._expiration_config(mode, threshold)
1617 cache_manager = self._make_cache_manager(config_str)
1619 # Check that an empty cache returns unknown for arbitrary ref
1620 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1622 # Should end with datasets: 2, 3, 4
1623 self.assertExpiration(cache_manager, 5, threshold + 1)
1624 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1626 # Check that we will not expire a file that is actively in use.
1627 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1628 self.assertIsNotNone(found)
1630 # Trigger cache expiration that should remove the file
1631 # we just retrieved. Should now have: 3, 4, 5
1632 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1633 self.assertIsNotNone(cached)
1635 # Cache should still report the standard file count.
1636 self.assertEqual(cache_manager.file_count, threshold + 1)
1638 # Add additional entry to cache.
1639 # Should now have 4, 5, 6
1640 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1641 self.assertIsNotNone(cached)
1643 # Is the file still there?
1644 self.assertTrue(found.exists())
1646 # Can we read it?
1647 data = found.read()
1648 self.assertGreater(len(data), 0)
1650 # Outside context the file should no longer exist.
1651 self.assertFalse(found.exists())
1653 # File count should not have changed.
1654 self.assertEqual(cache_manager.file_count, threshold + 1)
1656 # Dataset 2 was in the exempt directory but because hardlinks
1657 # are used it was deleted from the main cache during cache expiry
1658 # above and so should no longer be found.
1659 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1660 self.assertIsNone(found)
1662 # And the one stored after it is also gone.
1663 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1664 self.assertIsNone(found)
1666 # But dataset 4 is present.
1667 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1668 self.assertIsNotNone(found)
1670 # Adding a new dataset to the cache should now delete it.
1671 cache_manager.move_to_cache(self.files[7], self.refs[7])
1673 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1674 self.assertIsNone(found)
1676 def testCacheExpiryDatasets(self):
1677 threshold = 2 # Keep 2 datasets.
1678 mode = "datasets"
1679 config_str = self._expiration_config(mode, threshold)
1681 cache_manager = self._make_cache_manager(config_str)
1682 self.assertExpiration(cache_manager, 5, threshold + 1)
1683 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1685 def testCacheExpiryDatasetsComposite(self):
1686 threshold = 2 # Keep 2 datasets.
1687 mode = "datasets"
1688 config_str = self._expiration_config(mode, threshold)
1690 cache_manager = self._make_cache_manager(config_str)
1692 n_datasets = 3
1693 for i in range(n_datasets):
1694 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1695 cached = cache_manager.move_to_cache(component_file, component_ref)
1696 self.assertIsNotNone(cached)
1697 self.assertTrue(cache_manager.known_to_cache(component_ref))
1698 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1699 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1701 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1703 # Write two new non-composite and the number of files should drop.
1704 self.assertExpiration(cache_manager, 2, 5)
1706 def testCacheExpirySize(self):
1707 threshold = 55 # Each file is 10 bytes
1708 mode = "size"
1709 config_str = self._expiration_config(mode, threshold)
1711 cache_manager = self._make_cache_manager(config_str)
1712 self.assertExpiration(cache_manager, 10, 6)
1713 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1715 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1716 """Insert the datasets and then check the number retained."""
1717 for i in range(n_datasets):
1718 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1719 self.assertIsNotNone(cached)
1721 self.assertEqual(cache_manager.file_count, n_retained)
1723 # The oldest file should not be in the cache any more.
1724 for i in range(n_datasets):
1725 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1726 if i >= n_datasets - n_retained:
1727 self.assertIsInstance(found, ResourcePath)
1728 else:
1729 self.assertIsNone(found)
1731 def testCacheExpiryAge(self):
1732 threshold = 1 # Expire older than 2 seconds
1733 mode = "age"
1734 config_str = self._expiration_config(mode, threshold)
1736 cache_manager = self._make_cache_manager(config_str)
1737 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1739 # Insert 3 files, then sleep, then insert more.
1740 for i in range(2):
1741 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1742 self.assertIsNotNone(cached)
1743 time.sleep(2.0)
1744 for j in range(4):
1745 i = 2 + j # Continue the counting
1746 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1747 self.assertIsNotNone(cached)
1749 # Only the files written after the sleep should exist.
1750 self.assertEqual(cache_manager.file_count, 4)
1751 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1752 self.assertIsNone(found)
1753 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1754 self.assertIsInstance(found, ResourcePath)
1757class DatasetRefURIsTestCase(unittest.TestCase):
1758 """Tests for DatasetRefURIs."""
1760 def testSequenceAccess(self):
1761 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1762 uris = DatasetRefURIs()
1764 self.assertEqual(len(uris), 2)
1765 self.assertEqual(uris[0], None)
1766 self.assertEqual(uris[1], {})
1768 primaryURI = ResourcePath("1/2/3")
1769 componentURI = ResourcePath("a/b/c")
1771 # affirm that DatasetRefURIs does not support MutableSequence functions
1772 with self.assertRaises(TypeError):
1773 uris[0] = primaryURI
1774 with self.assertRaises(TypeError):
1775 uris[1] = {"foo": componentURI}
1777 # but DatasetRefURIs can be set by property name:
1778 uris.primaryURI = primaryURI
1779 uris.componentURIs = {"foo": componentURI}
1780 self.assertEqual(uris.primaryURI, primaryURI)
1781 self.assertEqual(uris[0], primaryURI)
1783 primary, components = uris
1784 self.assertEqual(primary, primaryURI)
1785 self.assertEqual(components, {"foo": componentURI})
1787 def testRepr(self):
1788 """Verify __repr__ output."""
1789 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1790 self.assertEqual(
1791 repr(uris),
1792 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1793 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1794 )
1797class DataIdForTestTestCase(unittest.TestCase):
1798 """Tests for the DataIdForTest class."""
1800 def testImmutable(self):
1801 """Verify that an instance is immutable by default."""
1802 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1803 initial_hash = hash(dataId)
1805 with self.assertRaises(RuntimeError):
1806 dataId["instrument"] = "foo"
1808 with self.assertRaises(RuntimeError):
1809 del dataId["instrument"]
1811 assert sys.version_info[0] == 3
1812 if sys.version_info[1] >= 9:
1813 with self.assertRaises(RuntimeError):
1814 dataId |= dict(foo="bar")
1816 with self.assertRaises(RuntimeError):
1817 dataId.pop("instrument")
1819 with self.assertRaises(RuntimeError):
1820 dataId.popitem()
1822 with self.assertRaises(RuntimeError):
1823 dataId.update(dict(instrument="foo"))
1825 # verify that the hash value has not changed.
1826 self.assertEqual(initial_hash, hash(dataId))
1828 def testMutable(self):
1829 """Verify that an instance can be made mutable (unfrozen)."""
1830 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1831 initial_hash = hash(dataId)
1832 dataId.frozen = False
1833 self.assertEqual(initial_hash, hash(dataId))
1835 dataId["instrument"] = "foo"
1836 self.assertEqual(dataId["instrument"], "foo")
1837 self.assertNotEqual(initial_hash, hash(dataId))
1838 initial_hash = hash(dataId)
1840 del dataId["instrument"]
1841 self.assertTrue("instrument" not in dataId)
1842 self.assertNotEqual(initial_hash, hash(dataId))
1843 initial_hash = hash(dataId)
1845 assert sys.version_info[0] == 3
1846 if sys.version_info[1] >= 9:
1847 dataId |= dict(foo="bar")
1848 self.assertEqual(dataId["foo"], "bar")
1849 self.assertNotEqual(initial_hash, hash(dataId))
1850 initial_hash = hash(dataId)
1852 dataId.pop("visit")
1853 self.assertTrue("visit" not in dataId)
1854 self.assertNotEqual(initial_hash, hash(dataId))
1855 initial_hash = hash(dataId)
1857 dataId.popitem()
1858 self.assertTrue("physical_filter" not in dataId)
1859 self.assertNotEqual(initial_hash, hash(dataId))
1860 initial_hash = hash(dataId)
1862 dataId.update(dict(instrument="foo"))
1863 self.assertEqual(dataId["instrument"], "foo")
1864 self.assertNotEqual(initial_hash, hash(dataId))
1865 initial_hash = hash(dataId)
1868if __name__ == "__main__": 1868 ↛ 1869line 1868 didn't jump to line 1869, because the condition on line 1868 was never true
1869 unittest.main()