Coverage for tests/test_datastore.py: 12%
1053 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-14 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-14 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import sys
27import tempfile
28import time
29import unittest
30import unittest.mock
31from collections import UserDict
32from dataclasses import dataclass
34import lsst.utils.tests
35import yaml
36from lsst.daf.butler import (
37 Config,
38 DataCoordinate,
39 DatasetRef,
40 DatasetRefURIs,
41 DatasetTypeNotSupportedError,
42 Datastore,
43 DatastoreCacheManager,
44 DatastoreCacheManagerConfig,
45 DatastoreConfig,
46 DatastoreDisabledCacheManager,
47 DatastoreValidationError,
48 DimensionUniverse,
49 FileDataset,
50 NamedKeyDict,
51 StorageClass,
52 StorageClassFactory,
53)
54from lsst.daf.butler.formatters.yaml import YamlFormatter
55from lsst.daf.butler.tests import (
56 BadNoWriteFormatter,
57 BadWriteFormatter,
58 DatasetTestHelper,
59 DatastoreTestHelper,
60 DummyRegistry,
61 MetricsExample,
62 MetricsExampleDataclass,
63 MetricsExampleModel,
64)
65from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
66from lsst.resources import ResourcePath
67from lsst.utils import doImport
69TESTDIR = os.path.dirname(__file__)
72class DataIdForTest(UserDict):
74 """A dict-like class that can be used for a DataId dict that is hashable.
76 By default the class is immutable ("frozen"). The `frozen`
77 attribute can be set to `False` to change values (but note that
78 the hash values before and after mutation will be different!).
79 """
81 def __init__(self, *args, **kwargs):
82 self.frozen = False
83 super().__init__(*args, **kwargs)
84 self.frozen = True
86 def __hash__(self):
87 return hash(str(self.data))
89 def __setitem__(self, k, v):
90 if self.frozen:
91 raise RuntimeError("DataIdForTest is frozen.")
92 return super().__setitem__(k, v)
94 def __delitem__(self, k):
95 if self.frozen:
96 raise RuntimeError("DataIdForTest is frozen.")
97 return super().__delitem__(k)
99 def __ior__(self, other):
100 assert sys.version_info[0] == 3
101 if sys.version_info[1] < 9:
102 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
103 if self.frozen:
104 raise RuntimeError("DataIdForTest is frozen.")
105 return super().__ior__(other)
107 def pop(self, k):
108 if self.frozen:
109 raise RuntimeError("DataIdForTest is frozen.")
110 return super().pop(k)
112 def popitem(self):
113 if self.frozen:
114 raise RuntimeError("DataIdForTest is frozen.")
115 return super().popitem()
117 def update(self, *args, **kwargs):
118 if self.frozen:
119 raise RuntimeError("DataIdForTest is frozen.")
120 super().update(*args, **kwargs)
123def makeExampleMetrics(use_none=False):
124 if use_none:
125 array = None
126 else:
127 array = [563, 234, 456.7, 105, 2054, -1045]
128 return MetricsExample(
129 {"AM1": 5.2, "AM2": 30.6},
130 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
131 array,
132 )
135@dataclass(frozen=True)
136class Named:
137 name: str
140class FakeDataCoordinate(NamedKeyDict):
141 """A fake hashable frozen DataCoordinate built from a simple dict."""
143 @classmethod
144 def from_dict(cls, dataId):
145 new = cls()
146 for k, v in dataId.items():
147 new[Named(k)] = v
148 return new.freeze()
150 def __hash__(self) -> int:
151 return hash(frozenset(self.items()))
154class TransactionTestError(Exception):
155 """Specific error for transactions, to prevent misdiagnosing
156 that might otherwise occur when a standard exception is used.
157 """
159 pass
162class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
163 """Support routines for datastore testing"""
165 root = None
167 @classmethod
168 def setUpClass(cls):
169 # Storage Classes are fixed for all datastores in these tests
170 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
171 cls.storageClassFactory = StorageClassFactory()
172 cls.storageClassFactory.addFromConfig(scConfigFile)
174 # Read the Datastore config so we can get the class
175 # information (since we should not assume the constructor
176 # name here, but rely on the configuration file itself)
177 datastoreConfig = DatastoreConfig(cls.configFile)
178 cls.datastoreType = doImport(datastoreConfig["cls"])
179 cls.universe = DimensionUniverse()
181 def setUp(self):
182 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
184 def tearDown(self):
185 if self.root is not None and os.path.exists(self.root):
186 shutil.rmtree(self.root, ignore_errors=True)
189class DatastoreTests(DatastoreTestsBase):
190 """Some basic tests of a simple datastore."""
192 hasUnsupportedPut = True
194 def testConfigRoot(self):
195 full = DatastoreConfig(self.configFile)
196 config = DatastoreConfig(self.configFile, mergeDefaults=False)
197 newroot = "/random/location"
198 self.datastoreType.setConfigRoot(newroot, config, full)
199 if self.rootKeys:
200 for k in self.rootKeys:
201 self.assertIn(newroot, config[k])
203 def testConstructor(self):
204 datastore = self.makeDatastore()
205 self.assertIsNotNone(datastore)
206 self.assertIs(datastore.isEphemeral, self.isEphemeral)
208 def testConfigurationValidation(self):
209 datastore = self.makeDatastore()
210 sc = self.storageClassFactory.getStorageClass("ThingOne")
211 datastore.validateConfiguration([sc])
213 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
214 if self.validationCanFail:
215 with self.assertRaises(DatastoreValidationError):
216 datastore.validateConfiguration([sc2], logFailures=True)
218 dimensions = self.universe.extract(("visit", "physical_filter"))
219 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
220 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
221 datastore.validateConfiguration([ref])
223 def testParameterValidation(self):
224 """Check that parameters are validated"""
225 sc = self.storageClassFactory.getStorageClass("ThingOne")
226 dimensions = self.universe.extract(("visit", "physical_filter"))
227 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
228 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
229 datastore = self.makeDatastore()
230 data = {1: 2, 3: 4}
231 datastore.put(data, ref)
232 newdata = datastore.get(ref)
233 self.assertEqual(data, newdata)
234 with self.assertRaises(KeyError):
235 newdata = datastore.get(ref, parameters={"missing": 5})
237 def testBasicPutGet(self):
238 metrics = makeExampleMetrics()
239 datastore = self.makeDatastore()
241 # Create multiple storage classes for testing different formulations
242 storageClasses = [
243 self.storageClassFactory.getStorageClass(sc)
244 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
245 ]
247 dimensions = self.universe.extract(("visit", "physical_filter"))
248 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
249 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
251 for sc in storageClasses:
252 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
253 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
255 # Make sure that using getManyURIs without predicting before the
256 # dataset has been put raises.
257 with self.assertRaises(FileNotFoundError):
258 datastore.getManyURIs([ref], predict=False)
260 # Make sure that using getManyURIs with predicting before the
261 # dataset has been put predicts the URI.
262 uris = datastore.getManyURIs([ref, ref2], predict=True)
263 self.assertIn("52", uris[ref].primaryURI.geturl())
264 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
265 self.assertIn("53", uris[ref2].primaryURI.geturl())
266 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
268 datastore.put(metrics, ref)
270 # Does it exist?
271 self.assertTrue(datastore.exists(ref))
272 self.assertTrue(datastore.knows(ref))
273 multi = datastore.knows_these([ref])
274 self.assertTrue(multi[ref])
275 multi = datastore.mexists([ref])
276 self.assertTrue(multi[ref])
278 # Get
279 metricsOut = datastore.get(ref, parameters=None)
280 self.assertEqual(metrics, metricsOut)
282 uri = datastore.getURI(ref)
283 self.assertEqual(uri.scheme, self.uriScheme)
285 uris = datastore.getManyURIs([ref])
286 self.assertEqual(len(uris), 1)
287 ref, uri = uris.popitem()
288 self.assertTrue(uri.primaryURI.exists())
289 self.assertFalse(uri.componentURIs)
291 # Get a component -- we need to construct new refs for them
292 # with derived storage classes but with parent ID
293 for comp in ("data", "output"):
294 compRef = ref.makeComponentRef(comp)
295 output = datastore.get(compRef)
296 self.assertEqual(output, getattr(metricsOut, comp))
298 uri = datastore.getURI(compRef)
299 self.assertEqual(uri.scheme, self.uriScheme)
301 uris = datastore.getManyURIs([compRef])
302 self.assertEqual(len(uris), 1)
304 storageClass = sc
306 # Check that we can put a metric with None in a component and
307 # get it back as None
308 metricsNone = makeExampleMetrics(use_none=True)
309 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
310 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
311 datastore.put(metricsNone, refNone)
313 comp = "data"
314 for comp in ("data", "output"):
315 compRef = refNone.makeComponentRef(comp)
316 output = datastore.get(compRef)
317 self.assertEqual(output, getattr(metricsNone, comp))
319 # Check that a put fails if the dataset type is not supported
320 if self.hasUnsupportedPut:
321 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
322 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
323 with self.assertRaises(DatasetTypeNotSupportedError):
324 datastore.put(metrics, ref)
326 # These should raise
327 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
328 with self.assertRaises(FileNotFoundError):
329 # non-existing file
330 datastore.get(ref)
332 # Get a URI from it
333 uri = datastore.getURI(ref, predict=True)
334 self.assertEqual(uri.scheme, self.uriScheme)
336 with self.assertRaises(FileNotFoundError):
337 datastore.getURI(ref)
339 def testTrustGetRequest(self):
340 """Check that we can get datasets that registry knows nothing about."""
342 datastore = self.makeDatastore()
344 # Skip test if the attribute is not defined
345 if not hasattr(datastore, "trustGetRequest"):
346 return
348 metrics = makeExampleMetrics()
350 i = 0
351 for sc_name in ("StructuredData", "StructuredComposite"):
352 i += 1
353 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
355 if sc_name == "StructuredComposite":
356 disassembled = True
357 else:
358 disassembled = False
360 # Start datastore in default configuration of using registry
361 datastore.trustGetRequest = False
363 # Create multiple storage classes for testing with or without
364 # disassembly
365 sc = self.storageClassFactory.getStorageClass(sc_name)
366 dimensions = self.universe.extract(("visit", "physical_filter"))
368 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
370 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
371 datastore.put(metrics, ref)
373 # Does it exist?
374 self.assertTrue(datastore.exists(ref))
375 self.assertTrue(datastore.knows(ref))
376 multi = datastore.knows_these([ref])
377 self.assertTrue(multi[ref])
378 multi = datastore.mexists([ref])
379 self.assertTrue(multi[ref])
381 # Get
382 metricsOut = datastore.get(ref)
383 self.assertEqual(metrics, metricsOut)
385 # Get the URI(s)
386 primaryURI, componentURIs = datastore.getURIs(ref)
387 if disassembled:
388 self.assertIsNone(primaryURI)
389 self.assertEqual(len(componentURIs), 3)
390 else:
391 self.assertIn(datasetTypeName, primaryURI.path)
392 self.assertFalse(componentURIs)
394 # Delete registry entry so now we are trusting
395 datastore.removeStoredItemInfo(ref)
397 # Now stop trusting and check that things break
398 datastore.trustGetRequest = False
400 # Does it exist?
401 self.assertFalse(datastore.exists(ref))
402 self.assertFalse(datastore.knows(ref))
403 multi = datastore.knows_these([ref])
404 self.assertFalse(multi[ref])
405 multi = datastore.mexists([ref])
406 self.assertFalse(multi[ref])
408 with self.assertRaises(FileNotFoundError):
409 datastore.get(ref)
411 with self.assertRaises(FileNotFoundError):
412 datastore.get(ref.makeComponentRef("data"))
414 # URI should fail unless we ask for prediction
415 with self.assertRaises(FileNotFoundError):
416 datastore.getURIs(ref)
418 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
419 if disassembled:
420 self.assertIsNone(predicted_primary)
421 self.assertEqual(len(predicted_disassembled), 3)
422 for uri in predicted_disassembled.values():
423 self.assertEqual(uri.fragment, "predicted")
424 self.assertIn(datasetTypeName, uri.path)
425 else:
426 self.assertIn(datasetTypeName, predicted_primary.path)
427 self.assertFalse(predicted_disassembled)
428 self.assertEqual(predicted_primary.fragment, "predicted")
430 # Now enable registry-free trusting mode
431 datastore.trustGetRequest = True
433 # Try again to get it
434 metricsOut = datastore.get(ref)
435 self.assertEqual(metricsOut, metrics)
437 # Does it exist?
438 self.assertTrue(datastore.exists(ref))
440 # Get a component
441 comp = "data"
442 compRef = ref.makeComponentRef(comp)
443 output = datastore.get(compRef)
444 self.assertEqual(output, getattr(metrics, comp))
446 # Get the URI -- if we trust this should work even without
447 # enabling prediction.
448 primaryURI2, componentURIs2 = datastore.getURIs(ref)
449 self.assertEqual(primaryURI2, primaryURI)
450 self.assertEqual(componentURIs2, componentURIs)
452 def testDisassembly(self):
453 """Test disassembly within datastore."""
454 metrics = makeExampleMetrics()
455 if self.isEphemeral:
456 # in-memory datastore does not disassemble
457 return
459 # Create multiple storage classes for testing different formulations
460 # of composites. One of these will not disassemble to provide
461 # a reference.
462 storageClasses = [
463 self.storageClassFactory.getStorageClass(sc)
464 for sc in (
465 "StructuredComposite",
466 "StructuredCompositeTestA",
467 "StructuredCompositeTestB",
468 "StructuredCompositeReadComp",
469 "StructuredData", # No disassembly
470 "StructuredCompositeReadCompNoDisassembly",
471 )
472 ]
474 # Create the test datastore
475 datastore = self.makeDatastore()
477 # Dummy dataId
478 dimensions = self.universe.extract(("visit", "physical_filter"))
479 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
481 for i, sc in enumerate(storageClasses):
482 with self.subTest(storageClass=sc.name):
483 # Create a different dataset type each time round
484 # so that a test failure in this subtest does not trigger
485 # a cascade of tests because of file clashes
486 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
488 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
490 datastore.put(metrics, ref)
492 baseURI, compURIs = datastore.getURIs(ref)
493 if disassembled:
494 self.assertIsNone(baseURI)
495 self.assertEqual(set(compURIs), {"data", "output", "summary"})
496 else:
497 self.assertIsNotNone(baseURI)
498 self.assertEqual(compURIs, {})
500 metrics_get = datastore.get(ref)
501 self.assertEqual(metrics_get, metrics)
503 # Retrieve the composite with read parameter
504 stop = 4
505 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
506 self.assertEqual(metrics_get.summary, metrics.summary)
507 self.assertEqual(metrics_get.output, metrics.output)
508 self.assertEqual(metrics_get.data, metrics.data[:stop])
510 # Retrieve a component
511 data = datastore.get(ref.makeComponentRef("data"))
512 self.assertEqual(data, metrics.data)
514 # On supported storage classes attempt to access a read
515 # only component
516 if "ReadComp" in sc.name:
517 cRef = ref.makeComponentRef("counter")
518 counter = datastore.get(cRef)
519 self.assertEqual(counter, len(metrics.data))
521 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
522 self.assertEqual(counter, stop)
524 datastore.remove(ref)
526 def prepDeleteTest(self, n_refs=1):
527 metrics = makeExampleMetrics()
528 datastore = self.makeDatastore()
529 # Put
530 dimensions = self.universe.extract(("visit", "physical_filter"))
531 sc = self.storageClassFactory.getStorageClass("StructuredData")
532 refs = []
533 for i in range(n_refs):
534 dataId = FakeDataCoordinate.from_dict(
535 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
536 )
537 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
538 datastore.put(metrics, ref)
540 # Does it exist?
541 self.assertTrue(datastore.exists(ref))
543 # Get
544 metricsOut = datastore.get(ref)
545 self.assertEqual(metrics, metricsOut)
546 refs.append(ref)
548 return datastore, *refs
550 def testRemove(self):
551 datastore, ref = self.prepDeleteTest()
553 # Remove
554 datastore.remove(ref)
556 # Does it exist?
557 self.assertFalse(datastore.exists(ref))
559 # Do we now get a predicted URI?
560 uri = datastore.getURI(ref, predict=True)
561 self.assertEqual(uri.fragment, "predicted")
563 # Get should now fail
564 with self.assertRaises(FileNotFoundError):
565 datastore.get(ref)
566 # Can only delete once
567 with self.assertRaises(FileNotFoundError):
568 datastore.remove(ref)
570 def testForget(self):
571 datastore, ref = self.prepDeleteTest()
573 # Remove
574 datastore.forget([ref])
576 # Does it exist (as far as we know)?
577 self.assertFalse(datastore.exists(ref))
579 # Do we now get a predicted URI?
580 uri = datastore.getURI(ref, predict=True)
581 self.assertEqual(uri.fragment, "predicted")
583 # Get should now fail
584 with self.assertRaises(FileNotFoundError):
585 datastore.get(ref)
587 # Forgetting again is a silent no-op
588 datastore.forget([ref])
590 # Predicted URI should still point to the file.
591 self.assertTrue(uri.exists())
593 def testTransfer(self):
594 metrics = makeExampleMetrics()
596 dimensions = self.universe.extract(("visit", "physical_filter"))
597 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"})
599 sc = self.storageClassFactory.getStorageClass("StructuredData")
600 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
602 inputDatastore = self.makeDatastore("test_input_datastore")
603 outputDatastore = self.makeDatastore("test_output_datastore")
605 inputDatastore.put(metrics, ref)
606 outputDatastore.transfer(inputDatastore, ref)
608 metricsOut = outputDatastore.get(ref)
609 self.assertEqual(metrics, metricsOut)
611 def testBasicTransaction(self):
612 datastore = self.makeDatastore()
613 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
614 dimensions = self.universe.extract(("visit", "physical_filter"))
615 nDatasets = 6
616 dataIds = [
617 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"})
618 for i in range(nDatasets)
619 ]
620 data = [
621 (
622 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
623 makeExampleMetrics(),
624 )
625 for dataId in dataIds
626 ]
627 succeed = data[: nDatasets // 2]
628 fail = data[nDatasets // 2 :]
629 # All datasets added in this transaction should continue to exist
630 with datastore.transaction():
631 for ref, metrics in succeed:
632 datastore.put(metrics, ref)
633 # Whereas datasets added in this transaction should not
634 with self.assertRaises(TransactionTestError):
635 with datastore.transaction():
636 for ref, metrics in fail:
637 datastore.put(metrics, ref)
638 raise TransactionTestError("This should propagate out of the context manager")
639 # Check for datasets that should exist
640 for ref, metrics in succeed:
641 # Does it exist?
642 self.assertTrue(datastore.exists(ref))
643 # Get
644 metricsOut = datastore.get(ref, parameters=None)
645 self.assertEqual(metrics, metricsOut)
646 # URI
647 uri = datastore.getURI(ref)
648 self.assertEqual(uri.scheme, self.uriScheme)
649 # Check for datasets that should not exist
650 for ref, _ in fail:
651 # These should raise
652 with self.assertRaises(FileNotFoundError):
653 # non-existing file
654 datastore.get(ref)
655 with self.assertRaises(FileNotFoundError):
656 datastore.getURI(ref)
658 def testNestedTransaction(self):
659 datastore = self.makeDatastore()
660 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
661 dimensions = self.universe.extract(("visit", "physical_filter"))
662 metrics = makeExampleMetrics()
664 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
665 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
666 datastore.put(metrics, refBefore)
667 with self.assertRaises(TransactionTestError):
668 with datastore.transaction():
669 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"})
670 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
671 datastore.put(metrics, refOuter)
672 with datastore.transaction():
673 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"})
674 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
675 datastore.put(metrics, refInner)
676 # All datasets should exist
677 for ref in (refBefore, refOuter, refInner):
678 metricsOut = datastore.get(ref, parameters=None)
679 self.assertEqual(metrics, metricsOut)
680 raise TransactionTestError("This should roll back the transaction")
681 # Dataset(s) inserted before the transaction should still exist
682 metricsOut = datastore.get(refBefore, parameters=None)
683 self.assertEqual(metrics, metricsOut)
684 # But all datasets inserted during the (rolled back) transaction
685 # should be gone
686 with self.assertRaises(FileNotFoundError):
687 datastore.get(refOuter)
688 with self.assertRaises(FileNotFoundError):
689 datastore.get(refInner)
691 def _prepareIngestTest(self):
692 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
693 dimensions = self.universe.extract(("visit", "physical_filter"))
694 metrics = makeExampleMetrics()
695 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
696 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
697 return metrics, ref
699 def runIngestTest(self, func, expectOutput=True):
700 metrics, ref = self._prepareIngestTest()
701 # The file will be deleted after the test.
702 # For symlink tests this leads to a situation where the datastore
703 # points to a file that does not exist. This will make os.path.exist
704 # return False but then the new symlink will fail with
705 # FileExistsError later in the code so the test still passes.
706 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
707 with open(path, "w") as fd:
708 yaml.dump(metrics._asdict(), stream=fd)
709 func(metrics, path, ref)
711 def testIngestNoTransfer(self):
712 """Test ingesting existing files with no transfer."""
713 for mode in (None, "auto"):
714 # Some datastores have auto but can't do in place transfer
715 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
716 continue
718 with self.subTest(mode=mode):
719 datastore = self.makeDatastore()
721 def succeed(obj, path, ref):
722 """Ingest a file already in the datastore root."""
723 # first move it into the root, and adjust the path
724 # accordingly
725 path = shutil.copy(path, datastore.root.ospath)
726 path = os.path.relpath(path, start=datastore.root.ospath)
727 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
728 self.assertEqual(obj, datastore.get(ref))
730 def failInputDoesNotExist(obj, path, ref):
731 """Can't ingest files if we're given a bad path."""
732 with self.assertRaises(FileNotFoundError):
733 datastore.ingest(
734 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
735 )
736 self.assertFalse(datastore.exists(ref))
738 def failOutsideRoot(obj, path, ref):
739 """Can't ingest files outside of datastore root unless
740 auto."""
741 if mode == "auto":
742 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
743 self.assertTrue(datastore.exists(ref))
744 else:
745 with self.assertRaises(RuntimeError):
746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
747 self.assertFalse(datastore.exists(ref))
749 def failNotImplemented(obj, path, ref):
750 with self.assertRaises(NotImplementedError):
751 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
753 if mode in self.ingestTransferModes:
754 self.runIngestTest(failOutsideRoot)
755 self.runIngestTest(failInputDoesNotExist)
756 self.runIngestTest(succeed)
757 else:
758 self.runIngestTest(failNotImplemented)
760 def testIngestTransfer(self):
761 """Test ingesting existing files after transferring them."""
762 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
763 with self.subTest(mode=mode):
764 datastore = self.makeDatastore(mode)
766 def succeed(obj, path, ref):
767 """Ingest a file by transferring it to the template
768 location."""
769 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
770 self.assertEqual(obj, datastore.get(ref))
772 def failInputDoesNotExist(obj, path, ref):
773 """Can't ingest files if we're given a bad path."""
774 with self.assertRaises(FileNotFoundError):
775 # Ensure the file does not look like it is in
776 # datastore for auto mode
777 datastore.ingest(
778 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
779 )
780 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
782 def failNotImplemented(obj, path, ref):
783 with self.assertRaises(NotImplementedError):
784 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
786 if mode in self.ingestTransferModes:
787 self.runIngestTest(failInputDoesNotExist)
788 self.runIngestTest(succeed, expectOutput=(mode != "move"))
789 else:
790 self.runIngestTest(failNotImplemented)
792 def testIngestSymlinkOfSymlink(self):
793 """Special test for symlink to a symlink ingest"""
794 metrics, ref = self._prepareIngestTest()
795 # The aim of this test is to create a dataset on disk, then
796 # create a symlink to it and finally ingest the symlink such that
797 # the symlink in the datastore points to the original dataset.
798 for mode in ("symlink", "relsymlink"):
799 if mode not in self.ingestTransferModes:
800 continue
802 print(f"Trying mode {mode}")
803 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
804 with open(realpath, "w") as fd:
805 yaml.dump(metrics._asdict(), stream=fd)
806 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
807 os.symlink(os.path.abspath(realpath), sympath)
809 datastore = self.makeDatastore()
810 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
812 uri = datastore.getURI(ref)
813 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
814 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
816 linkTarget = os.readlink(uri.ospath)
817 if mode == "relsymlink":
818 self.assertFalse(os.path.isabs(linkTarget))
819 else:
820 self.assertEqual(linkTarget, os.path.abspath(realpath))
822 # Check that we can get the dataset back regardless of mode
823 metric2 = datastore.get(ref)
824 self.assertEqual(metric2, metrics)
826 # Cleanup the file for next time round loop
827 # since it will get the same file name in store
828 datastore.remove(ref)
830 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
831 datastore = self.makeDatastore(name)
833 # For now only the FileDatastore can be used for this test.
834 # ChainedDatastore that only includes InMemoryDatastores have to be
835 # skipped as well.
836 for name in datastore.names:
837 if not name.startswith("InMemoryDatastore"):
838 break
839 else:
840 raise unittest.SkipTest("in-memory datastore does not support record export/import")
842 metrics = makeExampleMetrics()
843 dimensions = self.universe.extract(("visit", "physical_filter"))
844 sc = self.storageClassFactory.getStorageClass("StructuredData")
846 refs = []
847 for visit in (2048, 2049, 2050):
848 dataId = FakeDataCoordinate.from_dict(
849 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
850 )
851 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
852 datastore.put(metrics, ref)
853 refs.append(ref)
854 return datastore, refs
856 def testExportImportRecords(self):
857 """Test for export_records and import_records methods."""
858 datastore, refs = self._populate_export_datastore("test_datastore")
859 for exported_refs in (refs, refs[1:]):
860 n_refs = len(exported_refs)
861 records = datastore.export_records(exported_refs)
862 self.assertGreater(len(records), 0)
863 self.assertTrue(set(records.keys()) <= set(datastore.names))
864 # In a ChainedDatastore each FileDatastore will have a complete set
865 for datastore_name in records:
866 record_data = records[datastore_name]
867 self.assertEqual(len(record_data.records), n_refs)
869 # Use the same datastore name to import relative path.
870 datastore2 = self.makeDatastore("test_datastore")
872 records = datastore.export_records(refs[1:])
873 datastore2.import_records(records)
875 with self.assertRaises(FileNotFoundError):
876 data = datastore2.get(refs[0])
877 data = datastore2.get(refs[1])
878 self.assertIsNotNone(data)
879 data = datastore2.get(refs[2])
880 self.assertIsNotNone(data)
882 def testExport(self):
883 datastore, refs = self._populate_export_datastore("test_datastore")
885 datasets = list(datastore.export(refs))
886 self.assertEqual(len(datasets), 3)
888 for transfer in (None, "auto"):
889 # Both will default to None
890 datasets = list(datastore.export(refs, transfer=transfer))
891 self.assertEqual(len(datasets), 3)
893 with self.assertRaises(TypeError):
894 list(datastore.export(refs, transfer="copy"))
896 with self.assertRaises(TypeError):
897 list(datastore.export(refs, directory="exportDir", transfer="move"))
899 # Create a new ref that is not known to the datastore and try to
900 # export it.
901 sc = self.storageClassFactory.getStorageClass("ThingOne")
902 dimensions = self.universe.extract(("visit", "physical_filter"))
903 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
904 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
905 with self.assertRaises(FileNotFoundError):
906 list(datastore.export(refs + [ref], transfer=None))
908 def test_pydantic_dict_storage_class_conversions(self):
909 """Test converting a dataset stored as a pydantic model into a dict on
910 read.
911 """
912 datastore = self.makeDatastore()
913 store_as_model = self.makeDatasetRef(
914 "store_as_model",
915 dimensions=self.universe.empty,
916 storageClass="DictConvertibleModel",
917 dataId=DataCoordinate.makeEmpty(self.universe),
918 )
919 content = {"a": "one", "b": "two"}
920 model = DictConvertibleModel.from_dict(content, extra="original content")
921 datastore.put(model, store_as_model)
922 retrieved_model = datastore.get(store_as_model)
923 self.assertEqual(retrieved_model, model)
924 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
925 self.assertEqual(type(loaded), dict)
926 self.assertEqual(loaded, content)
928 def test_simple_class_put_get(self):
929 """Test that we can put and get a simple class with dict()
930 constructor."""
931 datastore = self.makeDatastore()
932 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
933 self._assert_different_puts(datastore, "MetricsExample", data)
935 def test_dataclass_put_get(self):
936 """Test that we can put and get a simple dataclass."""
937 datastore = self.makeDatastore()
938 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
939 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
941 def test_pydantic_put_get(self):
942 """Test that we can put and get a simple Pydantic model."""
943 datastore = self.makeDatastore()
944 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
945 self._assert_different_puts(datastore, "MetricsExampleModel", data)
947 def test_tuple_put_get(self):
948 """Test that we can put and get a tuple."""
949 datastore = self.makeDatastore()
950 data = tuple(["a", "b", 1])
951 self._assert_different_puts(datastore, "TupleExample", data)
953 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None:
954 refs = {
955 x: self.makeDatasetRef(
956 f"stora_as_{x}",
957 dimensions=self.universe.empty,
958 storageClass=f"{storageClass_root}{x}",
959 dataId=DataCoordinate.makeEmpty(self.universe),
960 )
961 for x in ["A", "B"]
962 }
964 for ref in refs.values():
965 datastore.put(data, ref)
967 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
970class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
971 """PosixDatastore specialization"""
973 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
974 uriScheme = "file"
975 canIngestNoTransferAuto = True
976 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
977 isEphemeral = False
978 rootKeys = ("root",)
979 validationCanFail = True
981 def setUp(self):
982 # Override the working directory before calling the base class
983 self.root = tempfile.mkdtemp(dir=TESTDIR)
984 super().setUp()
986 def testAtomicWrite(self):
987 """Test that we write to a temporary and then rename"""
988 datastore = self.makeDatastore()
989 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
990 dimensions = self.universe.extract(("visit", "physical_filter"))
991 metrics = makeExampleMetrics()
993 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
994 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
996 with self.assertLogs("lsst.resources", "DEBUG") as cm:
997 datastore.put(metrics, ref)
998 move_logs = [ll for ll in cm.output if "transfer=" in ll]
999 self.assertIn("transfer=move", move_logs[0])
1001 # And the transfer should be file to file.
1002 self.assertEqual(move_logs[0].count("file://"), 2)
1004 def testCanNotDeterminePutFormatterLocation(self):
1005 """Verify that the expected exception is raised if the FileDatastore
1006 can not determine the put formatter location."""
1008 _ = makeExampleMetrics()
1009 datastore = self.makeDatastore()
1011 # Create multiple storage classes for testing different formulations
1012 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1014 sccomp = StorageClass("Dummy")
1015 compositeStorageClass = StorageClass(
1016 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1017 )
1019 dimensions = self.universe.extract(("visit", "physical_filter"))
1020 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1022 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1023 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
1025 def raiser(ref):
1026 raise DatasetTypeNotSupportedError()
1028 with unittest.mock.patch.object(
1029 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1030 "_determine_put_formatter_location",
1031 side_effect=raiser,
1032 ):
1033 # verify the non-composite ref execution path:
1034 with self.assertRaises(DatasetTypeNotSupportedError):
1035 datastore.getURIs(ref, predict=True)
1037 # verify the composite-ref execution path:
1038 with self.assertRaises(DatasetTypeNotSupportedError):
1039 datastore.getURIs(compRef, predict=True)
1042class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1043 """Posix datastore tests but with checksums disabled."""
1045 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1047 def testChecksum(self):
1048 """Ensure that checksums have not been calculated."""
1050 datastore = self.makeDatastore()
1051 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1052 dimensions = self.universe.extract(("visit", "physical_filter"))
1053 metrics = makeExampleMetrics()
1055 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"})
1056 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1058 # Configuration should have disabled checksum calculation
1059 datastore.put(metrics, ref)
1060 infos = datastore.getStoredItemsInfo(ref)
1061 self.assertIsNone(infos[0].checksum)
1063 # Remove put back but with checksums enabled explicitly
1064 datastore.remove(ref)
1065 datastore.useChecksum = True
1066 datastore.put(metrics, ref)
1068 infos = datastore.getStoredItemsInfo(ref)
1069 self.assertIsNotNone(infos[0].checksum)
1072class TrashDatastoreTestCase(PosixDatastoreTestCase):
1073 """Restrict trash test to FileDatastore."""
1075 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1077 def testTrash(self):
1078 datastore, *refs = self.prepDeleteTest(n_refs=10)
1080 # Trash one of them.
1081 ref = refs.pop()
1082 uri = datastore.getURI(ref)
1083 datastore.trash(ref)
1084 self.assertTrue(uri.exists(), uri) # Not deleted yet
1085 datastore.emptyTrash()
1086 self.assertFalse(uri.exists(), uri)
1088 # Trash it again should be fine.
1089 datastore.trash(ref)
1091 # Trash multiple items at once.
1092 subset = [refs.pop(), refs.pop()]
1093 datastore.trash(subset)
1094 datastore.emptyTrash()
1096 # Remove a record and trash should do nothing.
1097 # This is execution butler scenario.
1098 ref = refs.pop()
1099 uri = datastore.getURI(ref)
1100 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1101 self.assertTrue(uri.exists())
1102 datastore.trash(ref)
1103 datastore.emptyTrash()
1104 self.assertTrue(uri.exists())
1106 # Switch on trust and it should delete the file.
1107 datastore.trustGetRequest = True
1108 datastore.trash([ref])
1109 self.assertFalse(uri.exists())
1111 # Remove multiples at once in trust mode.
1112 subset = [refs.pop() for i in range(3)]
1113 datastore.trash(subset)
1114 datastore.trash(refs.pop()) # Check that a single ref can trash
1117class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1118 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1120 def setUp(self):
1121 # Override the working directory before calling the base class
1122 self.root = tempfile.mkdtemp(dir=TESTDIR)
1123 super().setUp()
1125 def testCleanup(self):
1126 """Test that a failed formatter write does cleanup a partial file."""
1127 metrics = makeExampleMetrics()
1128 datastore = self.makeDatastore()
1130 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1132 dimensions = self.universe.extract(("visit", "physical_filter"))
1133 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1135 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1137 # Determine where the file will end up (we assume Formatters use
1138 # the same file extension)
1139 expectedUri = datastore.getURI(ref, predict=True)
1140 self.assertEqual(expectedUri.fragment, "predicted")
1142 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1144 # Try formatter that fails and formatter that fails and leaves
1145 # a file behind
1146 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1147 with self.subTest(formatter=formatter):
1148 # Monkey patch the formatter
1149 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1151 # Try to put the dataset, it should fail
1152 with self.assertRaises(Exception):
1153 datastore.put(metrics, ref)
1155 # Check that there is no file on disk
1156 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1158 # Check that there is a directory
1159 dir = expectedUri.dirname()
1160 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1162 # Force YamlFormatter and check that this time a file is written
1163 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1164 datastore.put(metrics, ref)
1165 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1166 datastore.remove(ref)
1167 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1170class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1171 """PosixDatastore specialization"""
1173 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1174 uriScheme = "mem"
1175 hasUnsupportedPut = False
1176 ingestTransferModes = ()
1177 isEphemeral = True
1178 rootKeys = None
1179 validationCanFail = False
1182class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1183 """ChainedDatastore specialization using a POSIXDatastore"""
1185 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1186 hasUnsupportedPut = False
1187 canIngestNoTransferAuto = False
1188 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1189 isEphemeral = False
1190 rootKeys = (".datastores.1.root", ".datastores.2.root")
1191 validationCanFail = True
1194class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1195 """ChainedDatastore specialization using all InMemoryDatastore"""
1197 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1198 validationCanFail = False
1201class DatastoreConstraintsTests(DatastoreTestsBase):
1202 """Basic tests of constraints model of Datastores."""
1204 def testConstraints(self):
1205 """Test constraints model. Assumes that each test class has the
1206 same constraints."""
1207 metrics = makeExampleMetrics()
1208 datastore = self.makeDatastore()
1210 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1211 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1212 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1213 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"})
1215 # Write empty file suitable for ingest check (JSON and YAML variants)
1216 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1217 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1218 for datasetTypeName, sc, accepted in (
1219 ("metric", sc1, True),
1220 ("metric5", sc1, False),
1221 ("metric33", sc1, True),
1222 ("metric5", sc2, True),
1223 ):
1224 # Choose different temp file depending on StorageClass
1225 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1227 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1228 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1229 if accepted:
1230 datastore.put(metrics, ref)
1231 self.assertTrue(datastore.exists(ref))
1232 datastore.remove(ref)
1234 # Try ingest
1235 if self.canIngest:
1236 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1237 self.assertTrue(datastore.exists(ref))
1238 datastore.remove(ref)
1239 else:
1240 with self.assertRaises(DatasetTypeNotSupportedError):
1241 datastore.put(metrics, ref)
1242 self.assertFalse(datastore.exists(ref))
1244 # Again with ingest
1245 if self.canIngest:
1246 with self.assertRaises(DatasetTypeNotSupportedError):
1247 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1248 self.assertFalse(datastore.exists(ref))
1251class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1252 """PosixDatastore specialization"""
1254 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1255 canIngest = True
1257 def setUp(self):
1258 # Override the working directory before calling the base class
1259 self.root = tempfile.mkdtemp(dir=TESTDIR)
1260 super().setUp()
1263class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1264 """InMemoryDatastore specialization"""
1266 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1267 canIngest = False
1270class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1271 """ChainedDatastore specialization using a POSIXDatastore and constraints
1272 at the ChainedDatstore"""
1274 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1277class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1278 """ChainedDatastore specialization using a POSIXDatastore"""
1280 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1283class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1284 """ChainedDatastore specialization using all InMemoryDatastore"""
1286 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1287 canIngest = False
1290class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1291 """Test that a chained datastore can control constraints per-datastore
1292 even if child datastore would accept."""
1294 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1296 def setUp(self):
1297 # Override the working directory before calling the base class
1298 self.root = tempfile.mkdtemp(dir=TESTDIR)
1299 super().setUp()
1301 def testConstraints(self):
1302 """Test chained datastore constraints model."""
1303 metrics = makeExampleMetrics()
1304 datastore = self.makeDatastore()
1306 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1307 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1308 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1309 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1310 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1312 # Write empty file suitable for ingest check (JSON and YAML variants)
1313 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1314 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1316 for typeName, dataId, sc, accept, ingest in (
1317 ("metric", dataId1, sc1, (False, True, False), True),
1318 ("metric5", dataId1, sc1, (False, False, False), False),
1319 ("metric5", dataId2, sc1, (True, False, False), False),
1320 ("metric33", dataId2, sc2, (True, True, False), True),
1321 ("metric5", dataId1, sc2, (False, True, False), True),
1322 ):
1323 # Choose different temp file depending on StorageClass
1324 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1326 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1327 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1328 if any(accept):
1329 datastore.put(metrics, ref)
1330 self.assertTrue(datastore.exists(ref))
1332 # Check each datastore inside the chained datastore
1333 for childDatastore, expected in zip(datastore.datastores, accept):
1334 self.assertEqual(
1335 childDatastore.exists(ref),
1336 expected,
1337 f"Testing presence of {ref} in datastore {childDatastore.name}",
1338 )
1340 datastore.remove(ref)
1342 # Check that ingest works
1343 if ingest:
1344 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1345 self.assertTrue(datastore.exists(ref))
1347 # Check each datastore inside the chained datastore
1348 for childDatastore, expected in zip(datastore.datastores, accept):
1349 # Ephemeral datastores means InMemory at the moment
1350 # and that does not accept ingest of files.
1351 if childDatastore.isEphemeral:
1352 expected = False
1353 self.assertEqual(
1354 childDatastore.exists(ref),
1355 expected,
1356 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1357 )
1359 datastore.remove(ref)
1360 else:
1361 with self.assertRaises(DatasetTypeNotSupportedError):
1362 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1364 else:
1365 with self.assertRaises(DatasetTypeNotSupportedError):
1366 datastore.put(metrics, ref)
1367 self.assertFalse(datastore.exists(ref))
1369 # Again with ingest
1370 with self.assertRaises(DatasetTypeNotSupportedError):
1371 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1372 self.assertFalse(datastore.exists(ref))
1375class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1376 """Tests for datastore caching infrastructure."""
1378 @classmethod
1379 def setUpClass(cls):
1380 cls.storageClassFactory = StorageClassFactory()
1381 cls.universe = DimensionUniverse()
1383 # Ensure that we load the test storage class definitions.
1384 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1385 cls.storageClassFactory.addFromConfig(scConfigFile)
1387 def setUp(self):
1388 self.id = 0
1390 # Create a root that we can use for caching tests.
1391 self.root = tempfile.mkdtemp(dir=TESTDIR)
1393 # Create some test dataset refs and associated test files
1394 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1395 dimensions = self.universe.extract(("visit", "physical_filter"))
1396 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1398 # Create list of refs and list of temporary files
1399 n_datasets = 10
1400 self.refs = [
1401 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1402 for n in range(n_datasets)
1403 ]
1405 root_uri = ResourcePath(self.root, forceDirectory=True)
1406 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1408 # Create test files.
1409 for uri in self.files:
1410 uri.write(b"0123456789")
1412 # Create some composite refs with component files.
1413 sc = self.storageClassFactory.getStorageClass("StructuredData")
1414 self.composite_refs = [
1415 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1416 ]
1417 self.comp_files = []
1418 self.comp_refs = []
1419 for n, ref in enumerate(self.composite_refs):
1420 component_refs = []
1421 component_files = []
1422 for component in sc.components:
1423 component_ref = ref.makeComponentRef(component)
1424 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1425 component_refs.append(component_ref)
1426 component_files.append(file)
1427 file.write(b"9876543210")
1429 self.comp_files.append(component_files)
1430 self.comp_refs.append(component_refs)
1432 def tearDown(self):
1433 if self.root is not None and os.path.exists(self.root):
1434 shutil.rmtree(self.root, ignore_errors=True)
1436 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1437 config = Config.fromYaml(config_str)
1438 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1440 def testNoCacheDir(self):
1441 config_str = """
1442cached:
1443 root: null
1444 cacheable:
1445 metric0: true
1446 """
1447 cache_manager = self._make_cache_manager(config_str)
1449 # Look inside to check we don't have a cache directory
1450 self.assertIsNone(cache_manager._cache_directory)
1452 self.assertCache(cache_manager)
1454 # Test that the cache directory is marked temporary
1455 self.assertTrue(cache_manager.cache_directory.isTemporary)
1457 def testNoCacheDirReversed(self):
1458 """Use default caching status and metric1 to false"""
1459 config_str = """
1460cached:
1461 root: null
1462 default: true
1463 cacheable:
1464 metric1: false
1465 """
1466 cache_manager = self._make_cache_manager(config_str)
1468 self.assertCache(cache_manager)
1470 def testEnvvarCacheDir(self):
1471 config_str = f"""
1472cached:
1473 root: '{self.root}'
1474 cacheable:
1475 metric0: true
1476 """
1478 root = ResourcePath(self.root, forceDirectory=True)
1479 env_dir = root.join("somewhere", forceDirectory=True)
1480 elsewhere = root.join("elsewhere", forceDirectory=True)
1482 # Environment variable should override the config value.
1483 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1484 cache_manager = self._make_cache_manager(config_str)
1485 self.assertEqual(cache_manager.cache_directory, env_dir)
1487 # This environment variable should not override the config value.
1488 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1489 cache_manager = self._make_cache_manager(config_str)
1490 self.assertEqual(cache_manager.cache_directory, root)
1492 # No default setting.
1493 config_str = """
1494cached:
1495 root: null
1496 default: true
1497 cacheable:
1498 metric1: false
1499 """
1500 cache_manager = self._make_cache_manager(config_str)
1502 # This environment variable should override the config value.
1503 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1504 cache_manager = self._make_cache_manager(config_str)
1505 self.assertEqual(cache_manager.cache_directory, env_dir)
1507 # If both environment variables are set the main (not IF_UNSET)
1508 # variable should win.
1509 with unittest.mock.patch.dict(
1510 os.environ,
1511 {
1512 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1513 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1514 },
1515 ):
1516 cache_manager = self._make_cache_manager(config_str)
1517 self.assertEqual(cache_manager.cache_directory, env_dir)
1519 # Use the API to set the environment variable, making sure that the
1520 # variable is reset on exit.
1521 with unittest.mock.patch.dict(
1522 os.environ,
1523 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1524 ):
1525 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1526 self.assertTrue(defined)
1527 cache_manager = self._make_cache_manager(config_str)
1528 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1530 # Now create the cache manager ahead of time and set the fallback
1531 # later.
1532 cache_manager = self._make_cache_manager(config_str)
1533 self.assertIsNone(cache_manager._cache_directory)
1534 with unittest.mock.patch.dict(
1535 os.environ,
1536 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1537 ):
1538 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1539 self.assertTrue(defined)
1540 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1542 def testExplicitCacheDir(self):
1543 config_str = f"""
1544cached:
1545 root: '{self.root}'
1546 cacheable:
1547 metric0: true
1548 """
1549 cache_manager = self._make_cache_manager(config_str)
1551 # Look inside to check we do have a cache directory.
1552 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1554 self.assertCache(cache_manager)
1556 # Test that the cache directory is not marked temporary
1557 self.assertFalse(cache_manager.cache_directory.isTemporary)
1559 def assertCache(self, cache_manager):
1560 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1561 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1563 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1564 self.assertIsInstance(uri, ResourcePath)
1565 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1567 # Check presence in cache using ref and then using file extension.
1568 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1569 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1570 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1571 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1573 # Cached file should no longer exist but uncached file should be
1574 # unaffected.
1575 self.assertFalse(self.files[0].exists())
1576 self.assertTrue(self.files[1].exists())
1578 # Should find this file and it should be within the cache directory.
1579 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1580 self.assertTrue(found.exists())
1581 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1583 # Should not be able to find these in cache
1584 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1585 self.assertIsNone(found)
1586 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1587 self.assertIsNone(found)
1589 def testNoCache(self):
1590 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1591 for uri, ref in zip(self.files, self.refs):
1592 self.assertFalse(cache_manager.should_be_cached(ref))
1593 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1594 self.assertFalse(cache_manager.known_to_cache(ref))
1595 with cache_manager.find_in_cache(ref, ".txt") as found:
1596 self.assertIsNone(found, msg=f"{cache_manager}")
1598 def _expiration_config(self, mode: str, threshold: int) -> str:
1599 return f"""
1600cached:
1601 default: true
1602 expiry:
1603 mode: {mode}
1604 threshold: {threshold}
1605 cacheable:
1606 unused: true
1607 """
1609 def testCacheExpiryFiles(self):
1610 threshold = 2 # Keep at least 2 files.
1611 mode = "files"
1612 config_str = self._expiration_config(mode, threshold)
1614 cache_manager = self._make_cache_manager(config_str)
1616 # Check that an empty cache returns unknown for arbitrary ref
1617 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1619 # Should end with datasets: 2, 3, 4
1620 self.assertExpiration(cache_manager, 5, threshold + 1)
1621 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1623 # Check that we will not expire a file that is actively in use.
1624 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1625 self.assertIsNotNone(found)
1627 # Trigger cache expiration that should remove the file
1628 # we just retrieved. Should now have: 3, 4, 5
1629 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1630 self.assertIsNotNone(cached)
1632 # Cache should still report the standard file count.
1633 self.assertEqual(cache_manager.file_count, threshold + 1)
1635 # Add additional entry to cache.
1636 # Should now have 4, 5, 6
1637 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1638 self.assertIsNotNone(cached)
1640 # Is the file still there?
1641 self.assertTrue(found.exists())
1643 # Can we read it?
1644 data = found.read()
1645 self.assertGreater(len(data), 0)
1647 # Outside context the file should no longer exist.
1648 self.assertFalse(found.exists())
1650 # File count should not have changed.
1651 self.assertEqual(cache_manager.file_count, threshold + 1)
1653 # Dataset 2 was in the exempt directory but because hardlinks
1654 # are used it was deleted from the main cache during cache expiry
1655 # above and so should no longer be found.
1656 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1657 self.assertIsNone(found)
1659 # And the one stored after it is also gone.
1660 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1661 self.assertIsNone(found)
1663 # But dataset 4 is present.
1664 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1665 self.assertIsNotNone(found)
1667 # Adding a new dataset to the cache should now delete it.
1668 cache_manager.move_to_cache(self.files[7], self.refs[7])
1670 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1671 self.assertIsNone(found)
1673 def testCacheExpiryDatasets(self):
1674 threshold = 2 # Keep 2 datasets.
1675 mode = "datasets"
1676 config_str = self._expiration_config(mode, threshold)
1678 cache_manager = self._make_cache_manager(config_str)
1679 self.assertExpiration(cache_manager, 5, threshold + 1)
1680 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1682 def testCacheExpiryDatasetsComposite(self):
1683 threshold = 2 # Keep 2 datasets.
1684 mode = "datasets"
1685 config_str = self._expiration_config(mode, threshold)
1687 cache_manager = self._make_cache_manager(config_str)
1689 n_datasets = 3
1690 for i in range(n_datasets):
1691 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1692 cached = cache_manager.move_to_cache(component_file, component_ref)
1693 self.assertIsNotNone(cached)
1694 self.assertTrue(cache_manager.known_to_cache(component_ref))
1695 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1696 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1698 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1700 # Write two new non-composite and the number of files should drop.
1701 self.assertExpiration(cache_manager, 2, 5)
1703 def testCacheExpirySize(self):
1704 threshold = 55 # Each file is 10 bytes
1705 mode = "size"
1706 config_str = self._expiration_config(mode, threshold)
1708 cache_manager = self._make_cache_manager(config_str)
1709 self.assertExpiration(cache_manager, 10, 6)
1710 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1712 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1713 """Insert the datasets and then check the number retained."""
1714 for i in range(n_datasets):
1715 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1716 self.assertIsNotNone(cached)
1718 self.assertEqual(cache_manager.file_count, n_retained)
1720 # The oldest file should not be in the cache any more.
1721 for i in range(n_datasets):
1722 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1723 if i >= n_datasets - n_retained:
1724 self.assertIsInstance(found, ResourcePath)
1725 else:
1726 self.assertIsNone(found)
1728 def testCacheExpiryAge(self):
1729 threshold = 1 # Expire older than 2 seconds
1730 mode = "age"
1731 config_str = self._expiration_config(mode, threshold)
1733 cache_manager = self._make_cache_manager(config_str)
1734 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1736 # Insert 3 files, then sleep, then insert more.
1737 for i in range(2):
1738 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1739 self.assertIsNotNone(cached)
1740 time.sleep(2.0)
1741 for j in range(4):
1742 i = 2 + j # Continue the counting
1743 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1744 self.assertIsNotNone(cached)
1746 # Only the files written after the sleep should exist.
1747 self.assertEqual(cache_manager.file_count, 4)
1748 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1749 self.assertIsNone(found)
1750 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1751 self.assertIsInstance(found, ResourcePath)
1754class DatasetRefURIsTestCase(unittest.TestCase):
1755 """Tests for DatasetRefURIs."""
1757 def testSequenceAccess(self):
1758 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1759 uris = DatasetRefURIs()
1761 self.assertEqual(len(uris), 2)
1762 self.assertEqual(uris[0], None)
1763 self.assertEqual(uris[1], {})
1765 primaryURI = ResourcePath("1/2/3")
1766 componentURI = ResourcePath("a/b/c")
1768 # affirm that DatasetRefURIs does not support MutableSequence functions
1769 with self.assertRaises(TypeError):
1770 uris[0] = primaryURI
1771 with self.assertRaises(TypeError):
1772 uris[1] = {"foo": componentURI}
1774 # but DatasetRefURIs can be set by property name:
1775 uris.primaryURI = primaryURI
1776 uris.componentURIs = {"foo": componentURI}
1777 self.assertEqual(uris.primaryURI, primaryURI)
1778 self.assertEqual(uris[0], primaryURI)
1780 primary, components = uris
1781 self.assertEqual(primary, primaryURI)
1782 self.assertEqual(components, {"foo": componentURI})
1784 def testRepr(self):
1785 """Verify __repr__ output."""
1786 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1787 self.assertEqual(
1788 repr(uris),
1789 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1790 f"{{'comp': ResourcePath(\"{os.getcwd()}/a/b/c\")}})",
1791 )
1794class DataIdForTestTestCase(unittest.TestCase):
1795 """Tests for the DataIdForTest class."""
1797 def testImmutable(self):
1798 """Verify that an instance is immutable by default."""
1799 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1800 initial_hash = hash(dataId)
1802 with self.assertRaises(RuntimeError):
1803 dataId["instrument"] = "foo"
1805 with self.assertRaises(RuntimeError):
1806 del dataId["instrument"]
1808 assert sys.version_info[0] == 3
1809 if sys.version_info[1] >= 9:
1810 with self.assertRaises(RuntimeError):
1811 dataId |= dict(foo="bar")
1813 with self.assertRaises(RuntimeError):
1814 dataId.pop("instrument")
1816 with self.assertRaises(RuntimeError):
1817 dataId.popitem()
1819 with self.assertRaises(RuntimeError):
1820 dataId.update(dict(instrument="foo"))
1822 # verify that the hash value has not changed.
1823 self.assertEqual(initial_hash, hash(dataId))
1825 def testMutable(self):
1826 """Verify that an instance can be made mutable (unfrozen)."""
1827 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1828 initial_hash = hash(dataId)
1829 dataId.frozen = False
1830 self.assertEqual(initial_hash, hash(dataId))
1832 dataId["instrument"] = "foo"
1833 self.assertEqual(dataId["instrument"], "foo")
1834 self.assertNotEqual(initial_hash, hash(dataId))
1835 initial_hash = hash(dataId)
1837 del dataId["instrument"]
1838 self.assertTrue("instrument" not in dataId)
1839 self.assertNotEqual(initial_hash, hash(dataId))
1840 initial_hash = hash(dataId)
1842 assert sys.version_info[0] == 3
1843 if sys.version_info[1] >= 9:
1844 dataId |= dict(foo="bar")
1845 self.assertEqual(dataId["foo"], "bar")
1846 self.assertNotEqual(initial_hash, hash(dataId))
1847 initial_hash = hash(dataId)
1849 dataId.pop("visit")
1850 self.assertTrue("visit" not in dataId)
1851 self.assertNotEqual(initial_hash, hash(dataId))
1852 initial_hash = hash(dataId)
1854 dataId.popitem()
1855 self.assertTrue("physical_filter" not in dataId)
1856 self.assertNotEqual(initial_hash, hash(dataId))
1857 initial_hash = hash(dataId)
1859 dataId.update(dict(instrument="foo"))
1860 self.assertEqual(dataId["instrument"], "foo")
1861 self.assertNotEqual(initial_hash, hash(dataId))
1862 initial_hash = hash(dataId)
1865if __name__ == "__main__": 1865 ↛ 1866line 1865 didn't jump to line 1866, because the condition on line 1865 was never true
1866 unittest.main()