Coverage for tests/test_datastore.py: 12%
1020 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-09 02:51 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-09 02:51 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import sys
27import tempfile
28import time
29import unittest
30import unittest.mock
31from collections import UserDict
32from dataclasses import dataclass
34import lsst.utils.tests
35import yaml
36from lsst.daf.butler import (
37 Config,
38 DatasetRef,
39 DatasetRefURIs,
40 DatasetTypeNotSupportedError,
41 Datastore,
42 DatastoreCacheManager,
43 DatastoreCacheManagerConfig,
44 DatastoreConfig,
45 DatastoreDisabledCacheManager,
46 DatastoreValidationError,
47 DimensionUniverse,
48 FileDataset,
49 NamedKeyDict,
50 StorageClass,
51 StorageClassFactory,
52)
53from lsst.daf.butler.formatters.yaml import YamlFormatter
54from lsst.daf.butler.tests import (
55 BadNoWriteFormatter,
56 BadWriteFormatter,
57 DatasetTestHelper,
58 DatastoreTestHelper,
59 DummyRegistry,
60 MetricsExample,
61)
62from lsst.resources import ResourcePath
63from lsst.utils import doImport
65TESTDIR = os.path.dirname(__file__)
68class DataIdForTest(UserDict):
70 """A dict-like class that can be used for a DataId dict that is hashable.
72 By default the class is immutable ("frozen"). The `frozen`
73 attribute can be set to `False` to change values (but note that
74 the hash values before and after mutation will be different!).
75 """
77 def __init__(self, *args, **kwargs):
78 self.frozen = False
79 super().__init__(*args, **kwargs)
80 self.frozen = True
82 def __hash__(self):
83 return hash(str(self.data))
85 def __setitem__(self, k, v):
86 if self.frozen:
87 raise RuntimeError("DataIdForTest is frozen.")
88 return super().__setitem__(k, v)
90 def __delitem__(self, k):
91 if self.frozen:
92 raise RuntimeError("DataIdForTest is frozen.")
93 return super().__delitem__(k)
95 def __ior__(self, other):
96 assert sys.version_info[0] == 3
97 if sys.version_info[1] < 9:
98 raise NotImplementedError("operator |= (ior) is not supported before version 3.9")
99 if self.frozen:
100 raise RuntimeError("DataIdForTest is frozen.")
101 return super().__ior__(other)
103 def pop(self, k):
104 if self.frozen:
105 raise RuntimeError("DataIdForTest is frozen.")
106 return super().pop(k)
108 def popitem(self):
109 if self.frozen:
110 raise RuntimeError("DataIdForTest is frozen.")
111 return super().popitem()
113 def update(self, *args, **kwargs):
114 if self.frozen:
115 raise RuntimeError("DataIdForTest is frozen.")
116 super().update(*args, **kwargs)
119def makeExampleMetrics(use_none=False):
120 if use_none:
121 array = None
122 else:
123 array = [563, 234, 456.7, 105, 2054, -1045]
124 return MetricsExample(
125 {"AM1": 5.2, "AM2": 30.6},
126 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
127 array,
128 )
131@dataclass(frozen=True)
132class Named:
133 name: str
136class FakeDataCoordinate(NamedKeyDict):
137 """A fake hashable frozen DataCoordinate built from a simple dict."""
139 @classmethod
140 def from_dict(cls, dataId):
141 new = cls()
142 for k, v in dataId.items():
143 new[Named(k)] = v
144 return new.freeze()
146 def __hash__(self) -> int:
147 return hash(frozenset(self.items()))
150class TransactionTestError(Exception):
151 """Specific error for transactions, to prevent misdiagnosing
152 that might otherwise occur when a standard exception is used.
153 """
155 pass
158class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
159 """Support routines for datastore testing"""
161 root = None
163 @classmethod
164 def setUpClass(cls):
165 # Storage Classes are fixed for all datastores in these tests
166 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
167 cls.storageClassFactory = StorageClassFactory()
168 cls.storageClassFactory.addFromConfig(scConfigFile)
170 # Read the Datastore config so we can get the class
171 # information (since we should not assume the constructor
172 # name here, but rely on the configuration file itself)
173 datastoreConfig = DatastoreConfig(cls.configFile)
174 cls.datastoreType = doImport(datastoreConfig["cls"])
175 cls.universe = DimensionUniverse()
177 def setUp(self):
178 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
180 def tearDown(self):
181 if self.root is not None and os.path.exists(self.root):
182 shutil.rmtree(self.root, ignore_errors=True)
185class DatastoreTests(DatastoreTestsBase):
186 """Some basic tests of a simple datastore."""
188 hasUnsupportedPut = True
190 def testConfigRoot(self):
191 full = DatastoreConfig(self.configFile)
192 config = DatastoreConfig(self.configFile, mergeDefaults=False)
193 newroot = "/random/location"
194 self.datastoreType.setConfigRoot(newroot, config, full)
195 if self.rootKeys:
196 for k in self.rootKeys:
197 self.assertIn(newroot, config[k])
199 def testConstructor(self):
200 datastore = self.makeDatastore()
201 self.assertIsNotNone(datastore)
202 self.assertIs(datastore.isEphemeral, self.isEphemeral)
204 def testConfigurationValidation(self):
205 datastore = self.makeDatastore()
206 sc = self.storageClassFactory.getStorageClass("ThingOne")
207 datastore.validateConfiguration([sc])
209 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
210 if self.validationCanFail:
211 with self.assertRaises(DatastoreValidationError):
212 datastore.validateConfiguration([sc2], logFailures=True)
214 dimensions = self.universe.extract(("visit", "physical_filter"))
215 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
216 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
217 datastore.validateConfiguration([ref])
219 def testParameterValidation(self):
220 """Check that parameters are validated"""
221 sc = self.storageClassFactory.getStorageClass("ThingOne")
222 dimensions = self.universe.extract(("visit", "physical_filter"))
223 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
225 datastore = self.makeDatastore()
226 data = {1: 2, 3: 4}
227 datastore.put(data, ref)
228 newdata = datastore.get(ref)
229 self.assertEqual(data, newdata)
230 with self.assertRaises(KeyError):
231 newdata = datastore.get(ref, parameters={"missing": 5})
233 def testBasicPutGet(self):
234 metrics = makeExampleMetrics()
235 datastore = self.makeDatastore()
237 # Create multiple storage classes for testing different formulations
238 storageClasses = [
239 self.storageClassFactory.getStorageClass(sc)
240 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
241 ]
243 dimensions = self.universe.extract(("visit", "physical_filter"))
244 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
245 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"})
247 for sc in storageClasses:
248 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
249 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False)
251 # Make sure that using getManyURIs without predicting before the
252 # dataset has been put raises.
253 with self.assertRaises(FileNotFoundError):
254 datastore.getManyURIs([ref], predict=False)
256 # Make sure that using getManyURIs with predicting before the
257 # dataset has been put predicts the URI.
258 uris = datastore.getManyURIs([ref, ref2], predict=True)
259 self.assertIn("52", uris[ref].primaryURI.geturl())
260 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
261 self.assertIn("53", uris[ref2].primaryURI.geturl())
262 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
264 datastore.put(metrics, ref)
266 # Does it exist?
267 self.assertTrue(datastore.exists(ref))
268 self.assertTrue(datastore.knows(ref))
269 multi = datastore.knows_these([ref])
270 self.assertTrue(multi[ref])
271 multi = datastore.mexists([ref])
272 self.assertTrue(multi[ref])
274 # Get
275 metricsOut = datastore.get(ref, parameters=None)
276 self.assertEqual(metrics, metricsOut)
278 uri = datastore.getURI(ref)
279 self.assertEqual(uri.scheme, self.uriScheme)
281 uris = datastore.getManyURIs([ref])
282 self.assertEqual(len(uris), 1)
283 ref, uri = uris.popitem()
284 self.assertTrue(uri.primaryURI.exists())
285 self.assertFalse(uri.componentURIs)
287 # Get a component -- we need to construct new refs for them
288 # with derived storage classes but with parent ID
289 for comp in ("data", "output"):
290 compRef = ref.makeComponentRef(comp)
291 output = datastore.get(compRef)
292 self.assertEqual(output, getattr(metricsOut, comp))
294 uri = datastore.getURI(compRef)
295 self.assertEqual(uri.scheme, self.uriScheme)
297 uris = datastore.getManyURIs([compRef])
298 self.assertEqual(len(uris), 1)
300 storageClass = sc
302 # Check that we can put a metric with None in a component and
303 # get it back as None
304 metricsNone = makeExampleMetrics(use_none=True)
305 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
306 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
307 datastore.put(metricsNone, refNone)
309 comp = "data"
310 for comp in ("data", "output"):
311 compRef = refNone.makeComponentRef(comp)
312 output = datastore.get(compRef)
313 self.assertEqual(output, getattr(metricsNone, comp))
315 # Check that a put fails if the dataset type is not supported
316 if self.hasUnsupportedPut:
317 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
318 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
319 with self.assertRaises(DatasetTypeNotSupportedError):
320 datastore.put(metrics, ref)
322 # These should raise
323 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
324 with self.assertRaises(FileNotFoundError):
325 # non-existing file
326 datastore.get(ref)
328 # Get a URI from it
329 uri = datastore.getURI(ref, predict=True)
330 self.assertEqual(uri.scheme, self.uriScheme)
332 with self.assertRaises(FileNotFoundError):
333 datastore.getURI(ref)
335 def testTrustGetRequest(self):
336 """Check that we can get datasets that registry knows nothing about."""
338 datastore = self.makeDatastore()
340 # Skip test if the attribute is not defined
341 if not hasattr(datastore, "trustGetRequest"):
342 return
344 metrics = makeExampleMetrics()
346 i = 0
347 for sc_name in ("StructuredData", "StructuredComposite"):
348 i += 1
349 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
351 if sc_name == "StructuredComposite":
352 disassembled = True
353 else:
354 disassembled = False
356 # Start datastore in default configuration of using registry
357 datastore.trustGetRequest = False
359 # Create multiple storage classes for testing with or without
360 # disassembly
361 sc = self.storageClassFactory.getStorageClass(sc_name)
362 dimensions = self.universe.extract(("visit", "physical_filter"))
364 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"})
366 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
367 datastore.put(metrics, ref)
369 # Does it exist?
370 self.assertTrue(datastore.exists(ref))
371 self.assertTrue(datastore.knows(ref))
372 multi = datastore.knows_these([ref])
373 self.assertTrue(multi[ref])
374 multi = datastore.mexists([ref])
375 self.assertTrue(multi[ref])
377 # Get
378 metricsOut = datastore.get(ref)
379 self.assertEqual(metrics, metricsOut)
381 # Get the URI(s)
382 primaryURI, componentURIs = datastore.getURIs(ref)
383 if disassembled:
384 self.assertIsNone(primaryURI)
385 self.assertEqual(len(componentURIs), 3)
386 else:
387 self.assertIn(datasetTypeName, primaryURI.path)
388 self.assertFalse(componentURIs)
390 # Delete registry entry so now we are trusting
391 datastore.removeStoredItemInfo(ref)
393 # Now stop trusting and check that things break
394 datastore.trustGetRequest = False
396 # Does it exist?
397 self.assertFalse(datastore.exists(ref))
398 self.assertFalse(datastore.knows(ref))
399 multi = datastore.knows_these([ref])
400 self.assertFalse(multi[ref])
401 multi = datastore.mexists([ref])
402 self.assertFalse(multi[ref])
404 with self.assertRaises(FileNotFoundError):
405 datastore.get(ref)
407 with self.assertRaises(FileNotFoundError):
408 datastore.get(ref.makeComponentRef("data"))
410 # URI should fail unless we ask for prediction
411 with self.assertRaises(FileNotFoundError):
412 datastore.getURIs(ref)
414 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
415 if disassembled:
416 self.assertIsNone(predicted_primary)
417 self.assertEqual(len(predicted_disassembled), 3)
418 for uri in predicted_disassembled.values():
419 self.assertEqual(uri.fragment, "predicted")
420 self.assertIn(datasetTypeName, uri.path)
421 else:
422 self.assertIn(datasetTypeName, predicted_primary.path)
423 self.assertFalse(predicted_disassembled)
424 self.assertEqual(predicted_primary.fragment, "predicted")
426 # Now enable registry-free trusting mode
427 datastore.trustGetRequest = True
429 # Try again to get it
430 metricsOut = datastore.get(ref)
431 self.assertEqual(metricsOut, metrics)
433 # Does it exist?
434 self.assertTrue(datastore.exists(ref))
436 # Get a component
437 comp = "data"
438 compRef = ref.makeComponentRef(comp)
439 output = datastore.get(compRef)
440 self.assertEqual(output, getattr(metrics, comp))
442 # Get the URI -- if we trust this should work even without
443 # enabling prediction.
444 primaryURI2, componentURIs2 = datastore.getURIs(ref)
445 self.assertEqual(primaryURI2, primaryURI)
446 self.assertEqual(componentURIs2, componentURIs)
448 def testDisassembly(self):
449 """Test disassembly within datastore."""
450 metrics = makeExampleMetrics()
451 if self.isEphemeral:
452 # in-memory datastore does not disassemble
453 return
455 # Create multiple storage classes for testing different formulations
456 # of composites. One of these will not disassemble to provide
457 # a reference.
458 storageClasses = [
459 self.storageClassFactory.getStorageClass(sc)
460 for sc in (
461 "StructuredComposite",
462 "StructuredCompositeTestA",
463 "StructuredCompositeTestB",
464 "StructuredCompositeReadComp",
465 "StructuredData", # No disassembly
466 "StructuredCompositeReadCompNoDisassembly",
467 )
468 ]
470 # Create the test datastore
471 datastore = self.makeDatastore()
473 # Dummy dataId
474 dimensions = self.universe.extract(("visit", "physical_filter"))
475 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
477 for i, sc in enumerate(storageClasses):
478 with self.subTest(storageClass=sc.name):
479 # Create a different dataset type each time round
480 # so that a test failure in this subtest does not trigger
481 # a cascade of tests because of file clashes
482 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False)
484 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
486 datastore.put(metrics, ref)
488 baseURI, compURIs = datastore.getURIs(ref)
489 if disassembled:
490 self.assertIsNone(baseURI)
491 self.assertEqual(set(compURIs), {"data", "output", "summary"})
492 else:
493 self.assertIsNotNone(baseURI)
494 self.assertEqual(compURIs, {})
496 metrics_get = datastore.get(ref)
497 self.assertEqual(metrics_get, metrics)
499 # Retrieve the composite with read parameter
500 stop = 4
501 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
502 self.assertEqual(metrics_get.summary, metrics.summary)
503 self.assertEqual(metrics_get.output, metrics.output)
504 self.assertEqual(metrics_get.data, metrics.data[:stop])
506 # Retrieve a component
507 data = datastore.get(ref.makeComponentRef("data"))
508 self.assertEqual(data, metrics.data)
510 # On supported storage classes attempt to access a read
511 # only component
512 if "ReadComp" in sc.name:
513 cRef = ref.makeComponentRef("counter")
514 counter = datastore.get(cRef)
515 self.assertEqual(counter, len(metrics.data))
517 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
518 self.assertEqual(counter, stop)
520 datastore.remove(ref)
522 def prepDeleteTest(self, n_refs=1):
523 metrics = makeExampleMetrics()
524 datastore = self.makeDatastore()
525 # Put
526 dimensions = self.universe.extract(("visit", "physical_filter"))
527 sc = self.storageClassFactory.getStorageClass("StructuredData")
528 refs = []
529 for i in range(n_refs):
530 dataId = FakeDataCoordinate.from_dict(
531 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"}
532 )
533 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
534 datastore.put(metrics, ref)
536 # Does it exist?
537 self.assertTrue(datastore.exists(ref))
539 # Get
540 metricsOut = datastore.get(ref)
541 self.assertEqual(metrics, metricsOut)
542 refs.append(ref)
544 return datastore, *refs
546 def testRemove(self):
547 datastore, ref = self.prepDeleteTest()
549 # Remove
550 datastore.remove(ref)
552 # Does it exist?
553 self.assertFalse(datastore.exists(ref))
555 # Do we now get a predicted URI?
556 uri = datastore.getURI(ref, predict=True)
557 self.assertEqual(uri.fragment, "predicted")
559 # Get should now fail
560 with self.assertRaises(FileNotFoundError):
561 datastore.get(ref)
562 # Can only delete once
563 with self.assertRaises(FileNotFoundError):
564 datastore.remove(ref)
566 def testForget(self):
567 datastore, ref = self.prepDeleteTest()
569 # Remove
570 datastore.forget([ref])
572 # Does it exist (as far as we know)?
573 self.assertFalse(datastore.exists(ref))
575 # Do we now get a predicted URI?
576 uri = datastore.getURI(ref, predict=True)
577 self.assertEqual(uri.fragment, "predicted")
579 # Get should now fail
580 with self.assertRaises(FileNotFoundError):
581 datastore.get(ref)
583 # Forgetting again is a silent no-op
584 datastore.forget([ref])
586 # Predicted URI should still point to the file.
587 self.assertTrue(uri.exists())
589 def testTransfer(self):
590 metrics = makeExampleMetrics()
592 dimensions = self.universe.extract(("visit", "physical_filter"))
593 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
595 sc = self.storageClassFactory.getStorageClass("StructuredData")
596 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
598 inputDatastore = self.makeDatastore("test_input_datastore")
599 outputDatastore = self.makeDatastore("test_output_datastore")
601 inputDatastore.put(metrics, ref)
602 outputDatastore.transfer(inputDatastore, ref)
604 metricsOut = outputDatastore.get(ref)
605 self.assertEqual(metrics, metricsOut)
607 def testBasicTransaction(self):
608 datastore = self.makeDatastore()
609 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
610 dimensions = self.universe.extract(("visit", "physical_filter"))
611 nDatasets = 6
612 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
613 data = [
614 (
615 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
616 makeExampleMetrics(),
617 )
618 for dataId in dataIds
619 ]
620 succeed = data[: nDatasets // 2]
621 fail = data[nDatasets // 2 :]
622 # All datasets added in this transaction should continue to exist
623 with datastore.transaction():
624 for ref, metrics in succeed:
625 datastore.put(metrics, ref)
626 # Whereas datasets added in this transaction should not
627 with self.assertRaises(TransactionTestError):
628 with datastore.transaction():
629 for ref, metrics in fail:
630 datastore.put(metrics, ref)
631 raise TransactionTestError("This should propagate out of the context manager")
632 # Check for datasets that should exist
633 for ref, metrics in succeed:
634 # Does it exist?
635 self.assertTrue(datastore.exists(ref))
636 # Get
637 metricsOut = datastore.get(ref, parameters=None)
638 self.assertEqual(metrics, metricsOut)
639 # URI
640 uri = datastore.getURI(ref)
641 self.assertEqual(uri.scheme, self.uriScheme)
642 # Check for datasets that should not exist
643 for ref, _ in fail:
644 # These should raise
645 with self.assertRaises(FileNotFoundError):
646 # non-existing file
647 datastore.get(ref)
648 with self.assertRaises(FileNotFoundError):
649 datastore.getURI(ref)
651 def testNestedTransaction(self):
652 datastore = self.makeDatastore()
653 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
654 dimensions = self.universe.extract(("visit", "physical_filter"))
655 metrics = makeExampleMetrics()
657 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
658 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
659 datastore.put(metrics, refBefore)
660 with self.assertRaises(TransactionTestError):
661 with datastore.transaction():
662 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
663 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
664 datastore.put(metrics, refOuter)
665 with datastore.transaction():
666 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
667 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
668 datastore.put(metrics, refInner)
669 # All datasets should exist
670 for ref in (refBefore, refOuter, refInner):
671 metricsOut = datastore.get(ref, parameters=None)
672 self.assertEqual(metrics, metricsOut)
673 raise TransactionTestError("This should roll back the transaction")
674 # Dataset(s) inserted before the transaction should still exist
675 metricsOut = datastore.get(refBefore, parameters=None)
676 self.assertEqual(metrics, metricsOut)
677 # But all datasets inserted during the (rolled back) transaction
678 # should be gone
679 with self.assertRaises(FileNotFoundError):
680 datastore.get(refOuter)
681 with self.assertRaises(FileNotFoundError):
682 datastore.get(refInner)
684 def _prepareIngestTest(self):
685 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
686 dimensions = self.universe.extract(("visit", "physical_filter"))
687 metrics = makeExampleMetrics()
688 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
689 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
690 return metrics, ref
692 def runIngestTest(self, func, expectOutput=True):
693 metrics, ref = self._prepareIngestTest()
694 # The file will be deleted after the test.
695 # For symlink tests this leads to a situation where the datastore
696 # points to a file that does not exist. This will make os.path.exist
697 # return False but then the new symlink will fail with
698 # FileExistsError later in the code so the test still passes.
699 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
700 with open(path, "w") as fd:
701 yaml.dump(metrics._asdict(), stream=fd)
702 func(metrics, path, ref)
704 def testIngestNoTransfer(self):
705 """Test ingesting existing files with no transfer."""
706 for mode in (None, "auto"):
708 # Some datastores have auto but can't do in place transfer
709 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
710 continue
712 with self.subTest(mode=mode):
713 datastore = self.makeDatastore()
715 def succeed(obj, path, ref):
716 """Ingest a file already in the datastore root."""
717 # first move it into the root, and adjust the path
718 # accordingly
719 path = shutil.copy(path, datastore.root.ospath)
720 path = os.path.relpath(path, start=datastore.root.ospath)
721 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
722 self.assertEqual(obj, datastore.get(ref))
724 def failInputDoesNotExist(obj, path, ref):
725 """Can't ingest files if we're given a bad path."""
726 with self.assertRaises(FileNotFoundError):
727 datastore.ingest(
728 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
729 )
730 self.assertFalse(datastore.exists(ref))
732 def failOutsideRoot(obj, path, ref):
733 """Can't ingest files outside of datastore root unless
734 auto."""
735 if mode == "auto":
736 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
737 self.assertTrue(datastore.exists(ref))
738 else:
739 with self.assertRaises(RuntimeError):
740 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
741 self.assertFalse(datastore.exists(ref))
743 def failNotImplemented(obj, path, ref):
744 with self.assertRaises(NotImplementedError):
745 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
747 if mode in self.ingestTransferModes:
748 self.runIngestTest(failOutsideRoot)
749 self.runIngestTest(failInputDoesNotExist)
750 self.runIngestTest(succeed)
751 else:
752 self.runIngestTest(failNotImplemented)
754 def testIngestTransfer(self):
755 """Test ingesting existing files after transferring them."""
756 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
757 with self.subTest(mode=mode):
758 datastore = self.makeDatastore(mode)
760 def succeed(obj, path, ref):
761 """Ingest a file by transferring it to the template
762 location."""
763 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
764 self.assertEqual(obj, datastore.get(ref))
766 def failInputDoesNotExist(obj, path, ref):
767 """Can't ingest files if we're given a bad path."""
768 with self.assertRaises(FileNotFoundError):
769 # Ensure the file does not look like it is in
770 # datastore for auto mode
771 datastore.ingest(
772 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
773 )
774 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
776 def failNotImplemented(obj, path, ref):
777 with self.assertRaises(NotImplementedError):
778 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
780 if mode in self.ingestTransferModes:
781 self.runIngestTest(failInputDoesNotExist)
782 self.runIngestTest(succeed, expectOutput=(mode != "move"))
783 else:
784 self.runIngestTest(failNotImplemented)
786 def testIngestSymlinkOfSymlink(self):
787 """Special test for symlink to a symlink ingest"""
788 metrics, ref = self._prepareIngestTest()
789 # The aim of this test is to create a dataset on disk, then
790 # create a symlink to it and finally ingest the symlink such that
791 # the symlink in the datastore points to the original dataset.
792 for mode in ("symlink", "relsymlink"):
793 if mode not in self.ingestTransferModes:
794 continue
796 print(f"Trying mode {mode}")
797 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
798 with open(realpath, "w") as fd:
799 yaml.dump(metrics._asdict(), stream=fd)
800 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
801 os.symlink(os.path.abspath(realpath), sympath)
803 datastore = self.makeDatastore()
804 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
806 uri = datastore.getURI(ref)
807 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
808 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
810 linkTarget = os.readlink(uri.ospath)
811 if mode == "relsymlink":
812 self.assertFalse(os.path.isabs(linkTarget))
813 else:
814 self.assertEqual(linkTarget, os.path.abspath(realpath))
816 # Check that we can get the dataset back regardless of mode
817 metric2 = datastore.get(ref)
818 self.assertEqual(metric2, metrics)
820 # Cleanup the file for next time round loop
821 # since it will get the same file name in store
822 datastore.remove(ref)
824 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
825 datastore = self.makeDatastore(name)
827 # For now only the FileDatastore can be used for this test.
828 # ChainedDatastore that only includes InMemoryDatastores have to be
829 # skipped as well.
830 for name in datastore.names:
831 if not name.startswith("InMemoryDatastore"):
832 break
833 else:
834 raise unittest.SkipTest("in-memory datastore does not support record export/import")
836 metrics = makeExampleMetrics()
837 dimensions = self.universe.extract(("visit", "physical_filter"))
838 sc = self.storageClassFactory.getStorageClass("StructuredData")
840 refs = []
841 for visit in (2048, 2049, 2050):
842 dataId = FakeDataCoordinate.from_dict(
843 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"}
844 )
845 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
846 datastore.put(metrics, ref)
847 refs.append(ref)
848 return datastore, refs
850 def testExportImportRecords(self):
851 """Test for export_records and import_records methods."""
852 datastore, refs = self._populate_export_datastore("test_datastore")
853 for exported_refs in (refs, refs[1:]):
854 n_refs = len(exported_refs)
855 records = datastore.export_records(exported_refs)
856 self.assertGreater(len(records), 0)
857 self.assertTrue(set(records.keys()) <= set(datastore.names))
858 # In a ChainedDatastore each FileDatastore will have a complete set
859 for datastore_name in records:
860 record_data = records[datastore_name]
861 self.assertEqual(len(record_data.records), n_refs)
863 # Use the same datastore name to import relative path.
864 datastore2 = self.makeDatastore("test_datastore")
866 records = datastore.export_records(refs[1:])
867 datastore2.import_records(records)
869 with self.assertRaises(FileNotFoundError):
870 data = datastore2.get(refs[0])
871 data = datastore2.get(refs[1])
872 self.assertIsNotNone(data)
873 data = datastore2.get(refs[2])
874 self.assertIsNotNone(data)
876 def testExport(self):
877 datastore, refs = self._populate_export_datastore("test_datastore")
879 datasets = list(datastore.export(refs))
880 self.assertEqual(len(datasets), 3)
882 for transfer in (None, "auto"):
883 # Both will default to None
884 datasets = list(datastore.export(refs, transfer=transfer))
885 self.assertEqual(len(datasets), 3)
887 with self.assertRaises(TypeError):
888 list(datastore.export(refs, transfer="copy"))
890 with self.assertRaises(TypeError):
891 list(datastore.export(refs, directory="exportDir", transfer="move"))
893 # Create a new ref that is not known to the datastore and try to
894 # export it.
895 sc = self.storageClassFactory.getStorageClass("ThingOne")
896 dimensions = self.universe.extract(("visit", "physical_filter"))
897 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
898 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
899 with self.assertRaises(FileNotFoundError):
900 list(datastore.export(refs + [ref], transfer=None))
903class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
904 """PosixDatastore specialization"""
906 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
907 uriScheme = "file"
908 canIngestNoTransferAuto = True
909 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
910 isEphemeral = False
911 rootKeys = ("root",)
912 validationCanFail = True
914 def setUp(self):
915 # Override the working directory before calling the base class
916 self.root = tempfile.mkdtemp(dir=TESTDIR)
917 super().setUp()
919 def testAtomicWrite(self):
920 """Test that we write to a temporary and then rename"""
921 datastore = self.makeDatastore()
922 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
923 dimensions = self.universe.extract(("visit", "physical_filter"))
924 metrics = makeExampleMetrics()
926 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
927 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
929 with self.assertLogs("lsst.resources", "DEBUG") as cm:
930 datastore.put(metrics, ref)
931 move_logs = [ll for ll in cm.output if "transfer=" in ll]
932 self.assertIn("transfer=move", move_logs[0])
934 # And the transfer should be file to file.
935 self.assertEqual(move_logs[0].count("file://"), 2)
937 def testCanNotDeterminePutFormatterLocation(self):
938 """Verify that the expected exception is raised if the FileDatastore
939 can not determine the put formatter location."""
941 _ = makeExampleMetrics()
942 datastore = self.makeDatastore()
944 # Create multiple storage classes for testing different formulations
945 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
947 sccomp = StorageClass("Dummy")
948 compositeStorageClass = StorageClass(
949 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
950 )
952 dimensions = self.universe.extract(("visit", "physical_filter"))
953 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
955 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
956 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False)
958 def raiser(ref):
959 raise DatasetTypeNotSupportedError()
961 with unittest.mock.patch.object(
962 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
963 "_determine_put_formatter_location",
964 side_effect=raiser,
965 ):
966 # verify the non-composite ref execution path:
967 with self.assertRaises(DatasetTypeNotSupportedError):
968 datastore.getURIs(ref, predict=True)
970 # verify the composite-ref execution path:
971 with self.assertRaises(DatasetTypeNotSupportedError):
972 datastore.getURIs(compRef, predict=True)
975class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
976 """Posix datastore tests but with checksums disabled."""
978 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
980 def testChecksum(self):
981 """Ensure that checksums have not been calculated."""
983 datastore = self.makeDatastore()
984 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
985 dimensions = self.universe.extract(("visit", "physical_filter"))
986 metrics = makeExampleMetrics()
988 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
989 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
991 # Configuration should have disabled checksum calculation
992 datastore.put(metrics, ref)
993 infos = datastore.getStoredItemsInfo(ref)
994 self.assertIsNone(infos[0].checksum)
996 # Remove put back but with checksums enabled explicitly
997 datastore.remove(ref)
998 datastore.useChecksum = True
999 datastore.put(metrics, ref)
1001 infos = datastore.getStoredItemsInfo(ref)
1002 self.assertIsNotNone(infos[0].checksum)
1005class TrashDatastoreTestCase(PosixDatastoreTestCase):
1006 """Restrict trash test to FileDatastore."""
1008 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1010 def testTrash(self):
1011 datastore, *refs = self.prepDeleteTest(n_refs=10)
1013 # Trash one of them.
1014 ref = refs.pop()
1015 uri = datastore.getURI(ref)
1016 datastore.trash(ref)
1017 self.assertTrue(uri.exists(), uri) # Not deleted yet
1018 datastore.emptyTrash()
1019 self.assertFalse(uri.exists(), uri)
1021 # Trash it again should be fine.
1022 datastore.trash(ref)
1024 # Trash multiple items at once.
1025 subset = [refs.pop(), refs.pop()]
1026 datastore.trash(subset)
1027 datastore.emptyTrash()
1029 # Remove a record and trash should do nothing.
1030 # This is execution butler scenario.
1031 ref = refs.pop()
1032 uri = datastore.getURI(ref)
1033 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1034 self.assertTrue(uri.exists())
1035 datastore.trash(ref)
1036 datastore.emptyTrash()
1037 self.assertTrue(uri.exists())
1039 # Switch on trust and it should delete the file.
1040 datastore.trustGetRequest = True
1041 datastore.trash([ref])
1042 self.assertFalse(uri.exists())
1044 # Remove multiples at once in trust mode.
1045 subset = [refs.pop() for i in range(3)]
1046 datastore.trash(subset)
1047 datastore.trash(refs.pop()) # Check that a single ref can trash
1050class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1051 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1053 def setUp(self):
1054 # Override the working directory before calling the base class
1055 self.root = tempfile.mkdtemp(dir=TESTDIR)
1056 super().setUp()
1058 def testCleanup(self):
1059 """Test that a failed formatter write does cleanup a partial file."""
1060 metrics = makeExampleMetrics()
1061 datastore = self.makeDatastore()
1063 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1065 dimensions = self.universe.extract(("visit", "physical_filter"))
1066 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1068 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
1070 # Determine where the file will end up (we assume Formatters use
1071 # the same file extension)
1072 expectedUri = datastore.getURI(ref, predict=True)
1073 self.assertEqual(expectedUri.fragment, "predicted")
1075 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1077 # Try formatter that fails and formatter that fails and leaves
1078 # a file behind
1079 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1080 with self.subTest(formatter=formatter):
1082 # Monkey patch the formatter
1083 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1085 # Try to put the dataset, it should fail
1086 with self.assertRaises(Exception):
1087 datastore.put(metrics, ref)
1089 # Check that there is no file on disk
1090 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1092 # Check that there is a directory
1093 dir = expectedUri.dirname()
1094 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1096 # Force YamlFormatter and check that this time a file is written
1097 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1098 datastore.put(metrics, ref)
1099 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1100 datastore.remove(ref)
1101 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1104class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1105 """PosixDatastore specialization"""
1107 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1108 uriScheme = "mem"
1109 hasUnsupportedPut = False
1110 ingestTransferModes = ()
1111 isEphemeral = True
1112 rootKeys = None
1113 validationCanFail = False
1116class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1117 """ChainedDatastore specialization using a POSIXDatastore"""
1119 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1120 hasUnsupportedPut = False
1121 canIngestNoTransferAuto = False
1122 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1123 isEphemeral = False
1124 rootKeys = (".datastores.1.root", ".datastores.2.root")
1125 validationCanFail = True
1128class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1129 """ChainedDatastore specialization using all InMemoryDatastore"""
1131 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1132 validationCanFail = False
1135class DatastoreConstraintsTests(DatastoreTestsBase):
1136 """Basic tests of constraints model of Datastores."""
1138 def testConstraints(self):
1139 """Test constraints model. Assumes that each test class has the
1140 same constraints."""
1141 metrics = makeExampleMetrics()
1142 datastore = self.makeDatastore()
1144 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1145 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1146 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1147 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1149 # Write empty file suitable for ingest check (JSON and YAML variants)
1150 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1151 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1152 for datasetTypeName, sc, accepted in (
1153 ("metric", sc1, True),
1154 ("metric5", sc1, False),
1155 ("metric33", sc1, True),
1156 ("metric5", sc2, True),
1157 ):
1158 # Choose different temp file depending on StorageClass
1159 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1161 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1162 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
1163 if accepted:
1164 datastore.put(metrics, ref)
1165 self.assertTrue(datastore.exists(ref))
1166 datastore.remove(ref)
1168 # Try ingest
1169 if self.canIngest:
1170 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1171 self.assertTrue(datastore.exists(ref))
1172 datastore.remove(ref)
1173 else:
1174 with self.assertRaises(DatasetTypeNotSupportedError):
1175 datastore.put(metrics, ref)
1176 self.assertFalse(datastore.exists(ref))
1178 # Again with ingest
1179 if self.canIngest:
1180 with self.assertRaises(DatasetTypeNotSupportedError):
1181 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1182 self.assertFalse(datastore.exists(ref))
1185class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1186 """PosixDatastore specialization"""
1188 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1189 canIngest = True
1191 def setUp(self):
1192 # Override the working directory before calling the base class
1193 self.root = tempfile.mkdtemp(dir=TESTDIR)
1194 super().setUp()
1197class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1198 """InMemoryDatastore specialization"""
1200 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1201 canIngest = False
1204class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1205 """ChainedDatastore specialization using a POSIXDatastore and constraints
1206 at the ChainedDatstore"""
1208 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1211class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1212 """ChainedDatastore specialization using a POSIXDatastore"""
1214 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1217class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1218 """ChainedDatastore specialization using all InMemoryDatastore"""
1220 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1221 canIngest = False
1224class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1225 """Test that a chained datastore can control constraints per-datastore
1226 even if child datastore would accept."""
1228 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1230 def setUp(self):
1231 # Override the working directory before calling the base class
1232 self.root = tempfile.mkdtemp(dir=TESTDIR)
1233 super().setUp()
1235 def testConstraints(self):
1236 """Test chained datastore constraints model."""
1237 metrics = makeExampleMetrics()
1238 datastore = self.makeDatastore()
1240 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1241 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1242 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1243 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
1244 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
1246 # Write empty file suitable for ingest check (JSON and YAML variants)
1247 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1248 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1250 for typeName, dataId, sc, accept, ingest in (
1251 ("metric", dataId1, sc1, (False, True, False), True),
1252 ("metric5", dataId1, sc1, (False, False, False), False),
1253 ("metric5", dataId2, sc1, (True, False, False), False),
1254 ("metric33", dataId2, sc2, (True, True, False), True),
1255 ("metric5", dataId1, sc2, (False, True, False), True),
1256 ):
1258 # Choose different temp file depending on StorageClass
1259 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1261 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1262 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False)
1263 if any(accept):
1264 datastore.put(metrics, ref)
1265 self.assertTrue(datastore.exists(ref))
1267 # Check each datastore inside the chained datastore
1268 for childDatastore, expected in zip(datastore.datastores, accept):
1269 self.assertEqual(
1270 childDatastore.exists(ref),
1271 expected,
1272 f"Testing presence of {ref} in datastore {childDatastore.name}",
1273 )
1275 datastore.remove(ref)
1277 # Check that ingest works
1278 if ingest:
1279 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1280 self.assertTrue(datastore.exists(ref))
1282 # Check each datastore inside the chained datastore
1283 for childDatastore, expected in zip(datastore.datastores, accept):
1284 # Ephemeral datastores means InMemory at the moment
1285 # and that does not accept ingest of files.
1286 if childDatastore.isEphemeral:
1287 expected = False
1288 self.assertEqual(
1289 childDatastore.exists(ref),
1290 expected,
1291 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1292 )
1294 datastore.remove(ref)
1295 else:
1296 with self.assertRaises(DatasetTypeNotSupportedError):
1297 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1299 else:
1300 with self.assertRaises(DatasetTypeNotSupportedError):
1301 datastore.put(metrics, ref)
1302 self.assertFalse(datastore.exists(ref))
1304 # Again with ingest
1305 with self.assertRaises(DatasetTypeNotSupportedError):
1306 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1307 self.assertFalse(datastore.exists(ref))
1310class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1311 """Tests for datastore caching infrastructure."""
1313 @classmethod
1314 def setUpClass(cls):
1315 cls.storageClassFactory = StorageClassFactory()
1316 cls.universe = DimensionUniverse()
1318 # Ensure that we load the test storage class definitions.
1319 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1320 cls.storageClassFactory.addFromConfig(scConfigFile)
1322 def setUp(self):
1323 self.id = 0
1325 # Create a root that we can use for caching tests.
1326 self.root = tempfile.mkdtemp(dir=TESTDIR)
1328 # Create some test dataset refs and associated test files
1329 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1330 dimensions = self.universe.extract(("visit", "physical_filter"))
1331 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1333 # Create list of refs and list of temporary files
1334 n_datasets = 10
1335 self.refs = [
1336 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False)
1337 for n in range(n_datasets)
1338 ]
1340 root_uri = ResourcePath(self.root, forceDirectory=True)
1341 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1343 # Create test files.
1344 for uri in self.files:
1345 uri.write(b"0123456789")
1347 # Create some composite refs with component files.
1348 sc = self.storageClassFactory.getStorageClass("StructuredData")
1349 self.composite_refs = [
1350 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3)
1351 ]
1352 self.comp_files = []
1353 self.comp_refs = []
1354 for n, ref in enumerate(self.composite_refs):
1355 component_refs = []
1356 component_files = []
1357 for component in sc.components:
1358 component_ref = ref.makeComponentRef(component)
1359 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1360 component_refs.append(component_ref)
1361 component_files.append(file)
1362 file.write(b"9876543210")
1364 self.comp_files.append(component_files)
1365 self.comp_refs.append(component_refs)
1367 def tearDown(self):
1368 if self.root is not None and os.path.exists(self.root):
1369 shutil.rmtree(self.root, ignore_errors=True)
1371 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1372 config = Config.fromYaml(config_str)
1373 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1375 def testNoCacheDir(self):
1376 config_str = """
1377cached:
1378 root: null
1379 cacheable:
1380 metric0: true
1381 """
1382 cache_manager = self._make_cache_manager(config_str)
1384 # Look inside to check we don't have a cache directory
1385 self.assertIsNone(cache_manager._cache_directory)
1387 self.assertCache(cache_manager)
1389 # Test that the cache directory is marked temporary
1390 self.assertTrue(cache_manager.cache_directory.isTemporary)
1392 def testNoCacheDirReversed(self):
1393 """Use default caching status and metric1 to false"""
1394 config_str = """
1395cached:
1396 root: null
1397 default: true
1398 cacheable:
1399 metric1: false
1400 """
1401 cache_manager = self._make_cache_manager(config_str)
1403 self.assertCache(cache_manager)
1405 def testEnvvarCacheDir(self):
1406 config_str = f"""
1407cached:
1408 root: '{self.root}'
1409 cacheable:
1410 metric0: true
1411 """
1413 root = ResourcePath(self.root, forceDirectory=True)
1414 env_dir = root.join("somewhere", forceDirectory=True)
1415 elsewhere = root.join("elsewhere", forceDirectory=True)
1417 # Environment variable should override the config value.
1418 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1419 cache_manager = self._make_cache_manager(config_str)
1420 self.assertEqual(cache_manager.cache_directory, env_dir)
1422 # This environment variable should not override the config value.
1423 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1424 cache_manager = self._make_cache_manager(config_str)
1425 self.assertEqual(cache_manager.cache_directory, root)
1427 # No default setting.
1428 config_str = """
1429cached:
1430 root: null
1431 default: true
1432 cacheable:
1433 metric1: false
1434 """
1435 cache_manager = self._make_cache_manager(config_str)
1437 # This environment variable should override the config value.
1438 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1439 cache_manager = self._make_cache_manager(config_str)
1440 self.assertEqual(cache_manager.cache_directory, env_dir)
1442 # If both environment variables are set the main (not IF_UNSET)
1443 # variable should win.
1444 with unittest.mock.patch.dict(
1445 os.environ,
1446 {
1447 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1448 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1449 },
1450 ):
1451 cache_manager = self._make_cache_manager(config_str)
1452 self.assertEqual(cache_manager.cache_directory, env_dir)
1454 # Use the API to set the environment variable, making sure that the
1455 # variable is reset on exit.
1456 with unittest.mock.patch.dict(
1457 os.environ,
1458 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1459 ):
1460 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1461 self.assertTrue(defined)
1462 cache_manager = self._make_cache_manager(config_str)
1463 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1465 # Now create the cache manager ahead of time and set the fallback
1466 # later.
1467 cache_manager = self._make_cache_manager(config_str)
1468 self.assertIsNone(cache_manager._cache_directory)
1469 with unittest.mock.patch.dict(
1470 os.environ,
1471 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1472 ):
1473 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1474 self.assertTrue(defined)
1475 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1477 def testExplicitCacheDir(self):
1478 config_str = f"""
1479cached:
1480 root: '{self.root}'
1481 cacheable:
1482 metric0: true
1483 """
1484 cache_manager = self._make_cache_manager(config_str)
1486 # Look inside to check we do have a cache directory.
1487 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1489 self.assertCache(cache_manager)
1491 # Test that the cache directory is not marked temporary
1492 self.assertFalse(cache_manager.cache_directory.isTemporary)
1494 def assertCache(self, cache_manager):
1495 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1496 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1498 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1499 self.assertIsInstance(uri, ResourcePath)
1500 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1502 # Check presence in cache using ref and then using file extension.
1503 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1504 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1505 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1506 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1508 # Cached file should no longer exist but uncached file should be
1509 # unaffected.
1510 self.assertFalse(self.files[0].exists())
1511 self.assertTrue(self.files[1].exists())
1513 # Should find this file and it should be within the cache directory.
1514 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1515 self.assertTrue(found.exists())
1516 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1518 # Should not be able to find these in cache
1519 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1520 self.assertIsNone(found)
1521 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1522 self.assertIsNone(found)
1524 def testNoCache(self):
1525 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1526 for uri, ref in zip(self.files, self.refs):
1527 self.assertFalse(cache_manager.should_be_cached(ref))
1528 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1529 self.assertFalse(cache_manager.known_to_cache(ref))
1530 with cache_manager.find_in_cache(ref, ".txt") as found:
1531 self.assertIsNone(found, msg=f"{cache_manager}")
1533 def _expiration_config(self, mode: str, threshold: int) -> str:
1534 return f"""
1535cached:
1536 default: true
1537 expiry:
1538 mode: {mode}
1539 threshold: {threshold}
1540 cacheable:
1541 unused: true
1542 """
1544 def testCacheExpiryFiles(self):
1545 threshold = 2 # Keep at least 2 files.
1546 mode = "files"
1547 config_str = self._expiration_config(mode, threshold)
1549 cache_manager = self._make_cache_manager(config_str)
1551 # Check that an empty cache returns unknown for arbitrary ref
1552 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1554 # Should end with datasets: 2, 3, 4
1555 self.assertExpiration(cache_manager, 5, threshold + 1)
1556 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1558 # Check that we will not expire a file that is actively in use.
1559 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1560 self.assertIsNotNone(found)
1562 # Trigger cache expiration that should remove the file
1563 # we just retrieved. Should now have: 3, 4, 5
1564 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1565 self.assertIsNotNone(cached)
1567 # Cache should still report the standard file count.
1568 self.assertEqual(cache_manager.file_count, threshold + 1)
1570 # Add additional entry to cache.
1571 # Should now have 4, 5, 6
1572 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1573 self.assertIsNotNone(cached)
1575 # Is the file still there?
1576 self.assertTrue(found.exists())
1578 # Can we read it?
1579 data = found.read()
1580 self.assertGreater(len(data), 0)
1582 # Outside context the file should no longer exist.
1583 self.assertFalse(found.exists())
1585 # File count should not have changed.
1586 self.assertEqual(cache_manager.file_count, threshold + 1)
1588 # Dataset 2 was in the exempt directory but because hardlinks
1589 # are used it was deleted from the main cache during cache expiry
1590 # above and so should no longer be found.
1591 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1592 self.assertIsNone(found)
1594 # And the one stored after it is also gone.
1595 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1596 self.assertIsNone(found)
1598 # But dataset 4 is present.
1599 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1600 self.assertIsNotNone(found)
1602 # Adding a new dataset to the cache should now delete it.
1603 cache_manager.move_to_cache(self.files[7], self.refs[7])
1605 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1606 self.assertIsNone(found)
1608 def testCacheExpiryDatasets(self):
1609 threshold = 2 # Keep 2 datasets.
1610 mode = "datasets"
1611 config_str = self._expiration_config(mode, threshold)
1613 cache_manager = self._make_cache_manager(config_str)
1614 self.assertExpiration(cache_manager, 5, threshold + 1)
1615 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1617 def testCacheExpiryDatasetsComposite(self):
1618 threshold = 2 # Keep 2 datasets.
1619 mode = "datasets"
1620 config_str = self._expiration_config(mode, threshold)
1622 cache_manager = self._make_cache_manager(config_str)
1624 n_datasets = 3
1625 for i in range(n_datasets):
1626 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1627 cached = cache_manager.move_to_cache(component_file, component_ref)
1628 self.assertIsNotNone(cached)
1629 self.assertTrue(cache_manager.known_to_cache(component_ref))
1630 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1631 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1633 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1635 # Write two new non-composite and the number of files should drop.
1636 self.assertExpiration(cache_manager, 2, 5)
1638 def testCacheExpirySize(self):
1639 threshold = 55 # Each file is 10 bytes
1640 mode = "size"
1641 config_str = self._expiration_config(mode, threshold)
1643 cache_manager = self._make_cache_manager(config_str)
1644 self.assertExpiration(cache_manager, 10, 6)
1645 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1647 def assertExpiration(self, cache_manager, n_datasets, n_retained):
1648 """Insert the datasets and then check the number retained."""
1649 for i in range(n_datasets):
1650 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1651 self.assertIsNotNone(cached)
1653 self.assertEqual(cache_manager.file_count, n_retained)
1655 # The oldest file should not be in the cache any more.
1656 for i in range(n_datasets):
1657 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1658 if i >= n_datasets - n_retained:
1659 self.assertIsInstance(found, ResourcePath)
1660 else:
1661 self.assertIsNone(found)
1663 def testCacheExpiryAge(self):
1664 threshold = 1 # Expire older than 2 seconds
1665 mode = "age"
1666 config_str = self._expiration_config(mode, threshold)
1668 cache_manager = self._make_cache_manager(config_str)
1669 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1671 # Insert 3 files, then sleep, then insert more.
1672 for i in range(2):
1673 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1674 self.assertIsNotNone(cached)
1675 time.sleep(2.0)
1676 for j in range(4):
1677 i = 2 + j # Continue the counting
1678 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1679 self.assertIsNotNone(cached)
1681 # Only the files written after the sleep should exist.
1682 self.assertEqual(cache_manager.file_count, 4)
1683 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1684 self.assertIsNone(found)
1685 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1686 self.assertIsInstance(found, ResourcePath)
1689class DatasetRefURIsTestCase(unittest.TestCase):
1690 """Tests for DatasetRefURIs."""
1692 def testSequenceAccess(self):
1693 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1694 uris = DatasetRefURIs()
1696 self.assertEqual(len(uris), 2)
1697 self.assertEqual(uris[0], None)
1698 self.assertEqual(uris[1], {})
1700 primaryURI = ResourcePath("1/2/3")
1701 componentURI = ResourcePath("a/b/c")
1703 # affirm that DatasetRefURIs does not support MutableSequence functions
1704 with self.assertRaises(TypeError):
1705 uris[0] = primaryURI
1706 with self.assertRaises(TypeError):
1707 uris[1] = {"foo": componentURI}
1709 # but DatasetRefURIs can be set by property name:
1710 uris.primaryURI = primaryURI
1711 uris.componentURIs = {"foo": componentURI}
1712 self.assertEqual(uris.primaryURI, primaryURI)
1713 self.assertEqual(uris[0], primaryURI)
1715 primary, components = uris
1716 self.assertEqual(primary, primaryURI)
1717 self.assertEqual(components, {"foo": componentURI})
1719 def testRepr(self):
1720 """Verify __repr__ output."""
1721 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")})
1722 self.assertEqual(
1723 repr(uris),
1724 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), '
1725 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})',
1726 )
1729class DataIdForTestTestCase(unittest.TestCase):
1730 """Tests for the DataIdForTest class."""
1732 def testImmutable(self):
1733 """Verify that an instance is immutable by default."""
1734 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1735 initial_hash = hash(dataId)
1737 with self.assertRaises(RuntimeError):
1738 dataId["instrument"] = "foo"
1740 with self.assertRaises(RuntimeError):
1741 del dataId["instrument"]
1743 assert sys.version_info[0] == 3
1744 if sys.version_info[1] >= 9:
1745 with self.assertRaises(RuntimeError):
1746 dataId |= dict(foo="bar")
1748 with self.assertRaises(RuntimeError):
1749 dataId.pop("instrument")
1751 with self.assertRaises(RuntimeError):
1752 dataId.popitem()
1754 with self.assertRaises(RuntimeError):
1755 dataId.update(dict(instrument="foo"))
1757 # verify that the hash value has not changed.
1758 self.assertEqual(initial_hash, hash(dataId))
1760 def testMutable(self):
1761 """Verify that an instance can be made mutable (unfrozen)."""
1762 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"})
1763 initial_hash = hash(dataId)
1764 dataId.frozen = False
1765 self.assertEqual(initial_hash, hash(dataId))
1767 dataId["instrument"] = "foo"
1768 self.assertEqual(dataId["instrument"], "foo")
1769 self.assertNotEqual(initial_hash, hash(dataId))
1770 initial_hash = hash(dataId)
1772 del dataId["instrument"]
1773 self.assertTrue("instrument" not in dataId)
1774 self.assertNotEqual(initial_hash, hash(dataId))
1775 initial_hash = hash(dataId)
1777 assert sys.version_info[0] == 3
1778 if sys.version_info[1] >= 9:
1779 dataId |= dict(foo="bar")
1780 self.assertEqual(dataId["foo"], "bar")
1781 self.assertNotEqual(initial_hash, hash(dataId))
1782 initial_hash = hash(dataId)
1784 dataId.pop("visit")
1785 self.assertTrue("visit" not in dataId)
1786 self.assertNotEqual(initial_hash, hash(dataId))
1787 initial_hash = hash(dataId)
1789 dataId.popitem()
1790 self.assertTrue("physical_filter" not in dataId)
1791 self.assertNotEqual(initial_hash, hash(dataId))
1792 initial_hash = hash(dataId)
1794 dataId.update(dict(instrument="foo"))
1795 self.assertEqual(dataId["instrument"], "foo")
1796 self.assertNotEqual(initial_hash, hash(dataId))
1797 initial_hash = hash(dataId)
1800if __name__ == "__main__": 1800 ↛ 1801line 1800 didn't jump to line 1801, because the condition on line 1800 was never true
1801 unittest.main()