Coverage for tests/test_datastore.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import unittest
24import shutil
25import yaml
26import tempfile
27import lsst.utils.tests
29from lsst.utils import doImport
31from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset
32from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError
33from lsst.daf.butler.formatters.yaml import YamlFormatter
34from lsst.daf.butler import (DatastoreCacheManager, DatastoreDisabledCacheManager,
35 DatastoreCacheManagerConfig, Config, ButlerURI)
37from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter,
38 BadNoWriteFormatter, MetricsExample, DummyRegistry)
41TESTDIR = os.path.dirname(__file__)
44def makeExampleMetrics(use_none=False):
45 if use_none:
46 array = None
47 else:
48 array = [563, 234, 456.7, 105, 2054, -1045]
49 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
50 {"a": [1, 2, 3],
51 "b": {"blue": 5, "red": "green"}},
52 array,
53 )
56class TransactionTestError(Exception):
57 """Specific error for transactions, to prevent misdiagnosing
58 that might otherwise occur when a standard exception is used.
59 """
60 pass
63class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
64 """Support routines for datastore testing"""
65 root = None
67 @classmethod
68 def setUpClass(cls):
69 # Storage Classes are fixed for all datastores in these tests
70 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
71 cls.storageClassFactory = StorageClassFactory()
72 cls.storageClassFactory.addFromConfig(scConfigFile)
74 # Read the Datastore config so we can get the class
75 # information (since we should not assume the constructor
76 # name here, but rely on the configuration file itself)
77 datastoreConfig = DatastoreConfig(cls.configFile)
78 cls.datastoreType = doImport(datastoreConfig["cls"])
79 cls.universe = DimensionUniverse()
81 def setUp(self):
82 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
84 def tearDown(self):
85 if self.root is not None and os.path.exists(self.root):
86 shutil.rmtree(self.root, ignore_errors=True)
89class DatastoreTests(DatastoreTestsBase):
90 """Some basic tests of a simple datastore."""
92 hasUnsupportedPut = True
94 def testConfigRoot(self):
95 full = DatastoreConfig(self.configFile)
96 config = DatastoreConfig(self.configFile, mergeDefaults=False)
97 newroot = "/random/location"
98 self.datastoreType.setConfigRoot(newroot, config, full)
99 if self.rootKeys:
100 for k in self.rootKeys:
101 self.assertIn(newroot, config[k])
103 def testConstructor(self):
104 datastore = self.makeDatastore()
105 self.assertIsNotNone(datastore)
106 self.assertIs(datastore.isEphemeral, self.isEphemeral)
108 def testConfigurationValidation(self):
109 datastore = self.makeDatastore()
110 sc = self.storageClassFactory.getStorageClass("ThingOne")
111 datastore.validateConfiguration([sc])
113 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
114 if self.validationCanFail:
115 with self.assertRaises(DatastoreValidationError):
116 datastore.validateConfiguration([sc2], logFailures=True)
118 dimensions = self.universe.extract(("visit", "physical_filter"))
119 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
120 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
121 datastore.validateConfiguration([ref])
123 def testParameterValidation(self):
124 """Check that parameters are validated"""
125 sc = self.storageClassFactory.getStorageClass("ThingOne")
126 dimensions = self.universe.extract(("visit", "physical_filter"))
127 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
128 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
129 datastore = self.makeDatastore()
130 data = {1: 2, 3: 4}
131 datastore.put(data, ref)
132 newdata = datastore.get(ref)
133 self.assertEqual(data, newdata)
134 with self.assertRaises(KeyError):
135 newdata = datastore.get(ref, parameters={"missing": 5})
137 def testBasicPutGet(self):
138 metrics = makeExampleMetrics()
139 datastore = self.makeDatastore()
141 # Create multiple storage classes for testing different formulations
142 storageClasses = [self.storageClassFactory.getStorageClass(sc)
143 for sc in ("StructuredData",
144 "StructuredDataJson",
145 "StructuredDataPickle")]
147 dimensions = self.universe.extract(("visit", "physical_filter"))
148 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
150 for sc in storageClasses:
151 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
152 print("Using storageClass: {}".format(sc.name))
153 datastore.put(metrics, ref)
155 # Does it exist?
156 self.assertTrue(datastore.exists(ref))
158 # Get
159 metricsOut = datastore.get(ref, parameters=None)
160 self.assertEqual(metrics, metricsOut)
162 uri = datastore.getURI(ref)
163 self.assertEqual(uri.scheme, self.uriScheme)
165 # Get a component -- we need to construct new refs for them
166 # with derived storage classes but with parent ID
167 for comp in ("data", "output"):
168 compRef = ref.makeComponentRef(comp)
169 output = datastore.get(compRef)
170 self.assertEqual(output, getattr(metricsOut, comp))
172 uri = datastore.getURI(compRef)
173 self.assertEqual(uri.scheme, self.uriScheme)
175 storageClass = sc
177 # Check that we can put a metric with None in a component and
178 # get it back as None
179 metricsNone = makeExampleMetrics(use_none=True)
180 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
181 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
182 datastore.put(metricsNone, refNone)
184 comp = "data"
185 for comp in ("data", "output"):
186 compRef = refNone.makeComponentRef(comp)
187 output = datastore.get(compRef)
188 self.assertEqual(output, getattr(metricsNone, comp))
190 # Check that a put fails if the dataset type is not supported
191 if self.hasUnsupportedPut:
192 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
193 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
194 with self.assertRaises(DatasetTypeNotSupportedError):
195 datastore.put(metrics, ref)
197 # These should raise
198 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
199 with self.assertRaises(FileNotFoundError):
200 # non-existing file
201 datastore.get(ref)
203 # Get a URI from it
204 uri = datastore.getURI(ref, predict=True)
205 self.assertEqual(uri.scheme, self.uriScheme)
207 with self.assertRaises(FileNotFoundError):
208 datastore.getURI(ref)
210 def testTrustGetRequest(self):
211 """Check that we can get datasets that registry knows nothing about.
212 """
214 datastore = self.makeDatastore()
216 # Skip test if the attribute is not defined
217 if not hasattr(datastore, "trustGetRequest"):
218 return
220 metrics = makeExampleMetrics()
222 i = 0
223 for sc_name in ("StructuredData", "StructuredComposite"):
224 i += 1
225 datasetTypeName = f"metric{i}"
227 if sc_name == "StructuredComposite":
228 disassembled = True
229 else:
230 disassembled = False
232 # Start datastore in default configuration of using registry
233 datastore.trustGetRequest = False
235 # Create multiple storage classes for testing with or without
236 # disassembly
237 sc = self.storageClassFactory.getStorageClass(sc_name)
238 dimensions = self.universe.extract(("visit", "physical_filter"))
239 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}
241 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
242 datastore.put(metrics, ref)
244 # Does it exist?
245 self.assertTrue(datastore.exists(ref))
247 # Get
248 metricsOut = datastore.get(ref)
249 self.assertEqual(metrics, metricsOut)
251 # Get the URI(s)
252 primaryURI, componentURIs = datastore.getURIs(ref)
253 if disassembled:
254 self.assertIsNone(primaryURI)
255 self.assertEqual(len(componentURIs), 3)
256 else:
257 self.assertIn(datasetTypeName, primaryURI.path)
258 self.assertFalse(componentURIs)
260 # Delete registry entry so now we are trusting
261 datastore.removeStoredItemInfo(ref)
263 # Now stop trusting and check that things break
264 datastore.trustGetRequest = False
266 # Does it exist?
267 self.assertFalse(datastore.exists(ref))
269 with self.assertRaises(FileNotFoundError):
270 datastore.get(ref)
272 with self.assertRaises(FileNotFoundError):
273 datastore.get(ref.makeComponentRef("data"))
275 # URI should fail unless we ask for prediction
276 with self.assertRaises(FileNotFoundError):
277 datastore.getURIs(ref)
279 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
280 if disassembled:
281 self.assertIsNone(predicted_primary)
282 self.assertEqual(len(predicted_disassembled), 3)
283 for uri in predicted_disassembled.values():
284 self.assertEqual(uri.fragment, "predicted")
285 self.assertIn(datasetTypeName, uri.path)
286 else:
287 self.assertIn(datasetTypeName, predicted_primary.path)
288 self.assertFalse(predicted_disassembled)
289 self.assertEqual(predicted_primary.fragment, "predicted")
291 # Now enable registry-free trusting mode
292 datastore.trustGetRequest = True
294 # Try again to get it
295 metricsOut = datastore.get(ref)
296 self.assertEqual(metricsOut, metrics)
298 # Does it exist?
299 self.assertTrue(datastore.exists(ref))
301 # Get a component
302 comp = "data"
303 compRef = ref.makeComponentRef(comp)
304 output = datastore.get(compRef)
305 self.assertEqual(output, getattr(metrics, comp))
307 # Get the URI -- if we trust this should work even without
308 # enabling prediction.
309 primaryURI2, componentURIs2 = datastore.getURIs(ref)
310 self.assertEqual(primaryURI2, primaryURI)
311 self.assertEqual(componentURIs2, componentURIs)
313 def testDisassembly(self):
314 """Test disassembly within datastore."""
315 metrics = makeExampleMetrics()
316 if self.isEphemeral:
317 # in-memory datastore does not disassemble
318 return
320 # Create multiple storage classes for testing different formulations
321 # of composites. One of these will not disassemble to provide
322 # a reference.
323 storageClasses = [self.storageClassFactory.getStorageClass(sc)
324 for sc in ("StructuredComposite",
325 "StructuredCompositeTestA",
326 "StructuredCompositeTestB",
327 "StructuredCompositeReadComp",
328 "StructuredData", # No disassembly
329 "StructuredCompositeReadCompNoDisassembly",
330 )]
332 # Create the test datastore
333 datastore = self.makeDatastore()
335 # Dummy dataId
336 dimensions = self.universe.extract(("visit", "physical_filter"))
337 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
339 for i, sc in enumerate(storageClasses):
340 with self.subTest(storageClass=sc.name):
341 # Create a different dataset type each time round
342 # so that a test failure in this subtest does not trigger
343 # a cascade of tests because of file clashes
344 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId,
345 conform=False)
347 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
349 datastore.put(metrics, ref)
351 baseURI, compURIs = datastore.getURIs(ref)
352 if disassembled:
353 self.assertIsNone(baseURI)
354 self.assertEqual(set(compURIs), {"data", "output", "summary"})
355 else:
356 self.assertIsNotNone(baseURI)
357 self.assertEqual(compURIs, {})
359 metrics_get = datastore.get(ref)
360 self.assertEqual(metrics_get, metrics)
362 # Retrieve the composite with read parameter
363 stop = 4
364 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
365 self.assertEqual(metrics_get.summary, metrics.summary)
366 self.assertEqual(metrics_get.output, metrics.output)
367 self.assertEqual(metrics_get.data, metrics.data[:stop])
369 # Retrieve a component
370 data = datastore.get(ref.makeComponentRef("data"))
371 self.assertEqual(data, metrics.data)
373 # On supported storage classes attempt to access a read
374 # only component
375 if "ReadComp" in sc.name:
376 cRef = ref.makeComponentRef("counter")
377 counter = datastore.get(cRef)
378 self.assertEqual(counter, len(metrics.data))
380 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
381 self.assertEqual(counter, stop)
383 datastore.remove(ref)
385 def testRegistryCompositePutGet(self):
386 """Tests the case where registry disassembles and puts to datastore.
387 """
388 metrics = makeExampleMetrics()
389 datastore = self.makeDatastore()
391 # Create multiple storage classes for testing different formulations
392 # of composites
393 storageClasses = [self.storageClassFactory.getStorageClass(sc)
394 for sc in ("StructuredComposite",
395 "StructuredCompositeTestA",
396 "StructuredCompositeTestB",
397 )]
399 dimensions = self.universe.extract(("visit", "physical_filter"))
400 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
402 for sc in storageClasses:
403 print("Using storageClass: {}".format(sc.name))
404 ref = self.makeDatasetRef("metric", dimensions, sc, dataId,
405 conform=False)
407 components = sc.delegate().disassemble(metrics)
408 self.assertTrue(components)
410 compsRead = {}
411 for compName, compInfo in components.items():
412 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions,
413 components[compName].storageClass, dataId,
414 conform=False)
416 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
417 datastore.put(compInfo.component, compRef)
419 uri = datastore.getURI(compRef)
420 self.assertEqual(uri.scheme, self.uriScheme)
422 compsRead[compName] = datastore.get(compRef)
424 # We can generate identical files for each storage class
425 # so remove the component here
426 datastore.remove(compRef)
428 # combine all the components we read back into a new composite
429 metricsOut = sc.delegate().assemble(compsRead)
430 self.assertEqual(metrics, metricsOut)
432 def prepDeleteTest(self):
433 metrics = makeExampleMetrics()
434 datastore = self.makeDatastore()
435 # Put
436 dimensions = self.universe.extract(("visit", "physical_filter"))
437 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"}
439 sc = self.storageClassFactory.getStorageClass("StructuredData")
440 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
441 datastore.put(metrics, ref)
443 # Does it exist?
444 self.assertTrue(datastore.exists(ref))
446 # Get
447 metricsOut = datastore.get(ref)
448 self.assertEqual(metrics, metricsOut)
450 return datastore, ref
452 def testRemove(self):
453 datastore, ref = self.prepDeleteTest()
455 # Remove
456 datastore.remove(ref)
458 # Does it exist?
459 self.assertFalse(datastore.exists(ref))
461 # Do we now get a predicted URI?
462 uri = datastore.getURI(ref, predict=True)
463 self.assertEqual(uri.fragment, "predicted")
465 # Get should now fail
466 with self.assertRaises(FileNotFoundError):
467 datastore.get(ref)
468 # Can only delete once
469 with self.assertRaises(FileNotFoundError):
470 datastore.remove(ref)
472 def testForget(self):
473 datastore, ref = self.prepDeleteTest()
475 # Remove
476 datastore.forget([ref])
478 # Does it exist (as far as we know)?
479 self.assertFalse(datastore.exists(ref))
481 # Do we now get a predicted URI?
482 uri = datastore.getURI(ref, predict=True)
483 self.assertEqual(uri.fragment, "predicted")
485 # Get should now fail
486 with self.assertRaises(FileNotFoundError):
487 datastore.get(ref)
489 # Forgetting again is a silent no-op
490 datastore.forget([ref])
492 # Predicted URI should still point to the file.
493 self.assertTrue(uri.exists())
495 def testTransfer(self):
496 metrics = makeExampleMetrics()
498 dimensions = self.universe.extract(("visit", "physical_filter"))
499 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
501 sc = self.storageClassFactory.getStorageClass("StructuredData")
502 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
504 inputDatastore = self.makeDatastore("test_input_datastore")
505 outputDatastore = self.makeDatastore("test_output_datastore")
507 inputDatastore.put(metrics, ref)
508 outputDatastore.transfer(inputDatastore, ref)
510 metricsOut = outputDatastore.get(ref)
511 self.assertEqual(metrics, metricsOut)
513 def testBasicTransaction(self):
514 datastore = self.makeDatastore()
515 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
516 dimensions = self.universe.extract(("visit", "physical_filter"))
517 nDatasets = 6
518 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
519 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
520 makeExampleMetrics(),)
521 for dataId in dataIds]
522 succeed = data[:nDatasets//2]
523 fail = data[nDatasets//2:]
524 # All datasets added in this transaction should continue to exist
525 with datastore.transaction():
526 for ref, metrics in succeed:
527 datastore.put(metrics, ref)
528 # Whereas datasets added in this transaction should not
529 with self.assertRaises(TransactionTestError):
530 with datastore.transaction():
531 for ref, metrics in fail:
532 datastore.put(metrics, ref)
533 raise TransactionTestError("This should propagate out of the context manager")
534 # Check for datasets that should exist
535 for ref, metrics in succeed:
536 # Does it exist?
537 self.assertTrue(datastore.exists(ref))
538 # Get
539 metricsOut = datastore.get(ref, parameters=None)
540 self.assertEqual(metrics, metricsOut)
541 # URI
542 uri = datastore.getURI(ref)
543 self.assertEqual(uri.scheme, self.uriScheme)
544 # Check for datasets that should not exist
545 for ref, _ in fail:
546 # These should raise
547 with self.assertRaises(FileNotFoundError):
548 # non-existing file
549 datastore.get(ref)
550 with self.assertRaises(FileNotFoundError):
551 datastore.getURI(ref)
553 def testNestedTransaction(self):
554 datastore = self.makeDatastore()
555 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
556 dimensions = self.universe.extract(("visit", "physical_filter"))
557 metrics = makeExampleMetrics()
559 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
560 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
561 conform=False)
562 datastore.put(metrics, refBefore)
563 with self.assertRaises(TransactionTestError):
564 with datastore.transaction():
565 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
566 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
567 conform=False)
568 datastore.put(metrics, refOuter)
569 with datastore.transaction():
570 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
571 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
572 conform=False)
573 datastore.put(metrics, refInner)
574 # All datasets should exist
575 for ref in (refBefore, refOuter, refInner):
576 metricsOut = datastore.get(ref, parameters=None)
577 self.assertEqual(metrics, metricsOut)
578 raise TransactionTestError("This should roll back the transaction")
579 # Dataset(s) inserted before the transaction should still exist
580 metricsOut = datastore.get(refBefore, parameters=None)
581 self.assertEqual(metrics, metricsOut)
582 # But all datasets inserted during the (rolled back) transaction
583 # should be gone
584 with self.assertRaises(FileNotFoundError):
585 datastore.get(refOuter)
586 with self.assertRaises(FileNotFoundError):
587 datastore.get(refInner)
589 def _prepareIngestTest(self):
590 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
591 dimensions = self.universe.extract(("visit", "physical_filter"))
592 metrics = makeExampleMetrics()
593 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
594 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
595 return metrics, ref
597 def runIngestTest(self, func, expectOutput=True):
598 metrics, ref = self._prepareIngestTest()
599 # The file will be deleted after the test.
600 # For symlink tests this leads to a situation where the datastore
601 # points to a file that does not exist. This will make os.path.exist
602 # return False but then the new symlink will fail with
603 # FileExistsError later in the code so the test still passes.
604 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
605 with open(path, 'w') as fd:
606 yaml.dump(metrics._asdict(), stream=fd)
607 func(metrics, path, ref)
609 def testIngestNoTransfer(self):
610 """Test ingesting existing files with no transfer.
611 """
612 for mode in (None, "auto"):
614 # Some datastores have auto but can't do in place transfer
615 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
616 continue
618 with self.subTest(mode=mode):
619 datastore = self.makeDatastore()
621 def succeed(obj, path, ref):
622 """Ingest a file already in the datastore root."""
623 # first move it into the root, and adjust the path
624 # accordingly
625 path = shutil.copy(path, datastore.root.ospath)
626 path = os.path.relpath(path, start=datastore.root.ospath)
627 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
628 self.assertEqual(obj, datastore.get(ref))
630 def failInputDoesNotExist(obj, path, ref):
631 """Can't ingest files if we're given a bad path."""
632 with self.assertRaises(FileNotFoundError):
633 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref),
634 transfer=mode)
635 self.assertFalse(datastore.exists(ref))
637 def failOutsideRoot(obj, path, ref):
638 """Can't ingest files outside of datastore root unless
639 auto."""
640 if mode == "auto":
641 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
642 self.assertTrue(datastore.exists(ref))
643 else:
644 with self.assertRaises(RuntimeError):
645 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
646 self.assertFalse(datastore.exists(ref))
648 def failNotImplemented(obj, path, ref):
649 with self.assertRaises(NotImplementedError):
650 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
652 if mode in self.ingestTransferModes:
653 self.runIngestTest(failOutsideRoot)
654 self.runIngestTest(failInputDoesNotExist)
655 self.runIngestTest(succeed)
656 else:
657 self.runIngestTest(failNotImplemented)
659 def testIngestTransfer(self):
660 """Test ingesting existing files after transferring them.
661 """
662 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
663 with self.subTest(mode=mode):
664 datastore = self.makeDatastore(mode)
666 def succeed(obj, path, ref):
667 """Ingest a file by transferring it to the template
668 location."""
669 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
670 self.assertEqual(obj, datastore.get(ref))
672 def failInputDoesNotExist(obj, path, ref):
673 """Can't ingest files if we're given a bad path."""
674 with self.assertRaises(FileNotFoundError):
675 # Ensure the file does not look like it is in
676 # datastore for auto mode
677 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref),
678 transfer=mode)
679 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
681 def failOutputExists(obj, path, ref):
682 """Can't ingest files if transfer destination already
683 exists."""
684 with self.assertRaises(FileExistsError):
685 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
686 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
688 def failNotImplemented(obj, path, ref):
689 with self.assertRaises(NotImplementedError):
690 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
692 if mode in self.ingestTransferModes:
693 self.runIngestTest(failInputDoesNotExist)
694 self.runIngestTest(succeed, expectOutput=(mode != "move"))
695 self.runIngestTest(failOutputExists)
696 else:
697 self.runIngestTest(failNotImplemented)
699 def testIngestSymlinkOfSymlink(self):
700 """Special test for symlink to a symlink ingest"""
701 metrics, ref = self._prepareIngestTest()
702 # The aim of this test is to create a dataset on disk, then
703 # create a symlink to it and finally ingest the symlink such that
704 # the symlink in the datastore points to the original dataset.
705 for mode in ("symlink", "relsymlink"):
706 if mode not in self.ingestTransferModes:
707 continue
709 print(f"Trying mode {mode}")
710 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
711 with open(realpath, 'w') as fd:
712 yaml.dump(metrics._asdict(), stream=fd)
713 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
714 os.symlink(os.path.abspath(realpath), sympath)
716 datastore = self.makeDatastore()
717 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
719 uri = datastore.getURI(ref)
720 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
721 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
723 linkTarget = os.readlink(uri.ospath)
724 if mode == "relsymlink":
725 self.assertFalse(os.path.isabs(linkTarget))
726 else:
727 self.assertEqual(linkTarget, os.path.abspath(realpath))
729 # Check that we can get the dataset back regardless of mode
730 metric2 = datastore.get(ref)
731 self.assertEqual(metric2, metrics)
733 # Cleanup the file for next time round loop
734 # since it will get the same file name in store
735 datastore.remove(ref)
738class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
739 """PosixDatastore specialization"""
740 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
741 uriScheme = "file"
742 canIngestNoTransferAuto = True
743 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
744 isEphemeral = False
745 rootKeys = ("root",)
746 validationCanFail = True
748 def setUp(self):
749 # Override the working directory before calling the base class
750 self.root = tempfile.mkdtemp(dir=TESTDIR)
751 super().setUp()
754class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
755 """Posix datastore tests but with checksums disabled."""
756 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
758 def testChecksum(self):
759 """Ensure that checksums have not been calculated."""
761 datastore = self.makeDatastore()
762 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
763 dimensions = self.universe.extract(("visit", "physical_filter"))
764 metrics = makeExampleMetrics()
766 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
767 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
768 conform=False)
770 # Configuration should have disabled checksum calculation
771 datastore.put(metrics, ref)
772 infos = datastore.getStoredItemsInfo(ref)
773 self.assertIsNone(infos[0].checksum)
775 # Remove put back but with checksums enabled explicitly
776 datastore.remove(ref)
777 datastore.useChecksum = True
778 datastore.put(metrics, ref)
780 infos = datastore.getStoredItemsInfo(ref)
781 self.assertIsNotNone(infos[0].checksum)
784class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
785 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
787 def setUp(self):
788 # Override the working directory before calling the base class
789 self.root = tempfile.mkdtemp(dir=TESTDIR)
790 super().setUp()
792 def testCleanup(self):
793 """Test that a failed formatter write does cleanup a partial file."""
794 metrics = makeExampleMetrics()
795 datastore = self.makeDatastore()
797 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
799 dimensions = self.universe.extract(("visit", "physical_filter"))
800 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
802 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
804 # Determine where the file will end up (we assume Formatters use
805 # the same file extension)
806 expectedUri = datastore.getURI(ref, predict=True)
807 self.assertEqual(expectedUri.fragment, "predicted")
809 self.assertEqual(expectedUri.getExtension(), ".yaml",
810 f"Is there a file extension in {expectedUri}")
812 # Try formatter that fails and formatter that fails and leaves
813 # a file behind
814 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
815 with self.subTest(formatter=formatter):
817 # Monkey patch the formatter
818 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter,
819 overwrite=True)
821 # Try to put the dataset, it should fail
822 with self.assertRaises(Exception):
823 datastore.put(metrics, ref)
825 # Check that there is no file on disk
826 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
828 # Check that there is a directory
829 dir = expectedUri.dirname()
830 self.assertTrue(dir.exists(),
831 f"Check for existence of directory {dir}")
833 # Force YamlFormatter and check that this time a file is written
834 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter,
835 overwrite=True)
836 datastore.put(metrics, ref)
837 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
838 datastore.remove(ref)
839 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
842class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
843 """PosixDatastore specialization"""
844 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
845 uriScheme = "mem"
846 hasUnsupportedPut = False
847 ingestTransferModes = ()
848 isEphemeral = True
849 rootKeys = None
850 validationCanFail = False
853class ChainedDatastoreTestCase(PosixDatastoreTestCase):
854 """ChainedDatastore specialization using a POSIXDatastore"""
855 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
856 hasUnsupportedPut = False
857 canIngestNoTransferAuto = False
858 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
859 isEphemeral = False
860 rootKeys = (".datastores.1.root", ".datastores.2.root")
861 validationCanFail = True
864class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
865 """ChainedDatastore specialization using all InMemoryDatastore"""
866 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
867 validationCanFail = False
870class DatastoreConstraintsTests(DatastoreTestsBase):
871 """Basic tests of constraints model of Datastores."""
873 def testConstraints(self):
874 """Test constraints model. Assumes that each test class has the
875 same constraints."""
876 metrics = makeExampleMetrics()
877 datastore = self.makeDatastore()
879 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
880 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
881 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
882 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
884 # Write empty file suitable for ingest check (JSON and YAML variants)
885 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
886 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
887 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False),
888 ("metric33", sc1, True), ("metric2", sc2, True)):
889 # Choose different temp file depending on StorageClass
890 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
892 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
893 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
894 if accepted:
895 datastore.put(metrics, ref)
896 self.assertTrue(datastore.exists(ref))
897 datastore.remove(ref)
899 # Try ingest
900 if self.canIngest:
901 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
902 self.assertTrue(datastore.exists(ref))
903 datastore.remove(ref)
904 else:
905 with self.assertRaises(DatasetTypeNotSupportedError):
906 datastore.put(metrics, ref)
907 self.assertFalse(datastore.exists(ref))
909 # Again with ingest
910 if self.canIngest:
911 with self.assertRaises(DatasetTypeNotSupportedError):
912 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
913 self.assertFalse(datastore.exists(ref))
916class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
917 """PosixDatastore specialization"""
918 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
919 canIngest = True
921 def setUp(self):
922 # Override the working directory before calling the base class
923 self.root = tempfile.mkdtemp(dir=TESTDIR)
924 super().setUp()
927class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
928 """InMemoryDatastore specialization"""
929 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
930 canIngest = False
933class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
934 """ChainedDatastore specialization using a POSIXDatastore and constraints
935 at the ChainedDatstore """
936 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
939class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
940 """ChainedDatastore specialization using a POSIXDatastore"""
941 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
944class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
945 """ChainedDatastore specialization using all InMemoryDatastore"""
946 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
947 canIngest = False
950class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
951 """Test that a chained datastore can control constraints per-datastore
952 even if child datastore would accept."""
954 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
956 def setUp(self):
957 # Override the working directory before calling the base class
958 self.root = tempfile.mkdtemp(dir=TESTDIR)
959 super().setUp()
961 def testConstraints(self):
962 """Test chained datastore constraints model."""
963 metrics = makeExampleMetrics()
964 datastore = self.makeDatastore()
966 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
967 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
968 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
969 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
970 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
972 # Write empty file suitable for ingest check (JSON and YAML variants)
973 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
974 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
976 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True),
977 ("metric2", dataId1, sc1, (False, False, False), False),
978 ("metric2", dataId2, sc1, (True, False, False), False),
979 ("metric33", dataId2, sc2, (True, True, False), True),
980 ("metric2", dataId1, sc2, (False, True, False), True)):
982 # Choose different temp file depending on StorageClass
983 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
985 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
986 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId,
987 conform=False)
988 if any(accept):
989 datastore.put(metrics, ref)
990 self.assertTrue(datastore.exists(ref))
992 # Check each datastore inside the chained datastore
993 for childDatastore, expected in zip(datastore.datastores, accept):
994 self.assertEqual(childDatastore.exists(ref), expected,
995 f"Testing presence of {ref} in datastore {childDatastore.name}")
997 datastore.remove(ref)
999 # Check that ingest works
1000 if ingest:
1001 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1002 self.assertTrue(datastore.exists(ref))
1004 # Check each datastore inside the chained datastore
1005 for childDatastore, expected in zip(datastore.datastores, accept):
1006 # Ephemeral datastores means InMemory at the moment
1007 # and that does not accept ingest of files.
1008 if childDatastore.isEphemeral:
1009 expected = False
1010 self.assertEqual(childDatastore.exists(ref), expected,
1011 f"Testing presence of ingested {ref} in datastore"
1012 f" {childDatastore.name}")
1014 datastore.remove(ref)
1015 else:
1016 with self.assertRaises(DatasetTypeNotSupportedError):
1017 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1019 else:
1020 with self.assertRaises(DatasetTypeNotSupportedError):
1021 datastore.put(metrics, ref)
1022 self.assertFalse(datastore.exists(ref))
1024 # Again with ingest
1025 with self.assertRaises(DatasetTypeNotSupportedError):
1026 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1027 self.assertFalse(datastore.exists(ref))
1030class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1031 """Tests for datastore caching infrastructure."""
1033 @classmethod
1034 def setUpClass(cls):
1035 cls.storageClassFactory = StorageClassFactory()
1036 cls.universe = DimensionUniverse()
1038 def setUp(self):
1039 self.id = 0
1041 # Create a root that we can use for caching tests.
1042 self.root = tempfile.mkdtemp(dir=TESTDIR)
1044 # Create some test dataset refs and associated test files
1045 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1046 dimensions = self.universe.extract(("visit", "physical_filter"))
1047 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
1049 # Create list of refs and list of temporary files
1050 n_datasets = 2
1051 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId,
1052 conform=False) for n in range(n_datasets)]
1054 root_uri = ButlerURI(self.root, forceDirectory=True)
1055 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1057 # Create empty files
1058 for uri in self.files:
1059 uri.write(b"")
1061 def tearDown(self):
1062 if self.root is not None and os.path.exists(self.root):
1063 shutil.rmtree(self.root, ignore_errors=True)
1065 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1066 config = Config.fromYaml(config_str)
1067 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1069 def testNoCacheDir(self):
1070 config_str = """
1071cached:
1072 root: null
1073 cacheable:
1074 metric0: true
1075 """
1076 cache_manager = self._make_cache_manager(config_str)
1078 # Look inside to check we don't have a cache directory
1079 self.assertIsNone(cache_manager._cache_directory)
1081 self.assertCache(cache_manager)
1083 # Test that the cache directory is marked temporary
1084 self.assertTrue(cache_manager.cache_directory.isTemporary)
1086 def testExplicitCacheDir(self):
1087 config_str = f"""
1088cached:
1089 root: '{self.root}'
1090 cacheable:
1091 metric0: true
1092 """
1093 cache_manager = self._make_cache_manager(config_str)
1095 # Look inside to check we do have a cache directory.
1096 self.assertEqual(cache_manager.cache_directory,
1097 ButlerURI(self.root, forceDirectory=True))
1099 self.assertCache(cache_manager)
1101 # Test that the cache directory is not marked temporary
1102 self.assertFalse(cache_manager.cache_directory.isTemporary)
1104 def assertCache(self, cache_manager):
1105 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1106 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1108 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1109 self.assertIsInstance(uri, ButlerURI)
1110 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1112 # Cached file should no longer exist but uncached file should be
1113 # unaffectted.
1114 self.assertFalse(self.files[0].exists())
1115 self.assertTrue(self.files[1].exists())
1117 # Should find this file and it should be within the cache directory.
1118 found = cache_manager.find_in_cache(self.refs[0], ".txt")
1119 self.assertTrue(found.exists())
1120 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1122 # Should not be able to find these in cache
1123 self.assertIsNone(cache_manager.find_in_cache(self.refs[0], ".fits"))
1124 self.assertIsNone(cache_manager.find_in_cache(self.refs[1], ".fits"))
1126 def testNoCache(self):
1127 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1128 for uri, ref in zip(self.files, self.refs):
1129 self.assertFalse(cache_manager.should_be_cached(ref))
1130 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1131 self.assertIsNone(cache_manager.find_in_cache(ref, ".txt"))
1134if __name__ == "__main__": 1134 ↛ 1135line 1134 didn't jump to line 1135, because the condition on line 1134 was never true
1135 unittest.main()