Coverage for tests/test_datastore.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import unittest
24import shutil
25import yaml
26import tempfile
27import lsst.utils.tests
29from lsst.utils import doImport
31from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset
32from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError
33from lsst.daf.butler.formatters.yaml import YamlFormatter
35from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter,
36 BadNoWriteFormatter, MetricsExample, DummyRegistry)
39TESTDIR = os.path.dirname(__file__)
42def makeExampleMetrics(use_none=False):
43 if use_none:
44 array = None
45 else:
46 array = [563, 234, 456.7, 105, 2054, -1045]
47 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
48 {"a": [1, 2, 3],
49 "b": {"blue": 5, "red": "green"}},
50 array,
51 )
54class TransactionTestError(Exception):
55 """Specific error for transactions, to prevent misdiagnosing
56 that might otherwise occur when a standard exception is used.
57 """
58 pass
61class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
62 """Support routines for datastore testing"""
63 root = None
65 @classmethod
66 def setUpClass(cls):
67 # Storage Classes are fixed for all datastores in these tests
68 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
69 cls.storageClassFactory = StorageClassFactory()
70 cls.storageClassFactory.addFromConfig(scConfigFile)
72 # Read the Datastore config so we can get the class
73 # information (since we should not assume the constructor
74 # name here, but rely on the configuration file itself)
75 datastoreConfig = DatastoreConfig(cls.configFile)
76 cls.datastoreType = doImport(datastoreConfig["cls"])
77 cls.universe = DimensionUniverse()
79 def setUp(self):
80 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
82 def tearDown(self):
83 if self.root is not None and os.path.exists(self.root):
84 shutil.rmtree(self.root, ignore_errors=True)
87class DatastoreTests(DatastoreTestsBase):
88 """Some basic tests of a simple datastore."""
90 hasUnsupportedPut = True
92 def testConfigRoot(self):
93 full = DatastoreConfig(self.configFile)
94 config = DatastoreConfig(self.configFile, mergeDefaults=False)
95 newroot = "/random/location"
96 self.datastoreType.setConfigRoot(newroot, config, full)
97 if self.rootKeys:
98 for k in self.rootKeys:
99 self.assertIn(newroot, config[k])
101 def testConstructor(self):
102 datastore = self.makeDatastore()
103 self.assertIsNotNone(datastore)
104 self.assertIs(datastore.isEphemeral, self.isEphemeral)
106 def testConfigurationValidation(self):
107 datastore = self.makeDatastore()
108 sc = self.storageClassFactory.getStorageClass("ThingOne")
109 datastore.validateConfiguration([sc])
111 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
112 if self.validationCanFail:
113 with self.assertRaises(DatastoreValidationError):
114 datastore.validateConfiguration([sc2], logFailures=True)
116 dimensions = self.universe.extract(("visit", "physical_filter"))
117 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
118 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
119 datastore.validateConfiguration([ref])
121 def testParameterValidation(self):
122 """Check that parameters are validated"""
123 sc = self.storageClassFactory.getStorageClass("ThingOne")
124 dimensions = self.universe.extract(("visit", "physical_filter"))
125 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
126 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
127 datastore = self.makeDatastore()
128 data = {1: 2, 3: 4}
129 datastore.put(data, ref)
130 newdata = datastore.get(ref)
131 self.assertEqual(data, newdata)
132 with self.assertRaises(KeyError):
133 newdata = datastore.get(ref, parameters={"missing": 5})
135 def testBasicPutGet(self):
136 metrics = makeExampleMetrics()
137 datastore = self.makeDatastore()
139 # Create multiple storage classes for testing different formulations
140 storageClasses = [self.storageClassFactory.getStorageClass(sc)
141 for sc in ("StructuredData",
142 "StructuredDataJson",
143 "StructuredDataPickle")]
145 dimensions = self.universe.extract(("visit", "physical_filter"))
146 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
148 for sc in storageClasses:
149 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
150 print("Using storageClass: {}".format(sc.name))
151 datastore.put(metrics, ref)
153 # Does it exist?
154 self.assertTrue(datastore.exists(ref))
156 # Get
157 metricsOut = datastore.get(ref, parameters=None)
158 self.assertEqual(metrics, metricsOut)
160 uri = datastore.getURI(ref)
161 self.assertEqual(uri.scheme, self.uriScheme)
163 # Get a component -- we need to construct new refs for them
164 # with derived storage classes but with parent ID
165 for comp in ("data", "output"):
166 compRef = ref.makeComponentRef(comp)
167 output = datastore.get(compRef)
168 self.assertEqual(output, getattr(metricsOut, comp))
170 uri = datastore.getURI(compRef)
171 self.assertEqual(uri.scheme, self.uriScheme)
173 storageClass = sc
175 # Check that we can put a metric with None in a component and
176 # get it back as None
177 metricsNone = makeExampleMetrics(use_none=True)
178 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
179 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
180 datastore.put(metricsNone, refNone)
182 comp = "data"
183 for comp in ("data", "output"):
184 compRef = refNone.makeComponentRef(comp)
185 output = datastore.get(compRef)
186 self.assertEqual(output, getattr(metricsNone, comp))
188 # Check that a put fails if the dataset type is not supported
189 if self.hasUnsupportedPut:
190 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
191 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
192 with self.assertRaises(DatasetTypeNotSupportedError):
193 datastore.put(metrics, ref)
195 # These should raise
196 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
197 with self.assertRaises(FileNotFoundError):
198 # non-existing file
199 datastore.get(ref)
201 # Get a URI from it
202 uri = datastore.getURI(ref, predict=True)
203 self.assertEqual(uri.scheme, self.uriScheme)
205 with self.assertRaises(FileNotFoundError):
206 datastore.getURI(ref)
208 def testTrustGetRequest(self):
209 """Check that we can get datasets that registry knows nothing about.
210 """
212 datastore = self.makeDatastore()
214 # Skip test if the attribute is not defined
215 if not hasattr(datastore, "trustGetRequest"):
216 return
218 metrics = makeExampleMetrics()
220 i = 0
221 for sc_name in ("StructuredData", "StructuredComposite"):
222 i += 1
223 datasetTypeName = f"metric{i}"
225 if sc_name == "StructuredComposite":
226 disassembled = True
227 else:
228 disassembled = False
230 # Start datastore in default configuration of using registry
231 datastore.trustGetRequest = False
233 # Create multiple storage classes for testing with or without
234 # disassembly
235 sc = self.storageClassFactory.getStorageClass(sc_name)
236 dimensions = self.universe.extract(("visit", "physical_filter"))
237 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}
239 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
240 datastore.put(metrics, ref)
242 # Does it exist?
243 self.assertTrue(datastore.exists(ref))
245 # Get
246 metricsOut = datastore.get(ref)
247 self.assertEqual(metrics, metricsOut)
249 # Get the URI(s)
250 primaryURI, componentURIs = datastore.getURIs(ref)
251 if disassembled:
252 self.assertIsNone(primaryURI)
253 self.assertEqual(len(componentURIs), 3)
254 else:
255 self.assertIn(datasetTypeName, primaryURI.path)
256 self.assertFalse(componentURIs)
258 # Delete registry entry so now we are trusting
259 datastore.removeStoredItemInfo(ref)
261 # Now stop trusting and check that things break
262 datastore.trustGetRequest = False
264 # Does it exist?
265 self.assertFalse(datastore.exists(ref))
267 with self.assertRaises(FileNotFoundError):
268 datastore.get(ref)
270 with self.assertRaises(FileNotFoundError):
271 datastore.get(ref.makeComponentRef("data"))
273 # URI should fail unless we ask for prediction
274 with self.assertRaises(FileNotFoundError):
275 datastore.getURIs(ref)
277 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
278 if disassembled:
279 self.assertIsNone(predicted_primary)
280 self.assertEqual(len(predicted_disassembled), 3)
281 for uri in predicted_disassembled.values():
282 self.assertEqual(uri.fragment, "predicted")
283 self.assertIn(datasetTypeName, uri.path)
284 else:
285 self.assertIn(datasetTypeName, predicted_primary.path)
286 self.assertFalse(predicted_disassembled)
287 self.assertEqual(predicted_primary.fragment, "predicted")
289 # Now enable registry-free trusting mode
290 datastore.trustGetRequest = True
292 # Try again to get it
293 metricsOut = datastore.get(ref)
294 self.assertEqual(metricsOut, metrics)
296 # Does it exist?
297 self.assertTrue(datastore.exists(ref))
299 # Get a component
300 comp = "data"
301 compRef = ref.makeComponentRef(comp)
302 output = datastore.get(compRef)
303 self.assertEqual(output, getattr(metrics, comp))
305 # Get the URI -- if we trust this should work even without
306 # enabling prediction.
307 primaryURI2, componentURIs2 = datastore.getURIs(ref)
308 self.assertEqual(primaryURI2, primaryURI)
309 self.assertEqual(componentURIs2, componentURIs)
311 def testDisassembly(self):
312 """Test disassembly within datastore."""
313 metrics = makeExampleMetrics()
314 if self.isEphemeral:
315 # in-memory datastore does not disassemble
316 return
318 # Create multiple storage classes for testing different formulations
319 # of composites. One of these will not disassemble to provide
320 # a reference.
321 storageClasses = [self.storageClassFactory.getStorageClass(sc)
322 for sc in ("StructuredComposite",
323 "StructuredCompositeTestA",
324 "StructuredCompositeTestB",
325 "StructuredCompositeReadComp",
326 "StructuredData", # No disassembly
327 "StructuredCompositeReadCompNoDisassembly",
328 )]
330 # Create the test datastore
331 datastore = self.makeDatastore()
333 # Dummy dataId
334 dimensions = self.universe.extract(("visit", "physical_filter"))
335 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
337 for i, sc in enumerate(storageClasses):
338 with self.subTest(storageClass=sc.name):
339 # Create a different dataset type each time round
340 # so that a test failure in this subtest does not trigger
341 # a cascade of tests because of file clashes
342 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId,
343 conform=False)
345 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
347 datastore.put(metrics, ref)
349 baseURI, compURIs = datastore.getURIs(ref)
350 if disassembled:
351 self.assertIsNone(baseURI)
352 self.assertEqual(set(compURIs), {"data", "output", "summary"})
353 else:
354 self.assertIsNotNone(baseURI)
355 self.assertEqual(compURIs, {})
357 metrics_get = datastore.get(ref)
358 self.assertEqual(metrics_get, metrics)
360 # Retrieve the composite with read parameter
361 stop = 4
362 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
363 self.assertEqual(metrics_get.summary, metrics.summary)
364 self.assertEqual(metrics_get.output, metrics.output)
365 self.assertEqual(metrics_get.data, metrics.data[:stop])
367 # Retrieve a component
368 data = datastore.get(ref.makeComponentRef("data"))
369 self.assertEqual(data, metrics.data)
371 # On supported storage classes attempt to access a read
372 # only component
373 if "ReadComp" in sc.name:
374 cRef = ref.makeComponentRef("counter")
375 counter = datastore.get(cRef)
376 self.assertEqual(counter, len(metrics.data))
378 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
379 self.assertEqual(counter, stop)
381 datastore.remove(ref)
383 def testRegistryCompositePutGet(self):
384 """Tests the case where registry disassembles and puts to datastore.
385 """
386 metrics = makeExampleMetrics()
387 datastore = self.makeDatastore()
389 # Create multiple storage classes for testing different formulations
390 # of composites
391 storageClasses = [self.storageClassFactory.getStorageClass(sc)
392 for sc in ("StructuredComposite",
393 "StructuredCompositeTestA",
394 "StructuredCompositeTestB",
395 )]
397 dimensions = self.universe.extract(("visit", "physical_filter"))
398 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
400 for sc in storageClasses:
401 print("Using storageClass: {}".format(sc.name))
402 ref = self.makeDatasetRef("metric", dimensions, sc, dataId,
403 conform=False)
405 components = sc.delegate().disassemble(metrics)
406 self.assertTrue(components)
408 compsRead = {}
409 for compName, compInfo in components.items():
410 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions,
411 components[compName].storageClass, dataId,
412 conform=False)
414 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
415 datastore.put(compInfo.component, compRef)
417 uri = datastore.getURI(compRef)
418 self.assertEqual(uri.scheme, self.uriScheme)
420 compsRead[compName] = datastore.get(compRef)
422 # We can generate identical files for each storage class
423 # so remove the component here
424 datastore.remove(compRef)
426 # combine all the components we read back into a new composite
427 metricsOut = sc.delegate().assemble(compsRead)
428 self.assertEqual(metrics, metricsOut)
430 def prepDeleteTest(self):
431 metrics = makeExampleMetrics()
432 datastore = self.makeDatastore()
433 # Put
434 dimensions = self.universe.extract(("visit", "physical_filter"))
435 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"}
437 sc = self.storageClassFactory.getStorageClass("StructuredData")
438 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
439 datastore.put(metrics, ref)
441 # Does it exist?
442 self.assertTrue(datastore.exists(ref))
444 # Get
445 metricsOut = datastore.get(ref)
446 self.assertEqual(metrics, metricsOut)
448 return datastore, ref
450 def testRemove(self):
451 datastore, ref = self.prepDeleteTest()
453 # Remove
454 datastore.remove(ref)
456 # Does it exist?
457 self.assertFalse(datastore.exists(ref))
459 # Do we now get a predicted URI?
460 uri = datastore.getURI(ref, predict=True)
461 self.assertEqual(uri.fragment, "predicted")
463 # Get should now fail
464 with self.assertRaises(FileNotFoundError):
465 datastore.get(ref)
466 # Can only delete once
467 with self.assertRaises(FileNotFoundError):
468 datastore.remove(ref)
470 def testForget(self):
471 datastore, ref = self.prepDeleteTest()
473 # Remove
474 datastore.forget([ref])
476 # Does it exist (as far as we know)?
477 self.assertFalse(datastore.exists(ref))
479 # Do we now get a predicted URI?
480 uri = datastore.getURI(ref, predict=True)
481 self.assertEqual(uri.fragment, "predicted")
483 # Get should now fail
484 with self.assertRaises(FileNotFoundError):
485 datastore.get(ref)
487 # Forgetting again is a silent no-op
488 datastore.forget([ref])
490 # Predicted URI should still point to the file.
491 self.assertTrue(uri.exists())
493 def testTransfer(self):
494 metrics = makeExampleMetrics()
496 dimensions = self.universe.extract(("visit", "physical_filter"))
497 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
499 sc = self.storageClassFactory.getStorageClass("StructuredData")
500 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
502 inputDatastore = self.makeDatastore("test_input_datastore")
503 outputDatastore = self.makeDatastore("test_output_datastore")
505 inputDatastore.put(metrics, ref)
506 outputDatastore.transfer(inputDatastore, ref)
508 metricsOut = outputDatastore.get(ref)
509 self.assertEqual(metrics, metricsOut)
511 def testBasicTransaction(self):
512 datastore = self.makeDatastore()
513 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
514 dimensions = self.universe.extract(("visit", "physical_filter"))
515 nDatasets = 6
516 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
517 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
518 makeExampleMetrics(),)
519 for dataId in dataIds]
520 succeed = data[:nDatasets//2]
521 fail = data[nDatasets//2:]
522 # All datasets added in this transaction should continue to exist
523 with datastore.transaction():
524 for ref, metrics in succeed:
525 datastore.put(metrics, ref)
526 # Whereas datasets added in this transaction should not
527 with self.assertRaises(TransactionTestError):
528 with datastore.transaction():
529 for ref, metrics in fail:
530 datastore.put(metrics, ref)
531 raise TransactionTestError("This should propagate out of the context manager")
532 # Check for datasets that should exist
533 for ref, metrics in succeed:
534 # Does it exist?
535 self.assertTrue(datastore.exists(ref))
536 # Get
537 metricsOut = datastore.get(ref, parameters=None)
538 self.assertEqual(metrics, metricsOut)
539 # URI
540 uri = datastore.getURI(ref)
541 self.assertEqual(uri.scheme, self.uriScheme)
542 # Check for datasets that should not exist
543 for ref, _ in fail:
544 # These should raise
545 with self.assertRaises(FileNotFoundError):
546 # non-existing file
547 datastore.get(ref)
548 with self.assertRaises(FileNotFoundError):
549 datastore.getURI(ref)
551 def testNestedTransaction(self):
552 datastore = self.makeDatastore()
553 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
554 dimensions = self.universe.extract(("visit", "physical_filter"))
555 metrics = makeExampleMetrics()
557 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
558 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
559 conform=False)
560 datastore.put(metrics, refBefore)
561 with self.assertRaises(TransactionTestError):
562 with datastore.transaction():
563 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
564 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
565 conform=False)
566 datastore.put(metrics, refOuter)
567 with datastore.transaction():
568 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
569 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
570 conform=False)
571 datastore.put(metrics, refInner)
572 # All datasets should exist
573 for ref in (refBefore, refOuter, refInner):
574 metricsOut = datastore.get(ref, parameters=None)
575 self.assertEqual(metrics, metricsOut)
576 raise TransactionTestError("This should roll back the transaction")
577 # Dataset(s) inserted before the transaction should still exist
578 metricsOut = datastore.get(refBefore, parameters=None)
579 self.assertEqual(metrics, metricsOut)
580 # But all datasets inserted during the (rolled back) transaction
581 # should be gone
582 with self.assertRaises(FileNotFoundError):
583 datastore.get(refOuter)
584 with self.assertRaises(FileNotFoundError):
585 datastore.get(refInner)
587 def _prepareIngestTest(self):
588 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
589 dimensions = self.universe.extract(("visit", "physical_filter"))
590 metrics = makeExampleMetrics()
591 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
592 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
593 return metrics, ref
595 def runIngestTest(self, func, expectOutput=True):
596 metrics, ref = self._prepareIngestTest()
597 # The file will be deleted after the test.
598 # For symlink tests this leads to a situation where the datastore
599 # points to a file that does not exist. This will make os.path.exist
600 # return False but then the new symlink will fail with
601 # FileExistsError later in the code so the test still passes.
602 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
603 with open(path, 'w') as fd:
604 yaml.dump(metrics._asdict(), stream=fd)
605 func(metrics, path, ref)
607 def testIngestNoTransfer(self):
608 """Test ingesting existing files with no transfer.
609 """
610 for mode in (None, "auto"):
612 # Some datastores have auto but can't do in place transfer
613 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
614 continue
616 with self.subTest(mode=mode):
617 datastore = self.makeDatastore()
619 def succeed(obj, path, ref):
620 """Ingest a file already in the datastore root."""
621 # first move it into the root, and adjust the path
622 # accordingly
623 path = shutil.copy(path, datastore.root.ospath)
624 path = os.path.relpath(path, start=datastore.root.ospath)
625 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
626 self.assertEqual(obj, datastore.get(ref))
628 def failInputDoesNotExist(obj, path, ref):
629 """Can't ingest files if we're given a bad path."""
630 with self.assertRaises(FileNotFoundError):
631 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref),
632 transfer=mode)
633 self.assertFalse(datastore.exists(ref))
635 def failOutsideRoot(obj, path, ref):
636 """Can't ingest files outside of datastore root unless
637 auto."""
638 if mode == "auto":
639 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
640 self.assertTrue(datastore.exists(ref))
641 else:
642 with self.assertRaises(RuntimeError):
643 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
644 self.assertFalse(datastore.exists(ref))
646 def failNotImplemented(obj, path, ref):
647 with self.assertRaises(NotImplementedError):
648 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
650 if mode in self.ingestTransferModes:
651 self.runIngestTest(failOutsideRoot)
652 self.runIngestTest(failInputDoesNotExist)
653 self.runIngestTest(succeed)
654 else:
655 self.runIngestTest(failNotImplemented)
657 def testIngestTransfer(self):
658 """Test ingesting existing files after transferring them.
659 """
660 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
661 with self.subTest(mode=mode):
662 datastore = self.makeDatastore(mode)
664 def succeed(obj, path, ref):
665 """Ingest a file by transferring it to the template
666 location."""
667 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
668 self.assertEqual(obj, datastore.get(ref))
670 def failInputDoesNotExist(obj, path, ref):
671 """Can't ingest files if we're given a bad path."""
672 with self.assertRaises(FileNotFoundError):
673 # Ensure the file does not look like it is in
674 # datastore for auto mode
675 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref),
676 transfer=mode)
677 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
679 def failOutputExists(obj, path, ref):
680 """Can't ingest files if transfer destination already
681 exists."""
682 with self.assertRaises(FileExistsError):
683 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
684 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
686 def failNotImplemented(obj, path, ref):
687 with self.assertRaises(NotImplementedError):
688 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
690 if mode in self.ingestTransferModes:
691 self.runIngestTest(failInputDoesNotExist)
692 self.runIngestTest(succeed, expectOutput=(mode != "move"))
693 self.runIngestTest(failOutputExists)
694 else:
695 self.runIngestTest(failNotImplemented)
697 def testIngestSymlinkOfSymlink(self):
698 """Special test for symlink to a symlink ingest"""
699 metrics, ref = self._prepareIngestTest()
700 # The aim of this test is to create a dataset on disk, then
701 # create a symlink to it and finally ingest the symlink such that
702 # the symlink in the datastore points to the original dataset.
703 for mode in ("symlink", "relsymlink"):
704 if mode not in self.ingestTransferModes:
705 continue
707 print(f"Trying mode {mode}")
708 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
709 with open(realpath, 'w') as fd:
710 yaml.dump(metrics._asdict(), stream=fd)
711 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
712 os.symlink(os.path.abspath(realpath), sympath)
714 datastore = self.makeDatastore()
715 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
717 uri = datastore.getURI(ref)
718 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
719 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
721 linkTarget = os.readlink(uri.ospath)
722 if mode == "relsymlink":
723 self.assertFalse(os.path.isabs(linkTarget))
724 else:
725 self.assertEqual(linkTarget, os.path.abspath(realpath))
727 # Check that we can get the dataset back regardless of mode
728 metric2 = datastore.get(ref)
729 self.assertEqual(metric2, metrics)
731 # Cleanup the file for next time round loop
732 # since it will get the same file name in store
733 datastore.remove(ref)
736class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
737 """PosixDatastore specialization"""
738 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
739 uriScheme = "file"
740 canIngestNoTransferAuto = True
741 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
742 isEphemeral = False
743 rootKeys = ("root",)
744 validationCanFail = True
746 def setUp(self):
747 # Override the working directory before calling the base class
748 self.root = tempfile.mkdtemp(dir=TESTDIR)
749 super().setUp()
752class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
753 """Posix datastore tests but with checksums disabled."""
754 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
756 def testChecksum(self):
757 """Ensure that checksums have not been calculated."""
759 datastore = self.makeDatastore()
760 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
761 dimensions = self.universe.extract(("visit", "physical_filter"))
762 metrics = makeExampleMetrics()
764 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
765 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
766 conform=False)
768 # Configuration should have disabled checksum calculation
769 datastore.put(metrics, ref)
770 infos = datastore.getStoredItemsInfo(ref)
771 self.assertIsNone(infos[0].checksum)
773 # Remove put back but with checksums enabled explicitly
774 datastore.remove(ref)
775 datastore.useChecksum = True
776 datastore.put(metrics, ref)
778 infos = datastore.getStoredItemsInfo(ref)
779 self.assertIsNotNone(infos[0].checksum)
782class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
783 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
785 def setUp(self):
786 # Override the working directory before calling the base class
787 self.root = tempfile.mkdtemp(dir=TESTDIR)
788 super().setUp()
790 def testCleanup(self):
791 """Test that a failed formatter write does cleanup a partial file."""
792 metrics = makeExampleMetrics()
793 datastore = self.makeDatastore()
795 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
797 dimensions = self.universe.extract(("visit", "physical_filter"))
798 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
800 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
802 # Determine where the file will end up (we assume Formatters use
803 # the same file extension)
804 expectedUri = datastore.getURI(ref, predict=True)
805 self.assertEqual(expectedUri.fragment, "predicted")
807 self.assertEqual(expectedUri.getExtension(), ".yaml",
808 f"Is there a file extension in {expectedUri}")
810 # Try formatter that fails and formatter that fails and leaves
811 # a file behind
812 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
813 with self.subTest(formatter=formatter):
815 # Monkey patch the formatter
816 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter,
817 overwrite=True)
819 # Try to put the dataset, it should fail
820 with self.assertRaises(Exception):
821 datastore.put(metrics, ref)
823 # Check that there is no file on disk
824 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
826 # Check that there is a directory
827 dir = expectedUri.dirname()
828 self.assertTrue(dir.exists(),
829 f"Check for existence of directory {dir}")
831 # Force YamlFormatter and check that this time a file is written
832 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter,
833 overwrite=True)
834 datastore.put(metrics, ref)
835 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
836 datastore.remove(ref)
837 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
840class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
841 """PosixDatastore specialization"""
842 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
843 uriScheme = "mem"
844 hasUnsupportedPut = False
845 ingestTransferModes = ()
846 isEphemeral = True
847 rootKeys = None
848 validationCanFail = False
851class ChainedDatastoreTestCase(PosixDatastoreTestCase):
852 """ChainedDatastore specialization using a POSIXDatastore"""
853 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
854 hasUnsupportedPut = False
855 canIngestNoTransferAuto = False
856 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
857 isEphemeral = False
858 rootKeys = (".datastores.1.root", ".datastores.2.root")
859 validationCanFail = True
862class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
863 """ChainedDatastore specialization using all InMemoryDatastore"""
864 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
865 validationCanFail = False
868class DatastoreConstraintsTests(DatastoreTestsBase):
869 """Basic tests of constraints model of Datastores."""
871 def testConstraints(self):
872 """Test constraints model. Assumes that each test class has the
873 same constraints."""
874 metrics = makeExampleMetrics()
875 datastore = self.makeDatastore()
877 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
878 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
879 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
880 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
882 # Write empty file suitable for ingest check (JSON and YAML variants)
883 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
884 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
885 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False),
886 ("metric33", sc1, True), ("metric2", sc2, True)):
887 # Choose different temp file depending on StorageClass
888 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
890 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
891 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
892 if accepted:
893 datastore.put(metrics, ref)
894 self.assertTrue(datastore.exists(ref))
895 datastore.remove(ref)
897 # Try ingest
898 if self.canIngest:
899 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
900 self.assertTrue(datastore.exists(ref))
901 datastore.remove(ref)
902 else:
903 with self.assertRaises(DatasetTypeNotSupportedError):
904 datastore.put(metrics, ref)
905 self.assertFalse(datastore.exists(ref))
907 # Again with ingest
908 if self.canIngest:
909 with self.assertRaises(DatasetTypeNotSupportedError):
910 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
911 self.assertFalse(datastore.exists(ref))
914class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
915 """PosixDatastore specialization"""
916 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
917 canIngest = True
919 def setUp(self):
920 # Override the working directory before calling the base class
921 self.root = tempfile.mkdtemp(dir=TESTDIR)
922 super().setUp()
925class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
926 """InMemoryDatastore specialization"""
927 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
928 canIngest = False
931class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
932 """ChainedDatastore specialization using a POSIXDatastore and constraints
933 at the ChainedDatstore """
934 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
937class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
938 """ChainedDatastore specialization using a POSIXDatastore"""
939 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
942class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
943 """ChainedDatastore specialization using all InMemoryDatastore"""
944 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
945 canIngest = False
948class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
949 """Test that a chained datastore can control constraints per-datastore
950 even if child datastore would accept."""
952 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
954 def setUp(self):
955 # Override the working directory before calling the base class
956 self.root = tempfile.mkdtemp(dir=TESTDIR)
957 super().setUp()
959 def testConstraints(self):
960 """Test chained datastore constraints model."""
961 metrics = makeExampleMetrics()
962 datastore = self.makeDatastore()
964 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
965 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
966 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
967 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
968 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
970 # Write empty file suitable for ingest check (JSON and YAML variants)
971 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
972 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
974 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True),
975 ("metric2", dataId1, sc1, (False, False, False), False),
976 ("metric2", dataId2, sc1, (True, False, False), False),
977 ("metric33", dataId2, sc2, (True, True, False), True),
978 ("metric2", dataId1, sc2, (False, True, False), True)):
980 # Choose different temp file depending on StorageClass
981 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
983 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
984 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId,
985 conform=False)
986 if any(accept):
987 datastore.put(metrics, ref)
988 self.assertTrue(datastore.exists(ref))
990 # Check each datastore inside the chained datastore
991 for childDatastore, expected in zip(datastore.datastores, accept):
992 self.assertEqual(childDatastore.exists(ref), expected,
993 f"Testing presence of {ref} in datastore {childDatastore.name}")
995 datastore.remove(ref)
997 # Check that ingest works
998 if ingest:
999 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1000 self.assertTrue(datastore.exists(ref))
1002 # Check each datastore inside the chained datastore
1003 for childDatastore, expected in zip(datastore.datastores, accept):
1004 # Ephemeral datastores means InMemory at the moment
1005 # and that does not accept ingest of files.
1006 if childDatastore.isEphemeral:
1007 expected = False
1008 self.assertEqual(childDatastore.exists(ref), expected,
1009 f"Testing presence of ingested {ref} in datastore"
1010 f" {childDatastore.name}")
1012 datastore.remove(ref)
1013 else:
1014 with self.assertRaises(DatasetTypeNotSupportedError):
1015 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1017 else:
1018 with self.assertRaises(DatasetTypeNotSupportedError):
1019 datastore.put(metrics, ref)
1020 self.assertFalse(datastore.exists(ref))
1022 # Again with ingest
1023 with self.assertRaises(DatasetTypeNotSupportedError):
1024 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1025 self.assertFalse(datastore.exists(ref))
1028if __name__ == "__main__": 1028 ↛ 1029line 1028 didn't jump to line 1029, because the condition on line 1028 was never true
1029 unittest.main()