Coverage for tests/test_datastore.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import os
23import unittest
24import shutil
25import yaml
26import tempfile
27import lsst.utils.tests
29from lsst.utils import doImport
31from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset
32from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError
33from lsst.daf.butler.formatters.yaml import YamlFormatter
35from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter,
36 BadNoWriteFormatter, MetricsExample, DummyRegistry)
39TESTDIR = os.path.dirname(__file__)
42def makeExampleMetrics(use_none=False):
43 if use_none:
44 array = None
45 else:
46 array = [563, 234, 456.7, 105, 2054, -1045]
47 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
48 {"a": [1, 2, 3],
49 "b": {"blue": 5, "red": "green"}},
50 array,
51 )
54class TransactionTestError(Exception):
55 """Specific error for transactions, to prevent misdiagnosing
56 that might otherwise occur when a standard exception is used.
57 """
58 pass
61class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper):
62 """Support routines for datastore testing"""
63 root = None
65 @classmethod
66 def setUpClass(cls):
67 # Storage Classes are fixed for all datastores in these tests
68 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
69 cls.storageClassFactory = StorageClassFactory()
70 cls.storageClassFactory.addFromConfig(scConfigFile)
72 # Read the Datastore config so we can get the class
73 # information (since we should not assume the constructor
74 # name here, but rely on the configuration file itself)
75 datastoreConfig = DatastoreConfig(cls.configFile)
76 cls.datastoreType = doImport(datastoreConfig["cls"])
77 cls.universe = DimensionUniverse()
79 def setUp(self):
80 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
82 def tearDown(self):
83 if self.root is not None and os.path.exists(self.root):
84 shutil.rmtree(self.root, ignore_errors=True)
87class DatastoreTests(DatastoreTestsBase):
88 """Some basic tests of a simple datastore."""
90 hasUnsupportedPut = True
92 def testConfigRoot(self):
93 full = DatastoreConfig(self.configFile)
94 config = DatastoreConfig(self.configFile, mergeDefaults=False)
95 newroot = "/random/location"
96 self.datastoreType.setConfigRoot(newroot, config, full)
97 if self.rootKeys:
98 for k in self.rootKeys:
99 self.assertIn(newroot, config[k])
101 def testConstructor(self):
102 datastore = self.makeDatastore()
103 self.assertIsNotNone(datastore)
104 self.assertIs(datastore.isEphemeral, self.isEphemeral)
106 def testConfigurationValidation(self):
107 datastore = self.makeDatastore()
108 sc = self.storageClassFactory.getStorageClass("ThingOne")
109 datastore.validateConfiguration([sc])
111 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
112 if self.validationCanFail:
113 with self.assertRaises(DatastoreValidationError):
114 datastore.validateConfiguration([sc2], logFailures=True)
116 dimensions = self.universe.extract(("visit", "physical_filter"))
117 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
118 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
119 datastore.validateConfiguration([ref])
121 def testParameterValidation(self):
122 """Check that parameters are validated"""
123 sc = self.storageClassFactory.getStorageClass("ThingOne")
124 dimensions = self.universe.extract(("visit", "physical_filter"))
125 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
126 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
127 datastore = self.makeDatastore()
128 data = {1: 2, 3: 4}
129 datastore.put(data, ref)
130 newdata = datastore.get(ref)
131 self.assertEqual(data, newdata)
132 with self.assertRaises(KeyError):
133 newdata = datastore.get(ref, parameters={"missing": 5})
135 def testBasicPutGet(self):
136 metrics = makeExampleMetrics()
137 datastore = self.makeDatastore()
139 # Create multiple storage classes for testing different formulations
140 storageClasses = [self.storageClassFactory.getStorageClass(sc)
141 for sc in ("StructuredData",
142 "StructuredDataJson",
143 "StructuredDataPickle")]
145 dimensions = self.universe.extract(("visit", "physical_filter"))
146 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
148 for sc in storageClasses:
149 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
150 print("Using storageClass: {}".format(sc.name))
151 datastore.put(metrics, ref)
153 # Does it exist?
154 self.assertTrue(datastore.exists(ref))
156 # Get
157 metricsOut = datastore.get(ref, parameters=None)
158 self.assertEqual(metrics, metricsOut)
160 uri = datastore.getURI(ref)
161 self.assertEqual(uri.scheme, self.uriScheme)
163 # Get a component -- we need to construct new refs for them
164 # with derived storage classes but with parent ID
165 for comp in ("data", "output"):
166 compRef = ref.makeComponentRef(comp)
167 output = datastore.get(compRef)
168 self.assertEqual(output, getattr(metricsOut, comp))
170 uri = datastore.getURI(compRef)
171 self.assertEqual(uri.scheme, self.uriScheme)
173 storageClass = sc
175 # Check that we can put a metric with None in a component and
176 # get it back as None
177 metricsNone = makeExampleMetrics(use_none=True)
178 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
179 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
180 datastore.put(metricsNone, refNone)
182 comp = "data"
183 for comp in ("data", "output"):
184 compRef = refNone.makeComponentRef(comp)
185 output = datastore.get(compRef)
186 self.assertEqual(output, getattr(metricsNone, comp))
188 # Check that a put fails if the dataset type is not supported
189 if self.hasUnsupportedPut:
190 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
191 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
192 with self.assertRaises(DatasetTypeNotSupportedError):
193 datastore.put(metrics, ref)
195 # These should raise
196 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000)
197 with self.assertRaises(FileNotFoundError):
198 # non-existing file
199 datastore.get(ref)
201 # Get a URI from it
202 uri = datastore.getURI(ref, predict=True)
203 self.assertEqual(uri.scheme, self.uriScheme)
205 with self.assertRaises(FileNotFoundError):
206 datastore.getURI(ref)
208 def testTrustGetRequest(self):
209 """Check that we can get datasets that registry knows nothing about.
210 """
212 datastore = self.makeDatastore()
214 # Skip test if the attribute is not defined
215 if not hasattr(datastore, "trustGetRequest"):
216 return
218 metrics = makeExampleMetrics()
220 i = 0
221 for sc_name in ("StructuredData", "StructuredComposite"):
222 i += 1
223 datasetTypeName = f"metric{i}"
225 if sc_name == "StructuredComposite":
226 disassembled = True
227 else:
228 disassembled = False
230 # Start datastore in default configuration of using registry
231 datastore.trustGetRequest = False
233 # Create multiple storage classes for testing with or without
234 # disassembly
235 sc = self.storageClassFactory.getStorageClass(sc_name)
236 dimensions = self.universe.extract(("visit", "physical_filter"))
237 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}
239 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
240 datastore.put(metrics, ref)
242 # Does it exist?
243 self.assertTrue(datastore.exists(ref))
245 # Get
246 metricsOut = datastore.get(ref)
247 self.assertEqual(metrics, metricsOut)
249 # Get the URI(s)
250 primaryURI, componentURIs = datastore.getURIs(ref)
251 if disassembled:
252 self.assertIsNone(primaryURI)
253 self.assertEqual(len(componentURIs), 3)
254 else:
255 self.assertIn(datasetTypeName, primaryURI.path)
256 self.assertFalse(componentURIs)
258 # Delete registry entry so now we are trusting
259 datastore.removeStoredItemInfo(ref)
261 # Now stop trusting and check that things break
262 datastore.trustGetRequest = False
264 # Does it exist?
265 self.assertFalse(datastore.exists(ref))
267 with self.assertRaises(FileNotFoundError):
268 datastore.get(ref)
270 with self.assertRaises(FileNotFoundError):
271 datastore.get(ref.makeComponentRef("data"))
273 # URI should fail unless we ask for prediction
274 with self.assertRaises(FileNotFoundError):
275 datastore.getURIs(ref)
277 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
278 if disassembled:
279 self.assertIsNone(predicted_primary)
280 self.assertEqual(len(predicted_disassembled), 3)
281 for uri in predicted_disassembled.values():
282 self.assertEqual(uri.fragment, "predicted")
283 self.assertIn(datasetTypeName, uri.path)
284 else:
285 self.assertIn(datasetTypeName, predicted_primary.path)
286 self.assertFalse(predicted_disassembled)
287 self.assertEqual(predicted_primary.fragment, "predicted")
289 # Now enable registry-free trusting mode
290 datastore.trustGetRequest = True
292 # Try again to get it
293 metricsOut = datastore.get(ref)
294 self.assertEqual(metricsOut, metrics)
296 # Does it exist?
297 self.assertTrue(datastore.exists(ref))
299 # Get a component
300 comp = "data"
301 compRef = ref.makeComponentRef(comp)
302 output = datastore.get(compRef)
303 self.assertEqual(output, getattr(metrics, comp))
305 # Get the URI -- if we trust this should work even without
306 # enabling prediction.
307 primaryURI2, componentURIs2 = datastore.getURIs(ref)
308 self.assertEqual(primaryURI2, primaryURI)
309 self.assertEqual(componentURIs2, componentURIs)
311 def testDisassembly(self):
312 """Test disassembly within datastore."""
313 metrics = makeExampleMetrics()
314 if self.isEphemeral:
315 # in-memory datastore does not disassemble
316 return
318 # Create multiple storage classes for testing different formulations
319 # of composites. One of these will not disassemble to provide
320 # a reference.
321 storageClasses = [self.storageClassFactory.getStorageClass(sc)
322 for sc in ("StructuredComposite",
323 "StructuredCompositeTestA",
324 "StructuredCompositeTestB",
325 "StructuredCompositeReadComp",
326 "StructuredData", # No disassembly
327 "StructuredCompositeReadCompNoDisassembly",
328 )]
330 # Create the test datastore
331 datastore = self.makeDatastore()
333 # Dummy dataId
334 dimensions = self.universe.extract(("visit", "physical_filter"))
335 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
337 for i, sc in enumerate(storageClasses):
338 with self.subTest(storageClass=sc.name):
339 # Create a different dataset type each time round
340 # so that a test failure in this subtest does not trigger
341 # a cascade of tests because of file clashes
342 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId,
343 conform=False)
345 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
347 datastore.put(metrics, ref)
349 baseURI, compURIs = datastore.getURIs(ref)
350 if disassembled:
351 self.assertIsNone(baseURI)
352 self.assertEqual(set(compURIs), {"data", "output", "summary"})
353 else:
354 self.assertIsNotNone(baseURI)
355 self.assertEqual(compURIs, {})
357 metrics_get = datastore.get(ref)
358 self.assertEqual(metrics_get, metrics)
360 # Retrieve the composite with read parameter
361 stop = 4
362 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
363 self.assertEqual(metrics_get.summary, metrics.summary)
364 self.assertEqual(metrics_get.output, metrics.output)
365 self.assertEqual(metrics_get.data, metrics.data[:stop])
367 # Retrieve a component
368 data = datastore.get(ref.makeComponentRef("data"))
369 self.assertEqual(data, metrics.data)
371 # On supported storage classes attempt to access a read
372 # only component
373 if "ReadComp" in sc.name:
374 cRef = ref.makeComponentRef("counter")
375 counter = datastore.get(cRef)
376 self.assertEqual(counter, len(metrics.data))
378 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
379 self.assertEqual(counter, stop)
381 datastore.remove(ref)
383 def testRegistryCompositePutGet(self):
384 """Tests the case where registry disassembles and puts to datastore.
385 """
386 metrics = makeExampleMetrics()
387 datastore = self.makeDatastore()
389 # Create multiple storage classes for testing different formulations
390 # of composites
391 storageClasses = [self.storageClassFactory.getStorageClass(sc)
392 for sc in ("StructuredComposite",
393 "StructuredCompositeTestA",
394 "StructuredCompositeTestB",
395 )]
397 dimensions = self.universe.extract(("visit", "physical_filter"))
398 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
400 for sc in storageClasses:
401 print("Using storageClass: {}".format(sc.name))
402 ref = self.makeDatasetRef("metric", dimensions, sc, dataId,
403 conform=False)
405 components = sc.delegate().disassemble(metrics)
406 self.assertTrue(components)
408 compsRead = {}
409 for compName, compInfo in components.items():
410 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions,
411 components[compName].storageClass, dataId,
412 conform=False)
414 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name))
415 datastore.put(compInfo.component, compRef)
417 uri = datastore.getURI(compRef)
418 self.assertEqual(uri.scheme, self.uriScheme)
420 compsRead[compName] = datastore.get(compRef)
422 # We can generate identical files for each storage class
423 # so remove the component here
424 datastore.remove(compRef)
426 # combine all the components we read back into a new composite
427 metricsOut = sc.delegate().assemble(compsRead)
428 self.assertEqual(metrics, metricsOut)
430 def testRemove(self):
431 metrics = makeExampleMetrics()
432 datastore = self.makeDatastore()
433 # Put
434 dimensions = self.universe.extract(("visit", "physical_filter"))
435 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"}
437 sc = self.storageClassFactory.getStorageClass("StructuredData")
438 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
439 datastore.put(metrics, ref)
441 # Does it exist?
442 self.assertTrue(datastore.exists(ref))
444 # Get
445 metricsOut = datastore.get(ref)
446 self.assertEqual(metrics, metricsOut)
447 # Remove
448 datastore.remove(ref)
450 # Does it exist?
451 self.assertFalse(datastore.exists(ref))
453 # Do we now get a predicted URI?
454 uri = datastore.getURI(ref, predict=True)
455 self.assertEqual(uri.fragment, "predicted")
457 # Get should now fail
458 with self.assertRaises(FileNotFoundError):
459 datastore.get(ref)
460 # Can only delete once
461 with self.assertRaises(FileNotFoundError):
462 datastore.remove(ref)
464 def testTransfer(self):
465 metrics = makeExampleMetrics()
467 dimensions = self.universe.extract(("visit", "physical_filter"))
468 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
470 sc = self.storageClassFactory.getStorageClass("StructuredData")
471 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
473 inputDatastore = self.makeDatastore("test_input_datastore")
474 outputDatastore = self.makeDatastore("test_output_datastore")
476 inputDatastore.put(metrics, ref)
477 outputDatastore.transfer(inputDatastore, ref)
479 metricsOut = outputDatastore.get(ref)
480 self.assertEqual(metrics, metricsOut)
482 def testBasicTransaction(self):
483 datastore = self.makeDatastore()
484 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
485 dimensions = self.universe.extract(("visit", "physical_filter"))
486 nDatasets = 6
487 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)]
488 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False),
489 makeExampleMetrics(),)
490 for dataId in dataIds]
491 succeed = data[:nDatasets//2]
492 fail = data[nDatasets//2:]
493 # All datasets added in this transaction should continue to exist
494 with datastore.transaction():
495 for ref, metrics in succeed:
496 datastore.put(metrics, ref)
497 # Whereas datasets added in this transaction should not
498 with self.assertRaises(TransactionTestError):
499 with datastore.transaction():
500 for ref, metrics in fail:
501 datastore.put(metrics, ref)
502 raise TransactionTestError("This should propagate out of the context manager")
503 # Check for datasets that should exist
504 for ref, metrics in succeed:
505 # Does it exist?
506 self.assertTrue(datastore.exists(ref))
507 # Get
508 metricsOut = datastore.get(ref, parameters=None)
509 self.assertEqual(metrics, metricsOut)
510 # URI
511 uri = datastore.getURI(ref)
512 self.assertEqual(uri.scheme, self.uriScheme)
513 # Check for datasets that should not exist
514 for ref, _ in fail:
515 # These should raise
516 with self.assertRaises(FileNotFoundError):
517 # non-existing file
518 datastore.get(ref)
519 with self.assertRaises(FileNotFoundError):
520 datastore.getURI(ref)
522 def testNestedTransaction(self):
523 datastore = self.makeDatastore()
524 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
525 dimensions = self.universe.extract(("visit", "physical_filter"))
526 metrics = makeExampleMetrics()
528 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
529 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
530 conform=False)
531 datastore.put(metrics, refBefore)
532 with self.assertRaises(TransactionTestError):
533 with datastore.transaction():
534 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"}
535 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
536 conform=False)
537 datastore.put(metrics, refOuter)
538 with datastore.transaction():
539 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"}
540 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
541 conform=False)
542 datastore.put(metrics, refInner)
543 # All datasets should exist
544 for ref in (refBefore, refOuter, refInner):
545 metricsOut = datastore.get(ref, parameters=None)
546 self.assertEqual(metrics, metricsOut)
547 raise TransactionTestError("This should roll back the transaction")
548 # Dataset(s) inserted before the transaction should still exist
549 metricsOut = datastore.get(refBefore, parameters=None)
550 self.assertEqual(metrics, metricsOut)
551 # But all datasets inserted during the (rolled back) transaction
552 # should be gone
553 with self.assertRaises(FileNotFoundError):
554 datastore.get(refOuter)
555 with self.assertRaises(FileNotFoundError):
556 datastore.get(refInner)
558 def _prepareIngestTest(self):
559 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
560 dimensions = self.universe.extract(("visit", "physical_filter"))
561 metrics = makeExampleMetrics()
562 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
563 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
564 return metrics, ref
566 def runIngestTest(self, func, expectOutput=True):
567 metrics, ref = self._prepareIngestTest()
568 # The file will be deleted after the test.
569 # For symlink tests this leads to a situation where the datastore
570 # points to a file that does not exist. This will make os.path.exist
571 # return False but then the new symlink will fail with
572 # FileExistsError later in the code so the test still passes.
573 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
574 with open(path, 'w') as fd:
575 yaml.dump(metrics._asdict(), stream=fd)
576 func(metrics, path, ref)
578 def testIngestNoTransfer(self):
579 """Test ingesting existing files with no transfer.
580 """
581 for mode in (None, "auto"):
583 # Some datastores have auto but can't do in place transfer
584 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
585 continue
587 with self.subTest(mode=mode):
588 datastore = self.makeDatastore()
590 def succeed(obj, path, ref):
591 """Ingest a file already in the datastore root."""
592 # first move it into the root, and adjust the path
593 # accordingly
594 path = shutil.copy(path, datastore.root.ospath)
595 path = os.path.relpath(path, start=datastore.root.ospath)
596 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
597 self.assertEqual(obj, datastore.get(ref))
599 def failInputDoesNotExist(obj, path, ref):
600 """Can't ingest files if we're given a bad path."""
601 with self.assertRaises(FileNotFoundError):
602 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref),
603 transfer=mode)
604 self.assertFalse(datastore.exists(ref))
606 def failOutsideRoot(obj, path, ref):
607 """Can't ingest files outside of datastore root unless
608 auto."""
609 if mode == "auto":
610 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
611 self.assertTrue(datastore.exists(ref))
612 else:
613 with self.assertRaises(RuntimeError):
614 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
615 self.assertFalse(datastore.exists(ref))
617 def failNotImplemented(obj, path, ref):
618 with self.assertRaises(NotImplementedError):
619 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
621 if mode in self.ingestTransferModes:
622 self.runIngestTest(failOutsideRoot)
623 self.runIngestTest(failInputDoesNotExist)
624 self.runIngestTest(succeed)
625 else:
626 self.runIngestTest(failNotImplemented)
628 def testIngestTransfer(self):
629 """Test ingesting existing files after transferring them.
630 """
631 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
632 with self.subTest(mode=mode):
633 datastore = self.makeDatastore(mode)
635 def succeed(obj, path, ref):
636 """Ingest a file by transferring it to the template
637 location."""
638 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
639 self.assertEqual(obj, datastore.get(ref))
641 def failInputDoesNotExist(obj, path, ref):
642 """Can't ingest files if we're given a bad path."""
643 with self.assertRaises(FileNotFoundError):
644 # Ensure the file does not look like it is in
645 # datastore for auto mode
646 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref),
647 transfer=mode)
648 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
650 def failOutputExists(obj, path, ref):
651 """Can't ingest files if transfer destination already
652 exists."""
653 with self.assertRaises(FileExistsError):
654 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
655 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
657 def failNotImplemented(obj, path, ref):
658 with self.assertRaises(NotImplementedError):
659 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
661 if mode in self.ingestTransferModes:
662 self.runIngestTest(failInputDoesNotExist)
663 self.runIngestTest(succeed, expectOutput=(mode != "move"))
664 self.runIngestTest(failOutputExists)
665 else:
666 self.runIngestTest(failNotImplemented)
668 def testIngestSymlinkOfSymlink(self):
669 """Special test for symlink to a symlink ingest"""
670 metrics, ref = self._prepareIngestTest()
671 # The aim of this test is to create a dataset on disk, then
672 # create a symlink to it and finally ingest the symlink such that
673 # the symlink in the datastore points to the original dataset.
674 for mode in ("symlink", "relsymlink"):
675 if mode not in self.ingestTransferModes:
676 continue
678 print(f"Trying mode {mode}")
679 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
680 with open(realpath, 'w') as fd:
681 yaml.dump(metrics._asdict(), stream=fd)
682 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
683 os.symlink(os.path.abspath(realpath), sympath)
685 datastore = self.makeDatastore()
686 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
688 uri = datastore.getURI(ref)
689 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
690 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
692 linkTarget = os.readlink(uri.ospath)
693 if mode == "relsymlink":
694 self.assertFalse(os.path.isabs(linkTarget))
695 else:
696 self.assertEqual(linkTarget, os.path.abspath(realpath))
698 # Check that we can get the dataset back regardless of mode
699 metric2 = datastore.get(ref)
700 self.assertEqual(metric2, metrics)
702 # Cleanup the file for next time round loop
703 # since it will get the same file name in store
704 datastore.remove(ref)
707class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
708 """PosixDatastore specialization"""
709 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
710 uriScheme = "file"
711 canIngestNoTransferAuto = True
712 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
713 isEphemeral = False
714 rootKeys = ("root",)
715 validationCanFail = True
717 def setUp(self):
718 # Override the working directory before calling the base class
719 self.root = tempfile.mkdtemp(dir=TESTDIR)
720 super().setUp()
723class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
724 """Posix datastore tests but with checksums disabled."""
725 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
727 def testChecksum(self):
728 """Ensure that checksums have not been calculated."""
730 datastore = self.makeDatastore()
731 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
732 dimensions = self.universe.extract(("visit", "physical_filter"))
733 metrics = makeExampleMetrics()
735 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"}
736 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId,
737 conform=False)
739 # Configuration should have disabled checksum calculation
740 datastore.put(metrics, ref)
741 infos = datastore.getStoredItemsInfo(ref)
742 self.assertIsNone(infos[0].checksum)
744 # Remove put back but with checksums enabled explicitly
745 datastore.remove(ref)
746 datastore.useChecksum = True
747 datastore.put(metrics, ref)
749 infos = datastore.getStoredItemsInfo(ref)
750 self.assertIsNotNone(infos[0].checksum)
753class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
754 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
756 def setUp(self):
757 # Override the working directory before calling the base class
758 self.root = tempfile.mkdtemp(dir=TESTDIR)
759 super().setUp()
761 def testCleanup(self):
762 """Test that a failed formatter write does cleanup a partial file."""
763 metrics = makeExampleMetrics()
764 datastore = self.makeDatastore()
766 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
768 dimensions = self.universe.extract(("visit", "physical_filter"))
769 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
771 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
773 # Determine where the file will end up (we assume Formatters use
774 # the same file extension)
775 expectedUri = datastore.getURI(ref, predict=True)
776 self.assertEqual(expectedUri.fragment, "predicted")
778 self.assertEqual(expectedUri.getExtension(), ".yaml",
779 f"Is there a file extension in {expectedUri}")
781 # Try formatter that fails and formatter that fails and leaves
782 # a file behind
783 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
784 with self.subTest(formatter=formatter):
786 # Monkey patch the formatter
787 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter,
788 overwrite=True)
790 # Try to put the dataset, it should fail
791 with self.assertRaises(Exception):
792 datastore.put(metrics, ref)
794 # Check that there is no file on disk
795 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
797 # Check that there is a directory
798 dir = expectedUri.dirname()
799 self.assertTrue(dir.exists(),
800 f"Check for existence of directory {dir}")
802 # Force YamlFormatter and check that this time a file is written
803 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter,
804 overwrite=True)
805 datastore.put(metrics, ref)
806 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
807 datastore.remove(ref)
808 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
811class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
812 """PosixDatastore specialization"""
813 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
814 uriScheme = "mem"
815 hasUnsupportedPut = False
816 ingestTransferModes = ()
817 isEphemeral = True
818 rootKeys = None
819 validationCanFail = False
822class ChainedDatastoreTestCase(PosixDatastoreTestCase):
823 """ChainedDatastore specialization using a POSIXDatastore"""
824 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
825 hasUnsupportedPut = False
826 canIngestNoTransferAuto = False
827 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto")
828 isEphemeral = False
829 rootKeys = (".datastores.1.root", ".datastores.2.root")
830 validationCanFail = True
833class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
834 """ChainedDatastore specialization using all InMemoryDatastore"""
835 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
836 validationCanFail = False
839class DatastoreConstraintsTests(DatastoreTestsBase):
840 """Basic tests of constraints model of Datastores."""
842 def testConstraints(self):
843 """Test constraints model. Assumes that each test class has the
844 same constraints."""
845 metrics = makeExampleMetrics()
846 datastore = self.makeDatastore()
848 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
849 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
850 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
851 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
853 # Write empty file suitable for ingest check (JSON and YAML variants)
854 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
855 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
856 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False),
857 ("metric33", sc1, True), ("metric2", sc2, True)):
858 # Choose different temp file depending on StorageClass
859 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
861 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
862 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False)
863 if accepted:
864 datastore.put(metrics, ref)
865 self.assertTrue(datastore.exists(ref))
866 datastore.remove(ref)
868 # Try ingest
869 if self.canIngest:
870 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
871 self.assertTrue(datastore.exists(ref))
872 datastore.remove(ref)
873 else:
874 with self.assertRaises(DatasetTypeNotSupportedError):
875 datastore.put(metrics, ref)
876 self.assertFalse(datastore.exists(ref))
878 # Again with ingest
879 if self.canIngest:
880 with self.assertRaises(DatasetTypeNotSupportedError):
881 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
882 self.assertFalse(datastore.exists(ref))
885class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
886 """PosixDatastore specialization"""
887 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
888 canIngest = True
890 def setUp(self):
891 # Override the working directory before calling the base class
892 self.root = tempfile.mkdtemp(dir=TESTDIR)
893 super().setUp()
896class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
897 """InMemoryDatastore specialization"""
898 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
899 canIngest = False
902class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
903 """ChainedDatastore specialization using a POSIXDatastore and constraints
904 at the ChainedDatstore """
905 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
908class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
909 """ChainedDatastore specialization using a POSIXDatastore"""
910 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
913class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
914 """ChainedDatastore specialization using all InMemoryDatastore"""
915 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
916 canIngest = False
919class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
920 """Test that a chained datastore can control constraints per-datastore
921 even if child datastore would accept."""
923 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
925 def setUp(self):
926 # Override the working directory before calling the base class
927 self.root = tempfile.mkdtemp(dir=TESTDIR)
928 super().setUp()
930 def testConstraints(self):
931 """Test chained datastore constraints model."""
932 metrics = makeExampleMetrics()
933 datastore = self.makeDatastore()
935 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
936 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
937 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
938 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
939 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
941 # Write empty file suitable for ingest check (JSON and YAML variants)
942 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
943 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
945 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True),
946 ("metric2", dataId1, sc1, (False, False, False), False),
947 ("metric2", dataId2, sc1, (True, False, False), False),
948 ("metric33", dataId2, sc2, (True, True, False), True),
949 ("metric2", dataId1, sc2, (False, True, False), True)):
951 # Choose different temp file depending on StorageClass
952 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
954 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
955 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId,
956 conform=False)
957 if any(accept):
958 datastore.put(metrics, ref)
959 self.assertTrue(datastore.exists(ref))
961 # Check each datastore inside the chained datastore
962 for childDatastore, expected in zip(datastore.datastores, accept):
963 self.assertEqual(childDatastore.exists(ref), expected,
964 f"Testing presence of {ref} in datastore {childDatastore.name}")
966 datastore.remove(ref)
968 # Check that ingest works
969 if ingest:
970 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
971 self.assertTrue(datastore.exists(ref))
973 # Check each datastore inside the chained datastore
974 for childDatastore, expected in zip(datastore.datastores, accept):
975 # Ephemeral datastores means InMemory at the moment
976 # and that does not accept ingest of files.
977 if childDatastore.isEphemeral:
978 expected = False
979 self.assertEqual(childDatastore.exists(ref), expected,
980 f"Testing presence of ingested {ref} in datastore"
981 f" {childDatastore.name}")
983 datastore.remove(ref)
984 else:
985 with self.assertRaises(DatasetTypeNotSupportedError):
986 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
988 else:
989 with self.assertRaises(DatasetTypeNotSupportedError):
990 datastore.put(metrics, ref)
991 self.assertFalse(datastore.exists(ref))
993 # Again with ingest
994 with self.assertRaises(DatasetTypeNotSupportedError):
995 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
996 self.assertFalse(datastore.exists(ref))
999if __name__ == "__main__": 999 ↛ 1000line 999 didn't jump to line 1000, because the condition on line 999 was never true
1000 unittest.main()