Coverage for tests/test_datastore.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of daf_butler. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (http://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.
return MetricsExample({"AM1": 5.2, "AM2": 30.6}, {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, [563, 234, 456.7] )
"""Specific error for transactions, to prevent misdiagnosing that might otherwise occur when a standard exception is used. """
"""Support routines for datastore testing"""
def setUpClass(cls): # Storage Classes are fixed for all datastores in these tests scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") cls.storageClassFactory = StorageClassFactory() cls.storageClassFactory.addFromConfig(scConfigFile)
# Read the Datastore config so we can get the class # information (since we should not assume the constructor # name here, but rely on the configuration file itself) datastoreConfig = DatastoreConfig(cls.configFile) cls.datastoreType = doImport(datastoreConfig["cls"]) cls.universe = DimensionUniverse()
self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
if self.root is not None and os.path.exists(self.root): shutil.rmtree(self.root, ignore_errors=True)
"""Some basic tests of a simple datastore."""
full = DatastoreConfig(self.configFile) config = DatastoreConfig(self.configFile, mergeDefaults=False) newroot = "/random/location" self.datastoreType.setConfigRoot(newroot, config, full) if self.rootKeys: for k in self.rootKeys: self.assertIn(newroot, config[k])
datastore = self.makeDatastore() self.assertIsNotNone(datastore) self.assertIs(datastore.isEphemeral, self.isEphemeral)
datastore = self.makeDatastore() sc = self.storageClassFactory.getStorageClass("ThingOne") datastore.validateConfiguration([sc])
sc2 = self.storageClassFactory.getStorageClass("ThingTwo") if self.validationCanFail: with self.assertRaises(DatastoreValidationError): datastore.validateConfiguration([sc2], logFailures=True)
dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) datastore.validateConfiguration([ref])
"""Check that parameters are validated""" sc = self.storageClassFactory.getStorageClass("ThingOne") dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) datastore = self.makeDatastore() data = {1: 2, 3: 4} datastore.put(data, ref) newdata = datastore.get(ref) self.assertEqual(data, newdata) with self.assertRaises(KeyError): newdata = datastore.get(ref, parameters={"missing": 5})
metrics = makeExampleMetrics() datastore = self.makeDatastore()
# Create multiple storage classes for testing different formulations storageClasses = [self.storageClassFactory.getStorageClass(sc) for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")]
dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
for sc in storageClasses: ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) print("Using storageClass: {}".format(sc.name)) datastore.put(metrics, ref)
# Does it exist? self.assertTrue(datastore.exists(ref))
# Get metricsOut = datastore.get(ref, parameters=None) self.assertEqual(metrics, metricsOut)
uri = datastore.getUri(ref) self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme)
# Get a component -- we need to construct new refs for them # with derived storage classes but with parent ID comp = "output" compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(comp), dimensions, sc.components[comp], dataId, id=ref.id) output = datastore.get(compRef) self.assertEqual(output, metricsOut.output)
uri = datastore.getUri(compRef) self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme)
storageClass = sc
# Check that a put fails if the dataset type is not supported if self.hasUnsupportedPut: sc = StorageClass("UnsupportedSC", pytype=type(metrics)) ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) with self.assertRaises(DatasetTypeNotSupportedError): datastore.put(metrics, ref)
# These should raise ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) with self.assertRaises(FileNotFoundError): # non-existing file datastore.get(ref)
# Get a URI from it uri = datastore.getUri(ref, predict=True) self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme)
with self.assertRaises(FileNotFoundError): datastore.getUri(ref)
metrics = makeExampleMetrics() datastore = self.makeDatastore()
# Create multiple storage classes for testing different formulations # of composites storageClasses = [self.storageClassFactory.getStorageClass(sc) for sc in ("StructuredComposite", "StructuredCompositeTestA", "StructuredCompositeTestB")]
dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
for sc in storageClasses: print("Using storageClass: {}".format(sc.name)) ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
components = sc.assembler().disassemble(metrics) self.assertTrue(components)
compsRead = {} for compName, compInfo in components.items(): compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions, components[compName].storageClass, dataId, conform=False)
print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) datastore.put(compInfo.component, compRef)
uri = datastore.getUri(compRef) self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme)
compsRead[compName] = datastore.get(compRef)
# We can generate identical files for each storage class # so remove the component here datastore.remove(compRef)
# combine all the components we read back into a new composite metricsOut = sc.assembler().assemble(compsRead) self.assertEqual(metrics, metricsOut)
metrics = makeExampleMetrics() datastore = self.makeDatastore() # Put dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"}
sc = self.storageClassFactory.getStorageClass("StructuredData") ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) datastore.put(metrics, ref)
# Does it exist? self.assertTrue(datastore.exists(ref))
# Get metricsOut = datastore.get(ref) self.assertEqual(metrics, metricsOut) # Remove datastore.remove(ref)
# Does it exist? self.assertFalse(datastore.exists(ref))
# Do we now get a predicted URI? uri = datastore.getUri(ref, predict=True) self.assertTrue(uri.endswith("#predicted"))
# Get should now fail with self.assertRaises(FileNotFoundError): datastore.get(ref) # Can only delete once with self.assertRaises(FileNotFoundError): datastore.remove(ref)
metrics = makeExampleMetrics()
dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}
sc = self.storageClassFactory.getStorageClass("StructuredData") ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False)
inputDatastore = self.makeDatastore("test_input_datastore") outputDatastore = self.makeDatastore("test_output_datastore")
inputDatastore.put(metrics, ref) outputDatastore.transfer(inputDatastore, ref)
metricsOut = outputDatastore.get(ref) self.assertEqual(metrics, metricsOut)
datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") dimensions = self.universe.extract(("visit", "physical_filter")) nDatasets = 6 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), makeExampleMetrics(),) for dataId in dataIds] succeed = data[:nDatasets//2] fail = data[nDatasets//2:] # All datasets added in this transaction should continue to exist with datastore.transaction(): for ref, metrics in succeed: datastore.put(metrics, ref) # Whereas datasets added in this transaction should not with self.assertRaises(TransactionTestError): with datastore.transaction(): for ref, metrics in fail: datastore.put(metrics, ref) raise TransactionTestError("This should propagate out of the context manager") # Check for datasets that should exist for ref, metrics in succeed: # Does it exist? self.assertTrue(datastore.exists(ref)) # Get metricsOut = datastore.get(ref, parameters=None) self.assertEqual(metrics, metricsOut) # URI uri = datastore.getUri(ref) self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme) # Check for datasets that should not exist for ref, _ in fail: # These should raise with self.assertRaises(FileNotFoundError): # non-existing file datastore.get(ref) with self.assertRaises(FileNotFoundError): datastore.getUri(ref)
datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") dimensions = self.universe.extract(("visit", "physical_filter")) metrics = makeExampleMetrics()
dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) datastore.put(metrics, refBefore) with self.assertRaises(TransactionTestError): with datastore.transaction(): dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) datastore.put(metrics, refOuter) with datastore.transaction(): dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) datastore.put(metrics, refInner) # All datasets should exist for ref in (refBefore, refOuter, refInner): metricsOut = datastore.get(ref, parameters=None) self.assertEqual(metrics, metricsOut) raise TransactionTestError("This should roll back the transaction") # Dataset(s) inserted before the transaction should still exist metricsOut = datastore.get(refBefore, parameters=None) self.assertEqual(metrics, metricsOut) # But all datasets inserted during the (rolled back) transaction # should be gone with self.assertRaises(FileNotFoundError): datastore.get(refOuter) with self.assertRaises(FileNotFoundError): datastore.get(refInner)
storageClass = self.storageClassFactory.getStorageClass("StructuredData") dimensions = self.universe.extract(("visit", "physical_filter")) metrics = makeExampleMetrics() dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: with open(path, 'w') as fd: yaml.dump(metrics._asdict(), stream=fd) func(metrics, path, ref)
"""Test ingesting existing files with no transfer. """ datastore = self.makeDatastore()
def succeed(obj, path, ref): """Ingest a file already in the datastore root.""" # first move it into the root, and adjust the path accordingly path = shutil.copy(path, datastore.root) path = os.path.relpath(path, start=datastore.root) datastore.ingest(FileDataset(path=path, refs=ref), transfer=None) self.assertEqual(obj, datastore.get(ref))
def failInputDoesNotExist(obj, path, ref): """Can't ingest files if we're given a bad path.""" with self.assertRaises(FileNotFoundError): datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=None) self.assertFalse(datastore.exists(ref))
def failOutsideRoot(obj, path, ref): """Can't ingest files outside of datastore root.""" with self.assertRaises(RuntimeError): datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=None) self.assertFalse(datastore.exists(ref))
def failNotImplemented(obj, path, ref): with self.assertRaises(NotImplementedError): datastore.ingest(FileDataset(path=path, refs=ref), transfer=None)
if None in self.ingestTransferModes: self.runIngestTest(failOutsideRoot) self.runIngestTest(failInputDoesNotExist) self.runIngestTest(succeed) else: self.runIngestTest(failNotImplemented)
"""Test ingesting existing files after transferring them. """ for mode in ("copy", "move", "hardlink", "symlink"): with self.subTest(mode=mode): datastore = self.makeDatastore(mode)
def succeed(obj, path, ref): """Ingest a file by transferring it to the template location.""" datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) self.assertEqual(obj, datastore.get(ref))
def failInputDoesNotExist(obj, path, ref): """Can't ingest files if we're given a bad path.""" with self.assertRaises(FileNotFoundError): datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode) self.assertFalse(datastore.exists(ref))
def failOutputExists(obj, path, ref): """Can't ingest files if transfer destination already exists.""" with self.assertRaises(FileExistsError): datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) self.assertFalse(datastore.exists(ref))
def failNotImplemented(obj, path, ref): with self.assertRaises(NotImplementedError): datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
if mode in self.ingestTransferModes: self.runIngestTest(failInputDoesNotExist) self.runIngestTest(succeed, expectOutput=(mode != "move")) self.runIngestTest(failOutputExists) else: self.runIngestTest(failNotImplemented)
"""PosixDatastore specialization"""
# Override the working directory before calling the base class self.root = tempfile.mkdtemp(dir=TESTDIR) super().setUp()
"""Posix datastore tests but with checksums disabled."""
"""Ensure that checksums have not been calculated."""
datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") dimensions = self.universe.extract(("visit", "physical_filter")) metrics = makeExampleMetrics()
dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
# Configuration should have disabled checksum calculation datastore.put(metrics, ref) info = datastore.getStoredItemInfo(ref) self.assertIsNone(info.checksum)
# Remove put back but with checksums enabled explicitly datastore.remove(ref) datastore.useChecksum = True datastore.put(metrics, ref)
info = datastore.getStoredItemInfo(ref) self.assertIsNotNone(info.checksum)
# Override the working directory before calling the base class self.root = tempfile.mkdtemp(dir=TESTDIR) super().setUp()
"""Test that a failed formatter write does cleanup a partial file.""" metrics = makeExampleMetrics() datastore = self.makeDatastore()
storageClass = self.storageClassFactory.getStorageClass("StructuredData")
dimensions = self.universe.extract(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"}
ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False)
# Determine where the file will end up (we assume Formatters use # the same file extension) expectedUri = datastore.getUri(ref, predict=True) self.assertTrue(expectedUri.endswith(".yaml#predicted"), f"Is there a file extension in {expectedUri}")
# Convert to ButlerURI so we can extract the path component expectedUri = ButlerURI(expectedUri) expectedFile = expectedUri.path
# Try formatter that fails and formatter that fails and leaves # a file behind for formatter in (BadWriteFormatter, BadNoWriteFormatter): with self.subTest(formatter=formatter):
# Monkey patch the formatter datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
# Try to put the dataset, it should fail with self.assertRaises(Exception): datastore.put(metrics, ref)
# Check that there is no file on disk self.assertFalse(os.path.exists(expectedFile), f"Check for existence of {expectedFile}")
# Check that there is a directory self.assertTrue(os.path.exists(os.path.dirname(expectedFile)), f"Check for existence of directory {os.path.dirname(expectedFile)}")
# Force YamlFormatter and check that this time a file is written datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) datastore.put(metrics, ref) self.assertTrue(os.path.exists(expectedFile), f"Check for existence of {expectedFile}") datastore.remove(ref) self.assertFalse(os.path.exists(expectedFile), f"Check for existence of now removed {expectedFile}")
"""PosixDatastore specialization"""
"""ChainedDatastore specialization using a POSIXDatastore"""
"""ChainedDatastore specialization using all InMemoryDatastore"""
"""Basic tests of constraints model of Datastores."""
"""Test constraints model. Assumes that each test class has the same constraints.""" metrics = makeExampleMetrics() datastore = self.makeDatastore()
sc1 = self.storageClassFactory.getStorageClass("StructuredData") sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
# Write empty file suitable for ingest check testfile = tempfile.NamedTemporaryFile() for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False), ("metric33", sc1, True), ("metric2", sc2, True)): with self.subTest(datasetTypeName=datasetTypeName): ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) if accepted: datastore.put(metrics, ref) self.assertTrue(datastore.exists(ref)) datastore.remove(ref)
# Try ingest if self.canIngest: datastore.ingest(FileDataset(testfile.name, [ref]), transfer="symlink") self.assertTrue(datastore.exists(ref)) datastore.remove(ref) else: with self.assertRaises(DatasetTypeNotSupportedError): datastore.put(metrics, ref) self.assertFalse(datastore.exists(ref))
# Again with ingest if self.canIngest: with self.assertRaises(DatasetTypeNotSupportedError): datastore.ingest(FileDataset(testfile.name, [ref]), transfer="symlink") self.assertFalse(datastore.exists(ref))
"""PosixDatastore specialization"""
# Override the working directory before calling the base class self.root = tempfile.mkdtemp(dir=TESTDIR) super().setUp()
"""InMemoryDatastore specialization"""
"""ChainedDatastore specialization using a POSIXDatastore and constraints at the ChainedDatstore """
"""ChainedDatastore specialization using a POSIXDatastore"""
"""ChainedDatastore specialization using all InMemoryDatastore"""
"""Test that a chained datastore can control constraints per-datastore even if child datastore would accept."""
# Override the working directory before calling the base class self.root = tempfile.mkdtemp(dir=TESTDIR) super().setUp()
"""Test chained datastore constraints model.""" metrics = makeExampleMetrics() datastore = self.makeDatastore()
sc1 = self.storageClassFactory.getStorageClass("StructuredData") sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}
# Write empty file suitable for ingest check testfile = tempfile.NamedTemporaryFile()
for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True), ("metric2", dataId1, sc1, (False, False, False), False), ("metric2", dataId2, sc1, (True, False, False), False), ("metric33", dataId2, sc2, (True, True, False), True), ("metric2", dataId1, sc2, (False, True, False), True)): with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) if any(accept): datastore.put(metrics, ref) self.assertTrue(datastore.exists(ref))
# Check each datastore inside the chained datastore for childDatastore, expected in zip(datastore.datastores, accept): self.assertEqual(childDatastore.exists(ref), expected, f"Testing presence of {ref} in datastore {childDatastore.name}")
datastore.remove(ref)
# Check that ingest works if ingest: datastore.ingest(FileDataset(testfile.name, [ref]), transfer="symlink") self.assertTrue(datastore.exists(ref))
# Check each datastore inside the chained datastore for childDatastore, expected in zip(datastore.datastores, accept): # Ephemeral datastores means InMemory at the moment # and that does not accept ingest of files. if childDatastore.isEphemeral: expected = False self.assertEqual(childDatastore.exists(ref), expected, f"Testing presence of ingested {ref} in datastore" f" {childDatastore.name}")
datastore.remove(ref) else: with self.assertRaises(DatasetTypeNotSupportedError): datastore.ingest(FileDataset(testfile.name, [ref]), transfer="symlink")
else: with self.assertRaises(DatasetTypeNotSupportedError): datastore.put(metrics, ref) self.assertFalse(datastore.exists(ref))
# Again with ingest with self.assertRaises(DatasetTypeNotSupportedError): datastore.ingest(FileDataset(testfile.name, [ref]), transfer="symlink") self.assertFalse(datastore.exists(ref))
unittest.main() |