Coverage for tests/test_parquet.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of daf_butler. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (http://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.
Tests in this module are disabled unless pandas and pyarrow are importable. """
except ImportError: pyarrow = None
"""Tests for ParquetFormatter, using PosixDatastore. """
"""Create a new butler root for each test.""" self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType("data", dimensions=(), storageClass="DataFrame", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType)
if os.path.exists(self.root): shutil.rmtree(self.root, ignore_errors=True)
columns1 = pd.Index(["a", "b", "c"]) df1 = pd.DataFrame(np.random.randn(5, 3), index=np.arange(5, dtype=int), columns=columns1) self.butler.put(df1, self.datasetType, dataId={}) # Read the whole DataFrame. df2 = self.butler.get(self.datasetType, dataId={}) self.assertTrue(df1.equals(df2)) # Read just the column descriptions. columns2 = self.butler.get(f"{self.datasetType.name}.columns", dataId={}) self.assertTrue(df1.columns.equals(columns2)) # Read just some columns a few different ways. df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]}) self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3)) df4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"}) self.assertTrue(df1.loc[:, ["a"]].equals(df4)) # Passing an unrecognized column should be a ValueError. with self.assertRaises(ValueError): self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]})
columns1 = pd.MultiIndex.from_tuples( [ ("g", "a"), ("g", "b"), ("g", "c"), ("r", "a"), ("r", "b"), ("r", "c"), ], names=["filter", "column"], ) df1 = pd.DataFrame(np.random.randn(5, 6), index=np.arange(5, dtype=int), columns=columns1) self.butler.put(df1, self.datasetType, dataId={}) # Read the whole DataFrame. df2 = self.butler.get(self.datasetType, dataId={}) self.assertTrue(df1.equals(df2)) # Read just the column descriptions. columns2 = self.butler.get(f"{self.datasetType.name}.columns", dataId={}) self.assertTrue(df1.columns.equals(columns2)) # Read just some columns a few different ways. df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": {"filter": "g"}}) self.assertTrue(df1.loc[:, ["g"]].equals(df3)) df4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": {"filter": ["r"], "column": "a"}}) self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4)) # Passing an unrecognized column should be a ValueError. with self.assertRaises(ValueError): self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]})
unittest.main() |