Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for ParquetFormatter. 

23 

24Tests in this module are disabled unless pandas and pyarrow are importable. 

25""" 

26 

27import os 

28import unittest 

29import tempfile 

30import shutil 

31 

32try: 

33 import numpy as np 

34 import pandas as pd 

35 import pyarrow.parquet 

36except ImportError: 

37 pyarrow = None 

38 

39from lsst.daf.butler import Butler, DatasetType 

40 

41 

42TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

43 

44 

45@unittest.skipUnless(pyarrow is not None, "Cannot test ParquetFormatter without pyarrow.") 

46class ParquetFormatterTestCase(unittest.TestCase): 

47 """Tests for ParquetFormatter, using PosixDatastore. 

48 """ 

49 

50 def setUp(self): 

51 """Create a new butler root for each test.""" 

52 self.root = tempfile.mkdtemp(dir=TESTDIR) 

53 Butler.makeRepo(self.root) 

54 self.butler = Butler(self.root, run="test_run") 

55 # No dimensions in dataset type so we don't have to worry about 

56 # inserting dimension data or defining data IDs. 

57 self.datasetType = DatasetType("data", dimensions=(), storageClass="DataFrame", 

58 universe=self.butler.registry.dimensions) 

59 self.butler.registry.registerDatasetType(self.datasetType) 

60 

61 def tearDown(self): 

62 if os.path.exists(self.root): 

63 shutil.rmtree(self.root, ignore_errors=True) 

64 

65 def testSingleIndexDataFrame(self): 

66 columns1 = pd.Index(["a", "b", "c"]) 

67 df1 = pd.DataFrame(np.random.randn(5, 3), index=np.arange(5, dtype=int), columns=columns1) 

68 self.butler.put(df1, self.datasetType, dataId={}) 

69 # Read the whole DataFrame. 

70 df2 = self.butler.get(self.datasetType, dataId={}) 

71 self.assertTrue(df1.equals(df2)) 

72 # Read just the column descriptions. 

73 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={}) 

74 self.assertTrue(df1.columns.equals(columns2)) 

75 # Read just some columns a few different ways. 

76 df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]}) 

77 self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3)) 

78 df4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"}) 

79 self.assertTrue(df1.loc[:, ["a"]].equals(df4)) 

80 # Passing an unrecognized column should be a ValueError. 

81 with self.assertRaises(ValueError): 

82 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]}) 

83 

84 def testMultiIndexDataFrame(self): 

85 columns1 = pd.MultiIndex.from_tuples( 

86 [ 

87 ("g", "a"), 

88 ("g", "b"), 

89 ("g", "c"), 

90 ("r", "a"), 

91 ("r", "b"), 

92 ("r", "c"), 

93 ], 

94 names=["filter", "column"], 

95 ) 

96 df1 = pd.DataFrame(np.random.randn(5, 6), index=np.arange(5, dtype=int), columns=columns1) 

97 self.butler.put(df1, self.datasetType, dataId={}) 

98 # Read the whole DataFrame. 

99 df2 = self.butler.get(self.datasetType, dataId={}) 

100 self.assertTrue(df1.equals(df2)) 

101 # Read just the column descriptions. 

102 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={}) 

103 self.assertTrue(df1.columns.equals(columns2)) 

104 # Read just some columns a few different ways. 

105 df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": {"filter": "g"}}) 

106 self.assertTrue(df1.loc[:, ["g"]].equals(df3)) 

107 df4 = self.butler.get(self.datasetType, dataId={}, 

108 parameters={"columns": {"filter": ["r"], "column": "a"}}) 

109 self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4)) 

110 # Passing an unrecognized column should be a ValueError. 

111 with self.assertRaises(ValueError): 

112 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]}) 

113 

114 

115if __name__ == "__main__": 115 ↛ 116line 115 didn't jump to line 116, because the condition on line 115 was never true

116 unittest.main()