Coverage for tests/test_transformObject.py: 22%

86 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-15 03:35 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24import pandas as pd 

25import numpy as np 

26 

27import lsst.utils.tests 

28 

29import pyarrow as pa 

30import pyarrow.parquet as pq 

31from lsst.pipe.tasks.parquetTable import MultilevelParquetTable 

32from lsst.pipe.tasks.functors import HsmFwhm, Column 

33from lsst.pipe.tasks.postprocess import TransformObjectCatalogTask, TransformObjectCatalogConfig 

34 

35ROOT = os.path.abspath(os.path.dirname(__file__)) 

36 

37 

38def setup_module(module): 

39 lsst.utils.tests.init() 

40 

41 

42class TransformObjectCatalogTestCase(unittest.TestCase): 

43 def setUp(self): 

44 # Note that this test input includes HSC-G, HSC-R, and HSC-I data 

45 df = pd.read_csv(os.path.join(ROOT, 'data', 'test_multilevel_parq.csv.gz'), 

46 header=[0, 1, 2], index_col=0) 

47 with lsst.utils.tests.getTempFilePath('*.parq') as filename: 

48 table = pa.Table.from_pandas(df) 

49 pq.write_table(table, filename) 

50 self.parq = MultilevelParquetTable(filename) 

51 

52 self.dataId = {"tract": 9615, "patch": "4,4"} 

53 

54 def testNullFilter(self): 

55 """Test that columns for all filters are created despite they may not 

56 exist in the input data. 

57 """ 

58 config = TransformObjectCatalogConfig() 

59 config.camelCase = True 

60 # Want y band columns despite the input data do not have them 

61 # Exclude g band columns despite the input data have them 

62 config.outputBands = ["r", "i", "y"] 

63 # Arbitrarily choose a boolean flag column to be "good" 

64 config.goodFlags = ['GoodFlagColumn'] 

65 task = TransformObjectCatalogTask(config=config) 

66 # Add in a float column, an integer column, a good flag, and 

67 # a bad flag. It does not matter which columns we choose, just 

68 # that they have the appropriate type. 

69 funcs = {'FloatColumn': HsmFwhm(dataset='meas'), 

70 'IntColumn': Column('base_InputCount_value', dataset='meas'), 

71 'GoodFlagColumn': Column('slot_GaussianFlux_flag', dataset='meas'), 

72 'BadFlagColumn': Column('slot_Centroid_flag', dataset='meas')} 

73 df = task.run(self.parq, funcs=funcs, dataId=self.dataId) 

74 self.assertIsInstance(df, pd.DataFrame) 

75 

76 for filt in config.outputBands: 

77 self.assertIn(filt + 'FloatColumn', df.columns) 

78 self.assertIn(filt + 'IntColumn', df.columns) 

79 self.assertIn(filt + 'BadFlagColumn', df.columns) 

80 self.assertIn(filt + 'GoodFlagColumn', df.columns) 

81 

82 # Check that the default filling has worked. 

83 self.assertNotIn('gFloatColumn', df.columns) 

84 self.assertTrue(df['yFloatColumn'].isnull().all()) 

85 self.assertTrue(df['iFloatColumn'].notnull().all()) 

86 self.assertTrue(np.all(df['iIntColumn'].values >= 0)) 

87 self.assertTrue(np.all(df['yIntColumn'].values < 0)) 

88 self.assertTrue(np.all(~df['yGoodFlagColumn'].values)) 

89 self.assertTrue(np.all(df['yBadFlagColumn'].values)) 

90 

91 # Check that the datatypes are preserved. 

92 self.assertEqual(df['iFloatColumn'].dtype, np.dtype(np.float64)) 

93 self.assertEqual(df['yFloatColumn'].dtype, np.dtype(np.float64)) 

94 self.assertEqual(df['iIntColumn'].dtype, np.dtype(np.int64)) 

95 self.assertEqual(df['yIntColumn'].dtype, np.dtype(np.int64)) 

96 self.assertEqual(df['iGoodFlagColumn'].dtype, np.dtype(np.bool_)) 

97 self.assertEqual(df['yGoodFlagColumn'].dtype, np.dtype(np.bool_)) 

98 self.assertEqual(df['iBadFlagColumn'].dtype, np.dtype(np.bool_)) 

99 self.assertEqual(df['yBadFlagColumn'].dtype, np.dtype(np.bool_)) 

100 

101 def testUnderscoreColumnFormat(self): 

102 """Test the per-filter column format with an underscore""" 

103 config = TransformObjectCatalogConfig() 

104 config.outputBands = ["g", "r", "i"] 

105 config.camelCase = False 

106 task = TransformObjectCatalogTask(config=config) 

107 funcs = {'Fwhm': HsmFwhm(dataset='meas')} 

108 df = task.run(self.parq, funcs=funcs, dataId=self.dataId) 

109 self.assertIsInstance(df, pd.DataFrame) 

110 for filt in config.outputBands: 

111 self.assertIn(filt + '_Fwhm', df.columns) 

112 

113 def testMultilevelOutput(self): 

114 """Test the non-flattened result dataframe with a multilevel column index""" 

115 config = TransformObjectCatalogConfig() 

116 config.outputBands = ["r", "i"] 

117 config.multilevelOutput = True 

118 task = TransformObjectCatalogTask(config=config) 

119 funcs = {'Fwhm': HsmFwhm(dataset='meas')} 

120 df = task.run(self.parq, funcs=funcs, dataId=self.dataId) 

121 self.assertIsInstance(df, pd.DataFrame) 

122 self.assertNotIn('g', df) 

123 for filt in config.outputBands: 

124 self.assertIsInstance(df[filt], pd.DataFrame) 

125 self.assertIn('Fwhm', df[filt].columns) 

126 

127 def testNoOutputBands(self): 

128 """All the input bands should go into the output, and nothing else. 

129 """ 

130 config = TransformObjectCatalogConfig() 

131 config.multilevelOutput = True 

132 task = TransformObjectCatalogTask(config=config) 

133 funcs = {'Fwhm': HsmFwhm(dataset='meas')} 

134 df = task.run(self.parq, funcs=funcs, dataId=self.dataId) 

135 self.assertIsInstance(df, pd.DataFrame) 

136 self.assertNotIn('HSC-G', df) 

137 for filt in ['g', 'r', 'i']: 

138 self.assertIsInstance(df[filt], pd.DataFrame) 

139 self.assertIn('Fwhm', df[filt].columns) 

140 

141 

142if __name__ == "__main__": 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true

143 lsst.utils.tests.init() 

144 unittest.main()