Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import copy 

23import functools 

24import numpy as np 

25import os 

26import pandas as pd 

27import unittest 

28 

29import lsst.utils.tests 

30 

31# TODO: Remove skipUnless and this try block DM-22256 

32try: 

33 from lsst.pipe.tasks.parquetTable import MultilevelParquetTable 

34 from lsst.pipe.tasks.functors import (CompositeFunctor, CustomFunctor, Column, RAColumn, 

35 DecColumn, Mag, MagDiff, Color, StarGalaxyLabeller, 

36 DeconvolvedMoments, SdssTraceSize, PsfSdssTraceSizeDiff, 

37 HsmTraceSize, PsfHsmTraceSizeDiff, HsmFwhm) 

38 havePyArrow = True 

39except ImportError: 

40 havePyArrow = False 

41 

42ROOT = os.path.abspath(os.path.dirname(__file__)) 

43 

44 

45@unittest.skipUnless(havePyArrow, "Requires pyarrow") 

46class FunctorTestCase(unittest.TestCase): 

47 

48 def simulateMultiParquet(self, dataDict): 

49 """Create a simple test MultilevelParquetTable 

50 """ 

51 simpleDF = pd.DataFrame(dataDict) 

52 dfFilterDSCombos = [] 

53 for ds in self.datasets: 

54 for filterName in self.filters: 

55 df = copy.copy(simpleDF) 

56 df.reindex(sorted(df.columns), axis=1) 

57 df['dataset'] = ds 

58 df['filter'] = filterName 

59 df.columns = pd.MultiIndex.from_tuples( 

60 [(ds, filterName, c) for c in df.columns], 

61 names=('dataset', 'filter', 'column')) 

62 dfFilterDSCombos.append(df) 

63 

64 df = functools.reduce(lambda d1, d2: d1.join(d2), dfFilterDSCombos) 

65 

66 return MultilevelParquetTable(dataFrame=df) 

67 

68 def setUp(self): 

69 np.random.seed(1234) 

70 self.datasets = ['forced_src', 'meas', 'ref'] 

71 self.filters = ['HSC-G', 'HSC-R'] 

72 self.columns = ['coord_ra', 'coord_dec'] 

73 self.nRecords = 5 

74 self.dataDict = { 

75 "coord_ra": [3.77654137, 3.77643059, 3.77621148, 3.77611944, 3.77610396], 

76 "coord_dec": [0.01127624, 0.01127787, 0.01127543, 0.01127543, 0.01127543]} 

77 

78 def _funcVal(self, functor, parq): 

79 self.assertIsInstance(functor.name, str) 

80 self.assertIsInstance(functor.shortname, str) 

81 

82 val = functor(parq) 

83 self.assertIsInstance(val, pd.Series) 

84 

85 val = functor(parq, dropna=True) 

86 self.assertEqual(val.isnull().sum(), 0) 

87 

88 return val 

89 

90 def testColumn(self): 

91 self.columns.append("base_FootprintArea_value") 

92 self.dataDict["base_FootprintArea_value"] = \ 

93 np.full(self.nRecords, 1) 

94 parq = self.simulateMultiParquet(self.dataDict) 

95 func = Column('base_FootprintArea_value', filt='HSC-G') 

96 self._funcVal(func, parq) 

97 

98 def testCustom(self): 

99 self.columns.append("base_FootprintArea_value") 

100 self.dataDict["base_FootprintArea_value"] = \ 

101 np.random.rand(self.nRecords) 

102 parq = self.simulateMultiParquet(self.dataDict) 

103 func = CustomFunctor('2*base_FootprintArea_value', filt='HSC-G') 

104 val = self._funcVal(func, parq) 

105 

106 func2 = Column('base_FootprintArea_value', filt='HSC-G') 

107 

108 np.allclose(val.values, 2*func2(parq).values, atol=1e-13, rtol=0) 

109 

110 def testCoords(self): 

111 parq = self.simulateMultiParquet(self.dataDict) 

112 ra = self._funcVal(RAColumn(), parq) 

113 dec = self._funcVal(DecColumn(), parq) 

114 

115 columnDict = {'dataset': 'ref', 'filter': 'HSC-G', 

116 'column': ['coord_ra', 'coord_dec']} 

117 coords = parq.toDataFrame(columns=columnDict, droplevels=True) / np.pi * 180. 

118 

119 self.assertTrue(np.allclose(ra, coords[('ref', 'HSC-G', 'coord_ra')], atol=1e-13, rtol=0)) 

120 self.assertTrue(np.allclose(dec, coords[('ref', 'HSC-G', 'coord_dec')], atol=1e-13, rtol=0)) 

121 

122 def testMag(self): 

123 self.columns.extend(["base_PsfFlux_instFlux", "base_PsfFlux_instFluxErr"]) 

124 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1000) 

125 self.dataDict["base_PsfFlux_instFluxErr"] = np.full(self.nRecords, 10) 

126 parq = self.simulateMultiParquet(self.dataDict) 

127 # Change one dataset filter combinations value. 

128 parq._df[("meas", "HSC-G", "base_PsfFlux_instFlux")] -= 1 

129 

130 fluxName = 'base_PsfFlux' 

131 

132 # Check that things work when you provide dataset explicitly 

133 for dataset in ['forced_src', 'meas']: 

134 psfMag_G = self._funcVal(Mag(fluxName, dataset=dataset, 

135 filt='HSC-G'), 

136 parq) 

137 psfMag_R = self._funcVal(Mag(fluxName, dataset=dataset, 

138 filt='HSC-R'), 

139 parq) 

140 

141 psfColor_GR = self._funcVal(Color(fluxName, 'HSC-G', 'HSC-R', 

142 dataset=dataset), 

143 parq) 

144 

145 self.assertTrue(np.allclose((psfMag_G - psfMag_R).dropna(), psfColor_GR, rtol=0, atol=1e-13)) 

146 

147 # Check that behavior as expected when dataset not provided; 

148 # that is, that the color comes from forced and default Mag is meas 

149 psfMag_G = self._funcVal(Mag(fluxName, filt='HSC-G'), parq) 

150 psfMag_R = self._funcVal(Mag(fluxName, filt='HSC-R'), parq) 

151 

152 psfColor_GR = self._funcVal(Color(fluxName, 'HSC-G', 'HSC-R'), parq) 

153 

154 # These should *not* be equal. 

155 self.assertFalse(np.allclose((psfMag_G - psfMag_R).dropna(), psfColor_GR)) 

156 

157 def testMagDiff(self): 

158 self.columns.extend(["base_PsfFlux_instFlux", "base_PsfFlux_instFluxErr", 

159 "modelfit_CModel_instFlux", "modelfit_CModel_instFluxErr"]) 

160 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1000) 

161 self.dataDict["base_PsfFlux_instFluxErr"] = np.full(self.nRecords, 10) 

162 self.dataDict["modelfit_CModel_instFlux"] = np.full(self.nRecords, 1000) 

163 self.dataDict["modelfit_CModel_instFluxErr"] = np.full(self.nRecords, 10) 

164 parq = self.simulateMultiParquet(self.dataDict) 

165 

166 for filt in self.filters: 

167 filt = 'HSC-G' 

168 val = self._funcVal(MagDiff('base_PsfFlux', 'modelfit_CModel', filt=filt), parq) 

169 

170 mag1 = self._funcVal(Mag('modelfit_CModel', filt=filt), parq) 

171 mag2 = self._funcVal(Mag('base_PsfFlux', filt=filt), parq) 

172 self.assertTrue(np.allclose((mag2 - mag1).dropna(), val, rtol=0, atol=1e-13)) 

173 

174 def testLabeller(self): 

175 # Covering the code is better than nothing 

176 self.columns.append("base_ClassificationExtendedness_value") 

177 self.dataDict["base_ClassificationExtendedness_value"] = np.full(self.nRecords, 1) 

178 parq = self.simulateMultiParquet(self.dataDict) 

179 labels = self._funcVal(StarGalaxyLabeller(), parq) # noqa 

180 

181 def testOther(self): 

182 self.columns.extend(["ext_shapeHSM_HsmSourceMoments_xx", "ext_shapeHSM_HsmSourceMoments_yy", 

183 "base_SdssShape_xx", "base_SdssShape_yy", 

184 "ext_shapeHSM_HsmPsfMoments_xx", "ext_shapeHSM_HsmPsfMoments_yy", 

185 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy"]) 

186 self.dataDict["ext_shapeHSM_HsmSourceMoments_xx"] = np.full(self.nRecords, 1 / np.sqrt(2)) 

187 self.dataDict["ext_shapeHSM_HsmSourceMoments_yy"] = np.full(self.nRecords, 1 / np.sqrt(2)) 

188 self.dataDict["base_SdssShape_xx"] = np.full(self.nRecords, 1 / np.sqrt(2)) 

189 self.dataDict["base_SdssShape_yy"] = np.full(self.nRecords, 1 / np.sqrt(2)) 

190 self.dataDict["ext_shapeHSM_HsmPsfMoments_xx"] = np.full(self.nRecords, 1 / np.sqrt(2)) 

191 self.dataDict["ext_shapeHSM_HsmPsfMoments_yy"] = np.full(self.nRecords, 1 / np.sqrt(2)) 

192 self.dataDict["base_SdssShape_psf_xx"] = np.full(self.nRecords, 1) 

193 self.dataDict["base_SdssShape_psf_yy"] = np.full(self.nRecords, 1) 

194 parq = self.simulateMultiParquet(self.dataDict) 

195 # Covering the code is better than nothing 

196 for filt in self.filters: 

197 for Func in [DeconvolvedMoments, 

198 SdssTraceSize, 

199 PsfSdssTraceSizeDiff, 

200 HsmTraceSize, PsfHsmTraceSizeDiff, HsmFwhm]: 

201 val = self._funcVal(Func(filt=filt), parq) # noqa 

202 

203 def _compositeFuncVal(self, functor, parq): 

204 self.assertIsInstance(functor, CompositeFunctor) 

205 

206 df = functor(parq) 

207 

208 self.assertIsInstance(df, pd.DataFrame) 

209 self.assertTrue(np.all([k in df.columns for k in functor.funcDict.keys()])) 

210 

211 df = functor(parq, dropna=True) 

212 

213 # Check that there are no nulls 

214 self.assertFalse(df.isnull().any(axis=None)) 

215 

216 return df 

217 

218 def testComposite(self): 

219 self.columns.extend(["modelfit_CModel_instFlux", "base_PsfFlux_instFlux"]) 

220 self.dataDict["modelfit_CModel_instFlux"] = np.full(self.nRecords, 1) 

221 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1) 

222 parq = self.simulateMultiParquet(self.dataDict) 

223 # Modify r band value slightly. 

224 parq._df[("meas", "HSC-R", "base_PsfFlux_instFlux")] -= 0.1 

225 

226 filt = 'HSC-G' 

227 funcDict = {'psfMag_ref': Mag('base_PsfFlux', dataset='ref'), 

228 'ra': RAColumn(), 

229 'dec': DecColumn(), 

230 'psfMag': Mag('base_PsfFlux', filt=filt), 

231 'cmodel_magDiff': MagDiff('base_PsfFlux', 

232 'modelfit_CModel', filt=filt)} 

233 func = CompositeFunctor(funcDict) 

234 df = self._compositeFuncVal(func, parq) 

235 

236 # Repeat same, but define filter globally instead of individually 

237 funcDict2 = {'psfMag_ref': Mag('base_PsfFlux', dataset='ref'), 

238 'ra': RAColumn(), 

239 'dec': DecColumn(), 

240 'psfMag': Mag('base_PsfFlux'), 

241 'cmodel_magDiff': MagDiff('base_PsfFlux', 

242 'modelfit_CModel')} 

243 

244 func2 = CompositeFunctor(funcDict2, filt=filt) 

245 df2 = self._compositeFuncVal(func2, parq) 

246 self.assertTrue(df.equals(df2)) 

247 

248 func2.filt = 'HSC-R' 

249 df3 = self._compositeFuncVal(func2, parq) 

250 # Because we modified the R filter this should fail. 

251 self.assertFalse(df2.equals(df3)) 

252 

253 # Make sure things work with passing list instead of dict 

254 funcs = [Mag('base_PsfFlux', dataset='ref'), 

255 RAColumn(), 

256 DecColumn(), 

257 Mag('base_PsfFlux', filt=filt), 

258 MagDiff('base_PsfFlux', 'modelfit_CModel', filt=filt)] 

259 

260 df = self._compositeFuncVal(CompositeFunctor(funcs), parq) 

261 

262 def testCompositeColor(self): 

263 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1000) 

264 self.dataDict["base_PsfFlux_instFluxErr"] = np.full(self.nRecords, 10) 

265 parq = self.simulateMultiParquet(self.dataDict) 

266 funcDict = {'a': Mag('base_PsfFlux', dataset='meas', filt='HSC-G'), 

267 'b': Mag('base_PsfFlux', dataset='forced_src', filt='HSC-G'), 

268 'c': Color('base_PsfFlux', 'HSC-G', 'HSC-R')} 

269 # Covering the code is better than nothing 

270 df = self._compositeFuncVal(CompositeFunctor(funcDict), parq) # noqa 

271 

272 

273class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

274 pass 

275 

276 

277def setup_module(module): 

278 lsst.utils.tests.init() 

279 

280 

281if __name__ == "__main__": 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true

282 lsst.utils.tests.init() 

283 unittest.main()