Coverage for tests/test_functors.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import copy
23import functools
24import numpy as np
25import os
26import pandas as pd
27import unittest
29import lsst.utils.tests
31# TODO: Remove skipUnless and this try block DM-22256
32try:
33 from lsst.pipe.tasks.parquetTable import MultilevelParquetTable
34 from lsst.pipe.tasks.functors import (CompositeFunctor, CustomFunctor, Column, RAColumn,
35 DecColumn, Mag, MagDiff, Color, StarGalaxyLabeller,
36 DeconvolvedMoments, SdssTraceSize, PsfSdssTraceSizeDiff,
37 HsmTraceSize, PsfHsmTraceSizeDiff, HsmFwhm)
38 havePyArrow = True
39except ImportError:
40 havePyArrow = False
42ROOT = os.path.abspath(os.path.dirname(__file__))
45@unittest.skipUnless(havePyArrow, "Requires pyarrow")
46class FunctorTestCase(unittest.TestCase):
48 def simulateMultiParquet(self, dataDict):
49 """Create a simple test MultilevelParquetTable
50 """
51 simpleDF = pd.DataFrame(dataDict)
52 dfFilterDSCombos = []
53 for ds in self.datasets:
54 for filterName in self.filters:
55 df = copy.copy(simpleDF)
56 df.reindex(sorted(df.columns), axis=1)
57 df['dataset'] = ds
58 df['filter'] = filterName
59 df.columns = pd.MultiIndex.from_tuples(
60 [(ds, filterName, c) for c in df.columns],
61 names=('dataset', 'filter', 'column'))
62 dfFilterDSCombos.append(df)
64 df = functools.reduce(lambda d1, d2: d1.join(d2), dfFilterDSCombos)
66 return MultilevelParquetTable(dataFrame=df)
68 def setUp(self):
69 np.random.seed(1234)
70 self.datasets = ['forced_src', 'meas', 'ref']
71 self.filters = ['HSC-G', 'HSC-R']
72 self.columns = ['coord_ra', 'coord_dec']
73 self.nRecords = 5
74 self.dataDict = {
75 "coord_ra": [3.77654137, 3.77643059, 3.77621148, 3.77611944, 3.77610396],
76 "coord_dec": [0.01127624, 0.01127787, 0.01127543, 0.01127543, 0.01127543]}
78 def _funcVal(self, functor, parq):
79 self.assertIsInstance(functor.name, str)
80 self.assertIsInstance(functor.shortname, str)
82 val = functor(parq)
83 self.assertIsInstance(val, pd.Series)
85 val = functor(parq, dropna=True)
86 self.assertEqual(val.isnull().sum(), 0)
88 return val
90 def testColumn(self):
91 self.columns.append("base_FootprintArea_value")
92 self.dataDict["base_FootprintArea_value"] = \
93 np.full(self.nRecords, 1)
94 parq = self.simulateMultiParquet(self.dataDict)
95 func = Column('base_FootprintArea_value', filt='HSC-G')
96 self._funcVal(func, parq)
98 def testCustom(self):
99 self.columns.append("base_FootprintArea_value")
100 self.dataDict["base_FootprintArea_value"] = \
101 np.random.rand(self.nRecords)
102 parq = self.simulateMultiParquet(self.dataDict)
103 func = CustomFunctor('2*base_FootprintArea_value', filt='HSC-G')
104 val = self._funcVal(func, parq)
106 func2 = Column('base_FootprintArea_value', filt='HSC-G')
108 np.allclose(val.values, 2*func2(parq).values, atol=1e-13, rtol=0)
110 def testCoords(self):
111 parq = self.simulateMultiParquet(self.dataDict)
112 ra = self._funcVal(RAColumn(), parq)
113 dec = self._funcVal(DecColumn(), parq)
115 columnDict = {'dataset': 'ref', 'filter': 'HSC-G',
116 'column': ['coord_ra', 'coord_dec']}
117 coords = parq.toDataFrame(columns=columnDict, droplevels=True) / np.pi * 180.
119 self.assertTrue(np.allclose(ra, coords[('ref', 'HSC-G', 'coord_ra')], atol=1e-13, rtol=0))
120 self.assertTrue(np.allclose(dec, coords[('ref', 'HSC-G', 'coord_dec')], atol=1e-13, rtol=0))
122 def testMag(self):
123 self.columns.extend(["base_PsfFlux_instFlux", "base_PsfFlux_instFluxErr"])
124 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1000)
125 self.dataDict["base_PsfFlux_instFluxErr"] = np.full(self.nRecords, 10)
126 parq = self.simulateMultiParquet(self.dataDict)
127 # Change one dataset filter combinations value.
128 parq._df[("meas", "HSC-G", "base_PsfFlux_instFlux")] -= 1
130 fluxName = 'base_PsfFlux'
132 # Check that things work when you provide dataset explicitly
133 for dataset in ['forced_src', 'meas']:
134 psfMag_G = self._funcVal(Mag(fluxName, dataset=dataset,
135 filt='HSC-G'),
136 parq)
137 psfMag_R = self._funcVal(Mag(fluxName, dataset=dataset,
138 filt='HSC-R'),
139 parq)
141 psfColor_GR = self._funcVal(Color(fluxName, 'HSC-G', 'HSC-R',
142 dataset=dataset),
143 parq)
145 self.assertTrue(np.allclose((psfMag_G - psfMag_R).dropna(), psfColor_GR, rtol=0, atol=1e-13))
147 # Check that behavior as expected when dataset not provided;
148 # that is, that the color comes from forced and default Mag is meas
149 psfMag_G = self._funcVal(Mag(fluxName, filt='HSC-G'), parq)
150 psfMag_R = self._funcVal(Mag(fluxName, filt='HSC-R'), parq)
152 psfColor_GR = self._funcVal(Color(fluxName, 'HSC-G', 'HSC-R'), parq)
154 # These should *not* be equal.
155 self.assertFalse(np.allclose((psfMag_G - psfMag_R).dropna(), psfColor_GR))
157 def testMagDiff(self):
158 self.columns.extend(["base_PsfFlux_instFlux", "base_PsfFlux_instFluxErr",
159 "modelfit_CModel_instFlux", "modelfit_CModel_instFluxErr"])
160 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1000)
161 self.dataDict["base_PsfFlux_instFluxErr"] = np.full(self.nRecords, 10)
162 self.dataDict["modelfit_CModel_instFlux"] = np.full(self.nRecords, 1000)
163 self.dataDict["modelfit_CModel_instFluxErr"] = np.full(self.nRecords, 10)
164 parq = self.simulateMultiParquet(self.dataDict)
166 for filt in self.filters:
167 filt = 'HSC-G'
168 val = self._funcVal(MagDiff('base_PsfFlux', 'modelfit_CModel', filt=filt), parq)
170 mag1 = self._funcVal(Mag('modelfit_CModel', filt=filt), parq)
171 mag2 = self._funcVal(Mag('base_PsfFlux', filt=filt), parq)
172 self.assertTrue(np.allclose((mag2 - mag1).dropna(), val, rtol=0, atol=1e-13))
174 def testLabeller(self):
175 # Covering the code is better than nothing
176 self.columns.append("base_ClassificationExtendedness_value")
177 self.dataDict["base_ClassificationExtendedness_value"] = np.full(self.nRecords, 1)
178 parq = self.simulateMultiParquet(self.dataDict)
179 labels = self._funcVal(StarGalaxyLabeller(), parq) # noqa
181 def testOther(self):
182 self.columns.extend(["ext_shapeHSM_HsmSourceMoments_xx", "ext_shapeHSM_HsmSourceMoments_yy",
183 "base_SdssShape_xx", "base_SdssShape_yy",
184 "ext_shapeHSM_HsmPsfMoments_xx", "ext_shapeHSM_HsmPsfMoments_yy",
185 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy"])
186 self.dataDict["ext_shapeHSM_HsmSourceMoments_xx"] = np.full(self.nRecords, 1 / np.sqrt(2))
187 self.dataDict["ext_shapeHSM_HsmSourceMoments_yy"] = np.full(self.nRecords, 1 / np.sqrt(2))
188 self.dataDict["base_SdssShape_xx"] = np.full(self.nRecords, 1 / np.sqrt(2))
189 self.dataDict["base_SdssShape_yy"] = np.full(self.nRecords, 1 / np.sqrt(2))
190 self.dataDict["ext_shapeHSM_HsmPsfMoments_xx"] = np.full(self.nRecords, 1 / np.sqrt(2))
191 self.dataDict["ext_shapeHSM_HsmPsfMoments_yy"] = np.full(self.nRecords, 1 / np.sqrt(2))
192 self.dataDict["base_SdssShape_psf_xx"] = np.full(self.nRecords, 1)
193 self.dataDict["base_SdssShape_psf_yy"] = np.full(self.nRecords, 1)
194 parq = self.simulateMultiParquet(self.dataDict)
195 # Covering the code is better than nothing
196 for filt in self.filters:
197 for Func in [DeconvolvedMoments,
198 SdssTraceSize,
199 PsfSdssTraceSizeDiff,
200 HsmTraceSize, PsfHsmTraceSizeDiff, HsmFwhm]:
201 val = self._funcVal(Func(filt=filt), parq) # noqa
203 def _compositeFuncVal(self, functor, parq):
204 self.assertIsInstance(functor, CompositeFunctor)
206 df = functor(parq)
208 self.assertIsInstance(df, pd.DataFrame)
209 self.assertTrue(np.all([k in df.columns for k in functor.funcDict.keys()]))
211 df = functor(parq, dropna=True)
213 # Check that there are no nulls
214 self.assertFalse(df.isnull().any(axis=None))
216 return df
218 def testComposite(self):
219 self.columns.extend(["modelfit_CModel_instFlux", "base_PsfFlux_instFlux"])
220 self.dataDict["modelfit_CModel_instFlux"] = np.full(self.nRecords, 1)
221 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1)
222 parq = self.simulateMultiParquet(self.dataDict)
223 # Modify r band value slightly.
224 parq._df[("meas", "HSC-R", "base_PsfFlux_instFlux")] -= 0.1
226 filt = 'HSC-G'
227 funcDict = {'psfMag_ref': Mag('base_PsfFlux', dataset='ref'),
228 'ra': RAColumn(),
229 'dec': DecColumn(),
230 'psfMag': Mag('base_PsfFlux', filt=filt),
231 'cmodel_magDiff': MagDiff('base_PsfFlux',
232 'modelfit_CModel', filt=filt)}
233 func = CompositeFunctor(funcDict)
234 df = self._compositeFuncVal(func, parq)
236 # Repeat same, but define filter globally instead of individually
237 funcDict2 = {'psfMag_ref': Mag('base_PsfFlux', dataset='ref'),
238 'ra': RAColumn(),
239 'dec': DecColumn(),
240 'psfMag': Mag('base_PsfFlux'),
241 'cmodel_magDiff': MagDiff('base_PsfFlux',
242 'modelfit_CModel')}
244 func2 = CompositeFunctor(funcDict2, filt=filt)
245 df2 = self._compositeFuncVal(func2, parq)
246 self.assertTrue(df.equals(df2))
248 func2.filt = 'HSC-R'
249 df3 = self._compositeFuncVal(func2, parq)
250 # Because we modified the R filter this should fail.
251 self.assertFalse(df2.equals(df3))
253 # Make sure things work with passing list instead of dict
254 funcs = [Mag('base_PsfFlux', dataset='ref'),
255 RAColumn(),
256 DecColumn(),
257 Mag('base_PsfFlux', filt=filt),
258 MagDiff('base_PsfFlux', 'modelfit_CModel', filt=filt)]
260 df = self._compositeFuncVal(CompositeFunctor(funcs), parq)
262 def testCompositeColor(self):
263 self.dataDict["base_PsfFlux_instFlux"] = np.full(self.nRecords, 1000)
264 self.dataDict["base_PsfFlux_instFluxErr"] = np.full(self.nRecords, 10)
265 parq = self.simulateMultiParquet(self.dataDict)
266 funcDict = {'a': Mag('base_PsfFlux', dataset='meas', filt='HSC-G'),
267 'b': Mag('base_PsfFlux', dataset='forced_src', filt='HSC-G'),
268 'c': Color('base_PsfFlux', 'HSC-G', 'HSC-R')}
269 # Covering the code is better than nothing
270 df = self._compositeFuncVal(CompositeFunctor(funcDict), parq) # noqa
273class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
274 pass
277def setup_module(module):
278 lsst.utils.tests.init()
281if __name__ == "__main__": 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true
282 lsst.utils.tests.init()
283 unittest.main()