Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 45%

153 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-03 10:03 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "LoadVector", 

25 "DownselectVector", 

26 "MultiCriteriaDownselectVector", 

27 "ConvertFluxToMag", 

28 "ConvertUnits", 

29 "CalcSn", 

30 "MagDiff", 

31 "ExtinctionCorrectedMagDiff", 

32 "PerGroupStatistic", 

33 "ResidualWithPerGroupStatistic", 

34 "RAcosDec", 

35) 

36 

37import logging 

38import warnings 

39from typing import Optional, cast 

40 

41import numpy as np 

42import pandas as pd 

43from astropy import units as u 

44from lsst.pex.config import DictField, Field 

45from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField 

46 

47from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction 

48from .selectors import VectorSelector 

49 

50_LOG = logging.getLogger(__name__) 

51 

52# Basic vectorActions 

53 

54 

55class LoadVector(VectorAction): 

56 """Load and return a Vector from KeyedData.""" 

57 

58 vectorKey = Field[str](doc="Key of vector which should be loaded") 

59 

60 def getInputSchema(self) -> KeyedDataSchema: 

61 return ((self.vectorKey, Vector),) 

62 

63 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

64 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)])) 

65 

66 

67class DownselectVector(VectorAction): 

68 """Get a vector from KeyedData, apply specified selector, return the 

69 shorter Vector. 

70 """ 

71 

72 vectorKey = Field[str](doc="column key to load from KeyedData") 

73 

74 selector = ConfigurableActionField[VectorAction]( 

75 doc="Action which returns a selection mask", default=VectorSelector 

76 ) 

77 

78 def getInputSchema(self) -> KeyedDataSchema: 

79 yield (self.vectorKey, Vector) 

80 yield from cast(VectorAction, self.selector).getInputSchema() 

81 

82 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

83 mask = cast(VectorAction, self.selector)(data, **kwargs) 

84 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

85 

86 

87class MultiCriteriaDownselectVector(VectorAction): 

88 """Get a vector from KeyedData, apply specified set of selectors with AND 

89 logic, and return the shorter Vector. 

90 """ 

91 

92 vectorKey = Field[str](doc="column key to load from KeyedData") 

93 

94 selectors = ConfigurableActionStructField[VectorAction]( 

95 doc="Selectors for selecting rows, will be AND together", 

96 ) 

97 

98 def getInputSchema(self) -> KeyedDataSchema: 

99 yield (self.vectorKey, Vector) 

100 for action in self.selectors: 

101 yield from action.getInputSchema() 

102 

103 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

104 mask: Optional[Vector] = None 

105 for selector in self.selectors: 

106 subMask = selector(data, **kwargs) 

107 if mask is None: 

108 mask = subMask 

109 else: 

110 mask *= subMask # type: ignore 

111 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

112 

113 

114# Astronomical vectorActions 

115 

116 

117class CalcSn(VectorAction): 

118 """Calculate the signal-to-noise ratio from a single flux vector.""" 

119 

120 fluxType = Field[str](doc="Flux type (vector key) to calculate the S/N.", default="{band}_psfFlux") 

121 uncertaintySuffix = Field[str]( 

122 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err" 

123 ) 

124 

125 def getInputSchema(self) -> KeyedDataSchema: 

126 yield self.fluxType, Vector 

127 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector 

128 

129 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

130 signal = np.array(data[self.fluxType.format(**kwargs)]) 

131 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)]) 

132 sn = signal / noise 

133 

134 return np.array(sn) 

135 

136 

137class ConvertFluxToMag(VectorAction): 

138 """Turn nano janskies into magnitudes.""" 

139 

140 vectorKey = Field[str](doc="Key of flux vector to convert to mags") 

141 fluxUnit = Field[str](doc="Astropy unit of flux vector", default="nJy") 

142 returnMillimags = Field[bool](doc="Use millimags or not?", default=False) 

143 

144 def getInputSchema(self) -> KeyedDataSchema: 

145 return ((self.vectorKey, Vector),) 

146 

147 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

148 with warnings.catch_warnings(): 

149 warnings.filterwarnings("ignore", r"invalid value encountered") 

150 warnings.filterwarnings("ignore", r"divide by zero") 

151 vec = cast(Vector, data[self.vectorKey.format(**kwargs)]) 

152 mags = (np.array(vec) * u.Unit(self.fluxUnit)).to(u.ABmag).value # type: ignore 

153 if self.returnMillimags: 

154 mags *= 1000 

155 return mags 

156 

157 

158class ConvertUnits(VectorAction): 

159 """Convert the units of a vector.""" 

160 

161 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector) 

162 inUnit = Field[str](doc="input Astropy unit") 

163 outUnit = Field[str](doc="output Astropy unit") 

164 

165 def getInputSchema(self) -> KeyedDataSchema: 

166 return tuple(self.buildAction.getInputSchema()) 

167 

168 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

169 dataWithUnit = self.buildAction(data, **kwargs) * u.Unit(self.inUnit) 

170 return dataWithUnit.to(self.outUnit).value 

171 

172 

173class MagDiff(VectorAction): 

174 """Calculate the difference between two magnitudes; 

175 each magnitude is derived from a flux column. 

176 Parameters 

177 ---------- 

178 TO DO: 

179 Returns 

180 ------- 

181 The magnitude difference in milli mags. 

182 Notes 

183 ----- 

184 The flux columns need to be in units (specifiable in 

185 the fluxUnits1 and 2 config options) that can be converted 

186 to janskies. This action doesn't have any calibration 

187 information and assumes that the fluxes are already 

188 calibrated. 

189 """ 

190 

191 col1 = Field[str](doc="Column to subtract from") 

192 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky") 

193 col2 = Field[str](doc="Column to subtract") 

194 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky") 

195 returnMillimags = Field[bool](doc="Use millimags or not?", default=True) 

196 

197 def getInputSchema(self) -> KeyedDataSchema: 

198 return ((self.col1, Vector), (self.col2, Vector)) 

199 

200 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

201 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1) 

202 mag1 = flux1.to(u.ABmag) 

203 

204 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2) 

205 mag2 = flux2.to(u.ABmag) 

206 

207 magDiff = mag1 - mag2 

208 

209 if self.returnMillimags: 

210 magDiff = magDiff.to(u.mmag) 

211 

212 return np.array(magDiff.value) 

213 

214 

215class ExtinctionCorrectedMagDiff(VectorAction): 

216 """Compute the difference between two magnitudes and correct for extinction 

217 By default bands are derived from the <band>_ prefix on flux columns, 

218 per the naming convention in the Object Table: 

219 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another 

220 convention, bands can alternatively be supplied via the band1 or band2 

221 config parameters. 

222 If band1 and band2 are supplied, the flux column names are ignored. 

223 """ 

224 

225 magDiff = ConfigurableActionField[VectorAction]( 

226 doc="Action that returns a difference in magnitudes", default=MagDiff 

227 ) 

228 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv") 

229 band1 = Field[str]( 

230 doc="Optional band for magDiff.col1. Supercedes column name prefix", 

231 optional=True, 

232 default=None, 

233 ) 

234 band2 = Field[str]( 

235 doc="Optional band for magDiff.col2. Supercedes column name prefix", 

236 optional=True, 

237 default=None, 

238 ) 

239 extinctionCoeffs = DictField[str, float]( 

240 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band." 

241 "Key must be the band", 

242 optional=True, 

243 default=None, 

244 ) 

245 

246 def getInputSchema(self) -> KeyedDataSchema: 

247 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),) 

248 

249 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

250 diff = self.magDiff(data, **kwargs) 

251 if not self.extinctionCoeffs: 

252 _LOG.debug("No extinction Coefficients. Not applying extinction correction") 

253 return diff 

254 

255 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0] 

256 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0] 

257 

258 # Return plain MagDiff with warning if either coeff not found 

259 for band in (col1Band, col2Band): 

260 if band not in self.extinctionCoeffs: 

261 _LOG.warning( 

262 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction", 

263 band, 

264 self.extinctionCoeffs, 

265 ) 

266 return diff 

267 

268 av1: float = self.extinctionCoeffs[col1Band] 

269 av2: float = self.extinctionCoeffs[col2Band] 

270 

271 ebv = data[self.ebvCol] 

272 # Ignore type until a more complete Vector protocol 

273 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore 

274 

275 if self.magDiff.returnMillimags: 

276 correction = correction.to(u.mmag) 

277 

278 return np.array(diff - correction.value) 

279 

280 

281class RAcosDec(VectorAction): 

282 """Construct a vector of RA*cos(Dec) in order to have commensurate values 

283 between RA and Dec.""" 

284 

285 raKey = Field[str](doc="RA coordinate", default="coord_ra") 

286 decKey = Field[str](doc="Dec coordinate", default="coord_dec") 

287 

288 def getInputSchema(self) -> KeyedDataSchema: 

289 return ((self.decKey, Vector), (self.raKey, Vector)) 

290 

291 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

292 ra = data[self.raKey] 

293 dec = data[self.decKey] 

294 return ra.to_numpy() * np.cos((dec.to_numpy() * u.degree).to(u.radian).value) 

295 

296 

297# Statistical vectorActions 

298 

299 

300class PerGroupStatistic(VectorAction): 

301 """Compute per-group statistic values and return result as a vector with 

302 one element per group. The computed statistic can be any function accepted 

303 by pandas DataFrameGroupBy.aggregate passed in as a string function name. 

304 """ 

305 

306 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

307 buildAction = ConfigurableActionField[VectorAction](doc="Action to build vector", default=LoadVector) 

308 func = Field[str](doc="Name of function to be applied per group") 

309 

310 def getInputSchema(self) -> KeyedDataSchema: 

311 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),) 

312 

313 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

314 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)}) 

315 result = df.groupby("groupKey")["value"].aggregate(self.func) 

316 return np.array(result) 

317 

318 

319class ResidualWithPerGroupStatistic(VectorAction): 

320 """Compute residual between individual elements of group and the per-group 

321 statistic.""" 

322 

323 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

324 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector) 

325 func = Field[str](doc="Name of function to be applied per group", default="mean") 

326 

327 def getInputSchema(self) -> KeyedDataSchema: 

328 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),) 

329 

330 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

331 values = self.buildAction(data, **kwargs) 

332 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": values}) 

333 result = df.groupby("groupKey")["value"].aggregate(self.func) 

334 

335 joinedDf = df.join(result, on="groupKey", validate="m:1", lsuffix="_individual", rsuffix="_group") 

336 

337 result = joinedDf["value_individual"] - joinedDf["value_group"] 

338 return np.array(result)