Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 49%

157 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-08-06 10:00 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import logging 

24from typing import Optional, cast 

25 

26import numpy as np 

27import pandas as pd 

28from astropy import units as u 

29from lsst.pex.config import DictField, Field 

30from lsst.pipe.tasks.configurableActions import ConfigurableActionField, ConfigurableActionStructField 

31 

32from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction 

33from .selectors import VectorSelector 

34 

35_LOG = logging.getLogger(__name__) 

36 

37 

38class DownselectVector(VectorAction): 

39 """Get a vector from KeyedData, apply specified selector, return the 

40 shorter Vector. 

41 """ 

42 

43 vectorKey = Field[str](doc="column key to load from KeyedData") 

44 

45 selector = ConfigurableActionField(doc="Action which returns a selection mask", default=VectorSelector) 

46 

47 def getInputSchema(self) -> KeyedDataSchema: 

48 yield (self.vectorKey, Vector) 

49 yield from cast(VectorAction, self.selector).getInputSchema() 

50 

51 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

52 mask = cast(VectorAction, self.selector)(data, **kwargs) 

53 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

54 

55 

56class MultiCriteriaDownselectVector(VectorAction): 

57 """Get a vector from KeyedData, apply specified set of selectors with AND 

58 logic, and return the shorter Vector. 

59 """ 

60 

61 vectorKey = Field[str](doc="column key to load from KeyedData") 

62 

63 selectors = ConfigurableActionStructField[VectorAction]( 

64 doc="Selectors for selecting rows, will be AND together", 

65 ) 

66 

67 def getInputSchema(self) -> KeyedDataSchema: 

68 yield (self.vectorKey, Vector) 

69 for action in self.selectors: 

70 yield from cast(VectorAction, action).getInputSchema() 

71 

72 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

73 mask: Optional[Vector] = None 

74 for selector in self.selectors: 

75 subMask = selector(data, **kwargs) 

76 if mask is None: 

77 mask = subMask 

78 else: 

79 mask *= subMask # type: ignore 

80 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

81 

82 

83class MagColumnNanoJansky(VectorAction): 

84 vectorKey = Field[str](doc="column key to use for this transformation") 

85 returnMillimags = Field[bool](doc="Use millimags or not?", default=False) 

86 

87 def getInputSchema(self) -> KeyedDataSchema: 

88 return ((self.vectorKey, Vector),) 

89 

90 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

91 with np.warnings.catch_warnings(): # type: ignore 

92 np.warnings.filterwarnings("ignore", r"invalid value encountered") # type: ignore 

93 np.warnings.filterwarnings("ignore", r"divide by zero") # type: ignore 

94 vec = cast(Vector, data[self.vectorKey.format(**kwargs)]) 

95 mag = np.array(-2.5 * np.log10((vec * 1e-9) / 3631.0)) # type: ignore 

96 if self.returnMillimags: 

97 return mag * u.mag.to(u.mmag) 

98 else: 

99 return mag 

100 

101 

102class FractionalDifference(VectorAction): 

103 """Calculate (A-B)/B""" 

104 

105 actionA = ConfigurableActionField(doc="Action which supplies vector A", dtype=VectorAction) 

106 actionB = ConfigurableActionField(doc="Action which supplies vector B", dtype=VectorAction) 

107 

108 def getInputSchema(self) -> KeyedDataSchema: 

109 yield from self.actionA.getInputSchema() # type: ignore 

110 yield from self.actionB.getInputSchema() # type: ignore 

111 

112 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

113 vecA = self.actionA(data, **kwargs) # type: ignore 

114 vecB = self.actionB(data, **kwargs) # type: ignore 

115 return (vecA - vecB) / vecB 

116 

117 

118class Sn(VectorAction): 

119 """Compute signal-to-noise in the given flux type""" 

120 

121 fluxType = Field[str](doc="Flux type to calculate the S/N in.", default="{band}_psfFlux") 

122 uncertaintySuffix = Field[str]( 

123 doc="Suffix to add to fluxType to specify uncertainty column", default="Err" 

124 ) 

125 band = Field[str](doc="Band to calculate the S/N in.", default="i") 

126 

127 def getInputSchema(self) -> KeyedDataSchema: 

128 yield (fluxCol := self.fluxType), Vector 

129 yield f"{fluxCol}{self.uncertaintySuffix}", Vector 

130 

131 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

132 """Computes S/N in self.fluxType 

133 Parameters 

134 ---------- 

135 df : `Tabular` 

136 Returns 

137 ------- 

138 result : `Vector` 

139 Computed signal-to-noise ratio. 

140 """ 

141 fluxCol = self.fluxType.format(**(kwargs | dict(band=self.band))) 

142 errCol = f"{fluxCol}{self.uncertaintySuffix.format(**kwargs)}" 

143 result = cast(Vector, data[fluxCol]) / data[errCol] # type: ignore 

144 

145 return np.array(cast(Vector, result)) 

146 

147 

148class LoadVector(VectorAction): 

149 """Load and return a Vector from KeyedData""" 

150 

151 vectorKey = Field[str](doc="Key of vector which should be loaded") 

152 

153 def getInputSchema(self) -> KeyedDataSchema: 

154 return ((self.vectorKey, Vector),) 

155 

156 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

157 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)])) 

158 

159 

160class MagDiff(VectorAction): 

161 """Calculate the difference between two magnitudes; 

162 each magnitude is derived from a flux column. 

163 Parameters 

164 ---------- 

165 TO DO: 

166 Returns 

167 ------- 

168 The magnitude difference in milli mags. 

169 Notes 

170 ----- 

171 The flux columns need to be in units (specifiable in 

172 the fluxUnits1 and 2 config options) that can be converted 

173 to janskies. This action doesn't have any calibration 

174 information and assumes that the fluxes are already 

175 calibrated. 

176 """ 

177 

178 col1 = Field[str](doc="Column to subtract from") 

179 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky") 

180 col2 = Field[str](doc="Column to subtract") 

181 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky") 

182 returnMillimags = Field[bool](doc="Use millimags or not?", default=True) 

183 

184 def getInputSchema(self) -> KeyedDataSchema: 

185 return ((self.col1, Vector), (self.col2, Vector)) 

186 

187 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

188 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1) 

189 mag1 = flux1.to(u.ABmag) 

190 

191 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2) 

192 mag2 = flux2.to(u.ABmag) 

193 

194 magDiff = mag1 - mag2 

195 

196 if self.returnMillimags: 

197 magDiff = magDiff.to(u.mmag) 

198 

199 return np.array(magDiff.value) 

200 

201 

202class SNCalculator(VectorAction): 

203 """Calculate the signal-to-noise.""" 

204 

205 fluxType = Field[str](doc="Flux type to calculate the S/N.", default="{band}_psfFlux") 

206 uncertaintySuffix = Field[str]( 

207 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err" 

208 ) 

209 

210 def getInputSchema(self) -> KeyedDataSchema: 

211 yield self.fluxType, Vector 

212 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector 

213 

214 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

215 signal = np.array(data[self.fluxType.format(**kwargs)]) 

216 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)]) 

217 sn = signal / noise 

218 

219 return np.array(sn) 

220 

221 

222class ExtinctionCorrectedMagDiff(VectorAction): 

223 """Compute the difference between two magnitudes and correct for extinction 

224 By default bands are derived from the <band>_ prefix on flux columns, 

225 per the naming convention in the Object Table: 

226 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another 

227 convention, bands can alternatively be supplied via the band1 or band2 

228 config parameters. 

229 If band1 and band2 are supplied, the flux column names are ignored. 

230 """ 

231 

232 magDiff = ConfigurableActionField( 

233 doc="Action that returns a difference in magnitudes", default=MagDiff, dtype=VectorAction 

234 ) 

235 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv") 

236 band1 = Field[str]( 

237 doc="Optional band for magDiff.col1. Supercedes column name prefix", 

238 optional=True, 

239 default=None, 

240 ) 

241 band2 = Field[str]( 

242 doc="Optional band for magDiff.col2. Supercedes column name prefix", 

243 optional=True, 

244 default=None, 

245 ) 

246 extinctionCoeffs = DictField[str, float]( 

247 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band." 

248 "Key must be the band", 

249 optional=True, 

250 default=None, 

251 ) 

252 

253 def getInputSchema(self) -> KeyedDataSchema: 

254 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),) 

255 

256 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

257 diff = self.magDiff(data, **kwargs) 

258 if not self.extinctionCoeffs: 

259 _LOG.warning("No extinction Coefficients. Not applying extinction correction") 

260 return diff 

261 

262 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0] 

263 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0] 

264 

265 # Return plain MagDiff with warning if either coeff not found 

266 for band in (col1Band, col2Band): 

267 if band not in self.extinctionCoeffs: 

268 _LOG.warning( 

269 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction", 

270 band, 

271 self.extinctionCoeffs, 

272 ) 

273 return diff 

274 

275 av1: float = self.extinctionCoeffs[col1Band] 

276 av2: float = self.extinctionCoeffs[col2Band] 

277 

278 ebv = data[self.ebvCol] 

279 # Ignore type until a more complete Vector protocol 

280 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore 

281 

282 if self.magDiff.returnMillimags: 

283 correction = correction.to(u.mmag) 

284 

285 return np.array(diff - correction.value) 

286 

287 

288class AstromDiff(VectorAction): 

289 """Calculate the difference between two columns, assuming their units 

290 are degrees, and convert the difference to arcseconds. 

291 Parameters 

292 ---------- 

293 df : `pandas.core.frame.DataFrame` 

294 The catalog to calculate the position difference from. 

295 Returns 

296 ------- 

297 angleDiffValue : `np.ndarray` 

298 The difference between two columns, either in the input units or in 

299 milliarcseconds. 

300 Notes 

301 ----- 

302 The columns need to be in units (specifiable in the radecUnits1 and 2 

303 config options) that can be converted to arcseconds. This action doesn't 

304 have any calibration information and assumes that the positions are already 

305 calibrated. 

306 """ 

307 

308 col1 = Field[str](doc="Column to subtract from", dtype=str) 

309 radecUnits1 = Field[str](doc="Units for col1", dtype=str, default="degree") 

310 col2 = Field[str](doc="Column to subtract", dtype=str) 

311 radecUnits2 = Field[str](doc="Units for col2", dtype=str, default="degree") 

312 returnMilliArcsecs = Field[bool](doc="Use marcseconds or not?", dtype=bool, default=True) 

313 

314 def getInputSchema(self) -> KeyedDataSchema: 

315 return ((self.col1, Vector), (self.col2, Vector)) 

316 

317 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

318 angle1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.radecUnits1) 

319 

320 angle2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.radecUnits2) 

321 

322 angleDiff = angle1 - angle2 

323 

324 if self.returnMilliArcsecs: 

325 angleDiffValue = angleDiff.to(u.arcsec).value * 1000 

326 else: 

327 angleDiffValue = angleDiff.value 

328 return angleDiffValue 

329 

330 

331class PerGroupStatistic(VectorAction): 

332 """Compute per-group statistic values and return result as a vector with 

333 one element per group. The computed statistic can be any function accepted 

334 by pandas DataFrameGroupBy.aggregate passed in as a string function name. 

335 """ 

336 

337 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

338 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector) 

339 func = Field[str](doc="Name of function to be applied per group") 

340 

341 def getInputSchema(self) -> KeyedDataSchema: 

342 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),) 

343 

344 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

345 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)}) 

346 result = df.groupby("groupKey")["value"].aggregate(self.func) 

347 return np.array(result)