Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 44%

184 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-14 03:19 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "DownselectVector", 

25 "MultiCriteriaDownselectVector", 

26 "MagColumnNanoJansky", 

27 "FractionalDifference", 

28 "Sn", 

29 "ConstantValue", 

30 "SubtractVector", 

31 "DivideVector", 

32 "LoadVector", 

33 "MagDiff", 

34 "SNCalculator", 

35 "ExtinctionCorrectedMagDiff", 

36 "AstromDiff", 

37 "PerGroupStatistic", 

38) 

39 

40import logging 

41from typing import Optional, cast 

42 

43import numpy as np 

44import pandas as pd 

45from astropy import units as u 

46from lsst.pex.config import DictField, Field 

47from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField 

48 

49from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction 

50from .selectors import VectorSelector 

51 

52_LOG = logging.getLogger(__name__) 

53 

54 

55class DownselectVector(VectorAction): 

56 """Get a vector from KeyedData, apply specified selector, return the 

57 shorter Vector. 

58 """ 

59 

60 vectorKey = Field[str](doc="column key to load from KeyedData") 

61 

62 selector = ConfigurableActionField[VectorAction]( 

63 doc="Action which returns a selection mask", default=VectorSelector 

64 ) 

65 

66 def getInputSchema(self) -> KeyedDataSchema: 

67 yield (self.vectorKey, Vector) 

68 yield from cast(VectorAction, self.selector).getInputSchema() 

69 

70 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

71 mask = cast(VectorAction, self.selector)(data, **kwargs) 

72 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

73 

74 

75class MultiCriteriaDownselectVector(VectorAction): 

76 """Get a vector from KeyedData, apply specified set of selectors with AND 

77 logic, and return the shorter Vector. 

78 """ 

79 

80 vectorKey = Field[str](doc="column key to load from KeyedData") 

81 

82 selectors = ConfigurableActionStructField[VectorAction]( 

83 doc="Selectors for selecting rows, will be AND together", 

84 ) 

85 

86 def getInputSchema(self) -> KeyedDataSchema: 

87 yield (self.vectorKey, Vector) 

88 for action in self.selectors: 

89 yield from action.getInputSchema() 

90 

91 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

92 mask: Optional[Vector] = None 

93 for selector in self.selectors: 

94 subMask = selector(data, **kwargs) 

95 if mask is None: 

96 mask = subMask 

97 else: 

98 mask *= subMask # type: ignore 

99 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

100 

101 

102class MagColumnNanoJansky(VectorAction): 

103 vectorKey = Field[str](doc="column key to use for this transformation") 

104 returnMillimags = Field[bool](doc="Use millimags or not?", default=False) 

105 

106 def getInputSchema(self) -> KeyedDataSchema: 

107 return ((self.vectorKey, Vector),) 

108 

109 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

110 with np.warnings.catch_warnings(): # type: ignore 

111 np.warnings.filterwarnings("ignore", r"invalid value encountered") # type: ignore 

112 np.warnings.filterwarnings("ignore", r"divide by zero") # type: ignore 

113 vec = cast(Vector, data[self.vectorKey.format(**kwargs)]) 

114 mags = (np.array(vec) * u.nJy).to(u.ABmag).value # type: ignore 

115 if self.returnMillimags: 

116 mags *= 1000 

117 return mags 

118 

119 

120class FractionalDifference(VectorAction): 

121 """Calculate (A-B)/B""" 

122 

123 actionA = ConfigurableActionField[VectorAction](doc="Action which supplies vector A") 

124 actionB = ConfigurableActionField[VectorAction](doc="Action which supplies vector B") 

125 

126 def getInputSchema(self) -> KeyedDataSchema: 

127 yield from self.actionA.getInputSchema() # type: ignore 

128 yield from self.actionB.getInputSchema() # type: ignore 

129 

130 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

131 vecA = self.actionA(data, **kwargs) # type: ignore 

132 vecB = self.actionB(data, **kwargs) # type: ignore 

133 return (vecA - vecB) / vecB 

134 

135 

136class Sn(VectorAction): 

137 """Compute signal-to-noise in the given flux type""" 

138 

139 fluxType = Field[str](doc="Flux type to calculate the S/N in.", default="{band}_psfFlux") 

140 uncertaintySuffix = Field[str]( 

141 doc="Suffix to add to fluxType to specify uncertainty column", default="Err" 

142 ) 

143 band = Field[str](doc="Band to calculate the S/N in.", default="i") 

144 

145 def getInputSchema(self) -> KeyedDataSchema: 

146 yield (fluxCol := self.fluxType), Vector 

147 yield f"{fluxCol}{self.uncertaintySuffix}", Vector 

148 

149 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

150 """Computes S/N in self.fluxType 

151 Parameters 

152 ---------- 

153 df : `Tabular` 

154 Returns 

155 ------- 

156 result : `Vector` 

157 Computed signal-to-noise ratio. 

158 """ 

159 fluxCol = self.fluxType.format(**(kwargs | dict(band=self.band))) 

160 errCol = f"{fluxCol}{self.uncertaintySuffix.format(**kwargs)}" 

161 result = cast(Vector, data[fluxCol]) / data[errCol] # type: ignore 

162 

163 return np.array(cast(Vector, result)) 

164 

165 

166class ConstantValue(VectorAction): 

167 """Return a constant scalar value""" 

168 

169 value = Field[float](doc="A single constant value", optional=False) 

170 

171 def getInputSchema(self) -> KeyedDataSchema: 

172 return () 

173 

174 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

175 return np.array([self.value]) 

176 

177 

178class SubtractVector(VectorAction): 

179 """Calculate (A-B)""" 

180 

181 actionA = ConfigurableActionField[VectorAction](doc="Action which supplies vector A") 

182 actionB = ConfigurableActionField[VectorAction](doc="Action which supplies vector B") 

183 

184 def getInputSchema(self) -> KeyedDataSchema: 

185 yield from self.actionA.getInputSchema() # type: ignore 

186 yield from self.actionB.getInputSchema() # type: ignore 

187 

188 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

189 vecA = self.actionA(data, **kwargs) # type: ignore 

190 vecB = self.actionB(data, **kwargs) # type: ignore 

191 return vecA - vecB 

192 

193 

194class DivideVector(VectorAction): 

195 """Calculate (A/B)""" 

196 

197 actionA = ConfigurableActionField[VectorAction](doc="Action which supplies vector A") 

198 actionB = ConfigurableActionField[VectorAction](doc="Action which supplies vector B") 

199 

200 def getInputSchema(self) -> KeyedDataSchema: 

201 yield from self.actionA.getInputSchema() # type: ignore 

202 yield from self.actionB.getInputSchema() # type: ignore 

203 

204 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

205 vecA = self.actionA(data, **kwargs) # type: ignore 

206 vecB = self.actionB(data, **kwargs) # type: ignore 

207 return vecA / vecB 

208 

209 

210class LoadVector(VectorAction): 

211 """Load and return a Vector from KeyedData""" 

212 

213 vectorKey = Field[str](doc="Key of vector which should be loaded") 

214 

215 def getInputSchema(self) -> KeyedDataSchema: 

216 return ((self.vectorKey, Vector),) 

217 

218 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

219 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)])) 

220 

221 

222class MagDiff(VectorAction): 

223 """Calculate the difference between two magnitudes; 

224 each magnitude is derived from a flux column. 

225 Parameters 

226 ---------- 

227 TO DO: 

228 Returns 

229 ------- 

230 The magnitude difference in milli mags. 

231 Notes 

232 ----- 

233 The flux columns need to be in units (specifiable in 

234 the fluxUnits1 and 2 config options) that can be converted 

235 to janskies. This action doesn't have any calibration 

236 information and assumes that the fluxes are already 

237 calibrated. 

238 """ 

239 

240 col1 = Field[str](doc="Column to subtract from") 

241 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky") 

242 col2 = Field[str](doc="Column to subtract") 

243 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky") 

244 returnMillimags = Field[bool](doc="Use millimags or not?", default=True) 

245 

246 def getInputSchema(self) -> KeyedDataSchema: 

247 return ((self.col1, Vector), (self.col2, Vector)) 

248 

249 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

250 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1) 

251 mag1 = flux1.to(u.ABmag) 

252 

253 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2) 

254 mag2 = flux2.to(u.ABmag) 

255 

256 magDiff = mag1 - mag2 

257 

258 if self.returnMillimags: 

259 magDiff = magDiff.to(u.mmag) 

260 

261 return np.array(magDiff.value) 

262 

263 

264class SNCalculator(VectorAction): 

265 """Calculate the signal-to-noise.""" 

266 

267 fluxType = Field[str](doc="Flux type to calculate the S/N.", default="{band}_psfFlux") 

268 uncertaintySuffix = Field[str]( 

269 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err" 

270 ) 

271 

272 def getInputSchema(self) -> KeyedDataSchema: 

273 yield self.fluxType, Vector 

274 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector 

275 

276 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

277 signal = np.array(data[self.fluxType.format(**kwargs)]) 

278 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)]) 

279 sn = signal / noise 

280 

281 return np.array(sn) 

282 

283 

284class ExtinctionCorrectedMagDiff(VectorAction): 

285 """Compute the difference between two magnitudes and correct for extinction 

286 By default bands are derived from the <band>_ prefix on flux columns, 

287 per the naming convention in the Object Table: 

288 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another 

289 convention, bands can alternatively be supplied via the band1 or band2 

290 config parameters. 

291 If band1 and band2 are supplied, the flux column names are ignored. 

292 """ 

293 

294 magDiff = ConfigurableActionField[VectorAction]( 

295 doc="Action that returns a difference in magnitudes", default=MagDiff 

296 ) 

297 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv") 

298 band1 = Field[str]( 

299 doc="Optional band for magDiff.col1. Supercedes column name prefix", 

300 optional=True, 

301 default=None, 

302 ) 

303 band2 = Field[str]( 

304 doc="Optional band for magDiff.col2. Supercedes column name prefix", 

305 optional=True, 

306 default=None, 

307 ) 

308 extinctionCoeffs = DictField[str, float]( 

309 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band." 

310 "Key must be the band", 

311 optional=True, 

312 default=None, 

313 ) 

314 

315 def getInputSchema(self) -> KeyedDataSchema: 

316 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),) 

317 

318 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

319 diff = self.magDiff(data, **kwargs) 

320 if not self.extinctionCoeffs: 

321 _LOG.warning("No extinction Coefficients. Not applying extinction correction") 

322 return diff 

323 

324 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0] 

325 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0] 

326 

327 # Return plain MagDiff with warning if either coeff not found 

328 for band in (col1Band, col2Band): 

329 if band not in self.extinctionCoeffs: 

330 _LOG.warning( 

331 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction", 

332 band, 

333 self.extinctionCoeffs, 

334 ) 

335 return diff 

336 

337 av1: float = self.extinctionCoeffs[col1Band] 

338 av2: float = self.extinctionCoeffs[col2Band] 

339 

340 ebv = data[self.ebvCol] 

341 # Ignore type until a more complete Vector protocol 

342 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore 

343 

344 if self.magDiff.returnMillimags: 

345 correction = correction.to(u.mmag) 

346 

347 return np.array(diff - correction.value) 

348 

349 

350class AstromDiff(VectorAction): 

351 """Calculate the difference between two columns, assuming their units 

352 are degrees, and convert the difference to arcseconds. 

353 Parameters 

354 ---------- 

355 df : `pandas.core.frame.DataFrame` 

356 The catalog to calculate the position difference from. 

357 Returns 

358 ------- 

359 angleDiffValue : `np.ndarray` 

360 The difference between two columns, either in the input units or in 

361 milliarcseconds. 

362 Notes 

363 ----- 

364 The columns need to be in units (specifiable in the radecUnits1 and 2 

365 config options) that can be converted to arcseconds. This action doesn't 

366 have any calibration information and assumes that the positions are already 

367 calibrated. 

368 """ 

369 

370 col1 = Field[str](doc="Column to subtract from", dtype=str) 

371 radecUnits1 = Field[str](doc="Units for col1", dtype=str, default="degree") 

372 col2 = Field[str](doc="Column to subtract", dtype=str) 

373 radecUnits2 = Field[str](doc="Units for col2", dtype=str, default="degree") 

374 returnMilliArcsecs = Field[bool](doc="Use marcseconds or not?", dtype=bool, default=True) 

375 

376 def getInputSchema(self) -> KeyedDataSchema: 

377 return ((self.col1, Vector), (self.col2, Vector)) 

378 

379 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

380 angle1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.radecUnits1) 

381 

382 angle2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.radecUnits2) 

383 

384 angleDiff = angle1 - angle2 

385 

386 if self.returnMilliArcsecs: 

387 angleDiffValue = angleDiff.to(u.arcsec).value * 1000 

388 else: 

389 angleDiffValue = angleDiff.value 

390 return angleDiffValue 

391 

392 

393class PerGroupStatistic(VectorAction): 

394 """Compute per-group statistic values and return result as a vector with 

395 one element per group. The computed statistic can be any function accepted 

396 by pandas DataFrameGroupBy.aggregate passed in as a string function name. 

397 """ 

398 

399 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

400 buildAction = ConfigurableActionField[VectorAction](doc="Action to build vector", default=LoadVector) 

401 func = Field[str](doc="Name of function to be applied per group") 

402 

403 def getInputSchema(self) -> KeyedDataSchema: 

404 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),) 

405 

406 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

407 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)}) 

408 result = df.groupby("groupKey")["value"].aggregate(self.func) 

409 return np.array(result)