Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 44%

184 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 11:55 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "DownselectVector", 

25 "MultiCriteriaDownselectVector", 

26 "MagColumnNanoJansky", 

27 "FractionalDifference", 

28 "Sn", 

29 "ConstantValue", 

30 "SubtractVector", 

31 "DivideVector", 

32 "LoadVector", 

33 "MagDiff", 

34 "SNCalculator", 

35 "ExtinctionCorrectedMagDiff", 

36 "AstromDiff", 

37 "PerGroupStatistic", 

38) 

39 

40import logging 

41from typing import Optional, cast 

42 

43import numpy as np 

44import pandas as pd 

45from astropy import units as u 

46from lsst.pex.config import DictField, Field 

47from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField 

48 

49from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction 

50from .selectors import VectorSelector 

51 

52_LOG = logging.getLogger(__name__) 

53 

54 

55class DownselectVector(VectorAction): 

56 """Get a vector from KeyedData, apply specified selector, return the 

57 shorter Vector. 

58 """ 

59 

60 vectorKey = Field[str](doc="column key to load from KeyedData") 

61 

62 selector = ConfigurableActionField[VectorAction]( 

63 doc="Action which returns a selection mask", default=VectorSelector 

64 ) 

65 

66 def getInputSchema(self) -> KeyedDataSchema: 

67 yield (self.vectorKey, Vector) 

68 yield from cast(VectorAction, self.selector).getInputSchema() 

69 

70 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

71 mask = cast(VectorAction, self.selector)(data, **kwargs) 

72 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

73 

74 

75class MultiCriteriaDownselectVector(VectorAction): 

76 """Get a vector from KeyedData, apply specified set of selectors with AND 

77 logic, and return the shorter Vector. 

78 """ 

79 

80 vectorKey = Field[str](doc="column key to load from KeyedData") 

81 

82 selectors = ConfigurableActionStructField[VectorAction]( 

83 doc="Selectors for selecting rows, will be AND together", 

84 ) 

85 

86 def getInputSchema(self) -> KeyedDataSchema: 

87 yield (self.vectorKey, Vector) 

88 for action in self.selectors: 

89 yield from action.getInputSchema() 

90 

91 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

92 mask: Optional[Vector] = None 

93 for selector in self.selectors: 

94 subMask = selector(data, **kwargs) 

95 if mask is None: 

96 mask = subMask 

97 else: 

98 mask *= subMask # type: ignore 

99 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

100 

101 

102class MagColumnNanoJansky(VectorAction): 

103 """Turn nano janskies into magnitudes.""" 

104 

105 vectorKey = Field[str](doc="column key to use for this transformation") 

106 returnMillimags = Field[bool](doc="Use millimags or not?", default=False) 

107 

108 def getInputSchema(self) -> KeyedDataSchema: 

109 return ((self.vectorKey, Vector),) 

110 

111 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

112 with np.warnings.catch_warnings(): # type: ignore 

113 np.warnings.filterwarnings("ignore", r"invalid value encountered") # type: ignore 

114 np.warnings.filterwarnings("ignore", r"divide by zero") # type: ignore 

115 vec = cast(Vector, data[self.vectorKey.format(**kwargs)]) 

116 mags = (np.array(vec) * u.nJy).to(u.ABmag).value # type: ignore 

117 if self.returnMillimags: 

118 mags *= 1000 

119 return mags 

120 

121 

122class FractionalDifference(VectorAction): 

123 """Calculate (A-B)/B""" 

124 

125 actionA = ConfigurableActionField[VectorAction](doc="Action which supplies vector A") 

126 actionB = ConfigurableActionField[VectorAction](doc="Action which supplies vector B") 

127 

128 def getInputSchema(self) -> KeyedDataSchema: 

129 yield from self.actionA.getInputSchema() # type: ignore 

130 yield from self.actionB.getInputSchema() # type: ignore 

131 

132 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

133 vecA = self.actionA(data, **kwargs) # type: ignore 

134 vecB = self.actionB(data, **kwargs) # type: ignore 

135 return (vecA - vecB) / vecB 

136 

137 

138class Sn(VectorAction): 

139 """Compute signal-to-noise in the given flux type""" 

140 

141 fluxType = Field[str](doc="Flux type to calculate the S/N in.", default="{band}_psfFlux") 

142 uncertaintySuffix = Field[str]( 

143 doc="Suffix to add to fluxType to specify uncertainty column", default="Err" 

144 ) 

145 band = Field[str](doc="Band to calculate the S/N in.", default="i") 

146 

147 def getInputSchema(self) -> KeyedDataSchema: 

148 yield (fluxCol := self.fluxType), Vector 

149 yield f"{fluxCol}{self.uncertaintySuffix}", Vector 

150 

151 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

152 """Computes S/N in self.fluxType 

153 

154 Parameters 

155 ---------- 

156 df : `Tabular` 

157 

158 Returns 

159 ------- 

160 result : `Vector` 

161 Computed signal-to-noise ratio. 

162 """ 

163 fluxCol = self.fluxType.format(**(kwargs | dict(band=self.band))) 

164 errCol = f"{fluxCol}{self.uncertaintySuffix.format(**kwargs)}" 

165 result = cast(Vector, data[fluxCol]) / data[errCol] # type: ignore 

166 

167 return np.array(cast(Vector, result)) 

168 

169 

170class ConstantValue(VectorAction): 

171 """Return a constant scalar value""" 

172 

173 value = Field[float](doc="A single constant value", optional=False) 

174 

175 def getInputSchema(self) -> KeyedDataSchema: 

176 return () 

177 

178 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

179 return np.array([self.value]) 

180 

181 

182class SubtractVector(VectorAction): 

183 """Calculate (A-B)""" 

184 

185 actionA = ConfigurableActionField[VectorAction](doc="Action which supplies vector A") 

186 actionB = ConfigurableActionField[VectorAction](doc="Action which supplies vector B") 

187 

188 def getInputSchema(self) -> KeyedDataSchema: 

189 yield from self.actionA.getInputSchema() # type: ignore 

190 yield from self.actionB.getInputSchema() # type: ignore 

191 

192 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

193 vecA = self.actionA(data, **kwargs) # type: ignore 

194 vecB = self.actionB(data, **kwargs) # type: ignore 

195 return vecA - vecB 

196 

197 

198class DivideVector(VectorAction): 

199 """Calculate (A/B)""" 

200 

201 actionA = ConfigurableActionField[VectorAction](doc="Action which supplies vector A") 

202 actionB = ConfigurableActionField[VectorAction](doc="Action which supplies vector B") 

203 

204 def getInputSchema(self) -> KeyedDataSchema: 

205 yield from self.actionA.getInputSchema() # type: ignore 

206 yield from self.actionB.getInputSchema() # type: ignore 

207 

208 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

209 vecA = self.actionA(data, **kwargs) # type: ignore 

210 vecB = self.actionB(data, **kwargs) # type: ignore 

211 return vecA / vecB 

212 

213 

214class LoadVector(VectorAction): 

215 """Load and return a Vector from KeyedData""" 

216 

217 vectorKey = Field[str](doc="Key of vector which should be loaded") 

218 

219 def getInputSchema(self) -> KeyedDataSchema: 

220 return ((self.vectorKey, Vector),) 

221 

222 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

223 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)])) 

224 

225 

226class MagDiff(VectorAction): 

227 """Calculate the difference between two magnitudes; 

228 each magnitude is derived from a flux column. 

229 Parameters 

230 ---------- 

231 TO DO: 

232 Returns 

233 ------- 

234 The magnitude difference in milli mags. 

235 Notes 

236 ----- 

237 The flux columns need to be in units (specifiable in 

238 the fluxUnits1 and 2 config options) that can be converted 

239 to janskies. This action doesn't have any calibration 

240 information and assumes that the fluxes are already 

241 calibrated. 

242 """ 

243 

244 col1 = Field[str](doc="Column to subtract from") 

245 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky") 

246 col2 = Field[str](doc="Column to subtract") 

247 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky") 

248 returnMillimags = Field[bool](doc="Use millimags or not?", default=True) 

249 

250 def getInputSchema(self) -> KeyedDataSchema: 

251 return ((self.col1, Vector), (self.col2, Vector)) 

252 

253 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

254 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1) 

255 mag1 = flux1.to(u.ABmag) 

256 

257 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2) 

258 mag2 = flux2.to(u.ABmag) 

259 

260 magDiff = mag1 - mag2 

261 

262 if self.returnMillimags: 

263 magDiff = magDiff.to(u.mmag) 

264 

265 return np.array(magDiff.value) 

266 

267 

268class SNCalculator(VectorAction): 

269 """Calculate the signal-to-noise.""" 

270 

271 fluxType = Field[str](doc="Flux type to calculate the S/N.", default="{band}_psfFlux") 

272 uncertaintySuffix = Field[str]( 

273 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err" 

274 ) 

275 

276 def getInputSchema(self) -> KeyedDataSchema: 

277 yield self.fluxType, Vector 

278 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector 

279 

280 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

281 signal = np.array(data[self.fluxType.format(**kwargs)]) 

282 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)]) 

283 sn = signal / noise 

284 

285 return np.array(sn) 

286 

287 

288class ExtinctionCorrectedMagDiff(VectorAction): 

289 """Compute the difference between two magnitudes and correct for extinction 

290 By default bands are derived from the <band>_ prefix on flux columns, 

291 per the naming convention in the Object Table: 

292 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another 

293 convention, bands can alternatively be supplied via the band1 or band2 

294 config parameters. 

295 If band1 and band2 are supplied, the flux column names are ignored. 

296 """ 

297 

298 magDiff = ConfigurableActionField[VectorAction]( 

299 doc="Action that returns a difference in magnitudes", default=MagDiff 

300 ) 

301 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv") 

302 band1 = Field[str]( 

303 doc="Optional band for magDiff.col1. Supercedes column name prefix", 

304 optional=True, 

305 default=None, 

306 ) 

307 band2 = Field[str]( 

308 doc="Optional band for magDiff.col2. Supercedes column name prefix", 

309 optional=True, 

310 default=None, 

311 ) 

312 extinctionCoeffs = DictField[str, float]( 

313 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band." 

314 "Key must be the band", 

315 optional=True, 

316 default=None, 

317 ) 

318 

319 def getInputSchema(self) -> KeyedDataSchema: 

320 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),) 

321 

322 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

323 diff = self.magDiff(data, **kwargs) 

324 if not self.extinctionCoeffs: 

325 _LOG.warning("No extinction Coefficients. Not applying extinction correction") 

326 return diff 

327 

328 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0] 

329 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0] 

330 

331 # Return plain MagDiff with warning if either coeff not found 

332 for band in (col1Band, col2Band): 

333 if band not in self.extinctionCoeffs: 

334 _LOG.warning( 

335 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction", 

336 band, 

337 self.extinctionCoeffs, 

338 ) 

339 return diff 

340 

341 av1: float = self.extinctionCoeffs[col1Band] 

342 av2: float = self.extinctionCoeffs[col2Band] 

343 

344 ebv = data[self.ebvCol] 

345 # Ignore type until a more complete Vector protocol 

346 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore 

347 

348 if self.magDiff.returnMillimags: 

349 correction = correction.to(u.mmag) 

350 

351 return np.array(diff - correction.value) 

352 

353 

354class AstromDiff(VectorAction): 

355 """Calculate the difference between two columns, assuming their units 

356 are degrees, and convert the difference to arcseconds. 

357 

358 Parameters 

359 ---------- 

360 df : `pandas.core.frame.DataFrame` 

361 The catalog to calculate the position difference from. 

362 

363 Returns 

364 ------- 

365 angleDiffValue : `np.ndarray` 

366 The difference between two columns, either in the input units or in 

367 milliarcseconds. 

368 

369 Notes 

370 ----- 

371 The columns need to be in units (specifiable in the radecUnits1 and 2 

372 config options) that can be converted to arcseconds. This action doesn't 

373 have any calibration information and assumes that the positions are already 

374 calibrated. 

375 """ 

376 

377 col1 = Field[str](doc="Column to subtract from", dtype=str) 

378 radecUnits1 = Field[str](doc="Units for col1", dtype=str, default="degree") 

379 col2 = Field[str](doc="Column to subtract", dtype=str) 

380 radecUnits2 = Field[str](doc="Units for col2", dtype=str, default="degree") 

381 returnMilliArcsecs = Field[bool](doc="Use marcseconds or not?", dtype=bool, default=True) 

382 

383 def getInputSchema(self) -> KeyedDataSchema: 

384 return ((self.col1, Vector), (self.col2, Vector)) 

385 

386 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

387 angle1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.radecUnits1) 

388 

389 angle2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.radecUnits2) 

390 

391 angleDiff = angle1 - angle2 

392 

393 if self.returnMilliArcsecs: 

394 angleDiffValue = angleDiff.to(u.arcsec).value * 1000 

395 else: 

396 angleDiffValue = angleDiff.value 

397 return angleDiffValue 

398 

399 

400class PerGroupStatistic(VectorAction): 

401 """Compute per-group statistic values and return result as a vector with 

402 one element per group. The computed statistic can be any function accepted 

403 by pandas DataFrameGroupBy.aggregate passed in as a string function name. 

404 """ 

405 

406 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

407 buildAction = ConfigurableActionField[VectorAction](doc="Action to build vector", default=LoadVector) 

408 func = Field[str](doc="Name of function to be applied per group") 

409 

410 def getInputSchema(self) -> KeyedDataSchema: 

411 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),) 

412 

413 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

414 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)}) 

415 result = df.groupby("groupKey")["value"].aggregate(self.func) 

416 return np.array(result)