Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 44%

184 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-31 12:05 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "DownselectVector", 

25 "MultiCriteriaDownselectVector", 

26 "MagColumnNanoJansky", 

27 "FractionalDifference", 

28 "Sn", 

29 "ConstantValue", 

30 "SubtractVector", 

31 "DivideVector", 

32 "LoadVector", 

33 "MagDiff", 

34 "SNCalculator", 

35 "ExtinctionCorrectedMagDiff", 

36 "AstromDiff", 

37 "PerGroupStatistic", 

38) 

39 

40import logging 

41from typing import Optional, cast 

42 

43import numpy as np 

44import pandas as pd 

45from astropy import units as u 

46from lsst.pex.config import DictField, Field 

47from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField 

48 

49from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction 

50from .selectors import VectorSelector 

51 

52_LOG = logging.getLogger(__name__) 

53 

54 

55class DownselectVector(VectorAction): 

56 """Get a vector from KeyedData, apply specified selector, return the 

57 shorter Vector. 

58 """ 

59 

60 vectorKey = Field[str](doc="column key to load from KeyedData") 

61 

62 selector = ConfigurableActionField(doc="Action which returns a selection mask", default=VectorSelector) 

63 

64 def getInputSchema(self) -> KeyedDataSchema: 

65 yield (self.vectorKey, Vector) 

66 yield from cast(VectorAction, self.selector).getInputSchema() 

67 

68 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

69 mask = cast(VectorAction, self.selector)(data, **kwargs) 

70 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

71 

72 

73class MultiCriteriaDownselectVector(VectorAction): 

74 """Get a vector from KeyedData, apply specified set of selectors with AND 

75 logic, and return the shorter Vector. 

76 """ 

77 

78 vectorKey = Field[str](doc="column key to load from KeyedData") 

79 

80 selectors = ConfigurableActionStructField[VectorAction]( 

81 doc="Selectors for selecting rows, will be AND together", 

82 ) 

83 

84 def getInputSchema(self) -> KeyedDataSchema: 

85 yield (self.vectorKey, Vector) 

86 for action in self.selectors: 

87 yield from cast(VectorAction, action).getInputSchema() 

88 

89 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

90 mask: Optional[Vector] = None 

91 for selector in self.selectors: 

92 subMask = selector(data, **kwargs) 

93 if mask is None: 

94 mask = subMask 

95 else: 

96 mask *= subMask # type: ignore 

97 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

98 

99 

100class MagColumnNanoJansky(VectorAction): 

101 vectorKey = Field[str](doc="column key to use for this transformation") 

102 returnMillimags = Field[bool](doc="Use millimags or not?", default=False) 

103 

104 def getInputSchema(self) -> KeyedDataSchema: 

105 return ((self.vectorKey, Vector),) 

106 

107 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

108 with np.warnings.catch_warnings(): # type: ignore 

109 np.warnings.filterwarnings("ignore", r"invalid value encountered") # type: ignore 

110 np.warnings.filterwarnings("ignore", r"divide by zero") # type: ignore 

111 vec = cast(Vector, data[self.vectorKey.format(**kwargs)]) 

112 mags = (np.array(vec) * u.nJy).to(u.ABmag).value # type: ignore 

113 if self.returnMillimags: 

114 mags *= 1000 

115 return mags 

116 

117 

118class FractionalDifference(VectorAction): 

119 """Calculate (A-B)/B""" 

120 

121 actionA = ConfigurableActionField(doc="Action which supplies vector A", dtype=VectorAction) 

122 actionB = ConfigurableActionField(doc="Action which supplies vector B", dtype=VectorAction) 

123 

124 def getInputSchema(self) -> KeyedDataSchema: 

125 yield from self.actionA.getInputSchema() # type: ignore 

126 yield from self.actionB.getInputSchema() # type: ignore 

127 

128 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

129 vecA = self.actionA(data, **kwargs) # type: ignore 

130 vecB = self.actionB(data, **kwargs) # type: ignore 

131 return (vecA - vecB) / vecB 

132 

133 

134class Sn(VectorAction): 

135 """Compute signal-to-noise in the given flux type""" 

136 

137 fluxType = Field[str](doc="Flux type to calculate the S/N in.", default="{band}_psfFlux") 

138 uncertaintySuffix = Field[str]( 

139 doc="Suffix to add to fluxType to specify uncertainty column", default="Err" 

140 ) 

141 band = Field[str](doc="Band to calculate the S/N in.", default="i") 

142 

143 def getInputSchema(self) -> KeyedDataSchema: 

144 yield (fluxCol := self.fluxType), Vector 

145 yield f"{fluxCol}{self.uncertaintySuffix}", Vector 

146 

147 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

148 """Computes S/N in self.fluxType 

149 Parameters 

150 ---------- 

151 df : `Tabular` 

152 Returns 

153 ------- 

154 result : `Vector` 

155 Computed signal-to-noise ratio. 

156 """ 

157 fluxCol = self.fluxType.format(**(kwargs | dict(band=self.band))) 

158 errCol = f"{fluxCol}{self.uncertaintySuffix.format(**kwargs)}" 

159 result = cast(Vector, data[fluxCol]) / data[errCol] # type: ignore 

160 

161 return np.array(cast(Vector, result)) 

162 

163 

164class ConstantValue(VectorAction): 

165 """Return a constant scalar value""" 

166 

167 value = Field[float](doc="A single constant value", optional=False) 

168 

169 def getInputSchema(self) -> KeyedDataSchema: 

170 return () 

171 

172 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

173 return np.array([self.value]) 

174 

175 

176class SubtractVector(VectorAction): 

177 """Calculate (A-B)""" 

178 

179 actionA = ConfigurableActionField(doc="Action which supplies vector A", dtype=VectorAction) 

180 actionB = ConfigurableActionField(doc="Action which supplies vector B", dtype=VectorAction) 

181 

182 def getInputSchema(self) -> KeyedDataSchema: 

183 yield from self.actionA.getInputSchema() # type: ignore 

184 yield from self.actionB.getInputSchema() # type: ignore 

185 

186 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

187 vecA = self.actionA(data, **kwargs) # type: ignore 

188 vecB = self.actionB(data, **kwargs) # type: ignore 

189 return vecA - vecB 

190 

191 

192class DivideVector(VectorAction): 

193 """Calculate (A/B)""" 

194 

195 actionA = ConfigurableActionField(doc="Action which supplies vector A", dtype=VectorAction) 

196 actionB = ConfigurableActionField(doc="Action which supplies vector B", dtype=VectorAction) 

197 

198 def getInputSchema(self) -> KeyedDataSchema: 

199 yield from self.actionA.getInputSchema() # type: ignore 

200 yield from self.actionB.getInputSchema() # type: ignore 

201 

202 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

203 vecA = self.actionA(data, **kwargs) # type: ignore 

204 vecB = self.actionB(data, **kwargs) # type: ignore 

205 return vecA / vecB 

206 

207 

208class LoadVector(VectorAction): 

209 """Load and return a Vector from KeyedData""" 

210 

211 vectorKey = Field[str](doc="Key of vector which should be loaded") 

212 

213 def getInputSchema(self) -> KeyedDataSchema: 

214 return ((self.vectorKey, Vector),) 

215 

216 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

217 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)])) 

218 

219 

220class MagDiff(VectorAction): 

221 """Calculate the difference between two magnitudes; 

222 each magnitude is derived from a flux column. 

223 Parameters 

224 ---------- 

225 TO DO: 

226 Returns 

227 ------- 

228 The magnitude difference in milli mags. 

229 Notes 

230 ----- 

231 The flux columns need to be in units (specifiable in 

232 the fluxUnits1 and 2 config options) that can be converted 

233 to janskies. This action doesn't have any calibration 

234 information and assumes that the fluxes are already 

235 calibrated. 

236 """ 

237 

238 col1 = Field[str](doc="Column to subtract from") 

239 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky") 

240 col2 = Field[str](doc="Column to subtract") 

241 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky") 

242 returnMillimags = Field[bool](doc="Use millimags or not?", default=True) 

243 

244 def getInputSchema(self) -> KeyedDataSchema: 

245 return ((self.col1, Vector), (self.col2, Vector)) 

246 

247 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

248 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1) 

249 mag1 = flux1.to(u.ABmag) 

250 

251 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2) 

252 mag2 = flux2.to(u.ABmag) 

253 

254 magDiff = mag1 - mag2 

255 

256 if self.returnMillimags: 

257 magDiff = magDiff.to(u.mmag) 

258 

259 return np.array(magDiff.value) 

260 

261 

262class SNCalculator(VectorAction): 

263 """Calculate the signal-to-noise.""" 

264 

265 fluxType = Field[str](doc="Flux type to calculate the S/N.", default="{band}_psfFlux") 

266 uncertaintySuffix = Field[str]( 

267 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err" 

268 ) 

269 

270 def getInputSchema(self) -> KeyedDataSchema: 

271 yield self.fluxType, Vector 

272 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector 

273 

274 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

275 signal = np.array(data[self.fluxType.format(**kwargs)]) 

276 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)]) 

277 sn = signal / noise 

278 

279 return np.array(sn) 

280 

281 

282class ExtinctionCorrectedMagDiff(VectorAction): 

283 """Compute the difference between two magnitudes and correct for extinction 

284 By default bands are derived from the <band>_ prefix on flux columns, 

285 per the naming convention in the Object Table: 

286 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another 

287 convention, bands can alternatively be supplied via the band1 or band2 

288 config parameters. 

289 If band1 and band2 are supplied, the flux column names are ignored. 

290 """ 

291 

292 magDiff = ConfigurableActionField( 

293 doc="Action that returns a difference in magnitudes", default=MagDiff, dtype=VectorAction 

294 ) 

295 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv") 

296 band1 = Field[str]( 

297 doc="Optional band for magDiff.col1. Supercedes column name prefix", 

298 optional=True, 

299 default=None, 

300 ) 

301 band2 = Field[str]( 

302 doc="Optional band for magDiff.col2. Supercedes column name prefix", 

303 optional=True, 

304 default=None, 

305 ) 

306 extinctionCoeffs = DictField[str, float]( 

307 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band." 

308 "Key must be the band", 

309 optional=True, 

310 default=None, 

311 ) 

312 

313 def getInputSchema(self) -> KeyedDataSchema: 

314 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),) 

315 

316 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

317 diff = self.magDiff(data, **kwargs) 

318 if not self.extinctionCoeffs: 

319 _LOG.warning("No extinction Coefficients. Not applying extinction correction") 

320 return diff 

321 

322 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0] 

323 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0] 

324 

325 # Return plain MagDiff with warning if either coeff not found 

326 for band in (col1Band, col2Band): 

327 if band not in self.extinctionCoeffs: 

328 _LOG.warning( 

329 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction", 

330 band, 

331 self.extinctionCoeffs, 

332 ) 

333 return diff 

334 

335 av1: float = self.extinctionCoeffs[col1Band] 

336 av2: float = self.extinctionCoeffs[col2Band] 

337 

338 ebv = data[self.ebvCol] 

339 # Ignore type until a more complete Vector protocol 

340 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore 

341 

342 if self.magDiff.returnMillimags: 

343 correction = correction.to(u.mmag) 

344 

345 return np.array(diff - correction.value) 

346 

347 

348class AstromDiff(VectorAction): 

349 """Calculate the difference between two columns, assuming their units 

350 are degrees, and convert the difference to arcseconds. 

351 Parameters 

352 ---------- 

353 df : `pandas.core.frame.DataFrame` 

354 The catalog to calculate the position difference from. 

355 Returns 

356 ------- 

357 angleDiffValue : `np.ndarray` 

358 The difference between two columns, either in the input units or in 

359 milliarcseconds. 

360 Notes 

361 ----- 

362 The columns need to be in units (specifiable in the radecUnits1 and 2 

363 config options) that can be converted to arcseconds. This action doesn't 

364 have any calibration information and assumes that the positions are already 

365 calibrated. 

366 """ 

367 

368 col1 = Field[str](doc="Column to subtract from", dtype=str) 

369 radecUnits1 = Field[str](doc="Units for col1", dtype=str, default="degree") 

370 col2 = Field[str](doc="Column to subtract", dtype=str) 

371 radecUnits2 = Field[str](doc="Units for col2", dtype=str, default="degree") 

372 returnMilliArcsecs = Field[bool](doc="Use marcseconds or not?", dtype=bool, default=True) 

373 

374 def getInputSchema(self) -> KeyedDataSchema: 

375 return ((self.col1, Vector), (self.col2, Vector)) 

376 

377 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

378 angle1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.radecUnits1) 

379 

380 angle2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.radecUnits2) 

381 

382 angleDiff = angle1 - angle2 

383 

384 if self.returnMilliArcsecs: 

385 angleDiffValue = angleDiff.to(u.arcsec).value * 1000 

386 else: 

387 angleDiffValue = angleDiff.value 

388 return angleDiffValue 

389 

390 

391class PerGroupStatistic(VectorAction): 

392 """Compute per-group statistic values and return result as a vector with 

393 one element per group. The computed statistic can be any function accepted 

394 by pandas DataFrameGroupBy.aggregate passed in as a string function name. 

395 """ 

396 

397 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

398 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector) 

399 func = Field[str](doc="Name of function to be applied per group") 

400 

401 def getInputSchema(self) -> KeyedDataSchema: 

402 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),) 

403 

404 def __call__(self, data: KeyedData, **kwargs) -> Vector: 

405 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)}) 

406 result = df.groupby("groupKey")["value"].aggregate(self.func) 

407 return np.array(result)