Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 34%

187 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-01 04:53 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MedianAction", 

26 "MeanAction", 

27 "StdevAction", 

28 "ValueAction", 

29 "SigmaMadAction", 

30 "CountAction", 

31 "CountUniqueAction", 

32 "ApproxFloor", 

33 "FracThreshold", 

34 "MaxAction", 

35 "MinAction", 

36 "FracInRange", 

37 "FracNan", 

38 "SumAction", 

39 "MedianHistAction", 

40 "IqrHistAction", 

41 "DivideScalar", 

42 "RmsAction", 

43) 

44 

45import operator 

46from math import nan 

47from typing import cast 

48 

49import numpy as np 

50from lsst.pex.config import ChoiceField, Field 

51from lsst.pex.config.configurableActions import ConfigurableActionField 

52 

53from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

54from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd 

55 

56 

57class ScalarFromVectorAction(ScalarAction): 

58 """Calculates a statistic from a single vector.""" 

59 

60 vectorKey = Field[str]("Key of Vector to compute statistic from.") 

61 

62 def getInputSchema(self) -> KeyedDataSchema: 

63 return ((self.vectorKey, Vector),) 

64 

65 

66class MedianAction(ScalarFromVectorAction): 

67 """Calculates the median of the given data.""" 

68 

69 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

70 mask = self.getMask(**kwargs) 

71 values = data[self.vectorKey.format(**kwargs)][mask] 

72 med = nanMedian(values) if len(values) else np.NaN 

73 

74 return med 

75 

76 

77class MeanAction(ScalarFromVectorAction): 

78 """Calculates the mean of the given data.""" 

79 

80 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

81 mask = self.getMask(**kwargs) 

82 values = data[self.vectorKey.format(**kwargs)][mask] 

83 mean = nanMean(values) if len(values) else np.NaN 

84 

85 return mean 

86 

87 

88class StdevAction(ScalarFromVectorAction): 

89 """Calculates the standard deviation of the given data.""" 

90 

91 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

92 mask = self.getMask(**kwargs) 

93 return nanStd(data[self.vectorKey.format(**kwargs)][mask]) 

94 

95 

96class RmsAction(ScalarFromVectorAction): 

97 """Calculates the root mean square of the given data (without subtracting 

98 the mean as in StdevAction).""" 

99 

100 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

101 mask = self.getMask(**kwargs) 

102 vector = data[self.vectorKey.format(**kwargs)][mask] 

103 vector = vector[~np.isnan(vector)] 

104 

105 return np.sqrt(np.mean(vector**2)) 

106 

107 

108class ValueAction(ScalarFromVectorAction): 

109 """Extracts the first value from a vector.""" 

110 

111 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

112 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0])) 

113 

114 

115class SigmaMadAction(ScalarFromVectorAction): 

116 """Calculates the sigma mad of the given data.""" 

117 

118 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

119 mask = self.getMask(**kwargs) 

120 return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask]) 

121 

122 

123class CountAction(ScalarAction): 

124 """Performs count actions, with threshold-based filtering. 

125 The operator is specified as a string, for example, "lt", "le", "ge", 

126 "gt", "ne", and "eq" for the mathematical operations <, <=, >=, >, !=, 

127 and == respectively. To count non-NaN values, only pass the column name 

128 as vector key. To count NaN values, pass threshold = nan (from math.nan). 

129 Optionally to configure from a YAML file, pass "threshold: !!float nan". 

130 To compute the number of elements with values less than a given threshold, 

131 use op="le". 

132 """ 

133 

134 vectorKey = Field[str]("Key of Vector to count") 

135 op = ChoiceField[str]( 

136 doc="Operator name string.", 

137 allowed={ 

138 "lt": "less than threshold", 

139 "le": "less than or equal to threshold", 

140 "ge": "greater than or equal to threshold", 

141 "ne": "not equal to a given value", 

142 "eq": "equal to a given value", 

143 "gt": "greater than threshold", 

144 }, 

145 default="ne", 

146 ) 

147 threshold = Field[float](doc="Threshold to apply.", default=nan) 

148 

149 def getInputSchema(self) -> KeyedDataSchema: 

150 return ((self.vectorKey, Vector),) 

151 

152 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

153 mask = self.getMask(**kwargs) 

154 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

155 

156 # Count NaNs and non-NaNs 

157 if self.threshold == nan: 

158 if self.op == "eq": 

159 # Count number of NaNs 

160 result = np.isnan(arr).sum() 

161 return cast(Scalar, int(result)) 

162 elif self.op == "ne": 

163 # Count number of non-NaNs 

164 result = len(arr) - np.isnan(arr).sum() 

165 return cast(Scalar, int(result)) 

166 else: 

167 raise ValueError("Invalid operator for counting NaNs.") 

168 # Count for given threshold ignoring all NaNs 

169 else: 

170 result = arr[~np.isnan(arr)] 

171 result = cast( 

172 Scalar, 

173 int(np.sum(getattr(operator, self.op)(result, self.threshold))), 

174 ) 

175 return result 

176 

177 

178class CountUniqueAction(ScalarFromVectorAction): 

179 """Counts the number of unique rows in a given column.""" 

180 

181 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

182 mask = self.getMask(**kwargs) 

183 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

184 count = len(np.unique(values)) 

185 return cast(Scalar, count) 

186 

187 

188class ApproxFloor(ScalarFromVectorAction): 

189 """Returns the median of the lowest ten values of the sorted input.""" 

190 

191 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

192 mask = self.getMask(**kwargs) 

193 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

194 x = len(value) // 10 

195 return nanMedian(value[-x:]) 

196 

197 

198class FracThreshold(ScalarFromVectorAction): 

199 """Compute the fraction of a distribution above or below a threshold. 

200 

201 The operator is specified as a string, for example, 

202 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

203 compute the fraction of elements with values less than a given threshold, 

204 use op="le". 

205 """ 

206 

207 op = ChoiceField[str]( 

208 doc="Operator name string.", 

209 allowed={ 

210 "lt": "less than threshold", 

211 "le": "less than or equal to threshold", 

212 "ge": "greater than or equal to threshold", 

213 "gt": "greater than threshold", 

214 }, 

215 ) 

216 threshold = Field[float](doc="Threshold to apply.") 

217 percent = Field[bool](doc="Express result as percentage", default=False) 

218 relative_to_median = Field[bool](doc="Calculate threshold relative to the median?", default=False) 

219 use_absolute_value = Field[bool]( 

220 doc=( 

221 "Calculate threshold after taking absolute value. If relative_to_median" 

222 " is true the absolute value will be applied after the median is subtracted" 

223 ), 

224 default=False, 

225 ) 

226 

227 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

228 mask = self.getMask(**kwargs) 

229 values = data[self.vectorKey.format(**kwargs)] 

230 values = values[mask] # type: ignore 

231 values = values[np.logical_not(np.isnan(values))] 

232 n_values = len(values) 

233 if n_values == 0: 

234 return np.nan 

235 threshold = self.threshold 

236 # If relative_to_median is set, shift the threshold to be median+thresh 

237 if self.relative_to_median and len(values) > 0: 

238 offset = nanMedian(values) 

239 if np.isfinite(offset): 

240 values -= offset 

241 if self.use_absolute_value: 

242 values = np.abs(values) 

243 result = cast( 

244 Scalar, 

245 float(np.sum(getattr(operator, self.op)(values, threshold)) / n_values), # type: ignore 

246 ) 

247 if self.percent: 

248 return 100.0 * result 

249 else: 

250 return result 

251 

252 

253class MaxAction(ScalarFromVectorAction): 

254 """Returns the maximum of the given data.""" 

255 

256 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

257 mask = self.getMask(**kwargs) 

258 return nanMax(data[self.vectorKey.format(**kwargs)][mask]) 

259 

260 

261class MinAction(ScalarFromVectorAction): 

262 """Returns the minimum of the given data.""" 

263 

264 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

265 mask = self.getMask(**kwargs) 

266 return nanMin(data[self.vectorKey.format(**kwargs)][mask]) 

267 

268 

269class FracInRange(ScalarFromVectorAction): 

270 """Compute the fraction of a distribution that is between specified 

271 minimum and maximum values, and is not NaN. 

272 """ 

273 

274 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

275 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

276 percent = Field[bool](doc="Express result as percentage", default=False) 

277 

278 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

279 mask = self.getMask(**kwargs) 

280 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

281 nvalues = len(values) 

282 values = values[np.logical_not(np.isnan(values))] 

283 sel_range = (values >= self.minimum) & (values < self.maximum) 

284 result = cast( 

285 Scalar, 

286 float(len(values[sel_range]) / nvalues), # type: ignore 

287 ) 

288 if self.percent: 

289 return 100.0 * result 

290 else: 

291 return result 

292 

293 

294class FracNan(ScalarFromVectorAction): 

295 """Compute the fraction of vector entries that are NaN.""" 

296 

297 percent = Field[bool](doc="Express result as percentage", default=False) 

298 

299 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

300 mask = self.getMask(**kwargs) 

301 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

302 nvalues = len(values) 

303 values = values[np.isnan(values)] 

304 result = cast( 

305 Scalar, 

306 float(len(values) / nvalues), # type: ignore 

307 ) 

308 if self.percent: 

309 return 100.0 * result 

310 else: 

311 return result 

312 

313 

314class SumAction(ScalarFromVectorAction): 

315 """Returns the sum of all values in the column.""" 

316 

317 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

318 mask = self.getMask(**kwargs) 

319 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

320 return cast(Scalar, np.nansum(arr)) 

321 

322 

323class MedianHistAction(ScalarAction): 

324 """Calculates the median of the given histogram data.""" 

325 

326 histKey = Field[str]("Key of frequency Vector") 

327 midKey = Field[str]("Key of bin midpoints Vector") 

328 

329 def getInputSchema(self) -> KeyedDataSchema: 

330 return ( 

331 (self.histKey, Vector), 

332 (self.midKey, Vector), 

333 ) 

334 

335 def histMedian(self, hist, bin_mid): 

336 """Calculates the median of a histogram with binned values 

337 

338 Parameters 

339 ---------- 

340 hist : `numpy.ndarray` 

341 Frequency array 

342 bin_mid : `numpy.ndarray` 

343 Bin midpoints array 

344 

345 Returns 

346 ------- 

347 median : `float` 

348 Median of histogram with binned values 

349 """ 

350 cumulative_sum = np.cumsum(hist) 

351 median_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 2) 

352 median = bin_mid[median_index] 

353 return median 

354 

355 def __call__(self, data: KeyedData, **kwargs): 

356 if len(data[self.histKey.format(**kwargs)]) != 0: 

357 hist = cast(Vector, data[self.histKey.format(**kwargs)]) 

358 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)]) 

359 med = cast(Scalar, float(self.histMedian(hist, bin_mid))) 

360 else: 

361 med = np.NaN 

362 return med 

363 

364 

365class IqrHistAction(ScalarAction): 

366 """Calculates the interquartile range of the given histogram data.""" 

367 

368 histKey = Field[str]("Key of frequency Vector") 

369 midKey = Field[str]("Key of bin midpoints Vector") 

370 

371 def getInputSchema(self) -> KeyedDataSchema: 

372 return ( 

373 (self.histKey, Vector), 

374 (self.midKey, Vector), 

375 ) 

376 

377 def histIqr(self, hist, bin_mid): 

378 """Calculates the interquartile range of a histogram with binned values 

379 

380 Parameters 

381 ---------- 

382 hist : `numpy.ndarray` 

383 Frequency array 

384 bin_mid : `numpy.ndarray` 

385 Bin midpoints array 

386 

387 Returns 

388 ------- 

389 iqr : `float` 

390 Inter-quartile range of histogram with binned values 

391 """ 

392 cumulative_sum = np.cumsum(hist) 

393 liqr_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 4) 

394 uiqr_index = np.searchsorted(cumulative_sum, (3 / 4) * cumulative_sum[-1]) 

395 liqr = bin_mid[liqr_index] 

396 uiqr = bin_mid[uiqr_index] 

397 iqr = uiqr - liqr 

398 return iqr 

399 

400 def __call__(self, data: KeyedData, **kwargs): 

401 if len(data[self.histKey.format(**kwargs)]) != 0: 

402 hist = cast(Vector, data[self.histKey.format(**kwargs)]) 

403 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)]) 

404 iqr = cast(Scalar, float(self.histIqr(hist, bin_mid))) 

405 else: 

406 iqr = np.NaN 

407 return iqr 

408 

409 

410class DivideScalar(ScalarAction): 

411 """Calculate (A/B) for scalars.""" 

412 

413 actionA = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar A") 

414 actionB = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar B") 

415 

416 def getInputSchema(self) -> KeyedDataSchema: 

417 yield from self.actionA.getInputSchema() 

418 yield from self.actionB.getInputSchema() 

419 

420 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

421 """Return the result of A/B. 

422 

423 Parameters 

424 ---------- 

425 data : `KeyedData` 

426 

427 Returns 

428 ------- 

429 result : `Scalar` 

430 The result of dividing A by B. 

431 """ 

432 scalarA = self.actionA(data, **kwargs) 

433 scalarB = self.actionB(data, **kwargs) 

434 if scalarB == 0: 

435 raise ValueError("Denominator is zero!") 

436 return scalarA / scalarB