Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 35%

178 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-04 04:15 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MedianAction", 

26 "MeanAction", 

27 "StdevAction", 

28 "ValueAction", 

29 "SigmaMadAction", 

30 "CountAction", 

31 "CountUniqueAction", 

32 "ApproxFloor", 

33 "FracThreshold", 

34 "MaxAction", 

35 "MinAction", 

36 "FracInRange", 

37 "FracNan", 

38 "SumAction", 

39 "MedianHistAction", 

40 "IqrHistAction", 

41 "DivideScalar", 

42) 

43 

44import operator 

45from math import nan 

46from typing import cast 

47 

48import numpy as np 

49from lsst.pex.config import ChoiceField, Field 

50from lsst.pex.config.configurableActions import ConfigurableActionField 

51 

52from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

53from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd 

54 

55 

56class ScalarFromVectorAction(ScalarAction): 

57 """Calculates a statistic from a single vector.""" 

58 

59 vectorKey = Field[str]("Key of Vector to compute statistic from.") 

60 

61 def getInputSchema(self) -> KeyedDataSchema: 

62 return ((self.vectorKey, Vector),) 

63 

64 

65class MedianAction(ScalarFromVectorAction): 

66 """Calculates the median of the given data.""" 

67 

68 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

69 mask = self.getMask(**kwargs) 

70 values = data[self.vectorKey.format(**kwargs)][mask] 

71 med = nanMedian(values) if len(values) else np.NaN 

72 

73 return med 

74 

75 

76class MeanAction(ScalarFromVectorAction): 

77 """Calculates the mean of the given data.""" 

78 

79 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

80 mask = self.getMask(**kwargs) 

81 values = data[self.vectorKey.format(**kwargs)][mask] 

82 mean = nanMean(values) if len(values) else np.NaN 

83 

84 return mean 

85 

86 

87class StdevAction(ScalarFromVectorAction): 

88 """Calculates the standard deviation of the given data.""" 

89 

90 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

91 mask = self.getMask(**kwargs) 

92 return nanStd(data[self.vectorKey.format(**kwargs)][mask]) 

93 

94 

95class ValueAction(ScalarFromVectorAction): 

96 """Extracts the first value from a vector.""" 

97 

98 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

99 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0])) 

100 

101 

102class SigmaMadAction(ScalarFromVectorAction): 

103 """Calculates the sigma mad of the given data.""" 

104 

105 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

106 mask = self.getMask(**kwargs) 

107 return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask]) 

108 

109 

110class CountAction(ScalarAction): 

111 """Performs count actions, with threshold-based filtering. 

112 The operator is specified as a string, for example, "lt", "le", "ge", 

113 "gt", "ne", and "eq" for the mathematical operations <, <=, >=, >, !=, 

114 and == respectively. To count non-NaN values, only pass the column name 

115 as vector key. To count NaN values, pass threshold = nan (from math.nan). 

116 Optionally to configure from a YAML file, pass "threshold: !!float nan". 

117 To compute the number of elements with values less than a given threshold, 

118 use op="le". 

119 """ 

120 

121 vectorKey = Field[str]("Key of Vector to count") 

122 op = ChoiceField[str]( 

123 doc="Operator name string.", 

124 allowed={ 

125 "lt": "less than threshold", 

126 "le": "less than or equal to threshold", 

127 "ge": "greater than or equal to threshold", 

128 "ne": "not equal to a given value", 

129 "eq": "equal to a given value", 

130 "gt": "greater than threshold", 

131 }, 

132 default="ne", 

133 ) 

134 threshold = Field[float](doc="Threshold to apply.", default=nan) 

135 

136 def getInputSchema(self) -> KeyedDataSchema: 

137 return ((self.vectorKey, Vector),) 

138 

139 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

140 mask = self.getMask(**kwargs) 

141 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

142 

143 # Count NaNs and non-NaNs 

144 if self.threshold == nan: 

145 if self.op == "eq": 

146 # Count number of NaNs 

147 result = np.isnan(arr).sum() 

148 return cast(Scalar, int(result)) 

149 elif self.op == "ne": 

150 # Count number of non-NaNs 

151 result = len(arr) - np.isnan(arr).sum() 

152 return cast(Scalar, int(result)) 

153 else: 

154 raise ValueError("Invalid operator for counting NaNs.") 

155 # Count for given threshold ignoring all NaNs 

156 else: 

157 result = arr[~np.isnan(arr)] 

158 result = cast( 

159 Scalar, 

160 int(np.sum(getattr(operator, self.op)(result, self.threshold))), 

161 ) 

162 return result 

163 

164 

165class CountUniqueAction(ScalarFromVectorAction): 

166 """Counts the number of unique rows in a given column.""" 

167 

168 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

169 mask = self.getMask(**kwargs) 

170 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

171 count = len(np.unique(values)) 

172 return cast(Scalar, count) 

173 

174 

175class ApproxFloor(ScalarFromVectorAction): 

176 """Returns the median of the lowest ten values of the sorted input.""" 

177 

178 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

179 mask = self.getMask(**kwargs) 

180 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

181 x = len(value) // 10 

182 return nanMedian(value[-x:]) 

183 

184 

185class FracThreshold(ScalarFromVectorAction): 

186 """Compute the fraction of a distribution above or below a threshold. 

187 

188 The operator is specified as a string, for example, 

189 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

190 compute the fraction of elements with values less than a given threshold, 

191 use op="le". 

192 """ 

193 

194 op = ChoiceField[str]( 

195 doc="Operator name string.", 

196 allowed={ 

197 "lt": "less than threshold", 

198 "le": "less than or equal to threshold", 

199 "ge": "greater than or equal to threshold", 

200 "gt": "greater than threshold", 

201 }, 

202 ) 

203 threshold = Field[float](doc="Threshold to apply.") 

204 percent = Field[bool](doc="Express result as percentage", default=False) 

205 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False) 

206 

207 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

208 mask = self.getMask(**kwargs) 

209 values = data[self.vectorKey.format(**kwargs)] 

210 values = values[mask] # type: ignore 

211 values = values[np.logical_not(np.isnan(values))] 

212 n_values = len(values) 

213 if n_values == 0: 

214 return np.nan 

215 threshold = self.threshold 

216 # If relative_to_median is set, shift the threshold to be median+thresh 

217 if self.relative_to_median and len(values) > 0: 

218 offset = nanMedian(values) 

219 if np.isfinite(offset): 

220 threshold += offset 

221 result = cast( 

222 Scalar, 

223 float(np.sum(getattr(operator, self.op)(values, threshold)) / n_values), # type: ignore 

224 ) 

225 if self.percent: 

226 return 100.0 * result 

227 else: 

228 return result 

229 

230 

231class MaxAction(ScalarFromVectorAction): 

232 """Returns the maximum of the given data.""" 

233 

234 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

235 mask = self.getMask(**kwargs) 

236 return nanMax(data[self.vectorKey.format(**kwargs)][mask]) 

237 

238 

239class MinAction(ScalarFromVectorAction): 

240 """Returns the minimum of the given data.""" 

241 

242 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

243 mask = self.getMask(**kwargs) 

244 return nanMin(data[self.vectorKey.format(**kwargs)][mask]) 

245 

246 

247class FracInRange(ScalarFromVectorAction): 

248 """Compute the fraction of a distribution that is between specified 

249 minimum and maximum values, and is not NaN. 

250 """ 

251 

252 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

253 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

254 percent = Field[bool](doc="Express result as percentage", default=False) 

255 

256 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

257 mask = self.getMask(**kwargs) 

258 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

259 nvalues = len(values) 

260 values = values[np.logical_not(np.isnan(values))] 

261 sel_range = (values >= self.minimum) & (values < self.maximum) 

262 result = cast( 

263 Scalar, 

264 float(len(values[sel_range]) / nvalues), # type: ignore 

265 ) 

266 if self.percent: 

267 return 100.0 * result 

268 else: 

269 return result 

270 

271 

272class FracNan(ScalarFromVectorAction): 

273 """Compute the fraction of vector entries that are NaN.""" 

274 

275 percent = Field[bool](doc="Express result as percentage", default=False) 

276 

277 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

278 mask = self.getMask(**kwargs) 

279 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

280 nvalues = len(values) 

281 values = values[np.isnan(values)] 

282 result = cast( 

283 Scalar, 

284 float(len(values) / nvalues), # type: ignore 

285 ) 

286 if self.percent: 

287 return 100.0 * result 

288 else: 

289 return result 

290 

291 

292class SumAction(ScalarFromVectorAction): 

293 """Returns the sum of all values in the column.""" 

294 

295 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

296 mask = self.getMask(**kwargs) 

297 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

298 return cast(Scalar, np.nansum(arr)) 

299 

300 

301class MedianHistAction(ScalarAction): 

302 """Calculates the median of the given histogram data.""" 

303 

304 histKey = Field[str]("Key of frequency Vector") 

305 midKey = Field[str]("Key of bin midpoints Vector") 

306 

307 def getInputSchema(self) -> KeyedDataSchema: 

308 return ( 

309 (self.histKey, Vector), 

310 (self.midKey, Vector), 

311 ) 

312 

313 def histMedian(self, hist, bin_mid): 

314 """Calculates the median of a histogram with binned values 

315 

316 Parameters 

317 ---------- 

318 hist : `numpy.ndarray` 

319 Frequency array 

320 bin_mid : `numpy.ndarray` 

321 Bin midpoints array 

322 

323 Returns 

324 ------- 

325 median : `float` 

326 Median of histogram with binned values 

327 """ 

328 cumulative_sum = np.cumsum(hist) 

329 median_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 2) 

330 median = bin_mid[median_index] 

331 return median 

332 

333 def __call__(self, data: KeyedData, **kwargs): 

334 if len(data[self.histKey.format(**kwargs)]) != 0: 

335 hist = cast(Vector, data[self.histKey.format(**kwargs)]) 

336 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)]) 

337 med = cast(Scalar, float(self.histMedian(hist, bin_mid))) 

338 else: 

339 med = np.NaN 

340 return med 

341 

342 

343class IqrHistAction(ScalarAction): 

344 """Calculates the interquartile range of the given histogram data.""" 

345 

346 histKey = Field[str]("Key of frequency Vector") 

347 midKey = Field[str]("Key of bin midpoints Vector") 

348 

349 def getInputSchema(self) -> KeyedDataSchema: 

350 return ( 

351 (self.histKey, Vector), 

352 (self.midKey, Vector), 

353 ) 

354 

355 def histIqr(self, hist, bin_mid): 

356 """Calculates the interquartile range of a histogram with binned values 

357 

358 Parameters 

359 ---------- 

360 hist : `numpy.ndarray` 

361 Frequency array 

362 bin_mid : `numpy.ndarray` 

363 Bin midpoints array 

364 

365 Returns 

366 ------- 

367 iqr : `float` 

368 Inter-quartile range of histogram with binned values 

369 """ 

370 cumulative_sum = np.cumsum(hist) 

371 liqr_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 4) 

372 uiqr_index = np.searchsorted(cumulative_sum, (3 / 4) * cumulative_sum[-1]) 

373 liqr = bin_mid[liqr_index] 

374 uiqr = bin_mid[uiqr_index] 

375 iqr = uiqr - liqr 

376 return iqr 

377 

378 def __call__(self, data: KeyedData, **kwargs): 

379 if len(data[self.histKey.format(**kwargs)]) != 0: 

380 hist = cast(Vector, data[self.histKey.format(**kwargs)]) 

381 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)]) 

382 iqr = cast(Scalar, float(self.histIqr(hist, bin_mid))) 

383 else: 

384 iqr = np.NaN 

385 return iqr 

386 

387 

388class DivideScalar(ScalarAction): 

389 """Calculate (A/B) for scalars.""" 

390 

391 actionA = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar A") 

392 actionB = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar B") 

393 

394 def getInputSchema(self) -> KeyedDataSchema: 

395 yield from self.actionA.getInputSchema() 

396 yield from self.actionB.getInputSchema() 

397 

398 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

399 """Return the result of A/B. 

400 

401 Parameters 

402 ---------- 

403 data : `KeyedData` 

404 

405 Returns 

406 ------- 

407 result : `Scalar` 

408 The result of dividing A by B. 

409 """ 

410 scalarA = self.actionA(data, **kwargs) 

411 scalarB = self.actionB(data, **kwargs) 

412 if scalarB == 0: 

413 raise ValueError("Denominator is zero!") 

414 return scalarA / scalarB