Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 36%

150 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-05 14:05 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MedianAction", 

26 "MeanAction", 

27 "StdevAction", 

28 "ValueAction", 

29 "SigmaMadAction", 

30 "CountAction", 

31 "CountUniqueAction", 

32 "ApproxFloor", 

33 "FracThreshold", 

34 "MaxAction", 

35 "MinAction", 

36 "FracInRange", 

37 "FracNan", 

38 "SumAction", 

39 "MedianHistAction", 

40 "IqrHistAction", 

41) 

42 

43import operator 

44from typing import cast 

45 

46import numpy as np 

47from lsst.pex.config import ChoiceField, Field 

48 

49from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

50from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd 

51 

52 

53class ScalarFromVectorAction(ScalarAction): 

54 """Calculates a statistic from a single vector.""" 

55 

56 vectorKey = Field[str]("Key of Vector to compute statistic from.") 

57 

58 def getInputSchema(self) -> KeyedDataSchema: 

59 return ((self.vectorKey, Vector),) 

60 

61 

62class MedianAction(ScalarFromVectorAction): 

63 """Calculates the median of the given data.""" 

64 

65 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

66 mask = self.getMask(**kwargs) 

67 values = data[self.vectorKey.format(**kwargs)][mask] 

68 med = nanMedian(values) if len(values) else np.NaN 

69 

70 return med 

71 

72 

73class MeanAction(ScalarFromVectorAction): 

74 """Calculates the mean of the given data.""" 

75 

76 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

77 mask = self.getMask(**kwargs) 

78 values = data[self.vectorKey.format(**kwargs)][mask] 

79 mean = nanMean(values) if len(values) else np.NaN 

80 

81 return mean 

82 

83 

84class StdevAction(ScalarFromVectorAction): 

85 """Calculates the standard deviation of the given data.""" 

86 

87 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

88 mask = self.getMask(**kwargs) 

89 return nanStd(data[self.vectorKey.format(**kwargs)][mask]) 

90 

91 

92class ValueAction(ScalarFromVectorAction): 

93 """Extracts the first value from a vector.""" 

94 

95 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

96 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0])) 

97 

98 

99class SigmaMadAction(ScalarFromVectorAction): 

100 """Calculates the sigma mad of the given data.""" 

101 

102 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

103 mask = self.getMask(**kwargs) 

104 return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask]) 

105 

106 

107class CountAction(ScalarFromVectorAction): 

108 """Returns the number of non-NaN entries in the given column.""" 

109 

110 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

111 mask = self.getMask(**kwargs) 

112 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

113 arr = arr[~np.isnan(arr)] 

114 return cast(Scalar, len(arr)) 

115 

116 

117class CountUniqueAction(ScalarFromVectorAction): 

118 """Counts the number of unique rows in a given column.""" 

119 

120 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

121 mask = self.getMask(**kwargs) 

122 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

123 count = len(np.unique(values)) 

124 return cast(Scalar, count) 

125 

126 

127class ApproxFloor(ScalarFromVectorAction): 

128 """Returns the median of the lowest ten values of the sorted input.""" 

129 

130 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

131 mask = self.getMask(**kwargs) 

132 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

133 x = len(value) // 10 

134 return nanMedian(value[-x:]) 

135 

136 

137class FracThreshold(ScalarFromVectorAction): 

138 """Compute the fraction of a distribution above or below a threshold. 

139 

140 The operator is specified as a string, for example, 

141 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

142 compute the fraction of elements with values less than a given threshold, 

143 use op="le". 

144 """ 

145 

146 op = ChoiceField[str]( 

147 doc="Operator name string.", 

148 allowed={ 

149 "lt": "less than threshold", 

150 "le": "less than or equal to threshold", 

151 "ge": "greater than or equal to threshold", 

152 "gt": "greater than threshold", 

153 }, 

154 ) 

155 threshold = Field[float](doc="Threshold to apply.") 

156 percent = Field[bool](doc="Express result as percentage", default=False) 

157 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False) 

158 

159 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

160 mask = self.getMask(**kwargs) 

161 values = data[self.vectorKey.format(**kwargs)] 

162 values = values[mask] # type: ignore 

163 values = values[np.logical_not(np.isnan(values))] 

164 n_values = len(values) 

165 if n_values == 0: 

166 return np.nan 

167 threshold = self.threshold 

168 # If relative_to_median is set, shift the threshold to be median+thresh 

169 if self.relative_to_median and len(values) > 0: 

170 offset = nanMedian(values) 

171 if np.isfinite(offset): 

172 threshold += offset 

173 result = cast( 

174 Scalar, 

175 float(np.sum(getattr(operator, self.op)(values, threshold)) / n_values), # type: ignore 

176 ) 

177 if self.percent: 

178 return 100.0 * result 

179 else: 

180 return result 

181 

182 

183class MaxAction(ScalarFromVectorAction): 

184 """Returns the maximum of the given data.""" 

185 

186 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

187 mask = self.getMask(**kwargs) 

188 return nanMax(data[self.vectorKey.format(**kwargs)][mask]) 

189 

190 

191class MinAction(ScalarFromVectorAction): 

192 """Returns the minimum of the given data.""" 

193 

194 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

195 mask = self.getMask(**kwargs) 

196 return nanMin(data[self.vectorKey.format(**kwargs)][mask]) 

197 

198 

199class FracInRange(ScalarFromVectorAction): 

200 """Compute the fraction of a distribution that is between specified 

201 minimum and maximum values, and is not NaN. 

202 """ 

203 

204 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

205 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

206 percent = Field[bool](doc="Express result as percentage", default=False) 

207 

208 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

209 mask = self.getMask(**kwargs) 

210 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

211 nvalues = len(values) 

212 values = values[np.logical_not(np.isnan(values))] 

213 sel_range = (values >= self.minimum) & (values < self.maximum) 

214 result = cast( 

215 Scalar, 

216 float(len(values[sel_range]) / nvalues), # type: ignore 

217 ) 

218 if self.percent: 

219 return 100.0 * result 

220 else: 

221 return result 

222 

223 

224class FracNan(ScalarFromVectorAction): 

225 """Compute the fraction of vector entries that are NaN.""" 

226 

227 percent = Field[bool](doc="Express result as percentage", default=False) 

228 

229 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

230 mask = self.getMask(**kwargs) 

231 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

232 nvalues = len(values) 

233 values = values[np.isnan(values)] 

234 result = cast( 

235 Scalar, 

236 float(len(values) / nvalues), # type: ignore 

237 ) 

238 if self.percent: 

239 return 100.0 * result 

240 else: 

241 return result 

242 

243 

244class SumAction(ScalarFromVectorAction): 

245 """Returns the sum of all values in the column.""" 

246 

247 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

248 mask = self.getMask(**kwargs) 

249 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

250 return cast(Scalar, np.nansum(arr)) 

251 

252 

253class MedianHistAction(ScalarAction): 

254 """Calculates the median of the given histogram data.""" 

255 

256 histKey = Field[str]("Key of frequency Vector") 

257 midKey = Field[str]("Key of bin midpoints Vector") 

258 

259 def getInputSchema(self) -> KeyedDataSchema: 

260 return ( 

261 (self.histKey, Vector), 

262 (self.midKey, Vector), 

263 ) 

264 

265 def histMedian(self, hist, bin_mid): 

266 """Calculates the median of a histogram with binned values 

267 

268 Parameters 

269 ---------- 

270 hist : `numpy.ndarray` 

271 Frequency array 

272 bin_mid : `numpy.ndarray` 

273 Bin midpoints array 

274 

275 Returns 

276 ------- 

277 median : `float` 

278 Median of histogram with binned values 

279 """ 

280 cumulative_sum = np.cumsum(hist) 

281 median_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 2) 

282 median = bin_mid[median_index] 

283 return median 

284 

285 def __call__(self, data: KeyedData, **kwargs): 

286 if len(data[self.histKey.format(**kwargs)]) != 0: 

287 hist = cast(Vector, data[self.histKey.format(**kwargs)]) 

288 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)]) 

289 med = cast(Scalar, float(self.histMedian(hist, bin_mid))) 

290 else: 

291 med = np.NaN 

292 return med 

293 

294 

295class IqrHistAction(ScalarAction): 

296 """Calculates the interquartile range of the given histogram data.""" 

297 

298 histKey = Field[str]("Key of frequency Vector") 

299 midKey = Field[str]("Key of bin midpoints Vector") 

300 

301 def getInputSchema(self) -> KeyedDataSchema: 

302 return ( 

303 (self.histKey, Vector), 

304 (self.midKey, Vector), 

305 ) 

306 

307 def histIqr(self, hist, bin_mid): 

308 """Calculates the interquartile range of a histogram with binned values 

309 

310 Parameters 

311 ---------- 

312 hist : `numpy.ndarray` 

313 Frequency array 

314 bin_mid : `numpy.ndarray` 

315 Bin midpoints array 

316 

317 Returns 

318 ------- 

319 iqr : `float` 

320 Inter-quartile range of histogram with binned values 

321 """ 

322 cumulative_sum = np.cumsum(hist) 

323 liqr_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 4) 

324 uiqr_index = np.searchsorted(cumulative_sum, (3 / 4) * cumulative_sum[-1]) 

325 liqr = bin_mid[liqr_index] 

326 uiqr = bin_mid[uiqr_index] 

327 iqr = uiqr - liqr 

328 return iqr 

329 

330 def __call__(self, data: KeyedData, **kwargs): 

331 if len(data[self.histKey.format(**kwargs)]) != 0: 

332 hist = cast(Vector, data[self.histKey.format(**kwargs)]) 

333 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)]) 

334 iqr = cast(Scalar, float(self.histIqr(hist, bin_mid))) 

335 else: 

336 iqr = np.NaN 

337 return iqr