Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 45%

148 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 13:17 +0000

1from __future__ import annotations 

2 

3__all__ = ( 

4 "MedianAction", 

5 "MeanAction", 

6 "StdevAction", 

7 "ValueAction", 

8 "SigmaMadAction", 

9 "CountAction", 

10 "CountUniqueAction", 

11 "ApproxFloor", 

12 "FracThreshold", 

13 "MaxAction", 

14 "MinAction", 

15 "FracInRange", 

16 "FracNan", 

17 "SumAction", 

18) 

19 

20import operator 

21from typing import cast 

22 

23import numpy as np 

24from lsst.pex.config import ChoiceField, Field 

25 

26from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

27from ...statistics import nansigmaMad 

28 

29 

30class MedianAction(ScalarAction): 

31 """Calculates the median of the given data.""" 

32 

33 vectorKey = Field[str]("Key of Vector to median") 

34 

35 def getInputSchema(self) -> KeyedDataSchema: 

36 return ((self.vectorKey, Vector),) 

37 

38 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

39 mask = self.getMask(**kwargs) 

40 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0: 

41 med = cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

42 else: 

43 med = np.NaN 

44 

45 return med 

46 

47 

48class MeanAction(ScalarAction): 

49 """Calculates the mean of the given data.""" 

50 

51 vectorKey = Field[str]("Key of Vector from which to calculate mean") 

52 

53 def getInputSchema(self) -> KeyedDataSchema: 

54 return ((self.vectorKey, Vector),) 

55 

56 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

57 mask = self.getMask(**kwargs) 

58 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0: 

59 mean = cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

60 else: 

61 mean = np.NaN 

62 

63 return mean 

64 

65 

66class StdevAction(ScalarAction): 

67 """Calculates the standard deviation of the given data.""" 

68 

69 vectorKey = Field[str]("Key of Vector from which to calculate std deviation") 

70 

71 def getInputSchema(self) -> KeyedDataSchema: 

72 return ((self.vectorKey, Vector),) 

73 

74 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

75 mask = self.getMask(**kwargs) 

76 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

77 

78 

79class ValueAction(ScalarAction): 

80 """Extracts the first value from a vector.""" 

81 

82 vectorKey = Field[str]("Key of Vector from which to extract the first value") 

83 

84 def getInputSchema(self) -> KeyedDataSchema: 

85 return ((self.vectorKey, Vector),) 

86 

87 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

88 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0])) 

89 

90 

91class SigmaMadAction(ScalarAction): 

92 """Calculates the sigma mad of the given data.""" 

93 

94 vectorKey = Field[str]("Key of Vector to median") 

95 

96 def getInputSchema(self) -> KeyedDataSchema: 

97 return ((self.vectorKey, Vector),) 

98 

99 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

100 mask = self.getMask(**kwargs) 

101 return cast( 

102 Scalar, 

103 float( 

104 nansigmaMad( 

105 data[self.vectorKey.format(**kwargs)][mask], # type: ignore 

106 ) 

107 ), 

108 ) 

109 

110 

111class CountAction(ScalarAction): 

112 """Returns the number of non-NaN entries in the given column.""" 

113 

114 vectorKey = Field[str]("Key of Vector to count") 

115 

116 def getInputSchema(self) -> KeyedDataSchema: 

117 return ((self.vectorKey, Vector),) 

118 

119 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

120 mask = self.getMask(**kwargs) 

121 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

122 arr = arr[~np.isnan(arr)] 

123 return cast(Scalar, len(arr)) 

124 

125 

126class CountUniqueAction(ScalarAction): 

127 """Counts the number of unique rows in a given column. 

128 

129 Parameters 

130 ---------- 

131 data : `KeyedData` 

132 

133 Returns 

134 ------- 

135 count : `Scalar` 

136 The number of unique rows in a given column. 

137 """ 

138 

139 vectorKey = Field[str](doc="Name of column.") 

140 

141 def getInputSchema(self) -> KeyedDataSchema: 

142 return ((self.vectorKey, Vector),) 

143 

144 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

145 mask = self.getMask(**kwargs) 

146 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

147 count = len(np.unique(values)) 

148 return cast(Scalar, count) 

149 

150 

151class ApproxFloor(ScalarAction): 

152 """Returns the median of the lowest ten values of the sorted input.""" 

153 

154 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False) 

155 

156 def getInputSchema(self) -> KeyedDataSchema: 

157 return ((self.vectorKey, Vector),) 

158 

159 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

160 mask = self.getMask(**kwargs) 

161 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

162 x = len(value) // 10 

163 return cast(Scalar, float(np.nanmedian(value[-x:]))) 

164 

165 

166class FracThreshold(ScalarAction): 

167 """Compute the fraction of a distribution that is above or below a 

168 specified threshold. The operator is specified as a string, for example, 

169 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

170 compute the fraction of elements with values less than a given threshold, 

171 use op="le". 

172 """ 

173 

174 op = ChoiceField[str]( 

175 doc="Operator name string.", 

176 allowed={ 

177 "lt": "less than threshold", 

178 "le": "less than or equal to threshold", 

179 "ge": "greater than or equal to threshold", 

180 "gt": "greater than threshold", 

181 }, 

182 ) 

183 threshold = Field[float](doc="Threshold to apply.") 

184 vectorKey = Field[str](doc="Name of column") 

185 percent = Field[bool](doc="Express result as percentage", default=False) 

186 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False) 

187 

188 def getInputSchema(self) -> KeyedDataSchema: 

189 return ((self.vectorKey, Vector),) 

190 

191 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

192 mask = self.getMask(**kwargs) 

193 values = data[self.vectorKey.format(**kwargs)] 

194 values = values[mask] # type: ignore 

195 values = values[np.logical_not(np.isnan(values))] 

196 # If relative_to_median is set, shift the threshold to be median+thresh 

197 if self.relative_to_median: 

198 threshold = self.threshold + np.median(values) 

199 else: 

200 threshold = self.threshold 

201 result = cast( 

202 Scalar, 

203 float(np.sum(getattr(operator, self.op)(values, threshold)) / len(values)), # type: ignore 

204 ) 

205 if self.percent: 

206 return 100.0 * result 

207 else: 

208 return result 

209 

210 

211class MaxAction(ScalarAction): 

212 """Returns the maximum of the given data.""" 

213 

214 vectorKey = Field[str]("Key of Vector to find maximum") 

215 

216 def getInputSchema(self) -> KeyedDataSchema: 

217 return ((self.vectorKey, Vector),) 

218 

219 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

220 mask = self.getMask(**kwargs) 

221 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

222 

223 

224class MinAction(ScalarAction): 

225 """Returns the minimum of the given data.""" 

226 

227 vectorKey = Field[str]("Key for the vector to perform action on") 

228 

229 def getInputSchema(self) -> KeyedDataSchema: 

230 return ((self.vectorKey, Vector),) 

231 

232 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

233 mask = self.getMask(**kwargs) 

234 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

235 

236 

237class FracInRange(ScalarAction): 

238 """Compute the fraction of a distribution that is between specified 

239 minimum and maximum values, and is not NaN. 

240 """ 

241 

242 vectorKey = Field[str](doc="Name of column") 

243 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

244 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

245 percent = Field[bool](doc="Express result as percentage", default=False) 

246 

247 def getInputSchema(self) -> KeyedDataSchema: 

248 return ((self.vectorKey, Vector),) 

249 

250 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

251 """Return the fraction of rows with values within the specified range. 

252 

253 Parameters 

254 ---------- 

255 data : `KeyedData` 

256 

257 Returns 

258 ------- 

259 result : `Scalar` 

260 The fraction (or percentage) of rows with values within the 

261 specified range. 

262 """ 

263 mask = self.getMask(**kwargs) 

264 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

265 nvalues = len(values) 

266 values = values[np.logical_not(np.isnan(values))] 

267 sel_range = (values >= self.minimum) & (values < self.maximum) 

268 result = cast( 

269 Scalar, 

270 float(len(values[sel_range]) / nvalues), # type: ignore 

271 ) 

272 if self.percent: 

273 return 100.0 * result 

274 else: 

275 return result 

276 

277 

278class FracNan(ScalarAction): 

279 """Compute the fraction of vector entries that are NaN.""" 

280 

281 vectorKey = Field[str](doc="Name of column") 

282 percent = Field[bool](doc="Express result as percentage", default=False) 

283 

284 def getInputSchema(self) -> KeyedDataSchema: 

285 return ((self.vectorKey, Vector),) 

286 

287 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

288 """Return the fraction of rows with NaN values. 

289 

290 Parameters 

291 ---------- 

292 data : `KeyedData` 

293 

294 Returns 

295 ------- 

296 result : `Scalar` 

297 The fraction (or percentage) of rows with NaN values. 

298 """ 

299 mask = self.getMask(**kwargs) 

300 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

301 nvalues = len(values) 

302 values = values[np.isnan(values)] 

303 result = cast( 

304 Scalar, 

305 float(len(values) / nvalues), # type: ignore 

306 ) 

307 if self.percent: 

308 return 100.0 * result 

309 else: 

310 return result 

311 

312 

313class SumAction(ScalarAction): 

314 """Returns the sum of all values in the column.""" 

315 

316 vectorKey = Field[str]("Key of Vector to sum") 

317 

318 def getInputSchema(self) -> KeyedDataSchema: 

319 return ((self.vectorKey, Vector),) 

320 

321 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

322 mask = self.getMask(**kwargs) 

323 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

324 return cast(Scalar, np.nansum(arr))