Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 45%

140 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-30 14:27 +0000

1from __future__ import annotations 

2 

3__all__ = ( 

4 "MedianAction", 

5 "MeanAction", 

6 "StdevAction", 

7 "ValueAction", 

8 "SigmaMadAction", 

9 "CountAction", 

10 "CountUniqueAction", 

11 "ApproxFloor", 

12 "FracThreshold", 

13 "MaxAction", 

14 "MinAction", 

15 "FracInRange", 

16 "FracNan", 

17) 

18 

19import operator 

20from typing import cast 

21 

22import numpy as np 

23from lsst.pex.config import ChoiceField, Field 

24 

25from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

26from ...statistics import nansigmaMad 

27 

28 

29class MedianAction(ScalarAction): 

30 """Calculates the median of the given data.""" 

31 

32 vectorKey = Field[str]("Key of Vector to median") 

33 

34 def getInputSchema(self) -> KeyedDataSchema: 

35 return ((self.vectorKey, Vector),) 

36 

37 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

38 mask = self.getMask(**kwargs) 

39 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0: 

40 med = cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

41 else: 

42 med = np.NaN 

43 

44 return med 

45 

46 

47class MeanAction(ScalarAction): 

48 """Calculates the mean of the given data.""" 

49 

50 vectorKey = Field[str]("Key of Vector from which to calculate mean") 

51 

52 def getInputSchema(self) -> KeyedDataSchema: 

53 return ((self.vectorKey, Vector),) 

54 

55 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

56 mask = self.getMask(**kwargs) 

57 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0: 

58 mean = cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

59 else: 

60 mean = np.NaN 

61 

62 return mean 

63 

64 

65class StdevAction(ScalarAction): 

66 """Calculates the standard deviation of the given data.""" 

67 

68 vectorKey = Field[str]("Key of Vector from which to calculate std deviation") 

69 

70 def getInputSchema(self) -> KeyedDataSchema: 

71 return ((self.vectorKey, Vector),) 

72 

73 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

74 mask = self.getMask(**kwargs) 

75 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

76 

77 

78class ValueAction(ScalarAction): 

79 """Extracts the first value from a vector.""" 

80 

81 vectorKey = Field[str]("Key of Vector from which to extract the first value") 

82 

83 def getInputSchema(self) -> KeyedDataSchema: 

84 return ((self.vectorKey, Vector),) 

85 

86 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

87 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0])) 

88 

89 

90class SigmaMadAction(ScalarAction): 

91 """Calculates the sigma mad of the given data.""" 

92 

93 vectorKey = Field[str]("Key of Vector to median") 

94 

95 def getInputSchema(self) -> KeyedDataSchema: 

96 return ((self.vectorKey, Vector),) 

97 

98 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

99 mask = self.getMask(**kwargs) 

100 return cast( 

101 Scalar, 

102 float( 

103 nansigmaMad( 

104 data[self.vectorKey.format(**kwargs)][mask], # type: ignore 

105 ) 

106 ), 

107 ) 

108 

109 

110class CountAction(ScalarAction): 

111 """Returns the number of non-NaN entries in the given column.""" 

112 

113 vectorKey = Field[str]("Key of Vector to count") 

114 

115 def getInputSchema(self) -> KeyedDataSchema: 

116 return ((self.vectorKey, Vector),) 

117 

118 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

119 mask = self.getMask(**kwargs) 

120 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

121 arr = arr[~np.isnan(arr)] 

122 return cast(Scalar, len(arr)) 

123 

124 

125class CountUniqueAction(ScalarAction): 

126 """Counts the number of unique rows in a given column. 

127 

128 Parameters 

129 ---------- 

130 data : `KeyedData` 

131 

132 Returns 

133 ------- 

134 count : `Scalar` 

135 The number of unique rows in a given column. 

136 """ 

137 

138 vectorKey = Field[str](doc="Name of column.") 

139 

140 def getInputSchema(self) -> KeyedDataSchema: 

141 return ((self.vectorKey, Vector),) 

142 

143 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

144 mask = self.getMask(**kwargs) 

145 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

146 count = len(np.unique(values)) 

147 return cast(Scalar, count) 

148 

149 

150class ApproxFloor(ScalarAction): 

151 """Returns the median of the lowest ten values of the sorted input.""" 

152 

153 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False) 

154 

155 def getInputSchema(self) -> KeyedDataSchema: 

156 return ((self.vectorKey, Vector),) 

157 

158 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

159 mask = self.getMask(**kwargs) 

160 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

161 x = len(value) // 10 

162 return cast(Scalar, float(np.nanmedian(value[-x:]))) 

163 

164 

165class FracThreshold(ScalarAction): 

166 """Compute the fraction of a distribution that is above or below a 

167 specified threshold. The operator is specified as a string, for example, 

168 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

169 compute the fraction of elements with values less than a given threshold, 

170 use op="le". 

171 """ 

172 

173 op = ChoiceField[str]( 

174 doc="Operator name string.", 

175 allowed={ 

176 "lt": "less than threshold", 

177 "le": "less than or equal to threshold", 

178 "ge": "greater than or equal to threshold", 

179 "gt": "greater than threshold", 

180 }, 

181 ) 

182 threshold = Field[float](doc="Threshold to apply.") 

183 vectorKey = Field[str](doc="Name of column") 

184 percent = Field[bool](doc="Express result as percentage", default=False) 

185 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False) 

186 

187 def getInputSchema(self) -> KeyedDataSchema: 

188 return ((self.vectorKey, Vector),) 

189 

190 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

191 mask = self.getMask(**kwargs) 

192 values = data[self.vectorKey.format(**kwargs)] 

193 values = values[mask] # type: ignore 

194 values = values[np.logical_not(np.isnan(values))] 

195 # If relative_to_median is set, shift the threshold to be median+thresh 

196 if self.relative_to_median: 

197 threshold = self.threshold + np.median(values) 

198 else: 

199 threshold = self.threshold 

200 result = cast( 

201 Scalar, 

202 float(np.sum(getattr(operator, self.op)(values, threshold)) / len(values)), # type: ignore 

203 ) 

204 if self.percent: 

205 return 100.0 * result 

206 else: 

207 return result 

208 

209 

210class MaxAction(ScalarAction): 

211 """Returns the maximum of the given data.""" 

212 

213 vectorKey = Field[str]("Key of Vector to find maximum") 

214 

215 def getInputSchema(self) -> KeyedDataSchema: 

216 return ((self.vectorKey, Vector),) 

217 

218 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

219 mask = self.getMask(**kwargs) 

220 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

221 

222 

223class MinAction(ScalarAction): 

224 """Returns the minimum of the given data.""" 

225 

226 vectorKey = Field[str]("Key for the vector to perform action on") 

227 

228 def getInputSchema(self) -> KeyedDataSchema: 

229 return ((self.vectorKey, Vector),) 

230 

231 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

232 mask = self.getMask(**kwargs) 

233 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

234 

235 

236class FracInRange(ScalarAction): 

237 """Compute the fraction of a distribution that is between specified 

238 minimum and maximum values, and is not NaN. 

239 """ 

240 

241 vectorKey = Field[str](doc="Name of column") 

242 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

243 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

244 percent = Field[bool](doc="Express result as percentage", default=False) 

245 

246 def getInputSchema(self) -> KeyedDataSchema: 

247 return ((self.vectorKey, Vector),) 

248 

249 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

250 """Return the fraction of rows with values within the specified range. 

251 

252 Parameters 

253 ---------- 

254 data : `KeyedData` 

255 

256 Returns 

257 ------- 

258 result : `Scalar` 

259 The fraction (or percentage) of rows with values within the 

260 specified range. 

261 """ 

262 mask = self.getMask(**kwargs) 

263 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

264 nvalues = len(values) 

265 values = values[np.logical_not(np.isnan(values))] 

266 sel_range = (values >= self.minimum) & (values < self.maximum) 

267 result = cast( 

268 Scalar, 

269 float(len(values[sel_range]) / nvalues), # type: ignore 

270 ) 

271 if self.percent: 

272 return 100.0 * result 

273 else: 

274 return result 

275 

276 

277class FracNan(ScalarAction): 

278 """Compute the fraction of vector entries that are NaN.""" 

279 

280 vectorKey = Field[str](doc="Name of column") 

281 percent = Field[bool](doc="Express result as percentage", default=False) 

282 

283 def getInputSchema(self) -> KeyedDataSchema: 

284 return ((self.vectorKey, Vector),) 

285 

286 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

287 """Return the fraction of rows with NaN values. 

288 

289 Parameters 

290 ---------- 

291 data : `KeyedData` 

292 

293 Returns 

294 ------- 

295 result : `Scalar` 

296 The fraction (or percentage) of rows with NaN values. 

297 """ 

298 mask = self.getMask(**kwargs) 

299 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

300 nvalues = len(values) 

301 values = values[np.isnan(values)] 

302 result = cast( 

303 Scalar, 

304 float(len(values) / nvalues), # type: ignore 

305 ) 

306 if self.percent: 

307 return 100.0 * result 

308 else: 

309 return result