Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 47%

128 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-07 11:44 +0000

1from __future__ import annotations 

2 

3__all__ = ( 

4 "MedianAction", 

5 "MeanAction", 

6 "StdevAction", 

7 "SigmaMadAction", 

8 "CountAction", 

9 "CountUniqueAction", 

10 "ApproxFloor", 

11 "FracThreshold", 

12 "MaxAction", 

13 "MinAction", 

14 "FracInRange", 

15 "FracNan", 

16) 

17 

18import operator 

19from typing import cast 

20 

21import numpy as np 

22from lsst.pex.config import ChoiceField, Field 

23 

24from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

25from ...statistics import nansigmaMad 

26 

27 

28class MedianAction(ScalarAction): 

29 """Calculates the median of the given data.""" 

30 

31 vectorKey = Field[str]("Key of Vector to median") 

32 

33 def getInputSchema(self) -> KeyedDataSchema: 

34 return ((self.vectorKey, Vector),) 

35 

36 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

37 mask = self.getMask(**kwargs) 

38 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

39 

40 

41class MeanAction(ScalarAction): 

42 """Calculates the mean of the given data.""" 

43 

44 vectorKey = Field[str]("Key of Vector from which to calculate mean") 

45 

46 def getInputSchema(self) -> KeyedDataSchema: 

47 return ((self.vectorKey, Vector),) 

48 

49 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

50 mask = self.getMask(**kwargs) 

51 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

52 

53 

54class StdevAction(ScalarAction): 

55 """Calculates the standard deviation of the given data.""" 

56 

57 vectorKey = Field[str]("Key of Vector from which to calculate std deviation") 

58 

59 def getInputSchema(self) -> KeyedDataSchema: 

60 return ((self.vectorKey, Vector),) 

61 

62 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

63 mask = self.getMask(**kwargs) 

64 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

65 

66 

67class SigmaMadAction(ScalarAction): 

68 """Calculates the sigma mad of the given data.""" 

69 

70 vectorKey = Field[str]("Key of Vector to median") 

71 

72 def getInputSchema(self) -> KeyedDataSchema: 

73 return ((self.vectorKey, Vector),) 

74 

75 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

76 mask = self.getMask(**kwargs) 

77 return cast( 

78 Scalar, 

79 float( 

80 nansigmaMad( 

81 data[self.vectorKey.format(**kwargs)][mask], # type: ignore 

82 ) 

83 ), 

84 ) 

85 

86 

87class CountAction(ScalarAction): 

88 """Returns the number of non-NaN entries in the given column.""" 

89 

90 vectorKey = Field[str]("Key of Vector to count") 

91 

92 def getInputSchema(self) -> KeyedDataSchema: 

93 return ((self.vectorKey, Vector),) 

94 

95 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

96 mask = self.getMask(**kwargs) 

97 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

98 arr = arr[~np.isnan(arr)] 

99 return cast(Scalar, len(arr)) 

100 

101 

102class CountUniqueAction(ScalarAction): 

103 """Counts the number of unique rows in a given column. 

104 

105 Parameters 

106 ---------- 

107 data : `KeyedData` 

108 

109 Returns 

110 ------- 

111 count : `Scalar` 

112 The number of unique rows in a given column. 

113 """ 

114 

115 vectorKey = Field[str](doc="Name of column.") 

116 

117 def getInputSchema(self) -> KeyedDataSchema: 

118 return ((self.vectorKey, Vector),) 

119 

120 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

121 mask = self.getMask(**kwargs) 

122 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

123 count = len(np.unique(values)) 

124 return cast(Scalar, count) 

125 

126 

127class ApproxFloor(ScalarAction): 

128 """Returns the median of the lowest ten values of the sorted input.""" 

129 

130 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False) 

131 

132 def getInputSchema(self) -> KeyedDataSchema: 

133 return ((self.vectorKey, Vector),) 

134 

135 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

136 mask = self.getMask(**kwargs) 

137 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

138 x = len(value) // 10 

139 return cast(Scalar, float(np.nanmedian(value[-x:]))) 

140 

141 

142class FracThreshold(ScalarAction): 

143 """Compute the fraction of a distribution that is above or below a 

144 specified threshold. The operator is specified as a string, for example, 

145 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

146 compute the fraction of elements with values less than a given threshold, 

147 use op="le". 

148 """ 

149 

150 op = ChoiceField[str]( 

151 doc="Operator name string.", 

152 allowed={ 

153 "lt": "less than threshold", 

154 "le": "less than or equal to threshold", 

155 "ge": "greater than or equal to threshold", 

156 "gt": "greater than threshold", 

157 }, 

158 ) 

159 threshold = Field[float](doc="Threshold to apply.") 

160 vectorKey = Field[str](doc="Name of column") 

161 percent = Field[bool](doc="Express result as percentage", default=False) 

162 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False) 

163 

164 def getInputSchema(self) -> KeyedDataSchema: 

165 return ((self.vectorKey, Vector),) 

166 

167 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

168 mask = self.getMask(**kwargs) 

169 values = data[self.vectorKey.format(**kwargs)] 

170 values = values[mask] # type: ignore 

171 values = values[np.logical_not(np.isnan(values))] 

172 # If relative_to_median is set, shift the threshold to be median+thresh 

173 if self.relative_to_median: 

174 threshold = self.threshold + np.median(values) 

175 else: 

176 threshold = self.threshold 

177 result = cast( 

178 Scalar, 

179 float(np.sum(getattr(operator, self.op)(values, threshold)) / len(values)), # type: ignore 

180 ) 

181 if self.percent: 

182 return 100.0 * result 

183 else: 

184 return result 

185 

186 

187class MaxAction(ScalarAction): 

188 """Returns the maximum of the given data.""" 

189 

190 vectorKey = Field[str]("Key of Vector to find maximum") 

191 

192 def getInputSchema(self) -> KeyedDataSchema: 

193 return ((self.vectorKey, Vector),) 

194 

195 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

196 mask = self.getMask(**kwargs) 

197 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

198 

199 

200class MinAction(ScalarAction): 

201 """Returns the minimum of the given data.""" 

202 

203 vectorKey = Field[str]("Key for the vector to perform action on") 

204 

205 def getInputSchema(self) -> KeyedDataSchema: 

206 return ((self.vectorKey, Vector),) 

207 

208 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

209 mask = self.getMask(**kwargs) 

210 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

211 

212 

213class FracInRange(ScalarAction): 

214 """Compute the fraction of a distribution that is between specified 

215 minimum and maximum values, and is not NaN. 

216 """ 

217 

218 vectorKey = Field[str](doc="Name of column") 

219 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

220 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

221 percent = Field[bool](doc="Express result as percentage", default=False) 

222 

223 def getInputSchema(self) -> KeyedDataSchema: 

224 return ((self.vectorKey, Vector),) 

225 

226 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

227 """Return the fraction of rows with values within the specified range. 

228 

229 Parameters 

230 ---------- 

231 data : `KeyedData` 

232 

233 Returns 

234 ------- 

235 result : `Scalar` 

236 The fraction (or percentage) of rows with values within the 

237 specified range. 

238 """ 

239 mask = self.getMask(**kwargs) 

240 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

241 nvalues = len(values) 

242 values = values[np.logical_not(np.isnan(values))] 

243 sel_range = (values >= self.minimum) & (values < self.maximum) 

244 result = cast( 

245 Scalar, 

246 float(len(values[sel_range]) / nvalues), # type: ignore 

247 ) 

248 if self.percent: 

249 return 100.0 * result 

250 else: 

251 return result 

252 

253 

254class FracNan(ScalarAction): 

255 """Compute the fraction of vector entries that are NaN.""" 

256 

257 vectorKey = Field[str](doc="Name of column") 

258 percent = Field[bool](doc="Express result as percentage", default=False) 

259 

260 def getInputSchema(self) -> KeyedDataSchema: 

261 return ((self.vectorKey, Vector),) 

262 

263 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

264 """Return the fraction of rows with NaN values. 

265 

266 Parameters 

267 ---------- 

268 data : `KeyedData` 

269 

270 Returns 

271 ------- 

272 result : `Scalar` 

273 The fraction (or percentage) of rows with NaN values. 

274 """ 

275 mask = self.getMask(**kwargs) 

276 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

277 nvalues = len(values) 

278 values = values[np.isnan(values)] 

279 result = cast( 

280 Scalar, 

281 float(len(values) / nvalues), # type: ignore 

282 ) 

283 if self.percent: 

284 return 100.0 * result 

285 else: 

286 return result