Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 48%

124 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-15 10:10 +0000

1from __future__ import annotations 

2 

3__all__ = ( 

4 "MedianAction", 

5 "MeanAction", 

6 "StdevAction", 

7 "SigmaMadAction", 

8 "CountAction", 

9 "CountUniqueAction", 

10 "ApproxFloor", 

11 "FracThreshold", 

12 "MaxAction", 

13 "MinAction", 

14 "FracInRange", 

15 "FracNan", 

16) 

17 

18import operator 

19from typing import cast 

20 

21import numpy as np 

22from lsst.pex.config import ChoiceField, Field 

23 

24from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

25from ...statistics import nansigmaMad 

26 

27 

28class MedianAction(ScalarAction): 

29 """Calculates the median of the given data.""" 

30 

31 vectorKey = Field[str]("Key of Vector to median") 

32 

33 def getInputSchema(self) -> KeyedDataSchema: 

34 return ((self.vectorKey, Vector),) 

35 

36 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

37 mask = self.getMask(**kwargs) 

38 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

39 

40 

41class MeanAction(ScalarAction): 

42 """Calculates the mean of the given data.""" 

43 

44 vectorKey = Field[str]("Key of Vector from which to calculate mean") 

45 

46 def getInputSchema(self) -> KeyedDataSchema: 

47 return ((self.vectorKey, Vector),) 

48 

49 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

50 mask = self.getMask(**kwargs) 

51 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

52 

53 

54class StdevAction(ScalarAction): 

55 """Calculates the standard deviation of the given data.""" 

56 

57 vectorKey = Field[str]("Key of Vector from which to calculate std deviation") 

58 

59 def getInputSchema(self) -> KeyedDataSchema: 

60 return ((self.vectorKey, Vector),) 

61 

62 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

63 mask = self.getMask(**kwargs) 

64 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

65 

66 

67class SigmaMadAction(ScalarAction): 

68 """Calculates the sigma mad of the given data.""" 

69 

70 vectorKey = Field[str]("Key of Vector to median") 

71 

72 def getInputSchema(self) -> KeyedDataSchema: 

73 return ((self.vectorKey, Vector),) 

74 

75 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

76 mask = self.getMask(**kwargs) 

77 return cast( 

78 Scalar, 

79 float( 

80 nansigmaMad( 

81 data[self.vectorKey.format(**kwargs)][mask], # type: ignore 

82 ) 

83 ), 

84 ) 

85 

86 

87class CountAction(ScalarAction): 

88 """Returns the number of non-NaN entries in the given column.""" 

89 

90 vectorKey = Field[str]("Key of Vector to count") 

91 

92 def getInputSchema(self) -> KeyedDataSchema: 

93 return ((self.vectorKey, Vector),) 

94 

95 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

96 mask = self.getMask(**kwargs) 

97 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

98 arr = arr[~np.isnan(arr)] 

99 return cast(Scalar, len(arr)) 

100 

101 

102class CountUniqueAction(ScalarAction): 

103 """Counts the number of unique rows in a given column. 

104 

105 Parameters 

106 ---------- 

107 data : `KeyedData` 

108 

109 Returns 

110 ------- 

111 count : `Scalar` 

112 The number of unique rows in a given column. 

113 """ 

114 

115 vectorKey = Field[str](doc="Name of column.") 

116 

117 def getInputSchema(self) -> KeyedDataSchema: 

118 return ((self.vectorKey, Vector),) 

119 

120 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

121 mask = self.getMask(**kwargs) 

122 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

123 count = len(np.unique(values)) 

124 return cast(Scalar, count) 

125 

126 

127class ApproxFloor(ScalarAction): 

128 """Returns the median of the lowest ten values of the sorted input.""" 

129 

130 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False) 

131 

132 def getInputSchema(self) -> KeyedDataSchema: 

133 return ((self.vectorKey, Vector),) 

134 

135 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

136 mask = self.getMask(**kwargs) 

137 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

138 x = len(value) // 10 

139 return cast(Scalar, float(np.nanmedian(value[-x:]))) 

140 

141 

142class FracThreshold(ScalarAction): 

143 """Compute the fraction of a distribution that is above or below a 

144 specified threshold. The operator is specified as a string, for example, 

145 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

146 compute the fraction of elements with values less than a given threshold, 

147 use op="le". 

148 """ 

149 

150 op = ChoiceField[str]( 

151 doc="Operator name string.", 

152 allowed={ 

153 "lt": "less than threshold", 

154 "le": "less than or equal to threshold", 

155 "ge": "greater than or equal to threshold", 

156 "gt": "greater than threshold", 

157 }, 

158 ) 

159 threshold = Field[float](doc="Threshold to apply.") 

160 vectorKey = Field[str](doc="Name of column") 

161 percent = Field[bool](doc="Express result as percentage", default=False) 

162 

163 def getInputSchema(self) -> KeyedDataSchema: 

164 return ((self.vectorKey, Vector),) 

165 

166 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

167 mask = self.getMask(**kwargs) 

168 values = data[self.vectorKey.format(**kwargs)] 

169 values = values[mask] # type: ignore 

170 values = values[np.logical_not(np.isnan(values))] 

171 result = cast( 

172 Scalar, 

173 float(np.sum(getattr(operator, self.op)(values, self.threshold)) / len(values)), # type: ignore 

174 ) 

175 if self.percent: 

176 return 100.0 * result 

177 else: 

178 return result 

179 

180 

181class MaxAction(ScalarAction): 

182 """Returns the maximum of the given data.""" 

183 

184 vectorKey = Field[str]("Key of Vector to find maximum") 

185 

186 def getInputSchema(self) -> KeyedDataSchema: 

187 return ((self.vectorKey, Vector),) 

188 

189 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

190 mask = self.getMask(**kwargs) 

191 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

192 

193 

194class MinAction(ScalarAction): 

195 """Returns the minimum of the given data.""" 

196 

197 vectorKey = Field[str]("Key for the vector to perform action on") 

198 

199 def getInputSchema(self) -> KeyedDataSchema: 

200 return ((self.vectorKey, Vector),) 

201 

202 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

203 mask = self.getMask(**kwargs) 

204 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

205 

206 

207class FracInRange(ScalarAction): 

208 """Compute the fraction of a distribution that is between specified 

209 minimum and maximum values, and is not NaN. 

210 """ 

211 

212 vectorKey = Field[str](doc="Name of column") 

213 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

214 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

215 percent = Field[bool](doc="Express result as percentage", default=False) 

216 

217 def getInputSchema(self) -> KeyedDataSchema: 

218 return ((self.vectorKey, Vector),) 

219 

220 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

221 """Return the fraction of rows with values within the specified range. 

222 

223 Parameters 

224 ---------- 

225 data : `KeyedData` 

226 

227 Returns 

228 ------- 

229 result : `Scalar` 

230 The fraction (or percentage) of rows with values within the 

231 specified range. 

232 """ 

233 mask = self.getMask(**kwargs) 

234 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

235 nvalues = len(values) 

236 values = values[np.logical_not(np.isnan(values))] 

237 sel_range = (values >= self.minimum) & (values < self.maximum) 

238 result = cast( 

239 Scalar, 

240 float(len(values[sel_range]) / nvalues), # type: ignore 

241 ) 

242 if self.percent: 

243 return 100.0 * result 

244 else: 

245 return result 

246 

247 

248class FracNan(ScalarAction): 

249 """Compute the fraction of vector entries that are NaN.""" 

250 

251 vectorKey = Field[str](doc="Name of column") 

252 percent = Field[bool](doc="Express result as percentage", default=False) 

253 

254 def getInputSchema(self) -> KeyedDataSchema: 

255 return ((self.vectorKey, Vector),) 

256 

257 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

258 """Return the fraction of rows with NaN values. 

259 

260 Parameters 

261 ---------- 

262 data : `KeyedData` 

263 

264 Returns 

265 ------- 

266 result : `Scalar` 

267 The fraction (or percentage) of rows with NaN values. 

268 """ 

269 mask = self.getMask(**kwargs) 

270 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

271 nvalues = len(values) 

272 values = values[np.isnan(values)] 

273 result = cast( 

274 Scalar, 

275 float(len(values) / nvalues), # type: ignore 

276 ) 

277 if self.percent: 

278 return 100.0 * result 

279 else: 

280 return result