Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 48%

124 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-21 10:05 +0000

1from __future__ import annotations 

2 

3__all__ = ( 

4 "MedianAction", 

5 "MeanAction", 

6 "StdevAction", 

7 "SigmaMadAction", 

8 "CountAction", 

9 "CountUniqueAction", 

10 "ApproxFloor", 

11 "FracThreshold", 

12 "MaxAction", 

13 "MinAction", 

14 "FracInRange", 

15 "FracNan", 

16) 

17 

18import operator 

19from typing import cast 

20 

21import numpy as np 

22from lsst.pex.config import ChoiceField, Field 

23 

24from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

25from ...statistics import nansigmaMad 

26 

27 

28class MedianAction(ScalarAction): 

29 vectorKey = Field[str]("Key of Vector to median") 

30 

31 def getInputSchema(self) -> KeyedDataSchema: 

32 return ((self.vectorKey, Vector),) 

33 

34 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

35 mask = self.getMask(**kwargs) 

36 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

37 

38 

39class MeanAction(ScalarAction): 

40 vectorKey = Field[str]("Key of Vector from which to calculate mean") 

41 

42 def getInputSchema(self) -> KeyedDataSchema: 

43 return ((self.vectorKey, Vector),) 

44 

45 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

46 mask = self.getMask(**kwargs) 

47 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

48 

49 

50class StdevAction(ScalarAction): 

51 vectorKey = Field[str]("Key of Vector from which to calculate std deviation") 

52 

53 def getInputSchema(self) -> KeyedDataSchema: 

54 return ((self.vectorKey, Vector),) 

55 

56 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

57 mask = self.getMask(**kwargs) 

58 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

59 

60 

61class SigmaMadAction(ScalarAction): 

62 vectorKey = Field[str]("Key of Vector to median") 

63 

64 def getInputSchema(self) -> KeyedDataSchema: 

65 return ((self.vectorKey, Vector),) 

66 

67 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

68 mask = self.getMask(**kwargs) 

69 return cast( 

70 Scalar, 

71 float( 

72 nansigmaMad( 

73 data[self.vectorKey.format(**kwargs)][mask], # type: ignore 

74 ) 

75 ), 

76 ) 

77 

78 

79class CountAction(ScalarAction): 

80 vectorKey = Field[str]("Key of Vector to count") 

81 

82 def getInputSchema(self) -> KeyedDataSchema: 

83 return ((self.vectorKey, Vector),) 

84 

85 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

86 mask = self.getMask(**kwargs) 

87 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

88 arr = arr[~np.isnan(arr)] 

89 return cast(Scalar, len(arr)) 

90 

91 

92class CountUniqueAction(ScalarAction): 

93 """Counts the number of unique rows in a given column. 

94 

95 Parameters 

96 ---------- 

97 data : `KeyedData` 

98 

99 Returns 

100 ------- 

101 count : `Scalar` 

102 The number of unique rows in a given column. 

103 """ 

104 

105 vectorKey = Field[str](doc="Name of column.") 

106 

107 def getInputSchema(self) -> KeyedDataSchema: 

108 return ((self.vectorKey, Vector),) 

109 

110 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

111 mask = self.getMask(**kwargs) 

112 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

113 count = len(np.unique(values)) 

114 return cast(Scalar, count) 

115 

116 

117class ApproxFloor(ScalarAction): 

118 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False) 

119 

120 def getInputSchema(self) -> KeyedDataSchema: 

121 return ((self.vectorKey, Vector),) 

122 

123 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

124 mask = self.getMask(**kwargs) 

125 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

126 x = len(value) // 10 

127 return cast(Scalar, float(np.nanmedian(value[-x:]))) 

128 

129 

130class FracThreshold(ScalarAction): 

131 """Compute the fraction of a distribution that is above or below a 

132 specified threshold. The operator is specified as a string, for example, 

133 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

134 compute the fraction of elements with values less than a given threshold, 

135 use op="le". 

136 """ 

137 

138 op = ChoiceField[str]( 

139 doc="Operator name string.", 

140 allowed={ 

141 "lt": "less than threshold", 

142 "le": "less than or equal to threshold", 

143 "ge": "greater than or equal to threshold", 

144 "gt": "greater than threshold", 

145 }, 

146 ) 

147 threshold = Field[float](doc="Threshold to apply.") 

148 vectorKey = Field[str](doc="Name of column") 

149 percent = Field[bool](doc="Express result as percentage", default=False) 

150 

151 def getInputSchema(self) -> KeyedDataSchema: 

152 return ((self.vectorKey, Vector),) 

153 

154 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

155 mask = self.getMask(**kwargs) 

156 values = data[self.vectorKey.format(**kwargs)] 

157 values = values[mask] # type: ignore 

158 values = values[np.logical_not(np.isnan(values))] 

159 result = cast( 

160 Scalar, 

161 float(np.sum(getattr(operator, self.op)(values, self.threshold)) / len(values)), # type: ignore 

162 ) 

163 if self.percent: 

164 return 100.0 * result 

165 else: 

166 return result 

167 

168 

169class MaxAction(ScalarAction): 

170 vectorKey = Field[str]("Key of Vector to find maximum") 

171 

172 def getInputSchema(self) -> KeyedDataSchema: 

173 return ((self.vectorKey, Vector),) 

174 

175 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

176 mask = self.getMask(**kwargs) 

177 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

178 

179 

180class MinAction(ScalarAction): 

181 vectorKey = Field[str]("Key for the vector to perform action on") 

182 

183 def getInputSchema(self) -> KeyedDataSchema: 

184 return ((self.vectorKey, Vector),) 

185 

186 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

187 mask = self.getMask(**kwargs) 

188 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

189 

190 

191class FracInRange(ScalarAction): 

192 """Compute the fraction of a distribution that is between specified 

193 minimum and maximum values, and is not NaN. 

194 """ 

195 

196 vectorKey = Field[str](doc="Name of column") 

197 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

198 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

199 percent = Field[bool](doc="Express result as percentage", default=False) 

200 

201 def getInputSchema(self) -> KeyedDataSchema: 

202 return ((self.vectorKey, Vector),) 

203 

204 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

205 """Return the fraction of rows with values within the specified range. 

206 

207 Parameters 

208 ---------- 

209 data : `KeyedData` 

210 

211 Returns 

212 ------- 

213 result : `Scalar` 

214 The fraction (or percentage) of rows with values within the 

215 specified range. 

216 """ 

217 mask = self.getMask(**kwargs) 

218 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

219 nvalues = len(values) 

220 values = values[np.logical_not(np.isnan(values))] 

221 sel_range = (values >= self.minimum) & (values < self.maximum) 

222 result = cast( 

223 Scalar, 

224 float(len(values[sel_range]) / nvalues), # type: ignore 

225 ) 

226 if self.percent: 

227 return 100.0 * result 

228 else: 

229 return result 

230 

231 

232class FracNan(ScalarAction): 

233 """Compute the fraction of vector entries that are NaN.""" 

234 

235 vectorKey = Field[str](doc="Name of column") 

236 percent = Field[bool](doc="Express result as percentage", default=False) 

237 

238 def getInputSchema(self) -> KeyedDataSchema: 

239 return ((self.vectorKey, Vector),) 

240 

241 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

242 """Return the fraction of rows with NaN values. 

243 

244 Parameters 

245 ---------- 

246 data : `KeyedData` 

247 

248 Returns 

249 ------- 

250 result : `Scalar` 

251 The fraction (or percentage) of rows with NaN values. 

252 """ 

253 mask = self.getMask(**kwargs) 

254 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

255 nvalues = len(values) 

256 values = values[np.isnan(values)] 

257 result = cast( 

258 Scalar, 

259 float(len(values) / nvalues), # type: ignore 

260 ) 

261 if self.percent: 

262 return 100.0 * result 

263 else: 

264 return result