Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 48%

123 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-20 09:05 +0000

1from __future__ import annotations 

2 

3import operator 

4from typing import cast 

5 

6import numpy as np 

7from lsst.pex.config import ChoiceField, Field 

8 

9from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector 

10from ...statistics import nansigmaMad 

11 

12 

13class MedianAction(ScalarAction): 

14 vectorKey = Field[str]("Key of Vector to median") 

15 

16 def getInputSchema(self) -> KeyedDataSchema: 

17 return ((self.vectorKey, Vector),) 

18 

19 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

20 mask = self.getMask(**kwargs) 

21 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

22 

23 

24class MeanAction(ScalarAction): 

25 vectorKey = Field[str]("Key of Vector from which to calculate mean") 

26 

27 def getInputSchema(self) -> KeyedDataSchema: 

28 return ((self.vectorKey, Vector),) 

29 

30 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

31 mask = self.getMask(**kwargs) 

32 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

33 

34 

35class StdevAction(ScalarAction): 

36 vectorKey = Field[str]("Key of Vector from which to calculate std deviation") 

37 

38 def getInputSchema(self) -> KeyedDataSchema: 

39 return ((self.vectorKey, Vector),) 

40 

41 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

42 mask = self.getMask(**kwargs) 

43 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

44 

45 

46class SigmaMadAction(ScalarAction): 

47 vectorKey = Field[str]("Key of Vector to median") 

48 

49 def getInputSchema(self) -> KeyedDataSchema: 

50 return ((self.vectorKey, Vector),) 

51 

52 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

53 mask = self.getMask(**kwargs) 

54 return cast( 

55 Scalar, 

56 float( 

57 nansigmaMad( 

58 data[self.vectorKey.format(**kwargs)][mask], # type: ignore 

59 ) 

60 ), 

61 ) 

62 

63 

64class CountAction(ScalarAction): 

65 vectorKey = Field[str]("Key of Vector to count") 

66 

67 def getInputSchema(self) -> KeyedDataSchema: 

68 return ((self.vectorKey, Vector),) 

69 

70 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

71 mask = self.getMask(**kwargs) 

72 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

73 arr = arr[~np.isnan(arr)] 

74 return cast(Scalar, len(arr)) 

75 

76 

77class CountUniqueAction(ScalarAction): 

78 """Counts the number of unique rows in a given column. 

79 

80 Parameters 

81 ---------- 

82 data : `KeyedData` 

83 

84 Returns 

85 ------- 

86 count : `Scalar` 

87 The number of unique rows in a given column. 

88 """ 

89 

90 vectorKey = Field[str](doc="Name of column.") 

91 

92 def getInputSchema(self) -> KeyedDataSchema: 

93 return ((self.vectorKey, Vector),) 

94 

95 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

96 mask = self.getMask(**kwargs) 

97 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

98 count = len(np.unique(values)) 

99 return cast(Scalar, count) 

100 

101 

102class ApproxFloor(ScalarAction): 

103 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False) 

104 

105 def getInputSchema(self) -> KeyedDataSchema: 

106 return ((self.vectorKey, Vector),) 

107 

108 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

109 mask = self.getMask(**kwargs) 

110 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore 

111 x = len(value) // 10 

112 return cast(Scalar, float(np.nanmedian(value[-x:]))) 

113 

114 

115class FracThreshold(ScalarAction): 

116 """Compute the fraction of a distribution that is above or below a 

117 specified threshold. The operator is specified as a string, for example, 

118 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To 

119 compute the fraction of elements with values less than a given threshold, 

120 use op="le". 

121 """ 

122 

123 op = ChoiceField[str]( 

124 doc="Operator name string.", 

125 allowed={ 

126 "lt": "less than threshold", 

127 "le": "less than or equal to threshold", 

128 "ge": "greater than or equal to threshold", 

129 "gt": "greater than threshold", 

130 }, 

131 ) 

132 threshold = Field[float](doc="Threshold to apply.") 

133 vectorKey = Field[str](doc="Name of column") 

134 percent = Field[bool](doc="Express result as percentage", default=False) 

135 

136 def getInputSchema(self) -> KeyedDataSchema: 

137 return ((self.vectorKey, Vector),) 

138 

139 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

140 mask = self.getMask(**kwargs) 

141 values = data[self.vectorKey.format(**kwargs)] 

142 values = values[mask] # type: ignore 

143 values = values[np.logical_not(np.isnan(values))] 

144 result = cast( 

145 Scalar, 

146 float(np.sum(getattr(operator, self.op)(values, self.threshold)) / len(values)), # type: ignore 

147 ) 

148 if self.percent: 

149 return 100.0 * result 

150 else: 

151 return result 

152 

153 

154class MaxAction(ScalarAction): 

155 vectorKey = Field[str]("Key of Vector to find maximum") 

156 

157 def getInputSchema(self) -> KeyedDataSchema: 

158 return ((self.vectorKey, Vector),) 

159 

160 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

161 mask = self.getMask(**kwargs) 

162 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

163 

164 

165class MinAction(ScalarAction): 

166 vectorKey = Field[str]("Key for the vector to perform action on") 

167 

168 def getInputSchema(self) -> KeyedDataSchema: 

169 return ((self.vectorKey, Vector),) 

170 

171 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

172 mask = self.getMask(**kwargs) 

173 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]))) 

174 

175 

176class FracInRange(ScalarAction): 

177 """Compute the fraction of a distribution that is between specified 

178 minimum and maximum values, and is not NaN. 

179 """ 

180 

181 vectorKey = Field[str](doc="Name of column") 

182 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0)) 

183 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0)) 

184 percent = Field[bool](doc="Express result as percentage", default=False) 

185 

186 def getInputSchema(self) -> KeyedDataSchema: 

187 return ((self.vectorKey, Vector),) 

188 

189 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

190 """Return the fraction of rows with values within the specified range. 

191 

192 Parameters 

193 ---------- 

194 data : `KeyedData` 

195 

196 Returns 

197 ------- 

198 result : `Scalar` 

199 The fraction (or percentage) of rows with values within the 

200 specified range. 

201 """ 

202 mask = self.getMask(**kwargs) 

203 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

204 nvalues = len(values) 

205 values = values[np.logical_not(np.isnan(values))] 

206 sel_range = (values >= self.minimum) & (values < self.maximum) 

207 result = cast( 

208 Scalar, 

209 float(len(values[sel_range]) / nvalues), # type: ignore 

210 ) 

211 if self.percent: 

212 return 100.0 * result 

213 else: 

214 return result 

215 

216 

217class FracNan(ScalarAction): 

218 """Compute the fraction of vector entries that are NaN.""" 

219 

220 vectorKey = Field[str](doc="Name of column") 

221 percent = Field[bool](doc="Express result as percentage", default=False) 

222 

223 def getInputSchema(self) -> KeyedDataSchema: 

224 return ((self.vectorKey, Vector),) 

225 

226 def __call__(self, data: KeyedData, **kwargs) -> Scalar: 

227 """Return the fraction of rows with NaN values. 

228 

229 Parameters 

230 ---------- 

231 data : `KeyedData` 

232 

233 Returns 

234 ------- 

235 result : `Scalar` 

236 The fraction (or percentage) of rows with NaN values. 

237 """ 

238 mask = self.getMask(**kwargs) 

239 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask] 

240 nvalues = len(values) 

241 values = values[np.isnan(values)] 

242 result = cast( 

243 Scalar, 

244 float(len(values) / nvalues), # type: ignore 

245 ) 

246 if self.percent: 

247 return 100.0 * result 

248 else: 

249 return result