Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numpy as np 

2from .baseMetric import BaseMetric 

3 

4# A collection of commonly used simple metrics, operating on a single column and returning a float. 

5 

6__all__ = ['PassMetric', 'Coaddm5Metric', 'MaxMetric', 'AbsMaxMetric', 'MeanMetric', 'AbsMeanMetric', 

7 'MedianMetric', 'AbsMedianMetric', 'MinMetric', 'FullRangeMetric', 'RmsMetric', 'SumMetric', 

8 'CountUniqueMetric', 'CountMetric', 'CountRatioMetric', 'CountSubsetMetric', 'RobustRmsMetric', 

9 'MaxPercentMetric', 'AbsMaxPercentMetric', 'BinaryMetric', 'FracAboveMetric', 'FracBelowMetric', 

10 'PercentileMetric', 'NoutliersNsigmaMetric', 'UniqueRatioMetric', 

11 'MeanAngleMetric', 'RmsAngleMetric', 'FullRangeAngleMetric'] 

12 

13twopi = 2.0*np.pi 

14 

15 

16class PassMetric(BaseMetric): 

17 """ 

18 Just pass the entire array through 

19 """ 

20 def __init__(self, cols=None, **kwargs): 

21 if cols is None: 

22 cols= [] 

23 super(PassMetric, self).__init__(col=cols, metricDtype='object', **kwargs) 

24 def run(self, dataSlice, slicePoint=None): 

25 return dataSlice 

26 

27 

28class Coaddm5Metric(BaseMetric): 

29 """Calculate the coadded m5 value at this gridpoint. 

30 """ 

31 def __init__(self, m5Col='fiveSigmaDepth', metricName='CoaddM5', **kwargs): 

32 """Instantiate metric. 

33 

34 m5col = the column name of the individual visit m5 data.""" 

35 super(Coaddm5Metric, self).__init__(col=m5Col, metricName=metricName, **kwargs) 

36 

37 def run(self, dataSlice, slicePoint=None): 

38 return 1.25 * np.log10(np.sum(10.**(.8*dataSlice[self.colname]))) 

39 

40class MaxMetric(BaseMetric): 

41 """Calculate the maximum of a simData column slice. 

42 """ 

43 def run(self, dataSlice, slicePoint=None): 

44 return np.max(dataSlice[self.colname]) 

45 

46class AbsMaxMetric(BaseMetric): 

47 """Calculate the max of the absolute value of a simData column slice. 

48 """ 

49 def run(self, dataSlice, slicePoint=None): 

50 return np.max(np.abs(dataSlice[self.colname])) 

51 

52class MeanMetric(BaseMetric): 

53 """Calculate the mean of a simData column slice. 

54 """ 

55 def run(self, dataSlice, slicePoint=None): 

56 return np.mean(dataSlice[self.colname]) 

57 

58class AbsMeanMetric(BaseMetric): 

59 """Calculate the mean of the absolute value of a simData column slice. 

60 """ 

61 def run(self, dataSlice, slicePoint=None): 

62 return np.mean(np.abs(dataSlice[self.colname])) 

63 

64class MedianMetric(BaseMetric): 

65 """Calculate the median of a simData column slice. 

66 """ 

67 def run(self, dataSlice, slicePoint=None): 

68 return np.median(dataSlice[self.colname]) 

69 

70class AbsMedianMetric(BaseMetric): 

71 """Calculate the median of the absolute value of a simData column slice. 

72 """ 

73 def run(self, dataSlice, slicePoint=None): 

74 return np.median(np.abs(dataSlice[self.colname])) 

75 

76class MinMetric(BaseMetric): 

77 """Calculate the minimum of a simData column slice. 

78 """ 

79 def run(self, dataSlice, slicePoint=None): 

80 return np.min(dataSlice[self.colname]) 

81 

82class FullRangeMetric(BaseMetric): 

83 """Calculate the range of a simData column slice. 

84 """ 

85 def run(self, dataSlice, slicePoint=None): 

86 return np.max(dataSlice[self.colname])-np.min(dataSlice[self.colname]) 

87 

88class RmsMetric(BaseMetric): 

89 """Calculate the standard deviation of a simData column slice. 

90 """ 

91 def run(self, dataSlice, slicePoint=None): 

92 return np.std(dataSlice[self.colname]) 

93 

94class SumMetric(BaseMetric): 

95 """Calculate the sum of a simData column slice. 

96 """ 

97 def run(self, dataSlice, slicePoint=None): 

98 return np.sum(dataSlice[self.colname]) 

99 

100class CountUniqueMetric(BaseMetric): 

101 """Return the number of unique values. 

102 """ 

103 def run(self, dataSlice, slicePoint=None): 

104 return np.size(np.unique(dataSlice[self.colname])) 

105 

106class UniqueRatioMetric(BaseMetric): 

107 """Return the number of unique values divided by the total number of values. 

108 """ 

109 def run(self, dataSlice, slicePoint=None): 

110 ntot = float(np.size(dataSlice[self.colname])) 

111 result = np.size(np.unique(dataSlice[self.colname])) / ntot 

112 return result 

113 

114class CountMetric(BaseMetric): 

115 """Count the length of a simData column slice. """ 

116 def __init__(self, col=None, **kwargs): 

117 super(CountMetric, self).__init__(col=col, **kwargs) 

118 self.metricDtype = 'int' 

119 

120 def run(self, dataSlice, slicePoint=None): 

121 return len(dataSlice[self.colname]) 

122 

123class CountRatioMetric(BaseMetric): 

124 """Count the length of a simData column slice, then divide by 'normVal'.  

125 """ 

126 def __init__(self, col=None, normVal=1., metricName=None, **kwargs): 

127 self.normVal = float(normVal) 

128 if metricName is None: 

129 metricName = 'CountRatio %s div %.1f'%(col, normVal) 

130 super(CountRatioMetric, self).__init__(col=col, metricName=metricName, **kwargs) 

131 

132 def run(self, dataSlice, slicePoint=None): 

133 return len(dataSlice[self.colname])/self.normVal 

134 

135class CountSubsetMetric(BaseMetric): 

136 """Count the length of a simData column slice which matches 'subset'.  

137 """ 

138 def __init__(self, col=None, subset=None, **kwargs): 

139 super(CountSubsetMetric, self).__init__(col=col, **kwargs) 

140 self.metricDtype = 'int' 

141 self.badval = 0 

142 self.subset = subset 

143 

144 def run(self, dataSlice, slicePoint=None): 

145 count = len(np.where(dataSlice[self.colname] == self.subset)[0]) 

146 return count 

147 

148class RobustRmsMetric(BaseMetric): 

149 """Use the inter-quartile range of the data to estimate the RMS.  

150 Robust since this calculation does not include outliers in the distribution. 

151 """ 

152 def run(self, dataSlice, slicePoint=None): 

153 iqr = np.percentile(dataSlice[self.colname],75)-np.percentile(dataSlice[self.colname],25) 

154 rms = iqr/1.349 #approximation 

155 return rms 

156 

157class MaxPercentMetric(BaseMetric): 

158 """Return the percent of the data which has the maximum value. 

159 """ 

160 def run(self, dataSlice, slicePoint=None): 

161 nMax = np.size(np.where(dataSlice[self.colname] == np.max(dataSlice[self.colname]))[0]) 

162 percent = nMax / float(dataSlice[self.colname].size) * 100. 

163 return percent 

164 

165class AbsMaxPercentMetric(BaseMetric): 

166 """Return the percent of the data which has the absolute value of the max value of the data. 

167 """ 

168 def run(self, dataSlice, slicePoint=None): 

169 maxVal = np.abs(np.max(dataSlice[self.colname])) 

170 nMax = np.size(np.where(np.abs(dataSlice[self.colname]) == maxVal)[0]) 

171 percent = nMax / float(dataSlice[self.colname].size) * 100.0 

172 return percent 

173 

174class BinaryMetric(BaseMetric): 

175 """Return 1 if there is data.  

176 """ 

177 def run(self, dataSlice, slicePoint=None): 

178 if dataSlice.size > 0: 

179 return 1 

180 else: 

181 return self.badval 

182 

183class FracAboveMetric(BaseMetric): 

184 """Find the fraction of data values above a given value. 

185 """ 

186 def __init__(self, col=None, cutoff=0.5, scale=1, metricName=None, **kwargs): 

187 # Col could just get passed in bundle with kwargs, but by explicitly pulling it out 

188 # first, we support use cases where class instantiated without explicit 'col='). 

189 if metricName is None: 

190 metricName = 'FracAbove %.2f in %s' %(cutoff, col) 

191 super(FracAboveMetric, self).__init__(col, metricName=metricName, **kwargs) 

192 self.cutoff = cutoff 

193 self.scale = scale 

194 def run(self, dataSlice, slicePoint=None): 

195 good = np.where(dataSlice[self.colname] >= self.cutoff)[0] 

196 fracAbove = np.size(good)/float(np.size(dataSlice[self.colname])) 

197 fracAbove = fracAbove * self.scale 

198 return fracAbove 

199 

200class FracBelowMetric(BaseMetric): 

201 """Find the fraction of data values below a given value. 

202 """ 

203 def __init__(self, col=None, cutoff=0.5, scale=1, metricName=None, **kwargs): 

204 if metricName is None: 

205 metricName = 'FracBelow %.2f %s' %(cutoff, col) 

206 super(FracBelowMetric, self).__init__(col, metricName=metricName, **kwargs) 

207 self.cutoff = cutoff 

208 self.scale = scale 

209 def run(self, dataSlice, slicePoint=None): 

210 good = np.where(dataSlice[self.colname] <= self.cutoff)[0] 

211 fracBelow = np.size(good)/float(np.size(dataSlice[self.colname])) 

212 fracBelow = fracBelow * self.scale 

213 return fracBelow 

214 

215class PercentileMetric(BaseMetric): 

216 """Find the value of a column at a given percentile. 

217 """ 

218 def __init__(self, col=None, percentile=90, metricName=None, **kwargs): 

219 if metricName is None: 

220 metricName = '%.0fth%sile %s' %(percentile, '%', col) 

221 super(PercentileMetric, self).__init__(col=col, metricName=metricName, **kwargs) 

222 self.percentile = percentile 

223 def run(self, dataSlice, slicePoint=None): 

224 pval = np.percentile(dataSlice[self.colname], self.percentile) 

225 return pval 

226 

227class NoutliersNsigmaMetric(BaseMetric): 

228 """Calculate the # of visits less than nSigma below the mean (nSigma<0) or 

229 more than nSigma above the mean of 'col'. 

230 """ 

231 def __init__(self, col=None, nSigma=3., metricName=None, **kwargs): 

232 self.nSigma = nSigma 

233 self.col = col 

234 if metricName is None: 

235 metricName = 'Noutliers %.1f %s' %(self.nSigma, self.col) 

236 super(NoutliersNsigmaMetric, self).__init__(col=col, metricName=metricName, **kwargs) 

237 self.metricDtype = 'int' 

238 

239 def run(self, dataSlice, slicePoint=None): 

240 med = np.mean(dataSlice[self.colname]) 

241 std = np.std(dataSlice[self.colname]) 

242 boundary = med + self.nSigma*std 

243 # If nsigma is positive, look for outliers above median. 

244 if self.nSigma >=0: 

245 outsiders = np.where(dataSlice[self.colname] > boundary) 

246 # Else look for outliers below median. 

247 else: 

248 outsiders = np.where(dataSlice[self.colname] < boundary) 

249 return len(dataSlice[self.colname][outsiders]) 

250 

251def _rotateAngles(angles): 

252 """Private utility for the '*Angle' Metrics below. 

253 

254 This takes a series of angles between 0-2pi and rotates them so that the 

255 first angle is at 0, ensuring the biggest 'gap' is at the end of the series. 

256 This simplifies calculations like the 'mean' and 'rms' or 'fullrange', removing 

257 the discontinuity at 0/2pi. 

258 """ 

259 angleidx = np.argsort(angles) 

260 diffangles = np.diff(angles[angleidx]) 

261 start_to_end = np.array([twopi-angles[angleidx][-1] + angles[angleidx][0]], float) 

262 if start_to_end < -2.*np.pi: 

263 raise ValueError('Angular metrics expect radians, this seems to be in degrees') 

264 diffangles = np.concatenate([diffangles, start_to_end]) 

265 maxdiff = np.where(diffangles == diffangles.max())[0] 

266 if len(maxdiff) > 1: 

267 maxdiff = maxdiff[-1:] 

268 if maxdiff == (len(angles)-1): 

269 rotation = angles[angleidx][0] 

270 else: 

271 rotation = angles[angleidx][maxdiff+1][0] 

272 return (rotation, (angles - rotation) % twopi) 

273 

274class MeanAngleMetric(BaseMetric): 

275 """Calculate the mean of an angular (degree) simData column slice. 

276 

277 'MeanAngle' differs from 'Mean' in that it accounts for wraparound at 2pi. 

278 """ 

279 def run(self, dataSlice, slicePoint=None): 

280 """Calculate mean angle via unit vectors. 

281 If unit vector 'strength' is less than 0.1, then just set mean to 180 degrees 

282 (as this indicates nearly uniformly distributed angles).  

283 """ 

284 x = np.cos(np.radians(dataSlice[self.colname])) 

285 y = np.sin(np.radians(dataSlice[self.colname])) 

286 meanx = np.mean(x) 

287 meany = np.mean(y) 

288 angle = np.arctan2(meany, meanx) 

289 radius = np.sqrt(meanx**2 + meany**2) 

290 mean = angle % twopi 

291 if radius < 0.1: 

292 mean = np.pi 

293 return np.degrees(mean) 

294 

295class RmsAngleMetric(BaseMetric): 

296 """Calculate the standard deviation of an angular (degrees) simData column slice. 

297 

298 'RmsAngle' differs from 'Rms' in that it accounts for wraparound at 2pi. 

299 """ 

300 def run(self, dataSlice, slicePoint=None): 

301 rotation, angles = _rotateAngles(np.radians(dataSlice[self.colname])) 

302 return np.std(np.degrees(angles)) 

303 

304class FullRangeAngleMetric(BaseMetric): 

305 """Calculate the full range of an angular (degrees) simData column slice. 

306 

307 'FullRangeAngle' differs from 'FullRange' in that it accounts for wraparound at 2pi. 

308 """ 

309 def run(self, dataSlice, slicePoint=None): 

310 rotation, angles = _rotateAngles(np.radians(dataSlice[self.colname])) 

311 return np.degrees(angles.max() - angles.min())