Coverage for python/lsst/analysis/tools/actions/keyedData/calcDistances.py: 16%

117 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 04:57 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21__all__ = ("CalcRelativeDistances",) 

22 

23import astropy.units as u 

24import esutil 

25import numpy as np 

26from lsst.pex.config import Field 

27from smatch import Matcher 

28 

29from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Vector 

30 

31 

32class CalcRelativeDistances(KeyedDataAction): 

33 """Calculate relative distances in a matched catalog. 

34 

35 Given a catalog of matched sources from multiple visits, this finds all 

36 pairs of objects at a given separation, then calculates the separation of 

37 their component source measurements from the individual visits. The RMS of 

38 these is used to calculate the astrometric relative repeatability metric, 

39 AMx, while the overall distribution of separations is used to compute the 

40 ADx and AFx metrics. 

41 """ 

42 

43 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

44 visitKey = Field[str](doc="Column key to use for matching visits", default="visit") 

45 raKey = Field[str](doc="RA column key", default="coord_ra") 

46 decKey = Field[str](doc="Dec column key", default="coord_dec") 

47 annulus = Field[float](doc="Radial distance of the annulus in arcmin", default=5.0) 

48 width = Field[float](doc="Width of annulus in arcmin", default=2.0) 

49 threshAD = Field[float](doc="Threshold in mas for AFx calculation.", default=20.0) 

50 threshAF = Field[float]( 

51 doc="Percentile of differences that can vary by more than threshAD.", default=10.0 

52 ) 

53 maxPairs = Field[int]( 

54 doc="Maximum number of pairs to use; downsample otherwise.", 

55 default=100_000, 

56 ) 

57 randomSeed = Field[int]( 

58 doc="Random seed to use when downsampling.", 

59 default=12345, 

60 ) 

61 

62 def getInputSchema(self) -> KeyedDataSchema: 

63 return ( 

64 (self.groupKey, Vector), 

65 (self.raKey, Vector), 

66 (self.decKey, Vector), 

67 (self.visitKey, Vector), 

68 ) 

69 

70 def __call__(self, data: KeyedData, **kwargs) -> KeyedData: 

71 """Run the calculation. 

72 

73 Parameters 

74 ---------- 

75 data : KeyedData 

76 Catalog of data including coordinate, visit, and object group 

77 information. 

78 

79 Returns 

80 ------- 

81 distanceParams : `dict` 

82 Dictionary of the calculated arrays and metrics with the following 

83 keys: 

84 

85 - ``rmsDistances`` : Per-object rms of separations (`np.array`). 

86 - ``separationResiduals`` : All separations minus per-object median 

87 (`np.array`) 

88 - ``AMx`` : AMx metric (`float`). 

89 - ``ADx`` : ADx metric (`float`). 

90 - ``AFx`` : AFx metric (`float`). 

91 """ 

92 distanceParams = { 

93 "rmsDistances": np.array([]), 

94 "separationResiduals": np.array([]), 

95 "AMx": np.nan, 

96 "ADx": np.nan, 

97 "AFx": np.nan, 

98 } 

99 

100 if len(data[self.groupKey]) == 0: 

101 return distanceParams 

102 

103 rng = np.random.RandomState(seed=self.randomSeed) 

104 

105 def _compressArray(arrayIn): 

106 h, rev = esutil.stat.histogram(arrayIn, rev=True) 

107 arrayOut = np.zeros(len(arrayIn), dtype=np.int32) 

108 (good,) = np.where(h > 0) 

109 for counter, ind in enumerate(good): 

110 arrayOut[rev[rev[ind] : rev[ind + 1]]] = counter 

111 return arrayOut 

112 

113 groupId = _compressArray(data[self.groupKey]) 

114 

115 nObj = groupId.max() + 1 

116 

117 # Compute the meanRa/meanDec. 

118 meanRa = np.zeros(nObj) 

119 meanDec = np.zeros_like(meanRa) 

120 nObs = np.zeros_like(meanRa, dtype=np.int64) 

121 

122 # Check if tract is overlapping ra=0 and rotate if so. 

123 # We assume a tract is smaller than 60x60 degrees. 

124 rotation = 0.0 

125 if np.max(data[self.raKey]) > 330.0 and np.min(data[self.raKey]) < 30.0: 

126 rotation = 180.0 

127 raRotated = np.array(data[self.raKey]) - rotation 

128 else: 

129 raRotated = np.array(data[self.raKey]) 

130 

131 np.add.at(meanRa, groupId, raRotated) 

132 np.add.at(meanDec, groupId, np.array(data[self.decKey])) 

133 np.add.at(nObs, groupId, 1) 

134 

135 meanRa /= nObs 

136 meanDec /= nObs 

137 meanRa += rotation 

138 

139 D = (self.annulus * u.arcmin).to_value(u.degree) 

140 width = (self.width * u.arcmin).to_value(u.degree) 

141 annulus = D + (width / 2) * np.array([-1, +1]) 

142 

143 # Match this catalog to itself within the radius and then cut 

144 # to the annulus inner radius. 

145 with Matcher(meanRa, meanDec) as m: 

146 idx, i1, i2, d = m.query_self(annulus[1], return_indices=True) 

147 

148 inAnnulus = d > annulus[0] 

149 i1 = i1[inAnnulus] 

150 i2 = i2[inAnnulus] 

151 

152 if len(i1) == 0: 

153 return distanceParams 

154 

155 if len(i1) > self.maxPairs: 

156 # Downsample the pairs. 

157 selection = rng.choice(len(i1), size=self.maxPairs, replace=False) 

158 i1 = i1[selection] 

159 i2 = i2[selection] 

160 

161 # Match groups and get indices. 

162 h, rev = esutil.stat.histogram(groupId, rev=True) 

163 

164 # Match together pairs that have the same visit. 

165 # It is unfortunate that this requires a loop, but it is not slow. 

166 # After this matching we have a set of matchedObsInd1/matchedObsInd2 

167 # that are all individual observations that are in the annulus and 

168 # share a visit. The matchedPairInd groups all the paired observations 

169 # of a given pair. 

170 matchedObsInd1 = [] 

171 matchedObsInd2 = [] 

172 matchedPairInd = [] 

173 for ind in range(len(i1)): 

174 objInd1 = i1[ind] 

175 objInd2 = i2[ind] 

176 obsInd1 = rev[rev[objInd1] : rev[objInd1 + 1]] 

177 obsInd2 = rev[rev[objInd2] : rev[objInd2 + 1]] 

178 a, b = esutil.numpy_util.match(data[self.visitKey][obsInd1], data[self.visitKey][obsInd2]) 

179 matchedObsInd1.append(obsInd1[a]) 

180 matchedObsInd2.append(obsInd2[b]) 

181 matchedPairInd.append(np.full(len(a), ind)) 

182 

183 matchedObsInd1 = np.concatenate(matchedObsInd1) 

184 matchedObsInd2 = np.concatenate(matchedObsInd2) 

185 matchedPairInd = np.concatenate(matchedPairInd) 

186 

187 separations = sphDist( 

188 np.deg2rad(np.array(data[self.raKey][matchedObsInd1])), 

189 np.deg2rad(np.array(data[self.decKey][matchedObsInd1])), 

190 np.deg2rad(np.array(data[self.raKey][matchedObsInd2])), 

191 np.deg2rad(np.array(data[self.decKey][matchedObsInd2])), 

192 ) 

193 

194 # Compute the mean from the ragged array of pairs by 

195 # using np.add.at to sum numerator and denominator. 

196 sepMean = np.zeros(len(i1)) 

197 nSep = np.zeros_like(sepMean, dtype=np.int32) 

198 np.add.at(sepMean, matchedPairInd, separations) 

199 np.add.at(nSep, matchedPairInd, 1) 

200 good = nSep > 1 

201 sepMean[good] /= nSep[good] 

202 sepMean[~good] = np.nan 

203 

204 # There are no good pairs, so return the default. 

205 if good.sum() == 0: 

206 return distanceParams 

207 

208 # Compute the stdev with sqrt(sum((sep - mean(sep))**2.)/(nsep - 1)) 

209 sepStd = np.zeros_like(sepMean) 

210 np.add.at( 

211 sepStd, 

212 matchedPairInd, 

213 (separations - sepMean[matchedPairInd]) ** 2.0, 

214 ) 

215 sepStd[good] = np.sqrt(sepStd[good] / (nSep[good] - 1)) 

216 rmsDistances = sepStd[good] 

217 

218 # Need sepResiduals, but only when nSep is > 2. 

219 bad2 = nSep <= 2 

220 sepMean[bad2] = np.nan 

221 sepResiduals = separations - sepMean[matchedPairInd] 

222 sepResiduals = sepResiduals[np.isfinite(sepResiduals)] 

223 

224 # This is always going to be valid because we checked the number 

225 # of good pairs above. 

226 AMx = (np.median(rmsDistances) * u.radian).to(u.marcsec) 

227 

228 # Because there is a more stringent selection for sepResiduals, 

229 # we need to check that we have enough to compute the metrics. 

230 if len(sepResiduals) <= 1: 

231 AFx = np.nan * u.percent 

232 ADx = np.nan * u.marcsec 

233 absDiffSeparations = np.array([]) * u.marcsec 

234 else: 

235 absDiffSeparations = (abs(sepResiduals - np.median(sepResiduals)) * u.radian).to(u.marcsec) 

236 afThreshhold = 100.0 - self.threshAF 

237 ADx = np.percentile(absDiffSeparations, afThreshhold) 

238 AFx = 100 * np.mean(np.abs(absDiffSeparations) > self.threshAD * u.marcsec) * u.percent 

239 

240 distanceParams["rmsDistances"] = (rmsDistances * u.radian).to(u.marcsec).value 

241 distanceParams["separationResiduals"] = absDiffSeparations.value 

242 distanceParams["AMx"] = AMx.value 

243 distanceParams["ADx"] = ADx.value 

244 distanceParams["AFx"] = AFx.value 

245 

246 return distanceParams 

247 

248 

249def sphDist(ra_mean, dec_mean, ra, dec): 

250 """Calculate distance on the surface of a unit sphere. 

251 

252 This function was borrowed from faro. 

253 

254 Parameters 

255 ---------- 

256 ra_mean : `float` 

257 Mean RA in radians. 

258 dec_mean : `float` 

259 Mean Dec in radians. 

260 ra : `numpy.array` [`float`] 

261 Array of RA in radians. 

262 dec : `numpy.array` [`float`] 

263 Array of Dec in radians. 

264 

265 Notes 

266 ----- 

267 Uses the Haversine formula to preserve accuracy at small angles. 

268 Law of cosines approach doesn't work well for the typically very small 

269 differences that we're looking at here. 

270 """ 

271 # Haversine 

272 dra = ra - ra_mean 

273 ddec = dec - dec_mean 

274 a = np.square(np.sin(ddec / 2)) + np.cos(dec_mean) * np.cos(dec) * np.square(np.sin(dra / 2)) 

275 dist = 2 * np.arcsin(np.sqrt(a)) 

276 

277 # This is what the law of cosines would look like 

278 # dist = np.arccos(np.sin(dec1)*np.sin(dec2) + 

279 # np.cos(dec1)*np.cos(dec2)*np.cos(ra1 - ra2)) 

280 

281 # This will also work, but must run separately for each element 

282 # whereas the numpy version will run on either scalars or arrays: 

283 # sp1 = geom.SpherePoint(ra1, dec1, geom.radians) 

284 # sp2 = geom.SpherePoint(ra2, dec2, geom.radians) 

285 # return sp1.separation(sp2).asRadians() 

286 

287 return dist