Coverage for python / lsst / analysis / tools / actions / keyedData / calcDistances.py: 16%

126 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-17 09:36 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21__all__ = ("CalcRelativeDistances",) 

22 

23import astropy.units as u 

24import esutil 

25import numpy as np 

26from smatch import Matcher 

27 

28from lsst.pex.config import Field 

29 

30from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Vector 

31 

32 

33class CalcRelativeDistances(KeyedDataAction): 

34 """Calculate relative distances in a matched catalog. 

35 

36 Given a catalog of matched sources from multiple visits, this finds all 

37 pairs of objects at a given separation, then calculates the separation of 

38 their component source measurements from the individual visits. The RMS of 

39 these is used to calculate the astrometric relative repeatability metric, 

40 AMx, while the overall distribution of separations is used to compute the 

41 ADx and AFx metrics. 

42 """ 

43 

44 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

45 visitKey = Field[str](doc="Column key to use for matching visits", default="visit") 

46 raKey = Field[str](doc="RA column key", default="coord_ra") 

47 decKey = Field[str](doc="Dec column key", default="coord_dec") 

48 annulus = Field[float](doc="Radial distance of the annulus in arcmin", default=5.0) 

49 width = Field[float](doc="Width of annulus in arcmin", default=2.0) 

50 threshAD = Field[float](doc="Threshold in mas for AFx calculation.", default=20.0) 

51 threshAF = Field[float]( 

52 doc="Percentile of differences that can vary by more than threshAD.", default=10.0 

53 ) 

54 maxPairs = Field[int]( 

55 doc="Maximum number of pairs to use; downsample otherwise.", 

56 default=100_000, 

57 ) 

58 bruteForceRadiusThreshold = Field[float]( 

59 doc="Matching radius at which to switch to brute-force matching (deg).", 

60 default=0.1, 

61 ) 

62 bruteForceMaxMatch = Field[int]( 

63 doc="Number of objects to sample for brute-force matching.", 

64 default=10_000, 

65 ) 

66 randomSeed = Field[int]( 

67 doc="Random seed to use when downsampling.", 

68 default=12345, 

69 ) 

70 

71 def getInputSchema(self) -> KeyedDataSchema: 

72 return ( 

73 (self.groupKey, Vector), 

74 (self.raKey, Vector), 

75 (self.decKey, Vector), 

76 (self.visitKey, Vector), 

77 ) 

78 

79 def __call__(self, data: KeyedData, **kwargs) -> KeyedData: 

80 """Run the calculation. 

81 

82 Parameters 

83 ---------- 

84 data : KeyedData 

85 Catalog of data including coordinate, visit, and object group 

86 information. 

87 

88 Returns 

89 ------- 

90 distanceParams : `dict` 

91 Dictionary of the calculated arrays and metrics with the following 

92 keys: 

93 

94 - ``rmsDistances`` : Per-object rms of separations (`np.array`). 

95 - ``separationResiduals`` : All separations minus per-object median 

96 (`np.array`) 

97 - ``AMx`` : AMx metric (`float`). 

98 - ``ADx`` : ADx metric (`float`). 

99 - ``AFx`` : AFx metric (`float`). 

100 """ 

101 distanceParams = { 

102 "rmsDistances": np.array([]), 

103 "separationResiduals": np.array([]), 

104 "AMx": np.nan, 

105 "ADx": np.nan, 

106 "AFx": np.nan, 

107 } 

108 

109 if len(data[self.groupKey]) == 0: 

110 return distanceParams 

111 

112 rng = np.random.RandomState(seed=self.randomSeed) 

113 

114 def _compressArray(arrayIn): 

115 h, rev = esutil.stat.histogram(arrayIn, rev=True) 

116 arrayOut = np.zeros(len(arrayIn), dtype=np.int32) 

117 (good,) = np.where(h > 0) 

118 for counter, ind in enumerate(good): 

119 arrayOut[rev[rev[ind] : rev[ind + 1]]] = counter 

120 return arrayOut 

121 

122 groupId = _compressArray(data[self.groupKey]) 

123 

124 nObj = groupId.max() + 1 

125 

126 # Compute the meanRa/meanDec. 

127 meanRa = np.zeros(nObj) 

128 meanDec = np.zeros_like(meanRa) 

129 nObs = np.zeros_like(meanRa, dtype=np.int64) 

130 

131 # Check if tract is overlapping ra=0 and rotate if so. 

132 # We assume a tract is smaller than 60x60 degrees. 

133 rotation = 0.0 

134 if np.max(data[self.raKey]) > 330.0 and np.min(data[self.raKey]) < 30.0: 

135 rotation = 180.0 

136 raRotated = np.array(data[self.raKey]) - rotation 

137 else: 

138 raRotated = np.array(data[self.raKey]) 

139 

140 np.add.at(meanRa, groupId, raRotated) 

141 np.add.at(meanDec, groupId, np.array(data[self.decKey])) 

142 np.add.at(nObs, groupId, 1) 

143 

144 meanRa /= nObs 

145 meanDec /= nObs 

146 meanRa += rotation 

147 

148 D = (self.annulus * u.arcmin).to_value(u.degree) 

149 width = (self.width * u.arcmin).to_value(u.degree) 

150 annulus = D + (width / 2) * np.array([-1, +1]) 

151 

152 if annulus[1] >= self.bruteForceRadiusThreshold: 

153 # Use brute-force matching. 

154 if len(meanRa) > self.bruteForceMaxMatch: 

155 bruteForceSelection = rng.choice(len(meanRa), size=self.bruteForceMaxMatch, replace=False) 

156 else: 

157 bruteForceSelection = np.arange(len(meanRa)) 

158 

159 i1 = np.tile(bruteForceSelection, len(bruteForceSelection)) 

160 i2 = np.repeat(bruteForceSelection, len(bruteForceSelection)) 

161 

162 d = np.rad2deg( 

163 sphDist( 

164 np.deg2rad(meanRa)[i1], 

165 np.deg2rad(meanDec)[i1], 

166 np.deg2rad(meanRa)[i2], 

167 np.deg2rad(meanDec)[i2], 

168 ), 

169 ) 

170 else: 

171 with Matcher(meanRa, meanDec) as m: 

172 idx, i1, i2, d = m.query_self(annulus[1], return_indices=True) 

173 

174 inAnnulus = (d > annulus[0]) & (d < annulus[1]) 

175 i1 = i1[inAnnulus] 

176 i2 = i2[inAnnulus] 

177 

178 if len(i1) == 0: 

179 return distanceParams 

180 

181 if len(i1) > self.maxPairs: 

182 # Downsample the pairs. 

183 selection = rng.choice(len(i1), size=self.maxPairs, replace=False) 

184 i1 = i1[selection] 

185 i2 = i2[selection] 

186 

187 # Match groups and get indices. 

188 h, rev = esutil.stat.histogram(groupId, rev=True) 

189 

190 # Match together pairs that have the same visit. 

191 # It is unfortunate that this requires a loop, but it is not slow. 

192 # After this matching we have a set of matchedObsInd1/matchedObsInd2 

193 # that are all individual observations that are in the annulus and 

194 # share a visit. The matchedPairInd groups all the paired observations 

195 # of a given pair. 

196 matchedObsInd1 = [] 

197 matchedObsInd2 = [] 

198 matchedPairInd = [] 

199 for ind in range(len(i1)): 

200 objInd1 = i1[ind] 

201 objInd2 = i2[ind] 

202 obsInd1 = rev[rev[objInd1] : rev[objInd1 + 1]] 

203 obsInd2 = rev[rev[objInd2] : rev[objInd2 + 1]] 

204 a, b = esutil.numpy_util.match(data[self.visitKey][obsInd1], data[self.visitKey][obsInd2]) 

205 matchedObsInd1.append(obsInd1[a]) 

206 matchedObsInd2.append(obsInd2[b]) 

207 matchedPairInd.append(np.full(len(a), ind)) 

208 

209 matchedObsInd1 = np.concatenate(matchedObsInd1) 

210 matchedObsInd2 = np.concatenate(matchedObsInd2) 

211 matchedPairInd = np.concatenate(matchedPairInd) 

212 

213 separations = sphDist( 

214 np.deg2rad(np.array(data[self.raKey][matchedObsInd1])), 

215 np.deg2rad(np.array(data[self.decKey][matchedObsInd1])), 

216 np.deg2rad(np.array(data[self.raKey][matchedObsInd2])), 

217 np.deg2rad(np.array(data[self.decKey][matchedObsInd2])), 

218 ) 

219 

220 # Compute the mean from the ragged array of pairs by 

221 # using np.add.at to sum numerator and denominator. 

222 sepMean = np.zeros(len(i1)) 

223 nSep = np.zeros_like(sepMean, dtype=np.int32) 

224 np.add.at(sepMean, matchedPairInd, separations) 

225 np.add.at(nSep, matchedPairInd, 1) 

226 good = nSep > 1 

227 sepMean[good] /= nSep[good] 

228 sepMean[~good] = np.nan 

229 

230 # There are no good pairs, so return the default. 

231 if good.sum() == 0: 

232 return distanceParams 

233 

234 # Compute the stdev with sqrt(sum((sep - mean(sep))**2.)/(nsep - 1)) 

235 sepStd = np.zeros_like(sepMean) 

236 np.add.at( 

237 sepStd, 

238 matchedPairInd, 

239 (separations - sepMean[matchedPairInd]) ** 2.0, 

240 ) 

241 sepStd[good] = np.sqrt(sepStd[good] / (nSep[good] - 1)) 

242 rmsDistances = sepStd[good] 

243 

244 # Need sepResiduals, but only when nSep is > 2. 

245 bad2 = nSep <= 2 

246 sepMean[bad2] = np.nan 

247 sepResiduals = separations - sepMean[matchedPairInd] 

248 sepResiduals = sepResiduals[np.isfinite(sepResiduals)] 

249 

250 # This is always going to be valid because we checked the number 

251 # of good pairs above. 

252 AMx = (np.median(rmsDistances) * u.radian).to(u.marcsec) 

253 

254 # Because there is a more stringent selection for sepResiduals, 

255 # we need to check that we have enough to compute the metrics. 

256 if len(sepResiduals) <= 1: 

257 AFx = np.nan * u.percent 

258 ADx = np.nan * u.marcsec 

259 absDiffSeparations = np.array([]) * u.marcsec 

260 else: 

261 absDiffSeparations = (abs(sepResiduals - np.median(sepResiduals)) * u.radian).to(u.marcsec) 

262 afThreshhold = 100.0 - self.threshAF 

263 ADx = np.percentile(absDiffSeparations, afThreshhold) 

264 AFx = 100 * np.mean(np.abs(absDiffSeparations) > self.threshAD * u.marcsec) * u.percent 

265 

266 distanceParams["rmsDistances"] = (rmsDistances * u.radian).to(u.marcsec).value 

267 distanceParams["separationResiduals"] = absDiffSeparations.value 

268 distanceParams["AMx"] = AMx.value 

269 distanceParams["ADx"] = ADx.value 

270 distanceParams["AFx"] = AFx.value 

271 

272 return distanceParams 

273 

274 

275def sphDist(ra_mean, dec_mean, ra, dec): 

276 """Calculate distance on the surface of a unit sphere. 

277 

278 This function was borrowed from faro. 

279 

280 Parameters 

281 ---------- 

282 ra_mean : `float` 

283 Mean RA in radians. 

284 dec_mean : `float` 

285 Mean Dec in radians. 

286 ra : `numpy.array` [`float`] 

287 Array of RA in radians. 

288 dec : `numpy.array` [`float`] 

289 Array of Dec in radians. 

290 

291 Notes 

292 ----- 

293 Uses the Haversine formula to preserve accuracy at small angles. 

294 Law of cosines approach doesn't work well for the typically very small 

295 differences that we're looking at here. 

296 """ 

297 # Haversine 

298 dra = ra - ra_mean 

299 ddec = dec - dec_mean 

300 a = np.square(np.sin(ddec / 2)) + np.cos(dec_mean) * np.cos(dec) * np.square(np.sin(dra / 2)) 

301 dist = 2 * np.arcsin(np.sqrt(a)) 

302 

303 # This is what the law of cosines would look like 

304 # dist = np.arccos(np.sin(dec1)*np.sin(dec2) + 

305 # np.cos(dec1)*np.cos(dec2)*np.cos(ra1 - ra2)) 

306 

307 # This will also work, but must run separately for each element 

308 # whereas the numpy version will run on either scalars or arrays: 

309 # sp1 = geom.SpherePoint(ra1, dec1, geom.radians) 

310 # sp2 = geom.SpherePoint(ra2, dec2, geom.radians) 

311 # return sp1.separation(sp2).asRadians() 

312 

313 return dist