Coverage for python / lsst / analysis / tools / actions / keyedData / calcDistances.py: 16%

126 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-22 09:09 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21__all__ = ("CalcRelativeDistances",) 

22 

23import astropy.units as u 

24import esutil 

25import numpy as np 

26from lsst.pex.config import Field 

27from smatch import Matcher 

28 

29from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Vector 

30 

31 

32class CalcRelativeDistances(KeyedDataAction): 

33 """Calculate relative distances in a matched catalog. 

34 

35 Given a catalog of matched sources from multiple visits, this finds all 

36 pairs of objects at a given separation, then calculates the separation of 

37 their component source measurements from the individual visits. The RMS of 

38 these is used to calculate the astrometric relative repeatability metric, 

39 AMx, while the overall distribution of separations is used to compute the 

40 ADx and AFx metrics. 

41 """ 

42 

43 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index") 

44 visitKey = Field[str](doc="Column key to use for matching visits", default="visit") 

45 raKey = Field[str](doc="RA column key", default="coord_ra") 

46 decKey = Field[str](doc="Dec column key", default="coord_dec") 

47 annulus = Field[float](doc="Radial distance of the annulus in arcmin", default=5.0) 

48 width = Field[float](doc="Width of annulus in arcmin", default=2.0) 

49 threshAD = Field[float](doc="Threshold in mas for AFx calculation.", default=20.0) 

50 threshAF = Field[float]( 

51 doc="Percentile of differences that can vary by more than threshAD.", default=10.0 

52 ) 

53 maxPairs = Field[int]( 

54 doc="Maximum number of pairs to use; downsample otherwise.", 

55 default=100_000, 

56 ) 

57 bruteForceRadiusThreshold = Field[float]( 

58 doc="Matching radius at which to switch to brute-force matching (deg).", 

59 default=0.1, 

60 ) 

61 bruteForceMaxMatch = Field[int]( 

62 doc="Number of objects to sample for brute-force matching.", 

63 default=10_000, 

64 ) 

65 randomSeed = Field[int]( 

66 doc="Random seed to use when downsampling.", 

67 default=12345, 

68 ) 

69 

70 def getInputSchema(self) -> KeyedDataSchema: 

71 return ( 

72 (self.groupKey, Vector), 

73 (self.raKey, Vector), 

74 (self.decKey, Vector), 

75 (self.visitKey, Vector), 

76 ) 

77 

78 def __call__(self, data: KeyedData, **kwargs) -> KeyedData: 

79 """Run the calculation. 

80 

81 Parameters 

82 ---------- 

83 data : KeyedData 

84 Catalog of data including coordinate, visit, and object group 

85 information. 

86 

87 Returns 

88 ------- 

89 distanceParams : `dict` 

90 Dictionary of the calculated arrays and metrics with the following 

91 keys: 

92 

93 - ``rmsDistances`` : Per-object rms of separations (`np.array`). 

94 - ``separationResiduals`` : All separations minus per-object median 

95 (`np.array`) 

96 - ``AMx`` : AMx metric (`float`). 

97 - ``ADx`` : ADx metric (`float`). 

98 - ``AFx`` : AFx metric (`float`). 

99 """ 

100 distanceParams = { 

101 "rmsDistances": np.array([]), 

102 "separationResiduals": np.array([]), 

103 "AMx": np.nan, 

104 "ADx": np.nan, 

105 "AFx": np.nan, 

106 } 

107 

108 if len(data[self.groupKey]) == 0: 

109 return distanceParams 

110 

111 rng = np.random.RandomState(seed=self.randomSeed) 

112 

113 def _compressArray(arrayIn): 

114 h, rev = esutil.stat.histogram(arrayIn, rev=True) 

115 arrayOut = np.zeros(len(arrayIn), dtype=np.int32) 

116 (good,) = np.where(h > 0) 

117 for counter, ind in enumerate(good): 

118 arrayOut[rev[rev[ind] : rev[ind + 1]]] = counter 

119 return arrayOut 

120 

121 groupId = _compressArray(data[self.groupKey]) 

122 

123 nObj = groupId.max() + 1 

124 

125 # Compute the meanRa/meanDec. 

126 meanRa = np.zeros(nObj) 

127 meanDec = np.zeros_like(meanRa) 

128 nObs = np.zeros_like(meanRa, dtype=np.int64) 

129 

130 # Check if tract is overlapping ra=0 and rotate if so. 

131 # We assume a tract is smaller than 60x60 degrees. 

132 rotation = 0.0 

133 if np.max(data[self.raKey]) > 330.0 and np.min(data[self.raKey]) < 30.0: 

134 rotation = 180.0 

135 raRotated = np.array(data[self.raKey]) - rotation 

136 else: 

137 raRotated = np.array(data[self.raKey]) 

138 

139 np.add.at(meanRa, groupId, raRotated) 

140 np.add.at(meanDec, groupId, np.array(data[self.decKey])) 

141 np.add.at(nObs, groupId, 1) 

142 

143 meanRa /= nObs 

144 meanDec /= nObs 

145 meanRa += rotation 

146 

147 D = (self.annulus * u.arcmin).to_value(u.degree) 

148 width = (self.width * u.arcmin).to_value(u.degree) 

149 annulus = D + (width / 2) * np.array([-1, +1]) 

150 

151 if annulus[1] >= self.bruteForceRadiusThreshold: 

152 # Use brute-force matching. 

153 if len(meanRa) > self.bruteForceMaxMatch: 

154 bruteForceSelection = rng.choice(len(meanRa), size=self.bruteForceMaxMatch, replace=False) 

155 else: 

156 bruteForceSelection = np.arange(len(meanRa)) 

157 

158 i1 = np.tile(bruteForceSelection, len(bruteForceSelection)) 

159 i2 = np.repeat(bruteForceSelection, len(bruteForceSelection)) 

160 

161 d = np.rad2deg( 

162 sphDist( 

163 np.deg2rad(meanRa)[i1], 

164 np.deg2rad(meanDec)[i1], 

165 np.deg2rad(meanRa)[i2], 

166 np.deg2rad(meanDec)[i2], 

167 ), 

168 ) 

169 else: 

170 with Matcher(meanRa, meanDec) as m: 

171 idx, i1, i2, d = m.query_self(annulus[1], return_indices=True) 

172 

173 inAnnulus = (d > annulus[0]) & (d < annulus[1]) 

174 i1 = i1[inAnnulus] 

175 i2 = i2[inAnnulus] 

176 

177 if len(i1) == 0: 

178 return distanceParams 

179 

180 if len(i1) > self.maxPairs: 

181 # Downsample the pairs. 

182 selection = rng.choice(len(i1), size=self.maxPairs, replace=False) 

183 i1 = i1[selection] 

184 i2 = i2[selection] 

185 

186 # Match groups and get indices. 

187 h, rev = esutil.stat.histogram(groupId, rev=True) 

188 

189 # Match together pairs that have the same visit. 

190 # It is unfortunate that this requires a loop, but it is not slow. 

191 # After this matching we have a set of matchedObsInd1/matchedObsInd2 

192 # that are all individual observations that are in the annulus and 

193 # share a visit. The matchedPairInd groups all the paired observations 

194 # of a given pair. 

195 matchedObsInd1 = [] 

196 matchedObsInd2 = [] 

197 matchedPairInd = [] 

198 for ind in range(len(i1)): 

199 objInd1 = i1[ind] 

200 objInd2 = i2[ind] 

201 obsInd1 = rev[rev[objInd1] : rev[objInd1 + 1]] 

202 obsInd2 = rev[rev[objInd2] : rev[objInd2 + 1]] 

203 a, b = esutil.numpy_util.match(data[self.visitKey][obsInd1], data[self.visitKey][obsInd2]) 

204 matchedObsInd1.append(obsInd1[a]) 

205 matchedObsInd2.append(obsInd2[b]) 

206 matchedPairInd.append(np.full(len(a), ind)) 

207 

208 matchedObsInd1 = np.concatenate(matchedObsInd1) 

209 matchedObsInd2 = np.concatenate(matchedObsInd2) 

210 matchedPairInd = np.concatenate(matchedPairInd) 

211 

212 separations = sphDist( 

213 np.deg2rad(np.array(data[self.raKey][matchedObsInd1])), 

214 np.deg2rad(np.array(data[self.decKey][matchedObsInd1])), 

215 np.deg2rad(np.array(data[self.raKey][matchedObsInd2])), 

216 np.deg2rad(np.array(data[self.decKey][matchedObsInd2])), 

217 ) 

218 

219 # Compute the mean from the ragged array of pairs by 

220 # using np.add.at to sum numerator and denominator. 

221 sepMean = np.zeros(len(i1)) 

222 nSep = np.zeros_like(sepMean, dtype=np.int32) 

223 np.add.at(sepMean, matchedPairInd, separations) 

224 np.add.at(nSep, matchedPairInd, 1) 

225 good = nSep > 1 

226 sepMean[good] /= nSep[good] 

227 sepMean[~good] = np.nan 

228 

229 # There are no good pairs, so return the default. 

230 if good.sum() == 0: 

231 return distanceParams 

232 

233 # Compute the stdev with sqrt(sum((sep - mean(sep))**2.)/(nsep - 1)) 

234 sepStd = np.zeros_like(sepMean) 

235 np.add.at( 

236 sepStd, 

237 matchedPairInd, 

238 (separations - sepMean[matchedPairInd]) ** 2.0, 

239 ) 

240 sepStd[good] = np.sqrt(sepStd[good] / (nSep[good] - 1)) 

241 rmsDistances = sepStd[good] 

242 

243 # Need sepResiduals, but only when nSep is > 2. 

244 bad2 = nSep <= 2 

245 sepMean[bad2] = np.nan 

246 sepResiduals = separations - sepMean[matchedPairInd] 

247 sepResiduals = sepResiduals[np.isfinite(sepResiduals)] 

248 

249 # This is always going to be valid because we checked the number 

250 # of good pairs above. 

251 AMx = (np.median(rmsDistances) * u.radian).to(u.marcsec) 

252 

253 # Because there is a more stringent selection for sepResiduals, 

254 # we need to check that we have enough to compute the metrics. 

255 if len(sepResiduals) <= 1: 

256 AFx = np.nan * u.percent 

257 ADx = np.nan * u.marcsec 

258 absDiffSeparations = np.array([]) * u.marcsec 

259 else: 

260 absDiffSeparations = (abs(sepResiduals - np.median(sepResiduals)) * u.radian).to(u.marcsec) 

261 afThreshhold = 100.0 - self.threshAF 

262 ADx = np.percentile(absDiffSeparations, afThreshhold) 

263 AFx = 100 * np.mean(np.abs(absDiffSeparations) > self.threshAD * u.marcsec) * u.percent 

264 

265 distanceParams["rmsDistances"] = (rmsDistances * u.radian).to(u.marcsec).value 

266 distanceParams["separationResiduals"] = absDiffSeparations.value 

267 distanceParams["AMx"] = AMx.value 

268 distanceParams["ADx"] = ADx.value 

269 distanceParams["AFx"] = AFx.value 

270 

271 return distanceParams 

272 

273 

274def sphDist(ra_mean, dec_mean, ra, dec): 

275 """Calculate distance on the surface of a unit sphere. 

276 

277 This function was borrowed from faro. 

278 

279 Parameters 

280 ---------- 

281 ra_mean : `float` 

282 Mean RA in radians. 

283 dec_mean : `float` 

284 Mean Dec in radians. 

285 ra : `numpy.array` [`float`] 

286 Array of RA in radians. 

287 dec : `numpy.array` [`float`] 

288 Array of Dec in radians. 

289 

290 Notes 

291 ----- 

292 Uses the Haversine formula to preserve accuracy at small angles. 

293 Law of cosines approach doesn't work well for the typically very small 

294 differences that we're looking at here. 

295 """ 

296 # Haversine 

297 dra = ra - ra_mean 

298 ddec = dec - dec_mean 

299 a = np.square(np.sin(ddec / 2)) + np.cos(dec_mean) * np.cos(dec) * np.square(np.sin(dra / 2)) 

300 dist = 2 * np.arcsin(np.sqrt(a)) 

301 

302 # This is what the law of cosines would look like 

303 # dist = np.arccos(np.sin(dec1)*np.sin(dec2) + 

304 # np.cos(dec1)*np.cos(dec2)*np.cos(ra1 - ra2)) 

305 

306 # This will also work, but must run separately for each element 

307 # whereas the numpy version will run on either scalars or arrays: 

308 # sp1 = geom.SpherePoint(ra1, dec1, geom.radians) 

309 # sp2 = geom.SpherePoint(ra2, dec2, geom.radians) 

310 # return sp1.separation(sp2).asRadians() 

311 

312 return dist