Coverage for python / lsst / analysis / tools / actions / keyedData / calcDistances.py: 16%
126 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-07 08:53 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-07 08:53 +0000
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21__all__ = ("CalcRelativeDistances",)
23import astropy.units as u
24import esutil
25import numpy as np
26from smatch import Matcher
28from lsst.pex.config import Field
30from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Vector
33class CalcRelativeDistances(KeyedDataAction):
34 """Calculate relative distances in a matched catalog.
36 Given a catalog of matched sources from multiple visits, this finds all
37 pairs of objects at a given separation, then calculates the separation of
38 their component source measurements from the individual visits. The RMS of
39 these is used to calculate the astrometric relative repeatability metric,
40 AMx, while the overall distribution of separations is used to compute the
41 ADx and AFx metrics.
42 """
44 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
45 visitKey = Field[str](doc="Column key to use for matching visits", default="visit")
46 raKey = Field[str](doc="RA column key", default="coord_ra")
47 decKey = Field[str](doc="Dec column key", default="coord_dec")
48 annulus = Field[float](doc="Radial distance of the annulus in arcmin", default=5.0)
49 width = Field[float](doc="Width of annulus in arcmin", default=2.0)
50 threshAD = Field[float](doc="Threshold in mas for AFx calculation.", default=20.0)
51 threshAF = Field[float](
52 doc="Percentile of differences that can vary by more than threshAD.", default=10.0
53 )
54 maxPairs = Field[int](
55 doc="Maximum number of pairs to use; downsample otherwise.",
56 default=100_000,
57 )
58 bruteForceRadiusThreshold = Field[float](
59 doc="Matching radius at which to switch to brute-force matching (deg).",
60 default=0.1,
61 )
62 bruteForceMaxMatch = Field[int](
63 doc="Number of objects to sample for brute-force matching.",
64 default=10_000,
65 )
66 randomSeed = Field[int](
67 doc="Random seed to use when downsampling.",
68 default=12345,
69 )
71 def getInputSchema(self) -> KeyedDataSchema:
72 return (
73 (self.groupKey, Vector),
74 (self.raKey, Vector),
75 (self.decKey, Vector),
76 (self.visitKey, Vector),
77 )
79 def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
80 """Run the calculation.
82 Parameters
83 ----------
84 data : KeyedData
85 Catalog of data including coordinate, visit, and object group
86 information.
88 Returns
89 -------
90 distanceParams : `dict`
91 Dictionary of the calculated arrays and metrics with the following
92 keys:
94 - ``rmsDistances`` : Per-object rms of separations (`np.array`).
95 - ``separationResiduals`` : All separations minus per-object median
96 (`np.array`)
97 - ``AMx`` : AMx metric (`float`).
98 - ``ADx`` : ADx metric (`float`).
99 - ``AFx`` : AFx metric (`float`).
100 """
101 distanceParams = {
102 "rmsDistances": np.array([]),
103 "separationResiduals": np.array([]),
104 "AMx": np.nan,
105 "ADx": np.nan,
106 "AFx": np.nan,
107 }
109 if len(data[self.groupKey]) == 0:
110 return distanceParams
112 rng = np.random.RandomState(seed=self.randomSeed)
114 def _compressArray(arrayIn):
115 h, rev = esutil.stat.histogram(arrayIn, rev=True)
116 arrayOut = np.zeros(len(arrayIn), dtype=np.int32)
117 (good,) = np.where(h > 0)
118 for counter, ind in enumerate(good):
119 arrayOut[rev[rev[ind] : rev[ind + 1]]] = counter
120 return arrayOut
122 groupId = _compressArray(data[self.groupKey])
124 nObj = groupId.max() + 1
126 # Compute the meanRa/meanDec.
127 meanRa = np.zeros(nObj)
128 meanDec = np.zeros_like(meanRa)
129 nObs = np.zeros_like(meanRa, dtype=np.int64)
131 # Check if tract is overlapping ra=0 and rotate if so.
132 # We assume a tract is smaller than 60x60 degrees.
133 rotation = 0.0
134 if np.max(data[self.raKey]) > 330.0 and np.min(data[self.raKey]) < 30.0:
135 rotation = 180.0
136 raRotated = np.array(data[self.raKey]) - rotation
137 else:
138 raRotated = np.array(data[self.raKey])
140 np.add.at(meanRa, groupId, raRotated)
141 np.add.at(meanDec, groupId, np.array(data[self.decKey]))
142 np.add.at(nObs, groupId, 1)
144 meanRa /= nObs
145 meanDec /= nObs
146 meanRa += rotation
148 D = (self.annulus * u.arcmin).to_value(u.degree)
149 width = (self.width * u.arcmin).to_value(u.degree)
150 annulus = D + (width / 2) * np.array([-1, +1])
152 if annulus[1] >= self.bruteForceRadiusThreshold:
153 # Use brute-force matching.
154 if len(meanRa) > self.bruteForceMaxMatch:
155 bruteForceSelection = rng.choice(len(meanRa), size=self.bruteForceMaxMatch, replace=False)
156 else:
157 bruteForceSelection = np.arange(len(meanRa))
159 i1 = np.tile(bruteForceSelection, len(bruteForceSelection))
160 i2 = np.repeat(bruteForceSelection, len(bruteForceSelection))
162 d = np.rad2deg(
163 sphDist(
164 np.deg2rad(meanRa)[i1],
165 np.deg2rad(meanDec)[i1],
166 np.deg2rad(meanRa)[i2],
167 np.deg2rad(meanDec)[i2],
168 ),
169 )
170 else:
171 with Matcher(meanRa, meanDec) as m:
172 idx, i1, i2, d = m.query_self(annulus[1], return_indices=True)
174 inAnnulus = (d > annulus[0]) & (d < annulus[1])
175 i1 = i1[inAnnulus]
176 i2 = i2[inAnnulus]
178 if len(i1) == 0:
179 return distanceParams
181 if len(i1) > self.maxPairs:
182 # Downsample the pairs.
183 selection = rng.choice(len(i1), size=self.maxPairs, replace=False)
184 i1 = i1[selection]
185 i2 = i2[selection]
187 # Match groups and get indices.
188 h, rev = esutil.stat.histogram(groupId, rev=True)
190 # Match together pairs that have the same visit.
191 # It is unfortunate that this requires a loop, but it is not slow.
192 # After this matching we have a set of matchedObsInd1/matchedObsInd2
193 # that are all individual observations that are in the annulus and
194 # share a visit. The matchedPairInd groups all the paired observations
195 # of a given pair.
196 matchedObsInd1 = []
197 matchedObsInd2 = []
198 matchedPairInd = []
199 for ind in range(len(i1)):
200 objInd1 = i1[ind]
201 objInd2 = i2[ind]
202 obsInd1 = rev[rev[objInd1] : rev[objInd1 + 1]]
203 obsInd2 = rev[rev[objInd2] : rev[objInd2 + 1]]
204 a, b = esutil.numpy_util.match(data[self.visitKey][obsInd1], data[self.visitKey][obsInd2])
205 matchedObsInd1.append(obsInd1[a])
206 matchedObsInd2.append(obsInd2[b])
207 matchedPairInd.append(np.full(len(a), ind))
209 matchedObsInd1 = np.concatenate(matchedObsInd1)
210 matchedObsInd2 = np.concatenate(matchedObsInd2)
211 matchedPairInd = np.concatenate(matchedPairInd)
213 separations = sphDist(
214 np.deg2rad(np.array(data[self.raKey][matchedObsInd1])),
215 np.deg2rad(np.array(data[self.decKey][matchedObsInd1])),
216 np.deg2rad(np.array(data[self.raKey][matchedObsInd2])),
217 np.deg2rad(np.array(data[self.decKey][matchedObsInd2])),
218 )
220 # Compute the mean from the ragged array of pairs by
221 # using np.add.at to sum numerator and denominator.
222 sepMean = np.zeros(len(i1))
223 nSep = np.zeros_like(sepMean, dtype=np.int32)
224 np.add.at(sepMean, matchedPairInd, separations)
225 np.add.at(nSep, matchedPairInd, 1)
226 good = nSep > 1
227 sepMean[good] /= nSep[good]
228 sepMean[~good] = np.nan
230 # There are no good pairs, so return the default.
231 if good.sum() == 0:
232 return distanceParams
234 # Compute the stdev with sqrt(sum((sep - mean(sep))**2.)/(nsep - 1))
235 sepStd = np.zeros_like(sepMean)
236 np.add.at(
237 sepStd,
238 matchedPairInd,
239 (separations - sepMean[matchedPairInd]) ** 2.0,
240 )
241 sepStd[good] = np.sqrt(sepStd[good] / (nSep[good] - 1))
242 rmsDistances = sepStd[good]
244 # Need sepResiduals, but only when nSep is > 2.
245 bad2 = nSep <= 2
246 sepMean[bad2] = np.nan
247 sepResiduals = separations - sepMean[matchedPairInd]
248 sepResiduals = sepResiduals[np.isfinite(sepResiduals)]
250 # This is always going to be valid because we checked the number
251 # of good pairs above.
252 AMx = (np.median(rmsDistances) * u.radian).to(u.marcsec)
254 # Because there is a more stringent selection for sepResiduals,
255 # we need to check that we have enough to compute the metrics.
256 if len(sepResiduals) <= 1:
257 AFx = np.nan * u.percent
258 ADx = np.nan * u.marcsec
259 absDiffSeparations = np.array([]) * u.marcsec
260 else:
261 absDiffSeparations = (abs(sepResiduals - np.median(sepResiduals)) * u.radian).to(u.marcsec)
262 afThreshhold = 100.0 - self.threshAF
263 ADx = np.percentile(absDiffSeparations, afThreshhold)
264 AFx = 100 * np.mean(np.abs(absDiffSeparations) > self.threshAD * u.marcsec) * u.percent
266 distanceParams["rmsDistances"] = (rmsDistances * u.radian).to(u.marcsec).value
267 distanceParams["separationResiduals"] = absDiffSeparations.value
268 distanceParams["AMx"] = AMx.value
269 distanceParams["ADx"] = ADx.value
270 distanceParams["AFx"] = AFx.value
272 return distanceParams
275def sphDist(ra_mean, dec_mean, ra, dec):
276 """Calculate distance on the surface of a unit sphere.
278 This function was borrowed from faro.
280 Parameters
281 ----------
282 ra_mean : `float`
283 Mean RA in radians.
284 dec_mean : `float`
285 Mean Dec in radians.
286 ra : `numpy.array` [`float`]
287 Array of RA in radians.
288 dec : `numpy.array` [`float`]
289 Array of Dec in radians.
291 Notes
292 -----
293 Uses the Haversine formula to preserve accuracy at small angles.
294 Law of cosines approach doesn't work well for the typically very small
295 differences that we're looking at here.
296 """
297 # Haversine
298 dra = ra - ra_mean
299 ddec = dec - dec_mean
300 a = np.square(np.sin(ddec / 2)) + np.cos(dec_mean) * np.cos(dec) * np.square(np.sin(dra / 2))
301 dist = 2 * np.arcsin(np.sqrt(a))
303 # This is what the law of cosines would look like
304 # dist = np.arccos(np.sin(dec1)*np.sin(dec2) +
305 # np.cos(dec1)*np.cos(dec2)*np.cos(ra1 - ra2))
307 # This will also work, but must run separately for each element
308 # whereas the numpy version will run on either scalars or arrays:
309 # sp1 = geom.SpherePoint(ra1, dec1, geom.radians)
310 # sp2 = geom.SpherePoint(ra2, dec2, geom.radians)
311 # return sp1.separation(sp2).asRadians()
313 return dist