Coverage for python / lsst / analysis / tools / actions / keyedData / calcDistances.py: 16%
126 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 09:09 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 09:09 +0000
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21__all__ = ("CalcRelativeDistances",)
23import astropy.units as u
24import esutil
25import numpy as np
26from lsst.pex.config import Field
27from smatch import Matcher
29from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Vector
32class CalcRelativeDistances(KeyedDataAction):
33 """Calculate relative distances in a matched catalog.
35 Given a catalog of matched sources from multiple visits, this finds all
36 pairs of objects at a given separation, then calculates the separation of
37 their component source measurements from the individual visits. The RMS of
38 these is used to calculate the astrometric relative repeatability metric,
39 AMx, while the overall distribution of separations is used to compute the
40 ADx and AFx metrics.
41 """
43 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
44 visitKey = Field[str](doc="Column key to use for matching visits", default="visit")
45 raKey = Field[str](doc="RA column key", default="coord_ra")
46 decKey = Field[str](doc="Dec column key", default="coord_dec")
47 annulus = Field[float](doc="Radial distance of the annulus in arcmin", default=5.0)
48 width = Field[float](doc="Width of annulus in arcmin", default=2.0)
49 threshAD = Field[float](doc="Threshold in mas for AFx calculation.", default=20.0)
50 threshAF = Field[float](
51 doc="Percentile of differences that can vary by more than threshAD.", default=10.0
52 )
53 maxPairs = Field[int](
54 doc="Maximum number of pairs to use; downsample otherwise.",
55 default=100_000,
56 )
57 bruteForceRadiusThreshold = Field[float](
58 doc="Matching radius at which to switch to brute-force matching (deg).",
59 default=0.1,
60 )
61 bruteForceMaxMatch = Field[int](
62 doc="Number of objects to sample for brute-force matching.",
63 default=10_000,
64 )
65 randomSeed = Field[int](
66 doc="Random seed to use when downsampling.",
67 default=12345,
68 )
70 def getInputSchema(self) -> KeyedDataSchema:
71 return (
72 (self.groupKey, Vector),
73 (self.raKey, Vector),
74 (self.decKey, Vector),
75 (self.visitKey, Vector),
76 )
78 def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
79 """Run the calculation.
81 Parameters
82 ----------
83 data : KeyedData
84 Catalog of data including coordinate, visit, and object group
85 information.
87 Returns
88 -------
89 distanceParams : `dict`
90 Dictionary of the calculated arrays and metrics with the following
91 keys:
93 - ``rmsDistances`` : Per-object rms of separations (`np.array`).
94 - ``separationResiduals`` : All separations minus per-object median
95 (`np.array`)
96 - ``AMx`` : AMx metric (`float`).
97 - ``ADx`` : ADx metric (`float`).
98 - ``AFx`` : AFx metric (`float`).
99 """
100 distanceParams = {
101 "rmsDistances": np.array([]),
102 "separationResiduals": np.array([]),
103 "AMx": np.nan,
104 "ADx": np.nan,
105 "AFx": np.nan,
106 }
108 if len(data[self.groupKey]) == 0:
109 return distanceParams
111 rng = np.random.RandomState(seed=self.randomSeed)
113 def _compressArray(arrayIn):
114 h, rev = esutil.stat.histogram(arrayIn, rev=True)
115 arrayOut = np.zeros(len(arrayIn), dtype=np.int32)
116 (good,) = np.where(h > 0)
117 for counter, ind in enumerate(good):
118 arrayOut[rev[rev[ind] : rev[ind + 1]]] = counter
119 return arrayOut
121 groupId = _compressArray(data[self.groupKey])
123 nObj = groupId.max() + 1
125 # Compute the meanRa/meanDec.
126 meanRa = np.zeros(nObj)
127 meanDec = np.zeros_like(meanRa)
128 nObs = np.zeros_like(meanRa, dtype=np.int64)
130 # Check if tract is overlapping ra=0 and rotate if so.
131 # We assume a tract is smaller than 60x60 degrees.
132 rotation = 0.0
133 if np.max(data[self.raKey]) > 330.0 and np.min(data[self.raKey]) < 30.0:
134 rotation = 180.0
135 raRotated = np.array(data[self.raKey]) - rotation
136 else:
137 raRotated = np.array(data[self.raKey])
139 np.add.at(meanRa, groupId, raRotated)
140 np.add.at(meanDec, groupId, np.array(data[self.decKey]))
141 np.add.at(nObs, groupId, 1)
143 meanRa /= nObs
144 meanDec /= nObs
145 meanRa += rotation
147 D = (self.annulus * u.arcmin).to_value(u.degree)
148 width = (self.width * u.arcmin).to_value(u.degree)
149 annulus = D + (width / 2) * np.array([-1, +1])
151 if annulus[1] >= self.bruteForceRadiusThreshold:
152 # Use brute-force matching.
153 if len(meanRa) > self.bruteForceMaxMatch:
154 bruteForceSelection = rng.choice(len(meanRa), size=self.bruteForceMaxMatch, replace=False)
155 else:
156 bruteForceSelection = np.arange(len(meanRa))
158 i1 = np.tile(bruteForceSelection, len(bruteForceSelection))
159 i2 = np.repeat(bruteForceSelection, len(bruteForceSelection))
161 d = np.rad2deg(
162 sphDist(
163 np.deg2rad(meanRa)[i1],
164 np.deg2rad(meanDec)[i1],
165 np.deg2rad(meanRa)[i2],
166 np.deg2rad(meanDec)[i2],
167 ),
168 )
169 else:
170 with Matcher(meanRa, meanDec) as m:
171 idx, i1, i2, d = m.query_self(annulus[1], return_indices=True)
173 inAnnulus = (d > annulus[0]) & (d < annulus[1])
174 i1 = i1[inAnnulus]
175 i2 = i2[inAnnulus]
177 if len(i1) == 0:
178 return distanceParams
180 if len(i1) > self.maxPairs:
181 # Downsample the pairs.
182 selection = rng.choice(len(i1), size=self.maxPairs, replace=False)
183 i1 = i1[selection]
184 i2 = i2[selection]
186 # Match groups and get indices.
187 h, rev = esutil.stat.histogram(groupId, rev=True)
189 # Match together pairs that have the same visit.
190 # It is unfortunate that this requires a loop, but it is not slow.
191 # After this matching we have a set of matchedObsInd1/matchedObsInd2
192 # that are all individual observations that are in the annulus and
193 # share a visit. The matchedPairInd groups all the paired observations
194 # of a given pair.
195 matchedObsInd1 = []
196 matchedObsInd2 = []
197 matchedPairInd = []
198 for ind in range(len(i1)):
199 objInd1 = i1[ind]
200 objInd2 = i2[ind]
201 obsInd1 = rev[rev[objInd1] : rev[objInd1 + 1]]
202 obsInd2 = rev[rev[objInd2] : rev[objInd2 + 1]]
203 a, b = esutil.numpy_util.match(data[self.visitKey][obsInd1], data[self.visitKey][obsInd2])
204 matchedObsInd1.append(obsInd1[a])
205 matchedObsInd2.append(obsInd2[b])
206 matchedPairInd.append(np.full(len(a), ind))
208 matchedObsInd1 = np.concatenate(matchedObsInd1)
209 matchedObsInd2 = np.concatenate(matchedObsInd2)
210 matchedPairInd = np.concatenate(matchedPairInd)
212 separations = sphDist(
213 np.deg2rad(np.array(data[self.raKey][matchedObsInd1])),
214 np.deg2rad(np.array(data[self.decKey][matchedObsInd1])),
215 np.deg2rad(np.array(data[self.raKey][matchedObsInd2])),
216 np.deg2rad(np.array(data[self.decKey][matchedObsInd2])),
217 )
219 # Compute the mean from the ragged array of pairs by
220 # using np.add.at to sum numerator and denominator.
221 sepMean = np.zeros(len(i1))
222 nSep = np.zeros_like(sepMean, dtype=np.int32)
223 np.add.at(sepMean, matchedPairInd, separations)
224 np.add.at(nSep, matchedPairInd, 1)
225 good = nSep > 1
226 sepMean[good] /= nSep[good]
227 sepMean[~good] = np.nan
229 # There are no good pairs, so return the default.
230 if good.sum() == 0:
231 return distanceParams
233 # Compute the stdev with sqrt(sum((sep - mean(sep))**2.)/(nsep - 1))
234 sepStd = np.zeros_like(sepMean)
235 np.add.at(
236 sepStd,
237 matchedPairInd,
238 (separations - sepMean[matchedPairInd]) ** 2.0,
239 )
240 sepStd[good] = np.sqrt(sepStd[good] / (nSep[good] - 1))
241 rmsDistances = sepStd[good]
243 # Need sepResiduals, but only when nSep is > 2.
244 bad2 = nSep <= 2
245 sepMean[bad2] = np.nan
246 sepResiduals = separations - sepMean[matchedPairInd]
247 sepResiduals = sepResiduals[np.isfinite(sepResiduals)]
249 # This is always going to be valid because we checked the number
250 # of good pairs above.
251 AMx = (np.median(rmsDistances) * u.radian).to(u.marcsec)
253 # Because there is a more stringent selection for sepResiduals,
254 # we need to check that we have enough to compute the metrics.
255 if len(sepResiduals) <= 1:
256 AFx = np.nan * u.percent
257 ADx = np.nan * u.marcsec
258 absDiffSeparations = np.array([]) * u.marcsec
259 else:
260 absDiffSeparations = (abs(sepResiduals - np.median(sepResiduals)) * u.radian).to(u.marcsec)
261 afThreshhold = 100.0 - self.threshAF
262 ADx = np.percentile(absDiffSeparations, afThreshhold)
263 AFx = 100 * np.mean(np.abs(absDiffSeparations) > self.threshAD * u.marcsec) * u.percent
265 distanceParams["rmsDistances"] = (rmsDistances * u.radian).to(u.marcsec).value
266 distanceParams["separationResiduals"] = absDiffSeparations.value
267 distanceParams["AMx"] = AMx.value
268 distanceParams["ADx"] = ADx.value
269 distanceParams["AFx"] = AFx.value
271 return distanceParams
274def sphDist(ra_mean, dec_mean, ra, dec):
275 """Calculate distance on the surface of a unit sphere.
277 This function was borrowed from faro.
279 Parameters
280 ----------
281 ra_mean : `float`
282 Mean RA in radians.
283 dec_mean : `float`
284 Mean Dec in radians.
285 ra : `numpy.array` [`float`]
286 Array of RA in radians.
287 dec : `numpy.array` [`float`]
288 Array of Dec in radians.
290 Notes
291 -----
292 Uses the Haversine formula to preserve accuracy at small angles.
293 Law of cosines approach doesn't work well for the typically very small
294 differences that we're looking at here.
295 """
296 # Haversine
297 dra = ra - ra_mean
298 ddec = dec - dec_mean
299 a = np.square(np.sin(ddec / 2)) + np.cos(dec_mean) * np.cos(dec) * np.square(np.sin(dra / 2))
300 dist = 2 * np.arcsin(np.sqrt(a))
302 # This is what the law of cosines would look like
303 # dist = np.arccos(np.sin(dec1)*np.sin(dec2) +
304 # np.cos(dec1)*np.cos(dec2)*np.cos(ra1 - ra2))
306 # This will also work, but must run separately for each element
307 # whereas the numpy version will run on either scalars or arrays:
308 # sp1 = geom.SpherePoint(ra1, dec1, geom.radians)
309 # sp2 = geom.SpherePoint(ra2, dec2, geom.radians)
310 # return sp1.separation(sp2).asRadians()
312 return dist