Coverage for python/lsst/analysis/tools/actions/keyedData/calcDistances.py: 16%
117 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-17 04:00 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-17 04:00 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21__all__ = ("CalcRelativeDistances",)
23import astropy.units as u
24import esutil
25import numpy as np
26from lsst.pex.config import Field
27from smatch import Matcher
29from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Vector
32class CalcRelativeDistances(KeyedDataAction):
33 """Calculate relative distances in a matched catalog.
35 Given a catalog of matched sources from multiple visits, this finds all
36 pairs of objects at a given separation, then calculates the separation of
37 their component source measurements from the individual visits. The RMS of
38 these is used to calculate the astrometric relative repeatability metric,
39 AMx, while the overall distribution of separations is used to compute the
40 ADx and AFx metrics.
41 """
43 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
44 visitKey = Field[str](doc="Column key to use for matching visits", default="visit")
45 raKey = Field[str](doc="RA column key", default="coord_ra")
46 decKey = Field[str](doc="Dec column key", default="coord_dec")
47 annulus = Field[float](doc="Radial distance of the annulus in arcmin", default=5.0)
48 width = Field[float](doc="Width of annulus in arcmin", default=2.0)
49 threshAD = Field[float](doc="Threshold in mas for AFx calculation.", default=20.0)
50 threshAF = Field[float](
51 doc="Percentile of differences that can vary by more than threshAD.", default=10.0
52 )
53 maxPairs = Field[int](
54 doc="Maximum number of pairs to use; downsample otherwise.",
55 default=100_000,
56 )
57 randomSeed = Field[int](
58 doc="Random seed to use when downsampling.",
59 default=12345,
60 )
62 def getInputSchema(self) -> KeyedDataSchema:
63 return (
64 (self.groupKey, Vector),
65 (self.raKey, Vector),
66 (self.decKey, Vector),
67 (self.visitKey, Vector),
68 )
70 def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
71 """Run the calculation.
73 Parameters
74 ----------
75 data : KeyedData
76 Catalog of data including coordinate, visit, and object group
77 information.
79 Returns
80 -------
81 distanceParams : `dict`
82 Dictionary of the calculated arrays and metrics with the following
83 keys:
85 - ``rmsDistances`` : Per-object rms of separations (`np.array`).
86 - ``separationResiduals`` : All separations minus per-object median
87 (`np.array`)
88 - ``AMx`` : AMx metric (`float`).
89 - ``ADx`` : ADx metric (`float`).
90 - ``AFx`` : AFx metric (`float`).
91 """
92 distanceParams = {
93 "rmsDistances": np.array([]),
94 "separationResiduals": np.array([]),
95 "AMx": np.nan,
96 "ADx": np.nan,
97 "AFx": np.nan,
98 }
100 if len(data[self.groupKey]) == 0:
101 return distanceParams
103 rng = np.random.RandomState(seed=self.randomSeed)
105 def _compressArray(arrayIn):
106 h, rev = esutil.stat.histogram(arrayIn, rev=True)
107 arrayOut = np.zeros(len(arrayIn), dtype=np.int32)
108 (good,) = np.where(h > 0)
109 for counter, ind in enumerate(good):
110 arrayOut[rev[rev[ind] : rev[ind + 1]]] = counter
111 return arrayOut
113 groupId = _compressArray(data[self.groupKey])
115 nObj = groupId.max() + 1
117 # Compute the meanRa/meanDec.
118 meanRa = np.zeros(nObj)
119 meanDec = np.zeros_like(meanRa)
120 nObs = np.zeros_like(meanRa, dtype=np.int64)
122 # Check if tract is overlapping ra=0 and rotate if so.
123 # We assume a tract is smaller than 60x60 degrees.
124 rotation = 0.0
125 if np.max(data[self.raKey]) > 330.0 and np.min(data[self.raKey]) < 30.0:
126 rotation = 180.0
127 raRotated = np.array(data[self.raKey]) - rotation
128 else:
129 raRotated = np.array(data[self.raKey])
131 np.add.at(meanRa, groupId, raRotated)
132 np.add.at(meanDec, groupId, np.array(data[self.decKey]))
133 np.add.at(nObs, groupId, 1)
135 meanRa /= nObs
136 meanDec /= nObs
137 meanRa += rotation
139 D = (self.annulus * u.arcmin).to_value(u.degree)
140 width = (self.width * u.arcmin).to_value(u.degree)
141 annulus = D + (width / 2) * np.array([-1, +1])
143 # Match this catalog to itself within the radius and then cut
144 # to the annulus inner radius.
145 with Matcher(meanRa, meanDec) as m:
146 idx, i1, i2, d = m.query_self(annulus[1], return_indices=True)
148 inAnnulus = d > annulus[0]
149 i1 = i1[inAnnulus]
150 i2 = i2[inAnnulus]
152 if len(i1) == 0:
153 return distanceParams
155 if len(i1) > self.maxPairs:
156 # Downsample the pairs.
157 selection = rng.choice(len(i1), size=self.maxPairs, replace=False)
158 i1 = i1[selection]
159 i2 = i2[selection]
161 # Match groups and get indices.
162 h, rev = esutil.stat.histogram(groupId, rev=True)
164 # Match together pairs that have the same visit.
165 # It is unfortunate that this requires a loop, but it is not slow.
166 # After this matching we have a set of matchedObsInd1/matchedObsInd2
167 # that are all individual observations that are in the annulus and
168 # share a visit. The matchedPairInd groups all the paired observations
169 # of a given pair.
170 matchedObsInd1 = []
171 matchedObsInd2 = []
172 matchedPairInd = []
173 for ind in range(len(i1)):
174 objInd1 = i1[ind]
175 objInd2 = i2[ind]
176 obsInd1 = rev[rev[objInd1] : rev[objInd1 + 1]]
177 obsInd2 = rev[rev[objInd2] : rev[objInd2 + 1]]
178 a, b = esutil.numpy_util.match(data[self.visitKey][obsInd1], data[self.visitKey][obsInd2])
179 matchedObsInd1.append(obsInd1[a])
180 matchedObsInd2.append(obsInd2[b])
181 matchedPairInd.append(np.full(len(a), ind))
183 matchedObsInd1 = np.concatenate(matchedObsInd1)
184 matchedObsInd2 = np.concatenate(matchedObsInd2)
185 matchedPairInd = np.concatenate(matchedPairInd)
187 separations = sphDist(
188 np.deg2rad(np.array(data[self.raKey][matchedObsInd1])),
189 np.deg2rad(np.array(data[self.decKey][matchedObsInd1])),
190 np.deg2rad(np.array(data[self.raKey][matchedObsInd2])),
191 np.deg2rad(np.array(data[self.decKey][matchedObsInd2])),
192 )
194 # Compute the mean from the ragged array of pairs by
195 # using np.add.at to sum numerator and denominator.
196 sepMean = np.zeros(len(i1))
197 nSep = np.zeros_like(sepMean, dtype=np.int32)
198 np.add.at(sepMean, matchedPairInd, separations)
199 np.add.at(nSep, matchedPairInd, 1)
200 good = nSep > 1
201 sepMean[good] /= nSep[good]
202 sepMean[~good] = np.nan
204 # There are no good pairs, so return the default.
205 if good.sum() == 0:
206 return distanceParams
208 # Compute the stdev with sqrt(sum((sep - mean(sep))**2.)/(nsep - 1))
209 sepStd = np.zeros_like(sepMean)
210 np.add.at(
211 sepStd,
212 matchedPairInd,
213 (separations - sepMean[matchedPairInd]) ** 2.0,
214 )
215 sepStd[good] = np.sqrt(sepStd[good] / (nSep[good] - 1))
216 rmsDistances = sepStd[good]
218 # Need sepResiduals, but only when nSep is > 2.
219 bad2 = nSep <= 2
220 sepMean[bad2] = np.nan
221 sepResiduals = separations - sepMean[matchedPairInd]
222 sepResiduals = sepResiduals[np.isfinite(sepResiduals)]
224 # This is always going to be valid because we checked the number
225 # of good pairs above.
226 AMx = (np.median(rmsDistances) * u.radian).to(u.marcsec)
228 # Because there is a more stringent selection for sepResiduals,
229 # we need to check that we have enough to compute the metrics.
230 if len(sepResiduals) <= 1:
231 AFx = np.nan * u.percent
232 ADx = np.nan * u.marcsec
233 absDiffSeparations = np.array([]) * u.marcsec
234 else:
235 absDiffSeparations = (abs(sepResiduals - np.median(sepResiduals)) * u.radian).to(u.marcsec)
236 afThreshhold = 100.0 - self.threshAF
237 ADx = np.percentile(absDiffSeparations, afThreshhold)
238 AFx = 100 * np.mean(np.abs(absDiffSeparations) > self.threshAD * u.marcsec) * u.percent
240 distanceParams["rmsDistances"] = (rmsDistances * u.radian).to(u.marcsec).value
241 distanceParams["separationResiduals"] = absDiffSeparations.value
242 distanceParams["AMx"] = AMx.value
243 distanceParams["ADx"] = ADx.value
244 distanceParams["AFx"] = AFx.value
246 return distanceParams
249def sphDist(ra_mean, dec_mean, ra, dec):
250 """Calculate distance on the surface of a unit sphere.
252 This function was borrowed from faro.
254 Parameters
255 ----------
256 ra_mean : `float`
257 Mean RA in radians.
258 dec_mean : `float`
259 Mean Dec in radians.
260 ra : `numpy.array` [`float`]
261 Array of RA in radians.
262 dec : `numpy.array` [`float`]
263 Array of Dec in radians.
265 Notes
266 -----
267 Uses the Haversine formula to preserve accuracy at small angles.
268 Law of cosines approach doesn't work well for the typically very small
269 differences that we're looking at here.
270 """
271 # Haversine
272 dra = ra - ra_mean
273 ddec = dec - dec_mean
274 a = np.square(np.sin(ddec / 2)) + np.cos(dec_mean) * np.cos(dec) * np.square(np.sin(dra / 2))
275 dist = 2 * np.arcsin(np.sqrt(a))
277 # This is what the law of cosines would look like
278 # dist = np.arccos(np.sin(dec1)*np.sin(dec2) +
279 # np.cos(dec1)*np.cos(dec2)*np.cos(ra1 - ra2))
281 # This will also work, but must run separately for each element
282 # whereas the numpy version will run on either scalars or arrays:
283 # sp1 = geom.SpherePoint(ra1, dec1, geom.radians)
284 # sp2 = geom.SpherePoint(ra2, dec2, geom.radians)
285 # return sp1.separation(sp2).asRadians()
287 return dist