Coverage for python/lsst/analysis/tools/actions/vector/calcRhoStatistics.py: 39%
106 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-11 03:11 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-11 03:11 -0800
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "BinnedCorr2Config",
26 "CalcRhoStatistics",
27)
29import logging
30from typing import TYPE_CHECKING, Any, Mapping, cast
32import numpy as np
33import treecorr # type: ignore[import]
34from lsst.pex.config import ChoiceField, Config, ConfigField, Field, FieldValidationError
36from ...interfaces import KeyedData, KeyedDataAction, Vector
37from .calcShapeSize import CalcShapeSize
38from .ellipticity import CalcE, CalcEDiff
39from .vectorActions import FractionalDifference
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from treecorr import GGCorrelation, KKCorrelation
44 from ...interfaces import KeyedDataSchema
46_LOG = logging.getLogger(__name__)
49class BinnedCorr2Config(Config):
50 """A Config class that holds some of the parameters supported by treecorr.
52 The fields in this class correspond to the parameters that can be passed to
53 BinnedCorr2 in `treecorr`, which is the base class for all two-point
54 correlation function calculations. The default values set for the fields
55 are identical to the default values set in v4.2 of `treecorr`. The
56 parameters that are excluded in this class are
57 'verbose', 'log_file', 'output_dots', 'rng' and 'pairwise' (deprecated).
58 For details about these options, see the documentation for `treecorr`:
59 https://rmjarvis.github.io/TreeCorr/_build/html/correlation2.html
61 A separate config class is used instead
62 of constructing a `~lsst.pex.config.DictField` so that mixed types can be
63 supported and the config can be validated at the beginning of the
64 execution.
66 Notes
67 -----
68 This is intended to be used in CalcRhoStatistics class. It only supports
69 some of the fields that are relevant for rho-statistics calculations.
70 """
72 nbins = Field[int]( 72 ↛ exitline 72 didn't jump to the function exit
73 doc=(
74 "How many bins to use. "
75 "(Exactly three of nbins, bin_size, min_sep, max_sep "
76 "are required. If nbins is not given, it will be "
77 "calculated from the values of the other three, "
78 "rounding up to the next highest integer. "
79 "In this case, bin_size will be readjusted to account "
80 "for this rounding up."
81 ),
82 optional=True,
83 check=lambda x: x > 0,
84 )
86 bin_size = Field[float](
87 doc=(
88 "The width of the bins in log(separation). "
89 "Exactly three of nbins, bin_size, min_sep, max_sep are required. "
90 "If bin_size is not given, it will be calculated from the values "
91 "of the other three."
92 ),
93 optional=True,
94 )
96 min_sep = Field[float](
97 doc=(
98 "The minimum separation in units of sep_units, if relevant. "
99 "Exactly three of nbins, bin_size, min_sep, max_sep are required. "
100 "If min_sep is not given, it will be calculated from the values "
101 "of the other three."
102 ),
103 optional=True,
104 )
106 max_sep = Field[float](
107 doc=(
108 "The maximum separation in units of sep_units, if relevant. "
109 "Exactly three of nbins, bin_size, min_sep, max_sep are required. "
110 "If max_sep is not given, it will be calculated from the values "
111 "of the other three."
112 ),
113 optional=True,
114 )
116 sep_units = ChoiceField[str](
117 doc=(
118 "The units to use for the separation values, given as a string. "
119 "This includes both min_sep and max_sep above, as well as the "
120 "units of the output distance values."
121 ),
122 default="radian",
123 optional=True,
124 allowed={units: units for units in ["arcsec", "arcmin", "degree", "hour", "radian"]},
125 )
127 bin_slop = Field[float](
128 doc=(
129 "How much slop to allow in the placement of pairs in the bins. "
130 "If bin_slop = 1, then the bin into which a particular pair is "
131 "placed may be incorrect by at most 1.0 bin widths. "
132 r"If None, use a bin_slop that gives a maximum error of 10% on "
133 "any bin, which has been found to yield good results for most "
134 "applications."
135 ),
136 default=None,
137 optional=True,
138 )
140 precision = Field[int]( 140 ↛ exitline 140 didn't jump to the function exit
141 doc=("The precision to use for the output values. This specifies how many digits to write."),
142 default=4,
143 optional=True,
144 check=lambda x: x > 0,
145 )
147 metric = ChoiceField[str](
148 doc=(
149 "Which metric to use for distance measurements. For details, see "
150 "https://rmjarvis.github.io/TreeCorr/_build/html/metric.html"
151 ),
152 default="Euclidean",
153 optional=True,
154 allowed={
155 "Euclidean": "straight-line Euclidean distance between two points",
156 "FisherRperp": (
157 "the perpendicular component of the distance, "
158 "following the definitions in "
159 "Fisher et al, 1994 (MNRAS, 267, 927)"
160 ),
161 "OldRperp": (
162 "the perpendicular component of the distance using the "
163 "definition of Rperp from TreeCorr v3.x."
164 ),
165 "Rlens": (
166 "Distance from the first object (taken to be a lens) to "
167 "the line connecting Earth and the second object "
168 "(taken to be a lensed source)."
169 ),
170 "Arc": "the true great circle distance for spherical coordinates.",
171 "Periodic": "Like ``Euclidean``, but with periodic boundaries.",
172 },
173 )
175 bin_type = ChoiceField[str](
176 doc="What type of binning should be used?",
177 default="Log",
178 optional=True,
179 allowed={
180 "Log": (
181 "Logarithmic binning in the distance. The bin steps will "
182 "be uniform in log(r) from log(min_sep) .. log(max_sep)."
183 ),
184 "Linear": (
185 "Linear binning in the distance. The bin steps will be "
186 "uniform in r from min_sep .. max_sep."
187 ),
188 "TwoD": (
189 "2-dimensional binning from x = (-max_sep .. max_sep) "
190 "and y = (-max_sep .. max_sep). The bin steps will be "
191 "uniform in both x and y. (i.e. linear in x,y)"
192 ),
193 },
194 )
196 var_method = ChoiceField[str](
197 doc="Which method to use for estimating the variance",
198 default="shot",
199 optional=True,
200 allowed={
201 method: method
202 for method in [
203 "shot",
204 "jackknife",
205 "sample",
206 "bootstrap",
207 "marked_bootstrap",
208 ]
209 },
210 )
212 num_bootstrap = Field[int](
213 doc=("How many bootstrap samples to use for the 'bootstrap' and 'marked_bootstrap' var methods."),
214 default=500,
215 optional=True,
216 )
218 def validate(self):
219 # Docs inherited from base class
220 super().validate()
221 req_params = (self.nbins, self.bin_size, self.min_sep, self.max_sep)
222 num_req_params = sum(param is not None for param in req_params)
223 if num_req_params != 3:
224 msg = (
225 "You must specify exactly three of ``nbins``, ``bin_size``, ``min_sep`` and ``max_sep``"
226 f" in treecorr_config. {num_req_params} parameters were set instead."
227 )
228 raise FieldValidationError(self.__class__.bin_size, self, msg)
230 if self.min_sep is not None and self.max_sep is not None:
231 if self.min_sep > self.max_sep:
232 raise FieldValidationError(self.__class__.min_sep, self, "min_sep must be <= max_sep")
235class CalcRhoStatistics(KeyedDataAction):
236 r"""Calculate rho statistics
238 Rho statistics refer to a collection of correlation functions involving
239 PSF ellipticity and size residuals. They quantify the contribution from PSF
240 leakage due to errors in PSF modeling to the weak lensing shear correlation
241 functions. The standard rho statistics are indexed from 1 to 5, and
242 this action calculates a sixth rho statistic, indexed 0.
244 Notes
245 -----
246 The exact definitions of rho statistics as defined in [1]_ are given below.
247 In addition to these five, we also compute the auto-correlation function of
248 the fractional size residuals and call it as the :math:`\rho_0( \theta )`.
250 .. math::
252 \rho_1(\theta) &= \langle \delta e^*_{PSF}(x) \delta e_{PSF}(x+\theta) \rangle # noqa: W505
254 \rho_2(\theta) &= \langle e^*_{PSF}(x) \delta e_{PSF}(x+\theta) \rangle
256 \rho_3(\theta) &= \left\langle (e^*_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x))
257 \delta e_{PSF}(x+\theta) \right\rangle
259 \rho_4(\theta) &= \left\langle (\delta e^*_{PSF}(x)
260 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)) \right\rangle
262 \rho_5(\theta) &= \left\langle (e^*_{PSF}(x)
263 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)) \right\rangle
265 There is a slightly different version for :math:`\rho_3( \theta )`, used in Melchior et al. (2015) [2]_.
267 .. math::
269 \rho'_3(\theta) &= \left\langle\frac{\delta T_{PSF}}{T_{PSF}}(x)
270 \frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)
271 \right\rangle
274 The definition of ellipticity used in [1]_ correspond to ``shear``-type ellipticity, which is typically
275 smaller by a factor of 4 than using ``distortion``-type ellipticity.
277 References
278 ----------
279 .. [1] Jarvis, M., Sheldon, E., Zuntz, J., Kacprzak, T., Bridle, S. L., et. al (2016). # noqa: W501
280 The DES Science Verification weak lensing shear catalogues
281 MNRAS, 460, 2245–2281.
282 https://doi.org/10.1093/mnras/stw990;
283 https://arxiv.org/abs/1507.05603
284 .. [2] Melchior, P., et. al (2015)
285 Mass and galaxy distributions of four massive galaxy clusters from Dark Energy Survey
286 Science Verification data
287 MNRAS, 449, no. 3, pp. 2219–2238.
288 https://doi:10.1093/mnras/stv398
289 https://arxiv.org/abs/1405.4285
290 """
292 colRa = Field[str](doc="RA column", default="coord_ra")
294 colDec = Field[str](doc="Dec column", default="coord_dec")
296 colXx = Field[str](doc="The column name to get the xx shape component from.", default="{band}_ixx")
298 colYy = Field[str](doc="The column name to get the yy shape component from.", default="{band}_iyy")
300 colXy = Field[str](doc="The column name to get the xy shape component from.", default="{band}_ixy")
302 colPsfXx = Field[str](
303 doc="The column name to get the PSF xx shape component from.", default="{band}_ixxPSF"
304 )
306 colPsfYy = Field[str](
307 doc="The column name to get the PSF yy shape component from.", default="{band}_iyyPSF"
308 )
310 colPsfXy = Field[str](
311 doc="The column name to get the PSF xy shape component from.", default="{band}_ixyPSF"
312 )
314 ellipticityType = ChoiceField[str](
315 doc="The type of ellipticity to calculate",
316 allowed={
317 "distortion": "Distortion, measured as (Ixx - Iyy)/(Ixx + Iyy)",
318 "shear": ("Shear, measured as (Ixx - Iyy)/(Ixx + Iyy + 2*sqrt(Ixx*Iyy - Ixy**2))"),
319 },
320 default="distortion",
321 )
323 sizeType = ChoiceField[str](
324 doc="The type of size to calculate",
325 default="trace",
326 allowed={
327 "trace": "trace radius",
328 "determinant": "determinant radius",
329 },
330 )
332 treecorr = ConfigField[BinnedCorr2Config](
333 doc="TreeCorr configuration",
334 )
336 def setDefaults(self):
337 super().setDefaults()
338 self.treecorr = BinnedCorr2Config()
339 self.treecorr.sep_units = "arcmin"
340 self.treecorr.max_sep = 100.0
342 def getInputSchema(self) -> KeyedDataSchema:
343 return (
344 (self.colRa, Vector),
345 (self.colDec, Vector),
346 (self.colXx, Vector),
347 (self.colYy, Vector),
348 (self.colXy, Vector),
349 (self.colPsfXx, Vector),
350 (self.colPsfYy, Vector),
351 (self.colPsfXy, Vector),
352 )
354 def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
355 calcEMeas = CalcE(
356 colXx=self.colXx,
357 colYy=self.colYy,
358 colXy=self.colXy,
359 ellipticityType=self.ellipticityType,
360 )
361 calcEpsf = CalcE(
362 colXx=self.colPsfXx,
363 colYy=self.colPsfYy,
364 colXy=self.colPsfXy,
365 ellipticityType=self.ellipticityType,
366 )
368 calcEDiff = CalcEDiff(colA=calcEMeas, colB=calcEpsf)
370 calcSizeResidual = FractionalDifference(
371 actionA=CalcShapeSize(
372 colXx=self.colXx,
373 colYy=self.colYy,
374 colXy=self.colXy,
375 sizeType=self.sizeType,
376 ),
377 actionB=CalcShapeSize(
378 colXx=self.colPsfXx,
379 colYy=self.colPsfYy,
380 colXy=self.colPsfXy,
381 sizeType=self.sizeType,
382 ),
383 )
385 # distortion-type ellipticity has a shear response of 2, so we need to
386 # divide by 2 so that the rho-stats do not depend on the
387 # ellipticity-type.
388 # Note: For distortion, the responsitivity is 2(1 - e^2_{rms}),
389 # where e_rms is the root mean square ellipticity per component.
390 # This is expected to be small and we ignore it here.
391 # This definition of responsitivity is consistent with the definions
392 # used in the rho-statistics calculations for the HSC shear catalog
393 # papers (Mandelbaum et al. 2018, Li et al., 2022).
394 responsitivity = 2.0 if self.ellipticityType == "distortion" else 1.0
396 # Call the actions on the data.
397 eMEAS = calcEMeas(data, **kwargs)
398 if self.ellipticityType == "distortion":
399 _LOG.debug("Correction value of responsitivity would be %f", 2 - np.mean(np.abs(eMEAS) ** 2))
400 eMEAS /= responsitivity # type: ignore
401 e1, e2 = np.real(eMEAS), np.imag(eMEAS)
402 eRes = calcEDiff(data, **kwargs)
403 eRes /= responsitivity # type: ignore
404 e1Res, e2Res = np.real(eRes), np.imag(eRes)
405 sizeRes = calcSizeResidual(data, **kwargs)
407 # Scale the sizeRes by ellipticities
408 e1SizeRes = e1 * sizeRes
409 e2SizeRes = e2 * sizeRes
411 # Package the arguments to capture auto-/cross-correlations for the
412 # Rho statistics.
413 args = {
414 0: (sizeRes, None),
415 1: (e1Res, e2Res, None, None),
416 2: (e1, e2, e1Res, e2Res),
417 3: (e1SizeRes, e2SizeRes, None, None),
418 4: (e1Res, e2Res, e1SizeRes, e2SizeRes),
419 5: (e1, e2, e1SizeRes, e2SizeRes),
420 }
422 ra: Vector = data[self.colRa] # type: ignore
423 dec: Vector = data[self.colDec] # type: ignore
425 treecorrKwargs = self.treecorr.toDict()
427 # Pass the appropriate arguments to the correlator and build a dict
428 rhoStats: Mapping[str, treecorr.BinnedCorr2] = {}
429 for rhoIndex in range(1, 6):
430 _LOG.info("Calculating rho-%d", rhoIndex)
431 rhoStats[f"rho{rhoIndex}"] = self._corrSpin2( # type: ignore[index]
432 ra, dec, *(args[rhoIndex]), **treecorrKwargs
433 )
435 _LOG.info("Calculating rho3alt")
436 rhoStats["rho3alt"] = self._corrSpin0(ra, dec, *(args[0]), **treecorrKwargs) # type: ignore[index]
437 return cast(KeyedData, rhoStats)
439 @classmethod
440 def _corrSpin0(
441 cls,
442 ra: Vector,
443 dec: Vector,
444 k1: Vector,
445 k2: Vector | None = None,
446 raUnits: str = "degrees",
447 decUnits: str = "degrees",
448 **treecorrKwargs: Any,
449 ) -> KKCorrelation:
450 """Function to compute correlations between at most two scalar fields.
452 This is used to compute rho3alt statistics, given the appropriate
453 spin-0 (scalar) fields, usually fractional size residuals.
455 Parameters
456 ----------
457 ra : `numpy.array`
458 The right ascension values of entries in the catalog.
459 dec : `numpy.array`
460 The declination values of entries in the catalog.
461 k1 : `numpy.array`
462 The primary scalar field.
463 k2 : `numpy.array`, optional
464 The secondary scalar field.
465 Autocorrelation of the primary field is computed if `None`.
466 raUnits : `str`, optional
467 Unit of the right ascension values. Valid options are
468 "degrees", "arcmin", "arcsec", "hours" or "radians".
469 decUnits : `str`, optional
470 Unit of the declination values. Valid options are
471 "degrees", "arcmin", "arcsec", "hours" or "radians".
472 **treecorrKwargs
473 Keyword arguments to be passed to `treecorr.KKCorrelation`.
475 Returns
476 -------
477 xy : `treecorr.KKCorrelation`
478 A `treecorr.KKCorrelation` object containing the correlation
479 function.
480 """
481 _LOG.debug(
482 "No. of entries: %d. The number of pairs in the resulting KKCorrelation cannot exceed %d",
483 len(ra),
484 len(ra) * (len(ra) - 1) / 2,
485 )
486 xy = treecorr.KKCorrelation(**treecorrKwargs)
487 catA = treecorr.Catalog(ra=ra, dec=dec, k=k1, ra_units=raUnits, dec_units=decUnits, logger=_LOG)
488 if k2 is None:
489 # Calculate the auto-correlation
490 xy.process(catA)
491 else:
492 catB = treecorr.Catalog(ra=ra, dec=dec, k=k2, ra_units=raUnits, dec_units=decUnits, logger=_LOG)
493 # Calculate the cross-correlation
494 xy.process(catA, catB)
496 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs))
497 return xy
499 @classmethod
500 def _corrSpin2(
501 cls,
502 ra: Vector,
503 dec: Vector,
504 g1a: Vector,
505 g2a: Vector,
506 g1b: Vector | None = None,
507 g2b: Vector | None = None,
508 raUnits: str = "degrees",
509 decUnits: str = "degrees",
510 **treecorrKwargs: Any,
511 ) -> GGCorrelation:
512 """Function to compute correlations between shear-like fields.
514 This is used to compute Rho statistics, given the appropriate spin-2
515 (shear-like) fields.
517 Parameters
518 ----------
519 ra : `numpy.array`
520 The right ascension values of entries in the catalog.
521 dec : `numpy.array`
522 The declination values of entries in the catalog.
523 g1a : `numpy.array`
524 The first component of the primary shear-like field.
525 g2a : `numpy.array`
526 The second component of the primary shear-like field.
527 g1b : `numpy.array`, optional
528 The first component of the secondary shear-like field.
529 Autocorrelation of the primary field is computed if `None`.
530 g2b : `numpy.array`, optional
531 The second component of the secondary shear-like field.
532 Autocorrelation of the primary field is computed if `None`.
533 raUnits : `str`, optional
534 Unit of the right ascension values. Valid options are
535 "degrees", "arcmin", "arcsec", "hours" or "radians".
536 decUnits : `str`, optional
537 Unit of the declination values. Valid options are
538 "degrees", "arcmin", "arcsec", "hours" or "radians".
539 **treecorrKwargs
540 Keyword arguments to be passed to `treecorr.GGCorrelation`.
542 Returns
543 -------
544 xy : `treecorr.GGCorrelation`
545 A `treecorr.GGCorrelation` object containing the correlation
546 function.
547 """
548 _LOG.debug(
549 "No. of entries: %d. The number of pairs in the resulting GGCorrelation cannot exceed %d",
550 len(ra),
551 len(ra) * (len(ra) - 1) / 2,
552 )
553 xy = treecorr.GGCorrelation(**treecorrKwargs)
554 catA = treecorr.Catalog(
555 ra=ra, dec=dec, g1=g1a, g2=g2a, ra_units=raUnits, dec_units=decUnits, logger=_LOG
556 )
557 if g1b is None or g2b is None:
558 # Calculate the auto-correlation
559 xy.process(catA)
560 else:
561 catB = treecorr.Catalog(
562 ra=ra, dec=dec, g1=g1b, g2=g2b, ra_units=raUnits, dec_units=decUnits, logger=_LOG
563 )
564 # Calculate the cross-correlation
565 xy.process(catA, catB)
567 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs))
568 return xy