Coverage for python/lsst/analysis/tools/actions/vector/calcRhoStatistics.py: 39%
108 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 03:14 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 03:14 -0800
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "BinnedCorr2Config",
26 "CalcRhoStatistics",
27)
29import logging
30from typing import TYPE_CHECKING, Any, Mapping, cast
32import numpy as np
33import treecorr # type: ignore[import]
34from lsst.pex.config import ChoiceField, Config, ConfigField, Field, FieldValidationError
36from ...interfaces import KeyedData, KeyedDataAction, Vector
37from .calcShapeSize import CalcShapeSize
38from .ellipticity import CalcE, CalcEDiff
39from .vectorActions import FractionalDifference
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from treecorr import GGCorrelation, KKCorrelation
44 from ...interfaces import KeyedDataSchema
46_LOG = logging.getLogger(__name__)
49class BinnedCorr2Config(Config):
50 """A Config class that holds some of the parameters supported by treecorr.
52 The fields in this class correspond to the parameters that can be passed to
53 BinnedCorr2 in `treecorr`, which is the base class for all two-point
54 correlation function calculations. The default values set for the fields
55 are identical to the default values set in v4.2 of `treecorr`. The
56 parameters that are excluded in this class are
57 'verbose', 'log_file', 'output_dots', 'rng' and 'pairwise' (deprecated).
58 For details about these options, see the documentation for `treecorr`:
59 https://rmjarvis.github.io/TreeCorr/_build/html/correlation2.html
61 A separate config class is used instead
62 of constructing a `~lsst.pex.config.DictField` so that mixed types can be
63 supported and the config can be validated at the beginning of the
64 execution. The ``toDict`` provides a minimal dictionary that overrides only
65 the default values and excludes the key-value pairs when the item is None.
67 Notes
68 -----
69 This is intended to be used in CalcRhoStatistics class. It only supports
70 some of the fields that are relevant for rho-statistics calculations.
71 """
73 nbins = Field[int]( 73 ↛ exitline 73 didn't jump to the function exit
74 doc=(
75 "How many bins to use. "
76 "(Exactly three of nbins, bin_size, min_sep, max_sep "
77 "are required. If nbins is not given, it will be "
78 "calculated from the values of the other three, "
79 "rounding up to the next highest integer. "
80 "In this case, bin_size will be readjusted to account "
81 "for this rounding up."
82 ),
83 optional=True,
84 check=lambda x: x > 0,
85 )
87 bin_size = Field[float](
88 doc=(
89 "The width of the bins in log(separation). "
90 "Exactly three of nbins, bin_size, min_sep, max_sep are required. "
91 "If bin_size is not given, it will be calculated from the values "
92 "of the other three."
93 ),
94 optional=True,
95 )
97 min_sep = Field[float](
98 doc=(
99 "The minimum separation in units of sep_units, if relevant. "
100 "Exactly three of nbins, bin_size, min_sep, max_sep are required. "
101 "If min_sep is not given, it will be calculated from the values "
102 "of the other three."
103 ),
104 optional=True,
105 )
107 max_sep = Field[float](
108 doc=(
109 "The maximum separation in units of sep_units, if relevant. "
110 "Exactly three of nbins, bin_size, min_sep, max_sep are required. "
111 "If max_sep is not given, it will be calculated from the values "
112 "of the other three."
113 ),
114 optional=True,
115 )
117 sep_units = ChoiceField[str](
118 doc=(
119 "The units to use for the separation values, given as a string. "
120 "This includes both min_sep and max_sep above, as well as the "
121 "units of the output distance values."
122 ),
123 default="radian",
124 optional=True,
125 allowed={units: units for units in ["arcsec", "arcmin", "degree", "hour", "radian"]},
126 )
128 bin_slop = Field[float](
129 doc=(
130 "How much slop to allow in the placement of pairs in the bins. "
131 "If bin_slop = 1, then the bin into which a particular pair is "
132 "placed may be incorrect by at most 1.0 bin widths. "
133 r"If None, use a bin_slop that gives a maximum error of 10% on "
134 "any bin, which has been found to yield good results for most "
135 "applications."
136 ),
137 default=None,
138 optional=True,
139 )
141 precision = Field[int]( 141 ↛ exitline 141 didn't jump to the function exit
142 doc=("The precision to use for the output values. This specifies how many digits to write."),
143 default=4,
144 optional=True,
145 check=lambda x: x > 0,
146 )
148 metric = ChoiceField[str](
149 doc=(
150 "Which metric to use for distance measurements. For details, see "
151 "https://rmjarvis.github.io/TreeCorr/_build/html/metric.html"
152 ),
153 default="Euclidean",
154 optional=True,
155 allowed={
156 "Euclidean": "straight-line Euclidean distance between two points",
157 "FisherRperp": (
158 "the perpendicular component of the distance, "
159 "following the definitions in "
160 "Fisher et al, 1994 (MNRAS, 267, 927)"
161 ),
162 "OldRperp": (
163 "the perpendicular component of the distance using the "
164 "definition of Rperp from TreeCorr v3.x."
165 ),
166 "Rlens": (
167 "Distance from the first object (taken to be a lens) to "
168 "the line connecting Earth and the second object "
169 "(taken to be a lensed source)."
170 ),
171 "Arc": "the true great circle distance for spherical coordinates.",
172 "Periodic": "Like ``Euclidean``, but with periodic boundaries.",
173 },
174 )
176 bin_type = ChoiceField[str](
177 doc="What type of binning should be used?",
178 default="Log",
179 optional=True,
180 allowed={
181 "Log": (
182 "Logarithmic binning in the distance. The bin steps will "
183 "be uniform in log(r) from log(min_sep) .. log(max_sep)."
184 ),
185 "Linear": (
186 "Linear binning in the distance. The bin steps will be "
187 "uniform in r from min_sep .. max_sep."
188 ),
189 "TwoD": (
190 "2-dimensional binning from x = (-max_sep .. max_sep) "
191 "and y = (-max_sep .. max_sep). The bin steps will be "
192 "uniform in both x and y. (i.e. linear in x,y)"
193 ),
194 },
195 )
197 var_method = ChoiceField[str](
198 doc="Which method to use for estimating the variance",
199 default="shot",
200 optional=True,
201 allowed={
202 method: method
203 for method in [
204 "shot",
205 "jackknife",
206 "sample",
207 "bootstrap",
208 "marked_bootstrap",
209 ]
210 },
211 )
213 num_bootstrap = Field[int](
214 doc=("How many bootstrap samples to use for the 'bootstrap' and 'marked_bootstrap' var methods."),
215 default=500,
216 optional=True,
217 )
219 def validate(self):
220 # Docs inherited from base class
221 super().validate()
222 req_params = (self.nbins, self.bin_size, self.min_sep, self.max_sep)
223 num_req_params = sum(param is not None for param in req_params)
224 if num_req_params != 3:
225 msg = (
226 "You must specify exactly three of ``nbins``, ``bin_size``, ``min_sep`` and ``max_sep``"
227 f" in treecorr_config. {num_req_params} parameters were set instead."
228 )
229 raise FieldValidationError(self.__class__.bin_size, self, msg)
231 if self.min_sep is not None and self.max_sep is not None:
232 if self.min_sep > self.max_sep:
233 raise FieldValidationError(self.__class__.min_sep, self, "min_sep must be <= max_sep")
235 def toDict(self):
236 # Docs inherited from base class
237 # We are excluding items that are None due to treecorr limitations.
238 # TODO: DM-38480. This override can be removed after treecorr v4.3
239 # makes its way onto rubin-env.
240 return {k: v for k, v in super().toDict().items() if v is not None}
243class CalcRhoStatistics(KeyedDataAction):
244 r"""Calculate rho statistics
246 Rho statistics refer to a collection of correlation functions involving
247 PSF ellipticity and size residuals. They quantify the contribution from PSF
248 leakage due to errors in PSF modeling to the weak lensing shear correlation
249 functions. The standard rho statistics are indexed from 1 to 5, and
250 this action calculates a sixth rho statistic, indexed 0.
252 Notes
253 -----
254 The exact definitions of rho statistics as defined in [1]_ are given below.
255 In addition to these five, we also compute the auto-correlation function of
256 the fractional size residuals and call it as the :math:`\rho_0( \theta )`.
258 .. math::
260 \rho_1(\theta) &= \langle \delta e^*_{PSF}(x) \delta e_{PSF}(x+\theta) \rangle # noqa: W505
262 \rho_2(\theta) &= \langle e^*_{PSF}(x) \delta e_{PSF}(x+\theta) \rangle
264 \rho_3(\theta) &= \left\langle (e^*_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x))
265 \delta e_{PSF}(x+\theta) \right\rangle
267 \rho_4(\theta) &= \left\langle (\delta e^*_{PSF}(x)
268 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)) \right\rangle
270 \rho_5(\theta) &= \left\langle (e^*_{PSF}(x)
271 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)) \right\rangle
273 There is a slightly different version for :math:`\rho_3( \theta )`, used in Melchior et al. (2015) [2]_.
275 .. math::
277 \rho'_3(\theta) &= \left\langle\frac{\delta T_{PSF}}{T_{PSF}}(x)
278 \frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)
279 \right\rangle
282 The definition of ellipticity used in [1]_ correspond to ``epsilon``-type ellipticity, which is typically
283 smaller by a factor of 4 than using ``chi``-type ellipticity.
285 References
286 ----------
287 .. [1] Jarvis, M., Sheldon, E., Zuntz, J., Kacprzak, T., Bridle, S. L., et. al (2016). # noqa: W501
288 The DES Science Verification weak lensing shear catalogues
289 MNRAS, 460, 2245–2281.
290 https://doi.org/10.1093/mnras/stw990;
291 https://arxiv.org/abs/1507.05603
292 .. [2] Melchior, P., et. al (2015)
293 Mass and galaxy distributions of four massive galaxy clusters from Dark Energy Survey
294 Science Verification data
295 MNRAS, 449, no. 3, pp. 2219–2238.
296 https://doi:10.1093/mnras/stv398
297 https://arxiv.org/abs/1405.4285
298 """
300 colRa = Field[str](doc="RA column", default="coord_ra")
302 colDec = Field[str](doc="Dec column", default="coord_dec")
304 colXx = Field[str](doc="The column name to get the xx shape component from.", default="{band}_ixx")
306 colYy = Field[str](doc="The column name to get the yy shape component from.", default="{band}_iyy")
308 colXy = Field[str](doc="The column name to get the xy shape component from.", default="{band}_ixy")
310 colPsfXx = Field[str](
311 doc="The column name to get the PSF xx shape component from.", default="{band}_ixxPSF"
312 )
314 colPsfYy = Field[str](
315 doc="The column name to get the PSF yy shape component from.", default="{band}_iyyPSF"
316 )
318 colPsfXy = Field[str](
319 doc="The column name to get the PSF xy shape component from.", default="{band}_ixyPSF"
320 )
322 # TODO: Replace them with distortion and shear (DM-37325).
323 ellipticityType = ChoiceField[str](
324 doc="The type of ellipticity to calculate",
325 allowed={
326 "chi": "Distortion, measured as (Ixx - Iyy)/(Ixx + Iyy)",
327 "epsilon": ("Shear, measured as (Ixx - Iyy)/(Ixx + Iyy + 2*sqrt(Ixx*Iyy - Ixy**2))"),
328 },
329 default="chi",
330 )
332 sizeType = ChoiceField[str](
333 doc="The type of size to calculate",
334 default="trace",
335 allowed={
336 "trace": "trace radius",
337 "determinant": "determinant radius",
338 },
339 )
341 treecorr = ConfigField[BinnedCorr2Config](
342 doc="TreeCorr configuration",
343 )
345 def setDefaults(self):
346 super().setDefaults()
347 self.treecorr = BinnedCorr2Config()
348 self.treecorr.sep_units = "arcmin"
349 self.treecorr.max_sep = 100.0
351 def getInputSchema(self) -> KeyedDataSchema:
352 return (
353 (self.colRa, Vector),
354 (self.colDec, Vector),
355 (self.colXx, Vector),
356 (self.colYy, Vector),
357 (self.colXy, Vector),
358 (self.colPsfXx, Vector),
359 (self.colPsfYy, Vector),
360 (self.colPsfXy, Vector),
361 )
363 def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
364 calcEMeas = CalcE(
365 colXx=self.colXx,
366 colYy=self.colYy,
367 colXy=self.colXy,
368 ellipticityType=self.ellipticityType,
369 )
370 calcEpsf = CalcE(
371 colXx=self.colPsfXx,
372 colYy=self.colPsfYy,
373 colXy=self.colPsfXy,
374 ellipticityType=self.ellipticityType,
375 )
377 calcEDiff = CalcEDiff(colA=calcEMeas, colB=calcEpsf)
379 calcSizeResidual = FractionalDifference(
380 actionA=CalcShapeSize(
381 colXx=self.colXx,
382 colYy=self.colYy,
383 colXy=self.colXy,
384 sizeType=self.sizeType,
385 ),
386 actionB=CalcShapeSize(
387 colXx=self.colPsfXx,
388 colYy=self.colPsfYy,
389 colXy=self.colPsfXy,
390 sizeType=self.sizeType,
391 ),
392 )
394 # chi-type ellipticity has a shear response of 2, so we need to
395 # divide by 2 so that the rho-stats do not depend on the
396 # ellipticity-type.
397 # Note: For distortion, the responsitivity is 2(1 - e^2_{rms}),
398 # where e_rms is the root mean square ellipticity per component.
399 # This is expected to be small and we ignore it here.
400 # This definition of responsitivity is consistent with the definions
401 # used in the rho-statistics calculations for the HSC shear catalog
402 # papers (Mandelbaum et al. 2018, Li et al., 2022).
403 responsitivity = 2.0 if self.ellipticityType == "chi" else 1.0
405 # Call the actions on the data.
406 eMEAS = calcEMeas(data, **kwargs)
407 if self.ellipticityType == "chi":
408 _LOG.debug("Correction value of responsitivity would be %f", 2 - np.mean(np.abs(eMEAS) ** 2))
409 eMEAS /= responsitivity # type: ignore
410 e1, e2 = np.real(eMEAS), np.imag(eMEAS)
411 eRes = calcEDiff(data, **kwargs)
412 eRes /= responsitivity # type: ignore
413 e1Res, e2Res = np.real(eRes), np.imag(eRes)
414 sizeRes = calcSizeResidual(data, **kwargs)
416 # Scale the sizeRes by ellipticities
417 e1SizeRes = e1 * sizeRes
418 e2SizeRes = e2 * sizeRes
420 # Package the arguments to capture auto-/cross-correlations for the
421 # Rho statistics.
422 args = {
423 0: (sizeRes, None),
424 1: (e1Res, e2Res, None, None),
425 2: (e1, e2, e1Res, e2Res),
426 3: (e1SizeRes, e2SizeRes, None, None),
427 4: (e1Res, e2Res, e1SizeRes, e2SizeRes),
428 5: (e1, e2, e1SizeRes, e2SizeRes),
429 }
431 ra: Vector = data[self.colRa] # type: ignore
432 dec: Vector = data[self.colDec] # type: ignore
434 treecorrKwargs = self.treecorr.toDict()
436 # Pass the appropriate arguments to the correlator and build a dict
437 rhoStats: Mapping[str, treecorr.BinnedCorr2] = {}
438 for rhoIndex in range(1, 6):
439 _LOG.info("Calculating rho-%d", rhoIndex)
440 rhoStats[f"rho{rhoIndex}"] = self._corrSpin2( # type: ignore[index]
441 ra, dec, *(args[rhoIndex]), **treecorrKwargs
442 )
444 _LOG.info("Calculating rho3alt")
445 rhoStats["rho3alt"] = self._corrSpin0(ra, dec, *(args[0]), **treecorrKwargs) # type: ignore[index]
446 return cast(KeyedData, rhoStats)
448 @classmethod
449 def _corrSpin0(
450 cls,
451 ra: Vector,
452 dec: Vector,
453 k1: Vector,
454 k2: Vector | None = None,
455 raUnits: str = "degrees",
456 decUnits: str = "degrees",
457 **treecorrKwargs: Any,
458 ) -> KKCorrelation:
459 """Function to compute correlations between at most two scalar fields.
461 This is used to compute rho3alt statistics, given the appropriate
462 spin-0 (scalar) fields, usually fractional size residuals.
464 Parameters
465 ----------
466 ra : `numpy.array`
467 The right ascension values of entries in the catalog.
468 dec : `numpy.array`
469 The declination values of entries in the catalog.
470 k1 : `numpy.array`
471 The primary scalar field.
472 k2 : `numpy.array`, optional
473 The secondary scalar field.
474 Autocorrelation of the primary field is computed if `None`.
475 raUnits : `str`, optional
476 Unit of the right ascension values. Valid options are
477 "degrees", "arcmin", "arcsec", "hours" or "radians".
478 decUnits : `str`, optional
479 Unit of the declination values. Valid options are
480 "degrees", "arcmin", "arcsec", "hours" or "radians".
481 **treecorrKwargs
482 Keyword arguments to be passed to `treecorr.KKCorrelation`.
484 Returns
485 -------
486 xy : `treecorr.KKCorrelation`
487 A `treecorr.KKCorrelation` object containing the correlation
488 function.
489 """
490 _LOG.debug(
491 "No. of entries: %d. The number of pairs in the resulting KKCorrelation cannot exceed %d",
492 len(ra),
493 len(ra) * (len(ra) - 1) / 2,
494 )
495 xy = treecorr.KKCorrelation(**treecorrKwargs)
496 catA = treecorr.Catalog(ra=ra, dec=dec, k=k1, ra_units=raUnits, dec_units=decUnits, logger=_LOG)
497 if k2 is None:
498 # Calculate the auto-correlation
499 xy.process(catA)
500 else:
501 catB = treecorr.Catalog(ra=ra, dec=dec, k=k2, ra_units=raUnits, dec_units=decUnits, logger=_LOG)
502 # Calculate the cross-correlation
503 xy.process(catA, catB)
505 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs))
506 return xy
508 @classmethod
509 def _corrSpin2(
510 cls,
511 ra: Vector,
512 dec: Vector,
513 g1a: Vector,
514 g2a: Vector,
515 g1b: Vector | None = None,
516 g2b: Vector | None = None,
517 raUnits: str = "degrees",
518 decUnits: str = "degrees",
519 **treecorrKwargs: Any,
520 ) -> GGCorrelation:
521 """Function to compute correlations between shear-like fields.
523 This is used to compute Rho statistics, given the appropriate spin-2
524 (shear-like) fields.
526 Parameters
527 ----------
528 ra : `numpy.array`
529 The right ascension values of entries in the catalog.
530 dec : `numpy.array`
531 The declination values of entries in the catalog.
532 g1a : `numpy.array`
533 The first component of the primary shear-like field.
534 g2a : `numpy.array`
535 The second component of the primary shear-like field.
536 g1b : `numpy.array`, optional
537 The first component of the secondary shear-like field.
538 Autocorrelation of the primary field is computed if `None`.
539 g2b : `numpy.array`, optional
540 The second component of the secondary shear-like field.
541 Autocorrelation of the primary field is computed if `None`.
542 raUnits : `str`, optional
543 Unit of the right ascension values. Valid options are
544 "degrees", "arcmin", "arcsec", "hours" or "radians".
545 decUnits : `str`, optional
546 Unit of the declination values. Valid options are
547 "degrees", "arcmin", "arcsec", "hours" or "radians".
548 **treecorrKwargs
549 Keyword arguments to be passed to `treecorr.GGCorrelation`.
551 Returns
552 -------
553 xy : `treecorr.GGCorrelation`
554 A `treecorr.GGCorrelation` object containing the correlation
555 function.
556 """
557 _LOG.debug(
558 "No. of entries: %d. The number of pairs in the resulting GGCorrelation cannot exceed %d",
559 len(ra),
560 len(ra) * (len(ra) - 1) / 2,
561 )
562 xy = treecorr.GGCorrelation(**treecorrKwargs)
563 catA = treecorr.Catalog(
564 ra=ra, dec=dec, g1=g1a, g2=g2a, ra_units=raUnits, dec_units=decUnits, logger=_LOG
565 )
566 if g1b is None or g2b is None:
567 # Calculate the auto-correlation
568 xy.process(catA)
569 else:
570 catB = treecorr.Catalog(
571 ra=ra, dec=dec, g1=g1b, g2=g2b, ra_units=raUnits, dec_units=decUnits, logger=_LOG
572 )
573 # Calculate the cross-correlation
574 xy.process(catA, catB)
576 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs))
577 return xy