Coverage for python/lsst/analysis/tools/actions/vector/calcRhoStatistics.py: 41%

106 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-17 03:59 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "BinnedCorr2Config", 

26 "CalcRhoStatistics", 

27) 

28 

29import logging 

30from typing import TYPE_CHECKING, Any, Mapping, cast 

31 

32import numpy as np 

33import treecorr # type: ignore[import] 

34from lsst.pex.config import ChoiceField, Config, ConfigField, Field, FieldValidationError 

35 

36from ...interfaces import KeyedData, KeyedDataAction, Vector 

37from .calcMomentSize import CalcMomentSize 

38from .ellipticity import CalcE, CalcEDiff 

39from .mathActions import FractionalDifference 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from treecorr import GGCorrelation, KKCorrelation 

43 

44 from ...interfaces import KeyedDataSchema 

45 

46_LOG = logging.getLogger(__name__) 

47 

48 

49class BinnedCorr2Config(Config): 

50 """A Config class that holds some of the parameters supported by treecorr. 

51 

52 The fields in this class correspond to the parameters that can be passed to 

53 BinnedCorr2 in `treecorr`, which is the base class for all two-point 

54 correlation function calculations. The default values set for the fields 

55 are identical to the default values set in v4.2 of `treecorr`. The 

56 parameters that are excluded in this class are 

57 'verbose', 'log_file', 'output_dots', 'rng' and 'pairwise' (deprecated). 

58 For details about these options, see the documentation for `treecorr`: 

59 https://rmjarvis.github.io/TreeCorr/_build/html/correlation2.html 

60 

61 A separate config class is used instead 

62 of constructing a `~lsst.pex.config.DictField` so that mixed types can be 

63 supported and the config can be validated at the beginning of the 

64 execution. 

65 

66 Notes 

67 ----- 

68 This is intended to be used in CalcRhoStatistics class. It only supports 

69 some of the fields that are relevant for rho-statistics calculations. 

70 """ 

71 

72 nbins = Field[int]( 72 ↛ exitline 72 didn't jump to the function exit

73 doc=( 

74 "How many bins to use. " 

75 "(Exactly three of nbins, bin_size, min_sep, max_sep " 

76 "are required. If nbins is not given, it will be " 

77 "calculated from the values of the other three, " 

78 "rounding up to the next highest integer. " 

79 "In this case, bin_size will be readjusted to account " 

80 "for this rounding up." 

81 ), 

82 optional=True, 

83 check=lambda x: x > 0, 

84 ) 

85 

86 bin_size = Field[float]( 

87 doc=( 

88 "The width of the bins in log(separation). " 

89 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

90 "If bin_size is not given, it will be calculated from the values " 

91 "of the other three." 

92 ), 

93 optional=True, 

94 ) 

95 

96 min_sep = Field[float]( 

97 doc=( 

98 "The minimum separation in units of sep_units, if relevant. " 

99 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

100 "If min_sep is not given, it will be calculated from the values " 

101 "of the other three." 

102 ), 

103 optional=True, 

104 ) 

105 

106 max_sep = Field[float]( 

107 doc=( 

108 "The maximum separation in units of sep_units, if relevant. " 

109 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

110 "If max_sep is not given, it will be calculated from the values " 

111 "of the other three." 

112 ), 

113 optional=True, 

114 ) 

115 

116 sep_units = ChoiceField[str]( 

117 doc=( 

118 "The units to use for the separation values, given as a string. " 

119 "This includes both min_sep and max_sep above, as well as the " 

120 "units of the output distance values." 

121 ), 

122 default="radian", 

123 optional=True, 

124 allowed={units: units for units in ["arcsec", "arcmin", "degree", "hour", "radian"]}, 

125 ) 

126 

127 bin_slop = Field[float]( 

128 doc=( 

129 "How much slop to allow in the placement of pairs in the bins. " 

130 "If bin_slop = 1, then the bin into which a particular pair is " 

131 "placed may be incorrect by at most 1.0 bin widths. " 

132 r"If None, use a bin_slop that gives a maximum error of 10% on " 

133 "any bin, which has been found to yield good results for most " 

134 "applications." 

135 ), 

136 default=None, 

137 optional=True, 

138 ) 

139 

140 precision = Field[int]( 140 ↛ exitline 140 didn't jump to the function exit

141 doc=("The precision to use for the output values. This specifies how many digits to write."), 

142 default=4, 

143 optional=True, 

144 check=lambda x: x > 0, 

145 ) 

146 

147 metric = ChoiceField[str]( 

148 doc=( 

149 "Which metric to use for distance measurements. For details, see " 

150 "https://rmjarvis.github.io/TreeCorr/_build/html/metric.html" 

151 ), 

152 default="Euclidean", 

153 optional=True, 

154 allowed={ 

155 "Euclidean": "straight-line Euclidean distance between two points", 

156 "FisherRperp": ( 

157 "the perpendicular component of the distance, " 

158 "following the definitions in " 

159 "Fisher et al, 1994 (MNRAS, 267, 927)" 

160 ), 

161 "OldRperp": ( 

162 "the perpendicular component of the distance using the " 

163 "definition of Rperp from TreeCorr v3.x." 

164 ), 

165 "Rlens": ( 

166 "Distance from the first object (taken to be a lens) to " 

167 "the line connecting Earth and the second object " 

168 "(taken to be a lensed source)." 

169 ), 

170 "Arc": "the true great circle distance for spherical coordinates.", 

171 "Periodic": "Like ``Euclidean``, but with periodic boundaries.", 

172 }, 

173 ) 

174 

175 bin_type = ChoiceField[str]( 

176 doc="What type of binning should be used?", 

177 default="Log", 

178 optional=True, 

179 allowed={ 

180 "Log": ( 

181 "Logarithmic binning in the distance. The bin steps will " 

182 "be uniform in log(r) from log(min_sep) .. log(max_sep)." 

183 ), 

184 "Linear": ( 

185 "Linear binning in the distance. The bin steps will be " 

186 "uniform in r from min_sep .. max_sep." 

187 ), 

188 "TwoD": ( 

189 "2-dimensional binning from x = (-max_sep .. max_sep) " 

190 "and y = (-max_sep .. max_sep). The bin steps will be " 

191 "uniform in both x and y. (i.e. linear in x,y)" 

192 ), 

193 }, 

194 ) 

195 

196 var_method = ChoiceField[str]( 

197 doc="Which method to use for estimating the variance", 

198 default="shot", 

199 optional=True, 

200 allowed={ 

201 method: method 

202 for method in [ 

203 "shot", 

204 "jackknife", 

205 "sample", 

206 "bootstrap", 

207 "marked_bootstrap", 

208 ] 

209 }, 

210 ) 

211 

212 num_bootstrap = Field[int]( 

213 doc=("How many bootstrap samples to use for the 'bootstrap' and 'marked_bootstrap' var methods."), 

214 default=500, 

215 optional=True, 

216 ) 

217 

218 def validate(self): 

219 # Docs inherited from base class 

220 super().validate() 

221 req_params = (self.nbins, self.bin_size, self.min_sep, self.max_sep) 

222 num_req_params = sum(param is not None for param in req_params) 

223 if num_req_params != 3: 

224 msg = ( 

225 "You must specify exactly three of ``nbins``, ``bin_size``, ``min_sep`` and ``max_sep``" 

226 f" in treecorr_config. {num_req_params} parameters were set instead." 

227 ) 

228 raise FieldValidationError(self.__class__.bin_size, self, msg) 

229 

230 if self.min_sep is not None and self.max_sep is not None: 

231 if self.min_sep > self.max_sep: 

232 raise FieldValidationError(self.__class__.min_sep, self, "min_sep must be <= max_sep") 

233 

234 

235class CalcRhoStatistics(KeyedDataAction): 

236 r"""Calculate rho statistics. 

237 

238 Rho statistics refer to a collection of correlation functions involving 

239 PSF ellipticity and size residuals. They quantify the contribution from PSF 

240 leakage due to errors in PSF modeling to the weak lensing shear correlation 

241 functions. 

242 

243 .. _rho_definitions: 

244 

245 The exact definitions of rho statistics as defined in [1]_ are given below. 

246 

247 .. math:: 

248 

249 \rho_1(\theta) &= \left\langle 

250 \delta e^*_{PSF}(x) 

251 \delta e_{PSF}(x+\theta) 

252 \right\rangle 

253 

254 \rho_2(\theta) &= \left\langle 

255 e^*_{PSF}(x) 

256 \delta e_{PSF}(x+\theta 

257 \right\rangle 

258 

259 \rho_3(\theta) &= \left\langle 

260 (e^*_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x)) 

261 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}})(x+\theta) 

262 \right\rangle 

263 

264 \rho_4(\theta) &= \left\langle 

265 \delta e^*_{PSF}(x) 

266 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}})(x+\theta) 

267 \right\rangle 

268 

269 \rho_5(\theta) &= \left\langle 

270 e^*_{PSF}(x) 

271 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}})(x+\theta) 

272 \right\rangle 

273 

274 

275 In addition to these five, we also compute the auto-correlation function of 

276 the fractional size residuals and call it as the :math:`\rho'_3( \theta )`, 

277 as referred to in Melchior et al. (2015) [2]_. 

278 

279 .. math:: 

280 

281 \rho'_3(\theta) = \left\langle\frac{\delta T_{PSF}}{T_{PSF}}(x) 

282 \frac{\delta T_{PSF}}{T_{PSF}}(x+\theta) 

283 \right\rangle 

284 

285 

286 The definition of ellipticity used in [1]_ correspond to shear-type, 

287 which is typically smaller by a factor of 4 than using distortion-type. 

288 

289 References 

290 ---------- 

291 

292 .. [1] Jarvis, M., Sheldon, E., Zuntz, J., Kacprzak, T., Bridle, S. L., 

293 et. al (2016). 

294 The DES Science Verification weak lensing shear catalogues 

295 MNRAS, 460, 2245–2281. 

296 https://doi.org/10.1093/mnras/stw990; 

297 https://arxiv.org/abs/1507.05603 

298 .. [2] Melchior, P., et. al (2015) 

299 Mass and galaxy distributions of four massive galaxy clusters from 

300 Dark Energy Survey Science Verification data 

301 MNRAS, 449, no. 3, pp. 2219–2238. 

302 https://doi:10.1093/mnras/stv398 

303 https://arxiv.org/abs/1405.4285 

304 """ 

305 

306 colRa = Field[str](doc="RA column", default="coord_ra") 

307 

308 colDec = Field[str](doc="Dec column", default="coord_dec") 

309 

310 colXx = Field[str](doc="The column name to get the xx shape component from.", default="{band}_ixx") 

311 

312 colYy = Field[str](doc="The column name to get the yy shape component from.", default="{band}_iyy") 

313 

314 colXy = Field[str](doc="The column name to get the xy shape component from.", default="{band}_ixy") 

315 

316 colPsfXx = Field[str]( 

317 doc="The column name to get the PSF xx shape component from.", default="{band}_ixxPSF" 

318 ) 

319 

320 colPsfYy = Field[str]( 

321 doc="The column name to get the PSF yy shape component from.", default="{band}_iyyPSF" 

322 ) 

323 

324 colPsfXy = Field[str]( 

325 doc="The column name to get the PSF xy shape component from.", default="{band}_ixyPSF" 

326 ) 

327 

328 ellipticityType = ChoiceField[str]( 

329 doc="The type of ellipticity to calculate", 

330 optional=False, 

331 allowed={ 

332 "distortion": r"Distortion, measured as :math:`(I_{xx}-I_{yy})/(I_{xx}+I_{yy})`", 

333 "shear": ( 

334 r"Shear, measured as :math:`(I_{xx}-I_{yy})/(I_{xx}+I_{yy}+2\sqrt{I_{xx}I_{yy}-I_{xy}^2})`" 

335 ), 

336 }, 

337 default="distortion", 

338 ) 

339 

340 sizeType = ChoiceField[str]( 

341 doc="The type of size to calculate", 

342 default="trace", 

343 allowed={ 

344 "trace": "trace radius", 

345 "determinant": "determinant radius", 

346 }, 

347 ) 

348 

349 treecorr = ConfigField[BinnedCorr2Config]( 

350 doc="TreeCorr configuration", 

351 ) 

352 

353 def setDefaults(self): 

354 super().setDefaults() 

355 self.treecorr = BinnedCorr2Config() 

356 self.treecorr.sep_units = "arcmin" 

357 self.treecorr.max_sep = 100.0 

358 

359 def getInputSchema(self) -> KeyedDataSchema: 

360 return ( 

361 (self.colRa, Vector), 

362 (self.colDec, Vector), 

363 (self.colXx, Vector), 

364 (self.colYy, Vector), 

365 (self.colXy, Vector), 

366 (self.colPsfXx, Vector), 

367 (self.colPsfYy, Vector), 

368 (self.colPsfXy, Vector), 

369 ) 

370 

371 def __call__(self, data: KeyedData, **kwargs) -> KeyedData: 

372 calcEMeas = CalcE( 

373 colXx=self.colXx, 

374 colYy=self.colYy, 

375 colXy=self.colXy, 

376 ellipticityType=self.ellipticityType, 

377 ) 

378 calcEpsf = CalcE( 

379 colXx=self.colPsfXx, 

380 colYy=self.colPsfYy, 

381 colXy=self.colPsfXy, 

382 ellipticityType=self.ellipticityType, 

383 ) 

384 

385 calcEDiff = CalcEDiff(colA=calcEMeas, colB=calcEpsf) 

386 

387 calcSizeResidual = FractionalDifference( 

388 actionA=CalcMomentSize( 

389 colXx=self.colXx, 

390 colYy=self.colYy, 

391 colXy=self.colXy, 

392 sizeType=self.sizeType, 

393 ), 

394 actionB=CalcMomentSize( 

395 colXx=self.colPsfXx, 

396 colYy=self.colPsfYy, 

397 colXy=self.colPsfXy, 

398 sizeType=self.sizeType, 

399 ), 

400 ) 

401 

402 # distortion-type ellipticity has a shear response of 2, so we need to 

403 # divide by 2 so that the rho-stats do not depend on the 

404 # ellipticity-type. 

405 # Note: For distortion, the responsitivity is 2(1 - e^2_{rms}), 

406 # where e_rms is the root mean square ellipticity per component. 

407 # This is expected to be small and we ignore it here. 

408 # This definition of responsitivity is consistent with the definions 

409 # used in the rho-statistics calculations for the HSC shear catalog 

410 # papers (Mandelbaum et al. 2018, Li et al., 2022). 

411 responsitivity = 2.0 if self.ellipticityType == "distortion" else 1.0 

412 

413 # Call the actions on the data. 

414 eMEAS = calcEMeas(data, **kwargs) 

415 if self.ellipticityType == "distortion": 

416 _LOG.debug("Correction value of responsitivity would be %f", 2 - np.mean(np.abs(eMEAS) ** 2)) 

417 eMEAS /= responsitivity # type: ignore 

418 e1, e2 = np.real(eMEAS), np.imag(eMEAS) 

419 eRes = calcEDiff(data, **kwargs) 

420 eRes /= responsitivity # type: ignore 

421 e1Res, e2Res = np.real(eRes), np.imag(eRes) 

422 sizeRes = calcSizeResidual(data, **kwargs) 

423 

424 # Scale the sizeRes by ellipticities 

425 e1SizeRes = e1 * sizeRes 

426 e2SizeRes = e2 * sizeRes 

427 

428 # Package the arguments to capture auto-/cross-correlations for the 

429 # Rho statistics. 

430 args = { 

431 0: (sizeRes, None), 

432 1: (e1Res, e2Res, None, None), 

433 2: (e1, e2, e1Res, e2Res), 

434 3: (e1SizeRes, e2SizeRes, None, None), 

435 4: (e1Res, e2Res, e1SizeRes, e2SizeRes), 

436 5: (e1, e2, e1SizeRes, e2SizeRes), 

437 } 

438 

439 ra: Vector = data[self.colRa] # type: ignore 

440 dec: Vector = data[self.colDec] # type: ignore 

441 

442 treecorrKwargs = self.treecorr.toDict() 

443 

444 # Pass the appropriate arguments to the correlator and build a dict 

445 rhoStats: Mapping[str, treecorr.BinnedCorr2] = {} 

446 for rhoIndex in range(1, 6): 

447 _LOG.info("Calculating rho-%d", rhoIndex) 

448 rhoStats[f"rho{rhoIndex}"] = self._corrSpin2( # type: ignore[index] 

449 ra, dec, *(args[rhoIndex]), **treecorrKwargs 

450 ) 

451 

452 _LOG.info("Calculating rho3alt") 

453 rhoStats["rho3alt"] = self._corrSpin0(ra, dec, *(args[0]), **treecorrKwargs) # type: ignore[index] 

454 return cast(KeyedData, rhoStats) 

455 

456 @classmethod 

457 def _corrSpin0( 

458 cls, 

459 ra: Vector, 

460 dec: Vector, 

461 k1: Vector, 

462 k2: Vector | None = None, 

463 raUnits: str = "degrees", 

464 decUnits: str = "degrees", 

465 **treecorrKwargs: Any, 

466 ) -> KKCorrelation: 

467 """Function to compute correlations between at most two scalar fields. 

468 

469 This is used to compute rho3alt statistics, given the appropriate 

470 spin-0 (scalar) fields, usually fractional size residuals. 

471 

472 Parameters 

473 ---------- 

474 ra : `numpy.array` 

475 The right ascension values of entries in the catalog. 

476 dec : `numpy.array` 

477 The declination values of entries in the catalog. 

478 k1 : `numpy.array` 

479 The primary scalar field. 

480 k2 : `numpy.array`, optional 

481 The secondary scalar field. 

482 Autocorrelation of the primary field is computed if `None`. 

483 raUnits : `str`, optional 

484 Unit of the right ascension values. Valid options are 

485 "degrees", "arcmin", "arcsec", "hours" or "radians". 

486 decUnits : `str`, optional 

487 Unit of the declination values. Valid options are 

488 "degrees", "arcmin", "arcsec", "hours" or "radians". 

489 **treecorrKwargs 

490 Keyword arguments to be passed to `treecorr.KKCorrelation`. 

491 

492 Returns 

493 ------- 

494 xy : `treecorr.KKCorrelation` 

495 A `treecorr.KKCorrelation` object containing the correlation 

496 function. 

497 """ 

498 _LOG.debug( 

499 "No. of entries: %d. The number of pairs in the resulting KKCorrelation cannot exceed %d", 

500 len(ra), 

501 len(ra) * (len(ra) - 1) / 2, 

502 ) 

503 xy = treecorr.KKCorrelation(**treecorrKwargs) 

504 catA = treecorr.Catalog(ra=ra, dec=dec, k=k1, ra_units=raUnits, dec_units=decUnits, logger=_LOG) 

505 if k2 is None: 

506 # Calculate the auto-correlation 

507 xy.process(catA) 

508 else: 

509 catB = treecorr.Catalog(ra=ra, dec=dec, k=k2, ra_units=raUnits, dec_units=decUnits, logger=_LOG) 

510 # Calculate the cross-correlation 

511 xy.process(catA, catB) 

512 

513 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs)) 

514 return xy 

515 

516 @classmethod 

517 def _corrSpin2( 

518 cls, 

519 ra: Vector, 

520 dec: Vector, 

521 g1a: Vector, 

522 g2a: Vector, 

523 g1b: Vector | None = None, 

524 g2b: Vector | None = None, 

525 raUnits: str = "degrees", 

526 decUnits: str = "degrees", 

527 **treecorrKwargs: Any, 

528 ) -> GGCorrelation: 

529 """Function to compute correlations between shear-like fields. 

530 

531 This is used to compute Rho statistics, given the appropriate spin-2 

532 (shear-like) fields. 

533 

534 Parameters 

535 ---------- 

536 ra : `numpy.array` 

537 The right ascension values of entries in the catalog. 

538 dec : `numpy.array` 

539 The declination values of entries in the catalog. 

540 g1a : `numpy.array` 

541 The first component of the primary shear-like field. 

542 g2a : `numpy.array` 

543 The second component of the primary shear-like field. 

544 g1b : `numpy.array`, optional 

545 The first component of the secondary shear-like field. 

546 Autocorrelation of the primary field is computed if `None`. 

547 g2b : `numpy.array`, optional 

548 The second component of the secondary shear-like field. 

549 Autocorrelation of the primary field is computed if `None`. 

550 raUnits : `str`, optional 

551 Unit of the right ascension values. Valid options are 

552 "degrees", "arcmin", "arcsec", "hours" or "radians". 

553 decUnits : `str`, optional 

554 Unit of the declination values. Valid options are 

555 "degrees", "arcmin", "arcsec", "hours" or "radians". 

556 **treecorrKwargs 

557 Keyword arguments to be passed to `treecorr.GGCorrelation`. 

558 

559 Returns 

560 ------- 

561 xy : `treecorr.GGCorrelation` 

562 A `treecorr.GGCorrelation` object containing the correlation 

563 function. 

564 """ 

565 _LOG.debug( 

566 "No. of entries: %d. The number of pairs in the resulting GGCorrelation cannot exceed %d", 

567 len(ra), 

568 len(ra) * (len(ra) - 1) / 2, 

569 ) 

570 xy = treecorr.GGCorrelation(**treecorrKwargs) 

571 catA = treecorr.Catalog( 

572 ra=ra, dec=dec, g1=g1a, g2=g2a, ra_units=raUnits, dec_units=decUnits, logger=_LOG 

573 ) 

574 if g1b is None or g2b is None: 

575 # Calculate the auto-correlation 

576 xy.process(catA) 

577 else: 

578 catB = treecorr.Catalog( 

579 ra=ra, dec=dec, g1=g1b, g2=g2b, ra_units=raUnits, dec_units=decUnits, logger=_LOG 

580 ) 

581 # Calculate the cross-correlation 

582 xy.process(catA, catB) 

583 

584 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs)) 

585 return xy