Coverage for python/lsst/analysis/tools/actions/vector/calcRhoStatistics.py: 41%

110 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 04:37 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "CalcRhoStatistics", 

26 "TreecorrConfig", 

27) 

28 

29import logging 

30from typing import TYPE_CHECKING, Any, Mapping, cast 

31 

32import numpy as np 

33import treecorr # type: ignore[import] 

34from lsst.pex.config import ChoiceField, Config, ConfigField, Field, FieldValidationError 

35 

36from ...interfaces import KeyedData, KeyedDataAction, Vector 

37from .calcMomentSize import CalcMomentSize 

38from .ellipticity import CalcE, CalcEDiff 

39from .mathActions import FractionalDifference 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from treecorr import GGCorrelation, KKCorrelation 

43 

44 from ...interfaces import KeyedDataSchema 

45 

46_LOG = logging.getLogger(__name__) 

47 

48 

49class TreecorrConfig(Config): 

50 """A Config class that holds some of the parameters supported by treecorr. 

51 

52 The fields in this class correspond to the parameters that can be passed to 

53 any calls to `treecorr` methods, including catalog creation and two-point 

54 correlation function calculations. The default values set for the fields 

55 are identical to the default values set in v4.2 of `treecorr`. 

56 

57 A separate config class is used instead 

58 of constructing a `~lsst.pex.config.DictField` so that mixed types can be 

59 supported and the config can be validated at the beginning of the 

60 execution. 

61 

62 Notes 

63 ----- 

64 This is intended to be used in CalcRhoStatistics class. It only supports 

65 some of the fields that are relevant for rho-statistics calculations. 

66 """ 

67 

68 nbins = Field[int]( 68 ↛ exitline 68 didn't jump to the function exit

69 doc=( 

70 "How many bins to use. " 

71 "(Exactly three of nbins, bin_size, min_sep, max_sep " 

72 "are required. If nbins is not given, it will be " 

73 "calculated from the values of the other three, " 

74 "rounding up to the next highest integer. " 

75 "In this case, bin_size will be readjusted to account " 

76 "for this rounding up." 

77 ), 

78 optional=True, 

79 check=lambda x: x > 0, 

80 ) 

81 

82 bin_size = Field[float]( 

83 doc=( 

84 "The width of the bins in log(separation). " 

85 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

86 "If bin_size is not given, it will be calculated from the values " 

87 "of the other three." 

88 ), 

89 optional=True, 

90 ) 

91 

92 min_sep = Field[float]( 

93 doc=( 

94 "The minimum separation in units of sep_units, if relevant. " 

95 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

96 "If min_sep is not given, it will be calculated from the values " 

97 "of the other three." 

98 ), 

99 optional=True, 

100 ) 

101 

102 max_sep = Field[float]( 

103 doc=( 

104 "The maximum separation in units of sep_units, if relevant. " 

105 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

106 "If max_sep is not given, it will be calculated from the values " 

107 "of the other three." 

108 ), 

109 optional=True, 

110 ) 

111 

112 sep_units = ChoiceField[str]( 

113 doc=( 

114 "The units to use for the separation values, given as a string. " 

115 "This includes both min_sep and max_sep above, as well as the " 

116 "units of the output distance values." 

117 ), 

118 default="radian", 

119 optional=True, 

120 allowed={units: units for units in ["arcsec", "arcmin", "degree", "hour", "radian"]}, 

121 ) 

122 

123 bin_slop = Field[float]( 

124 doc=( 

125 "How much slop to allow in the placement of pairs in the bins. " 

126 "If bin_slop = 1, then the bin into which a particular pair is " 

127 "placed may be incorrect by at most 1.0 bin widths. " 

128 r"If None, use a bin_slop that gives a maximum error of 10% on " 

129 "any bin, which has been found to yield good results for most " 

130 "applications." 

131 ), 

132 default=None, 

133 optional=True, 

134 ) 

135 

136 precision = Field[int]( 136 ↛ exitline 136 didn't jump to the function exit

137 doc=("The precision to use for the output values. This specifies how many digits to write."), 

138 default=4, 

139 optional=True, 

140 check=lambda x: x > 0, 

141 ) 

142 

143 metric = ChoiceField[str]( 

144 doc=( 

145 "Which metric to use for distance measurements. For details, see " 

146 "https://rmjarvis.github.io/TreeCorr/_build/html/metric.html" 

147 ), 

148 default="Euclidean", 

149 optional=True, 

150 allowed={ 

151 "Euclidean": "straight-line Euclidean distance between two points", 

152 "FisherRperp": ( 

153 "the perpendicular component of the distance, " 

154 "following the definitions in " 

155 "Fisher et al, 1994 (MNRAS, 267, 927)" 

156 ), 

157 "OldRperp": ( 

158 "the perpendicular component of the distance using the " 

159 "definition of Rperp from TreeCorr v3.x." 

160 ), 

161 "Rlens": ( 

162 "Distance from the first object (taken to be a lens) to " 

163 "the line connecting Earth and the second object " 

164 "(taken to be a lensed source)." 

165 ), 

166 "Arc": "the true great circle distance for spherical coordinates.", 

167 "Periodic": "Like ``Euclidean``, but with periodic boundaries.", 

168 }, 

169 ) 

170 

171 bin_type = ChoiceField[str]( 

172 doc="What type of binning should be used?", 

173 default="Log", 

174 optional=True, 

175 allowed={ 

176 "Log": ( 

177 "Logarithmic binning in the distance. The bin steps will " 

178 "be uniform in log(r) from log(min_sep) .. log(max_sep)." 

179 ), 

180 "Linear": ( 

181 "Linear binning in the distance. The bin steps will be " 

182 "uniform in r from min_sep .. max_sep." 

183 ), 

184 "TwoD": ( 

185 "2-dimensional binning from x = (-max_sep .. max_sep) " 

186 "and y = (-max_sep .. max_sep). The bin steps will be " 

187 "uniform in both x and y. (i.e. linear in x,y)" 

188 ), 

189 }, 

190 ) 

191 

192 var_method = ChoiceField[str]( 

193 doc="Which method to use for estimating the variance", 

194 default="shot", 

195 optional=True, 

196 allowed={ 

197 method: method 

198 for method in [ 

199 "shot", 

200 "jackknife", 

201 "sample", 

202 "bootstrap", 

203 "marked_bootstrap", 

204 ] 

205 }, 

206 ) 

207 

208 npatch = Field[int]( 

209 doc="How many patches to split the catalog into for the purpose of " 

210 "jackknife variance or other options that involve running via " 

211 "patches (boostrap, marked_boostrap etc.)", 

212 default=1, 

213 optional=True, 

214 ) 

215 

216 num_bootstrap = Field[int]( 

217 doc=("How many bootstrap samples to use for the 'bootstrap' and 'marked_bootstrap' var methods."), 

218 default=500, 

219 optional=True, 

220 ) 

221 

222 rng_seed = Field[int]( 

223 doc="Value to seed the treecorr random number generator with. Used to generate patches.", 

224 default=13579, 

225 ) 

226 

227 def validate(self): 

228 # Docs inherited from base class 

229 super().validate() 

230 req_params = (self.nbins, self.bin_size, self.min_sep, self.max_sep) 

231 num_req_params = sum(param is not None for param in req_params) 

232 if num_req_params != 3: 

233 msg = ( 

234 "You must specify exactly three of ``nbins``, ``bin_size``, ``min_sep`` and ``max_sep``" 

235 f" in treecorr_config. {num_req_params} parameters were set instead." 

236 ) 

237 raise FieldValidationError(self.__class__.bin_size, self, msg) 

238 

239 if self.min_sep is not None and self.max_sep is not None: 

240 if self.min_sep > self.max_sep: 

241 raise FieldValidationError(self.__class__.min_sep, self, "min_sep must be <= max_sep") 

242 

243 

244class CalcRhoStatistics(KeyedDataAction): 

245 r"""Calculate rho statistics. 

246 

247 Rho statistics refer to a collection of correlation functions involving 

248 PSF ellipticity and size residuals. They quantify the contribution from PSF 

249 leakage due to errors in PSF modeling to the weak lensing shear correlation 

250 functions. 

251 

252 .. _rho_definitions: 

253 

254 The exact definitions of rho statistics as defined in [1]_ are given below. 

255 

256 .. math:: 

257 

258 \rho_1(\theta) &= \left\langle 

259 \delta e^*_{PSF}(x) 

260 \delta e_{PSF}(x+\theta) 

261 \right\rangle 

262 

263 \rho_2(\theta) &= \left\langle 

264 e^*_{PSF}(x) 

265 \delta e_{PSF}(x+\theta 

266 \right\rangle 

267 

268 \rho_3(\theta) &= \left\langle 

269 (e^*_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x)) 

270 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}})(x+\theta) 

271 \right\rangle 

272 

273 \rho_4(\theta) &= \left\langle 

274 \delta e^*_{PSF}(x) 

275 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}})(x+\theta) 

276 \right\rangle 

277 

278 \rho_5(\theta) &= \left\langle 

279 e^*_{PSF}(x) 

280 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}})(x+\theta) 

281 \right\rangle 

282 

283 

284 In addition to these five, we also compute the auto-correlation function of 

285 the fractional size residuals and call it as the :math:`\rho'_3( \theta )`, 

286 as referred to in Melchior et al. (2015) [2]_. 

287 

288 .. math:: 

289 

290 \rho'_3(\theta) = \left\langle\frac{\delta T_{PSF}}{T_{PSF}}(x) 

291 \frac{\delta T_{PSF}}{T_{PSF}}(x+\theta) 

292 \right\rangle 

293 

294 

295 The definition of ellipticity used in [1]_ correspond to shear-type, 

296 which is typically smaller by a factor of 4 than using distortion-type. 

297 

298 References 

299 ---------- 

300 

301 .. [1] Jarvis, M., Sheldon, E., Zuntz, J., Kacprzak, T., Bridle, S. L., 

302 et. al (2016). 

303 The DES Science Verification weak lensing shear catalogues 

304 MNRAS, 460, 2245–2281. 

305 https://doi.org/10.1093/mnras/stw990; 

306 https://arxiv.org/abs/1507.05603 

307 .. [2] Melchior, P., et. al (2015) 

308 Mass and galaxy distributions of four massive galaxy clusters from 

309 Dark Energy Survey Science Verification data 

310 MNRAS, 449, no. 3, pp. 2219–2238. 

311 https://doi:10.1093/mnras/stv398 

312 https://arxiv.org/abs/1405.4285 

313 """ 

314 

315 colRa = Field[str](doc="RA column", default="coord_ra") 

316 

317 colDec = Field[str](doc="Dec column", default="coord_dec") 

318 

319 colXx = Field[str](doc="The column name to get the xx shape component from.", default="{band}_ixx") 

320 

321 colYy = Field[str](doc="The column name to get the yy shape component from.", default="{band}_iyy") 

322 

323 colXy = Field[str](doc="The column name to get the xy shape component from.", default="{band}_ixy") 

324 

325 colPsfXx = Field[str]( 

326 doc="The column name to get the PSF xx shape component from.", default="{band}_ixxPSF" 

327 ) 

328 

329 colPsfYy = Field[str]( 

330 doc="The column name to get the PSF yy shape component from.", default="{band}_iyyPSF" 

331 ) 

332 

333 colPsfXy = Field[str]( 

334 doc="The column name to get the PSF xy shape component from.", default="{band}_ixyPSF" 

335 ) 

336 

337 ellipticityType = ChoiceField[str]( 

338 doc="The type of ellipticity to calculate", 

339 optional=False, 

340 allowed={ 

341 "distortion": r"Distortion, measured as :math:`(I_{xx}-I_{yy})/(I_{xx}+I_{yy})`", 

342 "shear": ( 

343 r"Shear, measured as :math:`(I_{xx}-I_{yy})/(I_{xx}+I_{yy}+2\sqrt{I_{xx}I_{yy}-I_{xy}^2})`" 

344 ), 

345 }, 

346 default="distortion", 

347 ) 

348 

349 sizeType = ChoiceField[str]( 

350 doc="The type of size to calculate", 

351 default="trace", 

352 allowed={ 

353 "trace": "trace radius", 

354 "determinant": "determinant radius", 

355 }, 

356 ) 

357 

358 treecorr = ConfigField[TreecorrConfig]( 

359 doc="TreeCorr configuration", 

360 ) 

361 

362 def setDefaults(self): 

363 super().setDefaults() 

364 self.treecorr = TreecorrConfig() 

365 self.treecorr.sep_units = "arcmin" 

366 self.treecorr.max_sep = 100.0 

367 

368 def getInputSchema(self) -> KeyedDataSchema: 

369 return ( 

370 (self.colRa, Vector), 

371 (self.colDec, Vector), 

372 (self.colXx, Vector), 

373 (self.colYy, Vector), 

374 (self.colXy, Vector), 

375 (self.colPsfXx, Vector), 

376 (self.colPsfYy, Vector), 

377 (self.colPsfXy, Vector), 

378 ) 

379 

380 def __call__(self, data: KeyedData, **kwargs) -> KeyedData: 

381 calcEMeas = CalcE( 

382 colXx=self.colXx, 

383 colYy=self.colYy, 

384 colXy=self.colXy, 

385 ellipticityType=self.ellipticityType, 

386 ) 

387 calcEpsf = CalcE( 

388 colXx=self.colPsfXx, 

389 colYy=self.colPsfYy, 

390 colXy=self.colPsfXy, 

391 ellipticityType=self.ellipticityType, 

392 ) 

393 

394 calcEDiff = CalcEDiff(colA=calcEMeas, colB=calcEpsf) 

395 

396 calcSizeResidual = FractionalDifference( 

397 actionA=CalcMomentSize( 

398 colXx=self.colXx, 

399 colYy=self.colYy, 

400 colXy=self.colXy, 

401 sizeType=self.sizeType, 

402 ), 

403 actionB=CalcMomentSize( 

404 colXx=self.colPsfXx, 

405 colYy=self.colPsfYy, 

406 colXy=self.colPsfXy, 

407 sizeType=self.sizeType, 

408 ), 

409 ) 

410 

411 # distortion-type ellipticity has a shear response of 2, so we need to 

412 # divide by 2 so that the rho-stats do not depend on the 

413 # ellipticity-type. 

414 # Note: For distortion, the responsitivity is 2(1 - e^2_{rms}), 

415 # where e_rms is the root mean square ellipticity per component. 

416 # This is expected to be small and we ignore it here. 

417 # This definition of responsitivity is consistent with the definions 

418 # used in the rho-statistics calculations for the HSC shear catalog 

419 # papers (Mandelbaum et al. 2018, Li et al., 2022). 

420 responsitivity = 2.0 if self.ellipticityType == "distortion" else 1.0 

421 

422 # Call the actions on the data. 

423 eMEAS = calcEMeas(data, **kwargs) 

424 if self.ellipticityType == "distortion": 

425 _LOG.debug("Correction value of responsitivity would be %f", 2 - np.mean(np.abs(eMEAS) ** 2)) 

426 eMEAS /= responsitivity # type: ignore 

427 e1, e2 = np.real(eMEAS), np.imag(eMEAS) 

428 eRes = calcEDiff(data, **kwargs) 

429 eRes /= responsitivity # type: ignore 

430 e1Res, e2Res = np.real(eRes), np.imag(eRes) 

431 sizeRes = calcSizeResidual(data, **kwargs) 

432 

433 # Scale the sizeRes by ellipticities 

434 e1SizeRes = e1 * sizeRes 

435 e2SizeRes = e2 * sizeRes 

436 

437 # Package the arguments to capture auto-/cross-correlations for the 

438 # Rho statistics. 

439 args = { 

440 0: (sizeRes, None), 

441 1: (e1Res, e2Res, None, None), 

442 2: (e1, e2, e1Res, e2Res), 

443 3: (e1SizeRes, e2SizeRes, None, None), 

444 4: (e1Res, e2Res, e1SizeRes, e2SizeRes), 

445 5: (e1, e2, e1SizeRes, e2SizeRes), 

446 } 

447 

448 ra: Vector = data[self.colRa] # type: ignore 

449 dec: Vector = data[self.colDec] # type: ignore 

450 

451 treecorr_config_dict = self.treecorr.toDict() 

452 

453 # Swap rng_seed with an rng instance in treecorr config. 

454 rng = np.random.RandomState(treecorr_config_dict.pop("rng_seed")) 

455 treecorr_config_dict["rng"] = rng 

456 

457 # Pass the appropriate arguments to the correlator and build a dict 

458 rhoStats: Mapping[str, treecorr.BinnedCorr2] = {} 

459 for rhoIndex in range(1, 6): 

460 _LOG.info("Calculating rho-%d", rhoIndex) 

461 rhoStats[f"rho{rhoIndex}"] = self._corrSpin2( # type: ignore[index] 

462 ra, 

463 dec, 

464 *(args[rhoIndex]), 

465 treecorr_config_dict=treecorr_config_dict, 

466 ) 

467 

468 _LOG.info("Calculating rho3alt") 

469 rhoStats["rho3alt"] = self._corrSpin0( # type: ignore[index] 

470 ra, 

471 dec, 

472 *(args[0]), 

473 treecorr_config_dict=treecorr_config_dict, 

474 ) 

475 return cast(KeyedData, rhoStats) 

476 

477 @classmethod 

478 def _corrSpin0( 

479 cls, 

480 ra: Vector, 

481 dec: Vector, 

482 k1: Vector, 

483 k2: Vector | None = None, 

484 raUnits: str = "degrees", 

485 decUnits: str = "degrees", 

486 treecorr_config_dict: Mapping[str, Any] | None = None, 

487 ) -> KKCorrelation: 

488 """Function to compute correlations between at most two scalar fields. 

489 

490 This is used to compute rho3alt statistics, given the appropriate 

491 spin-0 (scalar) fields, usually fractional size residuals. 

492 

493 Parameters 

494 ---------- 

495 ra : `numpy.array` 

496 The right ascension values of entries in the catalog. 

497 dec : `numpy.array` 

498 The declination values of entries in the catalog. 

499 k1 : `numpy.array` 

500 The primary scalar field. 

501 k2 : `numpy.array`, optional 

502 The secondary scalar field. 

503 Autocorrelation of the primary field is computed if `None`. 

504 raUnits : `str`, optional 

505 Unit of the right ascension values. Valid options are 

506 "degrees", "arcmin", "arcsec", "hours" or "radians". 

507 decUnits : `str`, optional 

508 Unit of the declination values. Valid options are 

509 "degrees", "arcmin", "arcsec", "hours" or "radians". 

510 treecorr_config_dict: `dict`, optional 

511 Config dictionary to be passed to `treecorr` 

512 (`treecorr.KKCorrelation` or `treecorr.Catalog`). 

513 

514 Returns 

515 ------- 

516 xy : `treecorr.KKCorrelation` 

517 A `treecorr.KKCorrelation` object containing the correlation 

518 function. 

519 """ 

520 _LOG.debug( 

521 "No. of entries: %d. The number of pairs in the resulting KKCorrelation cannot exceed %d", 

522 len(ra), 

523 len(ra) * (len(ra) - 1) / 2, 

524 ) 

525 xy = treecorr.KKCorrelation(config=treecorr_config_dict) 

526 catA = treecorr.Catalog( 

527 config=treecorr_config_dict, 

528 ra=ra, 

529 dec=dec, 

530 k=k1, 

531 ra_units=raUnits, 

532 dec_units=decUnits, 

533 logger=_LOG, 

534 ) 

535 if k2 is None: 

536 # Calculate the auto-correlation 

537 xy.process(catA) 

538 else: 

539 catB = treecorr.Catalog( 

540 config=treecorr_config_dict, 

541 ra=ra, 

542 dec=dec, 

543 k=k2, 

544 ra_units=raUnits, 

545 dec_units=decUnits, 

546 logger=_LOG, 

547 patch_centers=catA.patch_centers, 

548 ) 

549 # Calculate the cross-correlation 

550 xy.process(catA, catB) 

551 

552 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs)) 

553 return xy 

554 

555 @classmethod 

556 def _corrSpin2( 

557 cls, 

558 ra: Vector, 

559 dec: Vector, 

560 g1a: Vector, 

561 g2a: Vector, 

562 g1b: Vector | None = None, 

563 g2b: Vector | None = None, 

564 raUnits: str = "degrees", 

565 decUnits: str = "degrees", 

566 treecorr_config_dict: Mapping[str, Any] | None = None, 

567 ) -> GGCorrelation: 

568 """Function to compute correlations between shear-like fields. 

569 

570 This is used to compute Rho statistics, given the appropriate spin-2 

571 (shear-like) fields. 

572 

573 Parameters 

574 ---------- 

575 ra : `numpy.array` 

576 The right ascension values of entries in the catalog. 

577 dec : `numpy.array` 

578 The declination values of entries in the catalog. 

579 g1a : `numpy.array` 

580 The first component of the primary shear-like field. 

581 g2a : `numpy.array` 

582 The second component of the primary shear-like field. 

583 g1b : `numpy.array`, optional 

584 The first component of the secondary shear-like field. 

585 Autocorrelation of the primary field is computed if `None`. 

586 g2b : `numpy.array`, optional 

587 The second component of the secondary shear-like field. 

588 Autocorrelation of the primary field is computed if `None`. 

589 raUnits : `str`, optional 

590 Unit of the right ascension values. Valid options are 

591 "degrees", "arcmin", "arcsec", "hours" or "radians". 

592 decUnits : `str`, optional 

593 Unit of the declination values. Valid options are 

594 "degrees", "arcmin", "arcsec", "hours" or "radians". 

595 treecorr_config_dict : `dict`, optional 

596 Config dictionary to be passed to `treecorr` 

597 (`treecorr.GGCorrelation` or `treecorr.Catalog`). 

598 

599 Returns 

600 ------- 

601 xy : `treecorr.GGCorrelation` 

602 A `treecorr.GGCorrelation` object containing the correlation 

603 function. 

604 """ 

605 _LOG.debug( 

606 "No. of entries: %d. The number of pairs in the resulting GGCorrelation cannot exceed %d", 

607 len(ra), 

608 len(ra) * (len(ra) - 1) / 2, 

609 ) 

610 xy = treecorr.GGCorrelation(config=treecorr_config_dict) 

611 catA = treecorr.Catalog( 

612 config=treecorr_config_dict, 

613 ra=ra, 

614 dec=dec, 

615 g1=g1a, 

616 g2=g2a, 

617 ra_units=raUnits, 

618 dec_units=decUnits, 

619 logger=_LOG, 

620 ) 

621 if g1b is None or g2b is None: 

622 # Calculate the auto-correlation 

623 xy.process(catA) 

624 else: 

625 catB = treecorr.Catalog( 

626 config=treecorr_config_dict, 

627 ra=ra, 

628 dec=dec, 

629 g1=g1b, 

630 g2=g2b, 

631 ra_units=raUnits, 

632 dec_units=decUnits, 

633 logger=_LOG, 

634 patch_centers=catA.patch_centers, 

635 ) 

636 # Calculate the cross-correlation 

637 xy.process(catA, catB) 

638 

639 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs)) 

640 return xy