Coverage for python/lsst/analysis/tools/actions/vector/calcRhoStatistics.py: 39%

108 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-05 04:11 -0800

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "BinnedCorr2Config", 

26 "CalcRhoStatistics", 

27) 

28 

29import logging 

30from typing import TYPE_CHECKING, Any, Mapping, cast 

31 

32import numpy as np 

33import treecorr # type: ignore[import] 

34from lsst.pex.config import ChoiceField, Config, ConfigField, Field, FieldValidationError 

35 

36from ...interfaces import KeyedData, KeyedDataAction, Vector 

37from .calcShapeSize import CalcShapeSize 

38from .ellipticity import CalcE, CalcEDiff 

39from .vectorActions import FractionalDifference 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from treecorr import GGCorrelation, KKCorrelation 

43 

44 from ...interfaces import KeyedDataSchema 

45 

46_LOG = logging.getLogger(__name__) 

47 

48 

49class BinnedCorr2Config(Config): 

50 """A Config class that holds some of the parameters supported by treecorr. 

51 

52 The fields in this class correspond to the parameters that can be passed to 

53 BinnedCorr2 in `treecorr`, which is the base class for all two-point 

54 correlation function calculations. The default values set for the fields 

55 are identical to the default values set in v4.2 of `treecorr`. The 

56 parameters that are excluded in this class are 

57 'verbose', 'log_file', 'output_dots', 'rng' and 'pairwise' (deprecated). 

58 For details about these options, see the documentation for `treecorr`: 

59 https://rmjarvis.github.io/TreeCorr/_build/html/correlation2.html 

60 

61 A separate config class is used instead 

62 of constructing a `~lsst.pex.config.DictField` so that mixed types can be 

63 supported and the config can be validated at the beginning of the 

64 execution. The ``toDict`` provides a minimal dictionary that overrides only 

65 the default values and excludes the key-value pairs when the item is None. 

66 

67 Notes 

68 ----- 

69 This is intended to be used in CalcRhoStatistics class. It only supports 

70 some of the fields that are relevant for rho-statistics calculations. 

71 """ 

72 

73 nbins = Field[int]( 73 ↛ exitline 73 didn't jump to the function exit

74 doc=( 

75 "How many bins to use. " 

76 "(Exactly three of nbins, bin_size, min_sep, max_sep " 

77 "are required. If nbins is not given, it will be " 

78 "calculated from the values of the other three, " 

79 "rounding up to the next highest integer. " 

80 "In this case, bin_size will be readjusted to account " 

81 "for this rounding up." 

82 ), 

83 optional=True, 

84 check=lambda x: x > 0, 

85 ) 

86 

87 bin_size = Field[float]( 

88 doc=( 

89 "The width of the bins in log(separation). " 

90 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

91 "If bin_size is not given, it will be calculated from the values " 

92 "of the other three." 

93 ), 

94 optional=True, 

95 ) 

96 

97 min_sep = Field[float]( 

98 doc=( 

99 "The minimum separation in units of sep_units, if relevant. " 

100 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

101 "If min_sep is not given, it will be calculated from the values " 

102 "of the other three." 

103 ), 

104 optional=True, 

105 ) 

106 

107 max_sep = Field[float]( 

108 doc=( 

109 "The maximum separation in units of sep_units, if relevant. " 

110 "Exactly three of nbins, bin_size, min_sep, max_sep are required. " 

111 "If max_sep is not given, it will be calculated from the values " 

112 "of the other three." 

113 ), 

114 optional=True, 

115 ) 

116 

117 sep_units = ChoiceField[str]( 

118 doc=( 

119 "The units to use for the separation values, given as a string. " 

120 "This includes both min_sep and max_sep above, as well as the " 

121 "units of the output distance values." 

122 ), 

123 default="radian", 

124 optional=True, 

125 allowed={units: units for units in ["arcsec", "arcmin", "degree", "hour", "radian"]}, 

126 ) 

127 

128 bin_slop = Field[float]( 

129 doc=( 

130 "How much slop to allow in the placement of pairs in the bins. " 

131 "If bin_slop = 1, then the bin into which a particular pair is " 

132 "placed may be incorrect by at most 1.0 bin widths. " 

133 r"If None, use a bin_slop that gives a maximum error of 10% on " 

134 "any bin, which has been found to yield good results for most " 

135 "applications." 

136 ), 

137 default=None, 

138 optional=True, 

139 ) 

140 

141 precision = Field[int]( 141 ↛ exitline 141 didn't jump to the function exit

142 doc=("The precision to use for the output values. This specifies how many digits to write."), 

143 default=4, 

144 optional=True, 

145 check=lambda x: x > 0, 

146 ) 

147 

148 metric = ChoiceField[str]( 

149 doc=( 

150 "Which metric to use for distance measurements. For details, see " 

151 "https://rmjarvis.github.io/TreeCorr/_build/html/metric.html" 

152 ), 

153 default="Euclidean", 

154 optional=True, 

155 allowed={ 

156 "Euclidean": "straight-line Euclidean distance between two points", 

157 "FisherRperp": ( 

158 "the perpendicular component of the distance, " 

159 "following the definitions in " 

160 "Fisher et al, 1994 (MNRAS, 267, 927)" 

161 ), 

162 "OldRperp": ( 

163 "the perpendicular component of the distance using the " 

164 "definition of Rperp from TreeCorr v3.x." 

165 ), 

166 "Rlens": ( 

167 "Distance from the first object (taken to be a lens) to " 

168 "the line connecting Earth and the second object " 

169 "(taken to be a lensed source)." 

170 ), 

171 "Arc": "the true great circle distance for spherical coordinates.", 

172 "Periodic": "Like ``Euclidean``, but with periodic boundaries.", 

173 }, 

174 ) 

175 

176 bin_type = ChoiceField[str]( 

177 doc="What type of binning should be used?", 

178 default="Log", 

179 optional=True, 

180 allowed={ 

181 "Log": ( 

182 "Logarithmic binning in the distance. The bin steps will " 

183 "be uniform in log(r) from log(min_sep) .. log(max_sep)." 

184 ), 

185 "Linear": ( 

186 "Linear binning in the distance. The bin steps will be " 

187 "uniform in r from min_sep .. max_sep." 

188 ), 

189 "TwoD": ( 

190 "2-dimensional binning from x = (-max_sep .. max_sep) " 

191 "and y = (-max_sep .. max_sep). The bin steps will be " 

192 "uniform in both x and y. (i.e. linear in x,y)" 

193 ), 

194 }, 

195 ) 

196 

197 var_method = ChoiceField[str]( 

198 doc="Which method to use for estimating the variance", 

199 default="shot", 

200 optional=True, 

201 allowed={ 

202 method: method 

203 for method in [ 

204 "shot", 

205 "jackknife", 

206 "sample", 

207 "bootstrap", 

208 "marked_bootstrap", 

209 ] 

210 }, 

211 ) 

212 

213 num_bootstrap = Field[int]( 

214 doc=("How many bootstrap samples to use for the 'bootstrap' and 'marked_bootstrap' var methods."), 

215 default=500, 

216 optional=True, 

217 ) 

218 

219 def validate(self): 

220 # Docs inherited from base class 

221 super().validate() 

222 req_params = (self.nbins, self.bin_size, self.min_sep, self.max_sep) 

223 num_req_params = sum(param is not None for param in req_params) 

224 if num_req_params != 3: 

225 msg = ( 

226 "You must specify exactly three of ``nbins``, ``bin_size``, ``min_sep`` and ``max_sep``" 

227 f" in treecorr_config. {num_req_params} parameters were set instead." 

228 ) 

229 raise FieldValidationError(self.__class__.bin_size, self, msg) 

230 

231 if self.min_sep is not None and self.max_sep is not None: 

232 if self.min_sep > self.max_sep: 

233 raise FieldValidationError(self.__class__.min_sep, self, "min_sep must be <= max_sep") 

234 

235 def toDict(self): 

236 # Docs inherited from base class 

237 # We are excluding items that are None due to treecorr limitations. 

238 # TODO: DM-38480. This override can be removed after treecorr v4.3 

239 # makes its way onto rubin-env. 

240 return {k: v for k, v in super().toDict().items() if v is not None} 

241 

242 

243class CalcRhoStatistics(KeyedDataAction): 

244 r"""Calculate rho statistics 

245 

246 Rho statistics refer to a collection of correlation functions involving 

247 PSF ellipticity and size residuals. They quantify the contribution from PSF 

248 leakage due to errors in PSF modeling to the weak lensing shear correlation 

249 functions. The standard rho statistics are indexed from 1 to 5, and 

250 this action calculates a sixth rho statistic, indexed 0. 

251 

252 Notes 

253 ----- 

254 The exact definitions of rho statistics as defined in [1]_ are given below. 

255 In addition to these five, we also compute the auto-correlation function of 

256 the fractional size residuals and call it as the :math:`\rho_0( \theta )`. 

257 

258 .. math:: 

259 

260 \rho_1(\theta) &= \langle \delta e^*_{PSF}(x) \delta e_{PSF}(x+\theta) \rangle # noqa: W505 

261 

262 \rho_2(\theta) &= \langle e^*_{PSF}(x) \delta e_{PSF}(x+\theta) \rangle 

263 

264 \rho_3(\theta) &= \left\langle (e^*_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x)) 

265 \delta e_{PSF}(x+\theta) \right\rangle 

266 

267 \rho_4(\theta) &= \left\langle (\delta e^*_{PSF}(x) 

268 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)) \right\rangle 

269 

270 \rho_5(\theta) &= \left\langle (e^*_{PSF}(x) 

271 (e_{PSF}\frac{\delta T_{PSF}}{T_{PSF}}(x+\theta)) \right\rangle 

272 

273 There is a slightly different version for :math:`\rho_3( \theta )`, used in Melchior et al. (2015) [2]_. 

274 

275 .. math:: 

276 

277 \rho'_3(\theta) &= \left\langle\frac{\delta T_{PSF}}{T_{PSF}}(x) 

278 \frac{\delta T_{PSF}}{T_{PSF}}(x+\theta) 

279 \right\rangle 

280 

281 

282 The definition of ellipticity used in [1]_ correspond to ``epsilon``-type ellipticity, which is typically 

283 smaller by a factor of 4 than using ``chi``-type ellipticity. 

284 

285 References 

286 ---------- 

287 .. [1] Jarvis, M., Sheldon, E., Zuntz, J., Kacprzak, T., Bridle, S. L., et. al (2016). # noqa: W501 

288 The DES Science Verification weak lensing shear catalogues 

289 MNRAS, 460, 2245–2281. 

290 https://doi.org/10.1093/mnras/stw990; 

291 https://arxiv.org/abs/1507.05603 

292 .. [2] Melchior, P., et. al (2015) 

293 Mass and galaxy distributions of four massive galaxy clusters from Dark Energy Survey 

294 Science Verification data 

295 MNRAS, 449, no. 3, pp. 2219–2238. 

296 https://doi:10.1093/mnras/stv398 

297 https://arxiv.org/abs/1405.4285 

298 """ 

299 

300 colRa = Field[str](doc="RA column", default="coord_ra") 

301 

302 colDec = Field[str](doc="Dec column", default="coord_dec") 

303 

304 colXx = Field[str](doc="The column name to get the xx shape component from.", default="{band}_ixx") 

305 

306 colYy = Field[str](doc="The column name to get the yy shape component from.", default="{band}_iyy") 

307 

308 colXy = Field[str](doc="The column name to get the xy shape component from.", default="{band}_ixy") 

309 

310 colPsfXx = Field[str]( 

311 doc="The column name to get the PSF xx shape component from.", default="{band}_ixxPSF" 

312 ) 

313 

314 colPsfYy = Field[str]( 

315 doc="The column name to get the PSF yy shape component from.", default="{band}_iyyPSF" 

316 ) 

317 

318 colPsfXy = Field[str]( 

319 doc="The column name to get the PSF xy shape component from.", default="{band}_ixyPSF" 

320 ) 

321 

322 # TODO: Replace them with distortion and shear (DM-37325). 

323 ellipticityType = ChoiceField[str]( 

324 doc="The type of ellipticity to calculate", 

325 allowed={ 

326 "chi": "Distortion, measured as (Ixx - Iyy)/(Ixx + Iyy)", 

327 "epsilon": ("Shear, measured as (Ixx - Iyy)/(Ixx + Iyy + 2*sqrt(Ixx*Iyy - Ixy**2))"), 

328 }, 

329 default="chi", 

330 ) 

331 

332 sizeType = ChoiceField[str]( 

333 doc="The type of size to calculate", 

334 default="trace", 

335 allowed={ 

336 "trace": "trace radius", 

337 "determinant": "determinant radius", 

338 }, 

339 ) 

340 

341 treecorr = ConfigField[BinnedCorr2Config]( 

342 doc="TreeCorr configuration", 

343 ) 

344 

345 def setDefaults(self): 

346 super().setDefaults() 

347 self.treecorr = BinnedCorr2Config() 

348 self.treecorr.sep_units = "arcmin" 

349 self.treecorr.max_sep = 100.0 

350 

351 def getInputSchema(self) -> KeyedDataSchema: 

352 return ( 

353 (self.colRa, Vector), 

354 (self.colDec, Vector), 

355 (self.colXx, Vector), 

356 (self.colYy, Vector), 

357 (self.colXy, Vector), 

358 (self.colPsfXx, Vector), 

359 (self.colPsfYy, Vector), 

360 (self.colPsfXy, Vector), 

361 ) 

362 

363 def __call__(self, data: KeyedData, **kwargs) -> KeyedData: 

364 calcEMeas = CalcE( 

365 colXx=self.colXx, 

366 colYy=self.colYy, 

367 colXy=self.colXy, 

368 ellipticityType=self.ellipticityType, 

369 ) 

370 calcEpsf = CalcE( 

371 colXx=self.colPsfXx, 

372 colYy=self.colPsfYy, 

373 colXy=self.colPsfXy, 

374 ellipticityType=self.ellipticityType, 

375 ) 

376 

377 calcEDiff = CalcEDiff(colA=calcEMeas, colB=calcEpsf) 

378 

379 calcSizeResidual = FractionalDifference( 

380 actionA=CalcShapeSize( 

381 colXx=self.colXx, 

382 colYy=self.colYy, 

383 colXy=self.colXy, 

384 sizeType=self.sizeType, 

385 ), 

386 actionB=CalcShapeSize( 

387 colXx=self.colPsfXx, 

388 colYy=self.colPsfYy, 

389 colXy=self.colPsfXy, 

390 sizeType=self.sizeType, 

391 ), 

392 ) 

393 

394 # chi-type ellipticity has a shear response of 2, so we need to 

395 # divide by 2 so that the rho-stats do not depend on the 

396 # ellipticity-type. 

397 # Note: For distortion, the responsitivity is 2(1 - e^2_{rms}), 

398 # where e_rms is the root mean square ellipticity per component. 

399 # This is expected to be small and we ignore it here. 

400 # This definition of responsitivity is consistent with the definions 

401 # used in the rho-statistics calculations for the HSC shear catalog 

402 # papers (Mandelbaum et al. 2018, Li et al., 2022). 

403 responsitivity = 2.0 if self.ellipticityType == "chi" else 1.0 

404 

405 # Call the actions on the data. 

406 eMEAS = calcEMeas(data, **kwargs) 

407 if self.ellipticityType == "chi": 

408 _LOG.debug("Correction value of responsitivity would be %f", 2 - np.mean(np.abs(eMEAS) ** 2)) 

409 eMEAS /= responsitivity # type: ignore 

410 e1, e2 = np.real(eMEAS), np.imag(eMEAS) 

411 eRes = calcEDiff(data, **kwargs) 

412 eRes /= responsitivity # type: ignore 

413 e1Res, e2Res = np.real(eRes), np.imag(eRes) 

414 sizeRes = calcSizeResidual(data, **kwargs) 

415 

416 # Scale the sizeRes by ellipticities 

417 e1SizeRes = e1 * sizeRes 

418 e2SizeRes = e2 * sizeRes 

419 

420 # Package the arguments to capture auto-/cross-correlations for the 

421 # Rho statistics. 

422 args = { 

423 0: (sizeRes, None), 

424 1: (e1Res, e2Res, None, None), 

425 2: (e1, e2, e1Res, e2Res), 

426 3: (e1SizeRes, e2SizeRes, None, None), 

427 4: (e1Res, e2Res, e1SizeRes, e2SizeRes), 

428 5: (e1, e2, e1SizeRes, e2SizeRes), 

429 } 

430 

431 ra: Vector = data[self.colRa] # type: ignore 

432 dec: Vector = data[self.colDec] # type: ignore 

433 

434 treecorrKwargs = self.treecorr.toDict() 

435 

436 # Pass the appropriate arguments to the correlator and build a dict 

437 rhoStats: Mapping[str, treecorr.BinnedCorr2] = {} 

438 for rhoIndex in range(1, 6): 

439 _LOG.info("Calculating rho-%d", rhoIndex) 

440 rhoStats[f"rho{rhoIndex}"] = self._corrSpin2( # type: ignore[index] 

441 ra, dec, *(args[rhoIndex]), **treecorrKwargs 

442 ) 

443 

444 _LOG.info("Calculating rho3alt") 

445 rhoStats["rho3alt"] = self._corrSpin0(ra, dec, *(args[0]), **treecorrKwargs) # type: ignore[index] 

446 return cast(KeyedData, rhoStats) 

447 

448 @classmethod 

449 def _corrSpin0( 

450 cls, 

451 ra: Vector, 

452 dec: Vector, 

453 k1: Vector, 

454 k2: Vector | None = None, 

455 raUnits: str = "degrees", 

456 decUnits: str = "degrees", 

457 **treecorrKwargs: Any, 

458 ) -> KKCorrelation: 

459 """Function to compute correlations between at most two scalar fields. 

460 

461 This is used to compute rho3alt statistics, given the appropriate 

462 spin-0 (scalar) fields, usually fractional size residuals. 

463 

464 Parameters 

465 ---------- 

466 ra : `numpy.array` 

467 The right ascension values of entries in the catalog. 

468 dec : `numpy.array` 

469 The declination values of entries in the catalog. 

470 k1 : `numpy.array` 

471 The primary scalar field. 

472 k2 : `numpy.array`, optional 

473 The secondary scalar field. 

474 Autocorrelation of the primary field is computed if `None`. 

475 raUnits : `str`, optional 

476 Unit of the right ascension values. Valid options are 

477 "degrees", "arcmin", "arcsec", "hours" or "radians". 

478 decUnits : `str`, optional 

479 Unit of the declination values. Valid options are 

480 "degrees", "arcmin", "arcsec", "hours" or "radians". 

481 **treecorrKwargs 

482 Keyword arguments to be passed to `treecorr.KKCorrelation`. 

483 

484 Returns 

485 ------- 

486 xy : `treecorr.KKCorrelation` 

487 A `treecorr.KKCorrelation` object containing the correlation 

488 function. 

489 """ 

490 _LOG.debug( 

491 "No. of entries: %d. The number of pairs in the resulting KKCorrelation cannot exceed %d", 

492 len(ra), 

493 len(ra) * (len(ra) - 1) / 2, 

494 ) 

495 xy = treecorr.KKCorrelation(**treecorrKwargs) 

496 catA = treecorr.Catalog(ra=ra, dec=dec, k=k1, ra_units=raUnits, dec_units=decUnits, logger=_LOG) 

497 if k2 is None: 

498 # Calculate the auto-correlation 

499 xy.process(catA) 

500 else: 

501 catB = treecorr.Catalog(ra=ra, dec=dec, k=k2, ra_units=raUnits, dec_units=decUnits, logger=_LOG) 

502 # Calculate the cross-correlation 

503 xy.process(catA, catB) 

504 

505 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs)) 

506 return xy 

507 

508 @classmethod 

509 def _corrSpin2( 

510 cls, 

511 ra: Vector, 

512 dec: Vector, 

513 g1a: Vector, 

514 g2a: Vector, 

515 g1b: Vector | None = None, 

516 g2b: Vector | None = None, 

517 raUnits: str = "degrees", 

518 decUnits: str = "degrees", 

519 **treecorrKwargs: Any, 

520 ) -> GGCorrelation: 

521 """Function to compute correlations between shear-like fields. 

522 

523 This is used to compute Rho statistics, given the appropriate spin-2 

524 (shear-like) fields. 

525 

526 Parameters 

527 ---------- 

528 ra : `numpy.array` 

529 The right ascension values of entries in the catalog. 

530 dec : `numpy.array` 

531 The declination values of entries in the catalog. 

532 g1a : `numpy.array` 

533 The first component of the primary shear-like field. 

534 g2a : `numpy.array` 

535 The second component of the primary shear-like field. 

536 g1b : `numpy.array`, optional 

537 The first component of the secondary shear-like field. 

538 Autocorrelation of the primary field is computed if `None`. 

539 g2b : `numpy.array`, optional 

540 The second component of the secondary shear-like field. 

541 Autocorrelation of the primary field is computed if `None`. 

542 raUnits : `str`, optional 

543 Unit of the right ascension values. Valid options are 

544 "degrees", "arcmin", "arcsec", "hours" or "radians". 

545 decUnits : `str`, optional 

546 Unit of the declination values. Valid options are 

547 "degrees", "arcmin", "arcsec", "hours" or "radians". 

548 **treecorrKwargs 

549 Keyword arguments to be passed to `treecorr.GGCorrelation`. 

550 

551 Returns 

552 ------- 

553 xy : `treecorr.GGCorrelation` 

554 A `treecorr.GGCorrelation` object containing the correlation 

555 function. 

556 """ 

557 _LOG.debug( 

558 "No. of entries: %d. The number of pairs in the resulting GGCorrelation cannot exceed %d", 

559 len(ra), 

560 len(ra) * (len(ra) - 1) / 2, 

561 ) 

562 xy = treecorr.GGCorrelation(**treecorrKwargs) 

563 catA = treecorr.Catalog( 

564 ra=ra, dec=dec, g1=g1a, g2=g2a, ra_units=raUnits, dec_units=decUnits, logger=_LOG 

565 ) 

566 if g1b is None or g2b is None: 

567 # Calculate the auto-correlation 

568 xy.process(catA) 

569 else: 

570 catB = treecorr.Catalog( 

571 ra=ra, dec=dec, g1=g1b, g2=g2b, ra_units=raUnits, dec_units=decUnits, logger=_LOG 

572 ) 

573 # Calculate the cross-correlation 

574 xy.process(catA, catB) 

575 

576 _LOG.debug("Correlated %d pairs based on the config set.", sum(xy.npairs)) 

577 return xy