Coverage for python/lsst/cp/pipe/utils.py: 11%

322 statements  

« prev     ^ index     » next       coverage.py v7.3.3, created at 2023-12-15 13:14 +0000

1# This file is part of cp_pipe. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21# 

22 

23__all__ = ['ddict2dict', 'CovFastFourierTransform'] 

24 

25 

26import galsim 

27import logging 

28import numpy as np 

29import numpy.polynomial.polynomial as poly 

30 

31from scipy.optimize import leastsq 

32from scipy.stats import median_abs_deviation, norm 

33 

34from lsst.ip.isr import isrMock 

35import lsst.afw.image 

36import lsst.afw.math 

37 

38 

39def sigmaClipCorrection(nSigClip): 

40 """Correct measured sigma to account for clipping. 

41 

42 If we clip our input data and then measure sigma, then the 

43 measured sigma is smaller than the true value because real 

44 points beyond the clip threshold have been removed. This is a 

45 small (1.5% at nSigClip=3) effect when nSigClip >~ 3, but the 

46 default parameters for measure crosstalk use nSigClip=2.0. 

47 This causes the measured sigma to be about 15% smaller than 

48 real. This formula corrects the issue, for the symmetric case 

49 (upper clip threshold equal to lower clip threshold). 

50 

51 Parameters 

52 ---------- 

53 nSigClip : `float` 

54 Number of sigma the measurement was clipped by. 

55 

56 Returns 

57 ------- 

58 scaleFactor : `float` 

59 Scale factor to increase the measured sigma by. 

60 """ 

61 varFactor = 1.0 - (2 * nSigClip * norm.pdf(nSigClip)) / (norm.cdf(nSigClip) - norm.cdf(-nSigClip)) 

62 return 1.0 / np.sqrt(varFactor) 

63 

64 

65def calculateWeightedReducedChi2(measured, model, weightsMeasured, nData, nParsModel): 

66 """Calculate weighted reduced chi2. 

67 

68 Parameters 

69 ---------- 

70 measured : `list` 

71 List with measured data. 

72 model : `list` 

73 List with modeled data. 

74 weightsMeasured : `list` 

75 List with weights for the measured data. 

76 nData : `int` 

77 Number of data points. 

78 nParsModel : `int` 

79 Number of parameters in the model. 

80 

81 Returns 

82 ------- 

83 redWeightedChi2 : `float` 

84 Reduced weighted chi2. 

85 """ 

86 wRes = (measured - model)*weightsMeasured 

87 return ((wRes*wRes).sum())/(nData-nParsModel) 

88 

89 

90def makeMockFlats(expTime, gain=1.0, readNoiseElectrons=5, fluxElectrons=1000, 

91 randomSeedFlat1=1984, randomSeedFlat2=666, powerLawBfParams=[], 

92 expId1=0, expId2=1): 

93 """Create a pair or mock flats with isrMock. 

94 

95 Parameters 

96 ---------- 

97 expTime : `float` 

98 Exposure time of the flats. 

99 gain : `float`, optional 

100 Gain, in e/ADU. 

101 readNoiseElectrons : `float`, optional 

102 Read noise rms, in electrons. 

103 fluxElectrons : `float`, optional 

104 Flux of flats, in electrons per second. 

105 randomSeedFlat1 : `int`, optional 

106 Random seed for the normal distrubutions for the mean signal 

107 and noise (flat1). 

108 randomSeedFlat2 : `int`, optional 

109 Random seed for the normal distrubutions for the mean signal 

110 and noise (flat2). 

111 powerLawBfParams : `list`, optional 

112 Parameters for `galsim.cdmodel.PowerLawCD` to simulate the 

113 brightter-fatter effect. 

114 expId1 : `int`, optional 

115 Exposure ID for first flat. 

116 expId2 : `int`, optional 

117 Exposure ID for second flat. 

118 

119 Returns 

120 ------- 

121 flatExp1 : `lsst.afw.image.exposure.ExposureF` 

122 First exposure of flat field pair. 

123 flatExp2 : `lsst.afw.image.exposure.ExposureF` 

124 Second exposure of flat field pair. 

125 

126 Notes 

127 ----- 

128 The parameters of `galsim.cdmodel.PowerLawCD` are `n, r0, t0, rx, 

129 tx, r, t, alpha`. For more information about their meaning, see 

130 the Galsim documentation 

131 https://galsim-developers.github.io/GalSim/_build/html/_modules/galsim/cdmodel.html # noqa: W505 

132 and Gruen+15 (1501.02802). 

133 

134 Example: galsim.cdmodel.PowerLawCD(8, 1.1e-7, 1.1e-7, 1.0e-8, 

135 1.0e-8, 1.0e-9, 1.0e-9, 2.0) 

136 """ 

137 flatFlux = fluxElectrons # e/s 

138 flatMean = flatFlux*expTime # e 

139 readNoise = readNoiseElectrons # e 

140 

141 mockImageConfig = isrMock.IsrMock.ConfigClass() 

142 

143 mockImageConfig.flatDrop = 0.99999 

144 mockImageConfig.isTrimmed = True 

145 

146 flatExp1 = isrMock.FlatMock(config=mockImageConfig).run() 

147 flatExp2 = flatExp1.clone() 

148 (shapeY, shapeX) = flatExp1.getDimensions() 

149 flatWidth = np.sqrt(flatMean) 

150 

151 rng1 = np.random.RandomState(randomSeedFlat1) 

152 flatData1 = rng1.normal(flatMean, flatWidth, (shapeX, shapeY)) + rng1.normal(0.0, readNoise, 

153 (shapeX, shapeY)) 

154 rng2 = np.random.RandomState(randomSeedFlat2) 

155 flatData2 = rng2.normal(flatMean, flatWidth, (shapeX, shapeY)) + rng2.normal(0.0, readNoise, 

156 (shapeX, shapeY)) 

157 # Simulate BF with power law model in galsim 

158 if len(powerLawBfParams): 

159 if not len(powerLawBfParams) == 8: 

160 raise RuntimeError("Wrong number of parameters for `galsim.cdmodel.PowerLawCD`. " 

161 f"Expected 8; passed {len(powerLawBfParams)}.") 

162 cd = galsim.cdmodel.PowerLawCD(*powerLawBfParams) 

163 tempFlatData1 = galsim.Image(flatData1) 

164 temp2FlatData1 = cd.applyForward(tempFlatData1) 

165 

166 tempFlatData2 = galsim.Image(flatData2) 

167 temp2FlatData2 = cd.applyForward(tempFlatData2) 

168 

169 flatExp1.image.array[:] = temp2FlatData1.array/gain # ADU 

170 flatExp2.image.array[:] = temp2FlatData2.array/gain # ADU 

171 else: 

172 flatExp1.image.array[:] = flatData1/gain # ADU 

173 flatExp2.image.array[:] = flatData2/gain # ADU 

174 

175 visitInfoExp1 = lsst.afw.image.VisitInfo(exposureTime=expTime) 

176 visitInfoExp2 = lsst.afw.image.VisitInfo(exposureTime=expTime) 

177 

178 flatExp1.info.id = expId1 

179 flatExp1.getInfo().setVisitInfo(visitInfoExp1) 

180 flatExp2.info.id = expId2 

181 flatExp2.getInfo().setVisitInfo(visitInfoExp2) 

182 

183 return flatExp1, flatExp2 

184 

185 

186def irlsFit(initialParams, dataX, dataY, function, weightsY=None, weightType='Cauchy', scaleResidual=True): 

187 """Iteratively reweighted least squares fit. 

188 

189 This uses the `lsst.cp.pipe.utils.fitLeastSq`, but applies weights 

190 based on the Cauchy distribution by default. Other weight options 

191 are implemented. See e.g. Holland and Welsch, 1977, 

192 doi:10.1080/03610927708827533 

193 

194 Parameters 

195 ---------- 

196 initialParams : `list` [`float`] 

197 Starting parameters. 

198 dataX : `numpy.array`, (N,) 

199 Abscissa data. 

200 dataY : `numpy.array`, (N,) 

201 Ordinate data. 

202 function : callable 

203 Function to fit. 

204 weightsY : `numpy.array`, (N,) 

205 Weights to apply to the data. 

206 weightType : `str`, optional 

207 Type of weighting to use. One of Cauchy, Anderson, bisquare, 

208 box, Welsch, Huber, logistic, or Fair. 

209 scaleResidual : `bool`, optional 

210 If true, the residual is scaled by the sqrt of the Y values. 

211 

212 Returns 

213 ------- 

214 polyFit : `list` [`float`] 

215 Final best fit parameters. 

216 polyFitErr : `list` [`float`] 

217 Final errors on fit parameters. 

218 chiSq : `float` 

219 Reduced chi squared. 

220 weightsY : `list` [`float`] 

221 Final weights used for each point. 

222 

223 Raises 

224 ------ 

225 RuntimeError : 

226 Raised if an unknown weightType string is passed. 

227 """ 

228 if not weightsY: 

229 weightsY = np.ones_like(dataX) 

230 

231 polyFit, polyFitErr, chiSq = fitLeastSq(initialParams, dataX, dataY, function, weightsY=weightsY) 

232 for iteration in range(10): 

233 resid = np.abs(dataY - function(polyFit, dataX)) 

234 if scaleResidual: 

235 resid = resid / np.sqrt(dataY) 

236 if weightType == 'Cauchy': 

237 # Use Cauchy weighting. This is a soft weight. 

238 # At [2, 3, 5, 10] sigma, weights are [.59, .39, .19, .05]. 

239 Z = resid / 2.385 

240 weightsY = 1.0 / (1.0 + np.square(Z)) 

241 elif weightType == 'Anderson': 

242 # Anderson+1972 weighting. This is a hard weight. 

243 # At [2, 3, 5, 10] sigma, weights are [.67, .35, 0.0, 0.0]. 

244 Z = resid / (1.339 * np.pi) 

245 weightsY = np.where(Z < 1.0, np.sinc(Z), 0.0) 

246 elif weightType == 'bisquare': 

247 # Beaton and Tukey (1974) biweight. This is a hard weight. 

248 # At [2, 3, 5, 10] sigma, weights are [.81, .59, 0.0, 0.0]. 

249 Z = resid / 4.685 

250 weightsY = np.where(Z < 1.0, 1.0 - np.square(Z), 0.0) 

251 elif weightType == 'box': 

252 # Hinich and Talwar (1975). This is a hard weight. 

253 # At [2, 3, 5, 10] sigma, weights are [1.0, 0.0, 0.0, 0.0]. 

254 weightsY = np.where(resid < 2.795, 1.0, 0.0) 

255 elif weightType == 'Welsch': 

256 # Dennis and Welsch (1976). This is a hard weight. 

257 # At [2, 3, 5, 10] sigma, weights are [.64, .36, .06, 1e-5]. 

258 Z = resid / 2.985 

259 weightsY = np.exp(-1.0 * np.square(Z)) 

260 elif weightType == 'Huber': 

261 # Huber (1964) weighting. This is a soft weight. 

262 # At [2, 3, 5, 10] sigma, weights are [.67, .45, .27, .13]. 

263 Z = resid / 1.345 

264 weightsY = np.where(Z < 1.0, 1.0, 1 / Z) 

265 elif weightType == 'logistic': 

266 # Logistic weighting. This is a soft weight. 

267 # At [2, 3, 5, 10] sigma, weights are [.56, .40, .24, .12]. 

268 Z = resid / 1.205 

269 weightsY = np.tanh(Z) / Z 

270 elif weightType == 'Fair': 

271 # Fair (1974) weighting. This is a soft weight. 

272 # At [2, 3, 5, 10] sigma, weights are [.41, .32, .22, .12]. 

273 Z = resid / 1.4 

274 weightsY = (1.0 / (1.0 + (Z))) 

275 else: 

276 raise RuntimeError(f"Unknown weighting type: {weightType}") 

277 polyFit, polyFitErr, chiSq = fitLeastSq(initialParams, dataX, dataY, function, weightsY=weightsY) 

278 

279 return polyFit, polyFitErr, chiSq, weightsY 

280 

281 

282def fitLeastSq(initialParams, dataX, dataY, function, weightsY=None): 

283 """Do a fit and estimate the parameter errors using using 

284 scipy.optimize.leastq. 

285 

286 optimize.leastsq returns the fractional covariance matrix. To 

287 estimate the standard deviation of the fit parameters, multiply 

288 the entries of this matrix by the unweighted reduced chi squared 

289 and take the square root of the diagonal elements. 

290 

291 Parameters 

292 ---------- 

293 initialParams : `list` [`float`] 

294 initial values for fit parameters. For ptcFitType=POLYNOMIAL, 

295 its length determines the degree of the polynomial. 

296 dataX : `numpy.array`, (N,) 

297 Data in the abscissa axis. 

298 dataY : `numpy.array`, (N,) 

299 Data in the ordinate axis. 

300 function : callable object (function) 

301 Function to fit the data with. 

302 weightsY : `numpy.array`, (N,) 

303 Weights of the data in the ordinate axis. 

304 

305 Return 

306 ------ 

307 pFitSingleLeastSquares : `list` [`float`] 

308 List with fitted parameters. 

309 pErrSingleLeastSquares : `list` [`float`] 

310 List with errors for fitted parameters. 

311 

312 reducedChiSqSingleLeastSquares : `float` 

313 Reduced chi squared, unweighted if weightsY is not provided. 

314 """ 

315 if weightsY is None: 

316 weightsY = np.ones(len(dataX)) 

317 

318 def errFunc(p, x, y, weightsY=None): 

319 if weightsY is None: 

320 weightsY = np.ones(len(x)) 

321 return (function(p, x) - y)*weightsY 

322 

323 pFit, pCov, infoDict, errMessage, success = leastsq(errFunc, initialParams, 

324 args=(dataX, dataY, weightsY), full_output=1, 

325 epsfcn=0.0001) 

326 

327 if (len(dataY) > len(initialParams)) and pCov is not None: 

328 reducedChiSq = calculateWeightedReducedChi2(dataY, function(pFit, dataX), weightsY, len(dataY), 

329 len(initialParams)) 

330 pCov *= reducedChiSq 

331 else: 

332 pCov = np.zeros((len(initialParams), len(initialParams))) 

333 pCov[:, :] = np.nan 

334 reducedChiSq = np.nan 

335 

336 errorVec = [] 

337 for i in range(len(pFit)): 

338 errorVec.append(np.fabs(pCov[i][i])**0.5) 

339 

340 pFitSingleLeastSquares = pFit 

341 pErrSingleLeastSquares = np.array(errorVec) 

342 

343 return pFitSingleLeastSquares, pErrSingleLeastSquares, reducedChiSq 

344 

345 

346def fitBootstrap(initialParams, dataX, dataY, function, weightsY=None, confidenceSigma=1.): 

347 """Do a fit using least squares and bootstrap to estimate parameter errors. 

348 

349 The bootstrap error bars are calculated by fitting 100 random data sets. 

350 

351 Parameters 

352 ---------- 

353 initialParams : `list` [`float`] 

354 initial values for fit parameters. For ptcFitType=POLYNOMIAL, 

355 its length determines the degree of the polynomial. 

356 dataX : `numpy.array`, (N,) 

357 Data in the abscissa axis. 

358 dataY : `numpy.array`, (N,) 

359 Data in the ordinate axis. 

360 function : callable object (function) 

361 Function to fit the data with. 

362 weightsY : `numpy.array`, (N,), optional. 

363 Weights of the data in the ordinate axis. 

364 confidenceSigma : `float`, optional. 

365 Number of sigmas that determine confidence interval for the 

366 bootstrap errors. 

367 

368 Return 

369 ------ 

370 pFitBootstrap : `list` [`float`] 

371 List with fitted parameters. 

372 pErrBootstrap : `list` [`float`] 

373 List with errors for fitted parameters. 

374 reducedChiSqBootstrap : `float` 

375 Reduced chi squared, unweighted if weightsY is not provided. 

376 """ 

377 if weightsY is None: 

378 weightsY = np.ones(len(dataX)) 

379 

380 def errFunc(p, x, y, weightsY): 

381 if weightsY is None: 

382 weightsY = np.ones(len(x)) 

383 return (function(p, x) - y)*weightsY 

384 

385 # Fit first time 

386 pFit, _ = leastsq(errFunc, initialParams, args=(dataX, dataY, weightsY), full_output=0) 

387 

388 # Get the stdev of the residuals 

389 residuals = errFunc(pFit, dataX, dataY, weightsY) 

390 # 100 random data sets are generated and fitted 

391 pars = [] 

392 for i in range(100): 

393 randomDelta = np.random.normal(0., np.fabs(residuals), len(dataY)) 

394 randomDataY = dataY + randomDelta 

395 randomFit, _ = leastsq(errFunc, initialParams, 

396 args=(dataX, randomDataY, weightsY), full_output=0) 

397 pars.append(randomFit) 

398 pars = np.array(pars) 

399 meanPfit = np.mean(pars, 0) 

400 

401 # confidence interval for parameter estimates 

402 errPfit = confidenceSigma*np.std(pars, 0) 

403 pFitBootstrap = meanPfit 

404 pErrBootstrap = errPfit 

405 

406 reducedChiSq = calculateWeightedReducedChi2(dataY, function(pFitBootstrap, dataX), weightsY, len(dataY), 

407 len(initialParams)) 

408 return pFitBootstrap, pErrBootstrap, reducedChiSq 

409 

410 

411def funcPolynomial(pars, x): 

412 """Polynomial function definition 

413 Parameters 

414 ---------- 

415 params : `list` 

416 Polynomial coefficients. Its length determines the polynomial order. 

417 

418 x : `numpy.array`, (N,) 

419 Abscisa array. 

420 

421 Returns 

422 ------- 

423 y : `numpy.array`, (N,) 

424 Ordinate array after evaluating polynomial of order 

425 len(pars)-1 at `x`. 

426 """ 

427 return poly.polyval(x, [*pars]) 

428 

429 

430def funcAstier(pars, x): 

431 """Single brighter-fatter parameter model for PTC; Equation 16 of 

432 Astier+19. 

433 

434 Parameters 

435 ---------- 

436 params : `list` 

437 Parameters of the model: a00 (brightter-fatter), gain (e/ADU), 

438 and noise (e^2). 

439 x : `numpy.array`, (N,) 

440 Signal mu (ADU). 

441 

442 Returns 

443 ------- 

444 y : `numpy.array`, (N,) 

445 C_00 (variance) in ADU^2. 

446 """ 

447 a00, gain, noise = pars 

448 return 0.5/(a00*gain*gain)*(np.exp(2*a00*x*gain)-1) + noise/(gain*gain) # C_00 

449 

450 

451def arrangeFlatsByExpTime(exposureList, exposureIdList, log=None): 

452 """Arrange exposures by exposure time. 

453 

454 Parameters 

455 ---------- 

456 exposureList : `list` [`lsst.pipe.base.connections.DeferredDatasetRef`] 

457 Input list of exposure references. 

458 exposureIdList : `list` [`int`] 

459 List of exposure ids as obtained by dataId[`exposure`]. 

460 log : `lsst.utils.logging.LsstLogAdapter`, optional 

461 Log object. 

462 

463 Returns 

464 ------ 

465 flatsAtExpTime : `dict` [`float`, 

466 `list`[(`lsst.pipe.base.connections.DeferredDatasetRef`, 

467 `int`)]] 

468 Dictionary that groups references to flat-field exposures 

469 (and their IDs) that have the same exposure time (seconds). 

470 """ 

471 flatsAtExpTime = {} 

472 assert len(exposureList) == len(exposureIdList), "Different lengths for exp. list and exp. ID lists" 

473 for expRef, expId in zip(exposureList, exposureIdList): 

474 expTime = expRef.get(component='visitInfo').exposureTime 

475 if not np.isfinite(expTime) and log is not None: 

476 log.warning("Exposure %d has non-finite exposure time.", expId) 

477 listAtExpTime = flatsAtExpTime.setdefault(expTime, []) 

478 listAtExpTime.append((expRef, expId)) 

479 

480 return flatsAtExpTime 

481 

482 

483def arrangeFlatsByExpFlux(exposureList, exposureIdList, fluxKeyword, log=None): 

484 """Arrange exposures by exposure flux. 

485 

486 Parameters 

487 ---------- 

488 exposureList : `list` [`lsst.pipe.base.connections.DeferredDatasetRef`] 

489 Input list of exposure references. 

490 exposureIdList : `list` [`int`] 

491 List of exposure ids as obtained by dataId[`exposure`]. 

492 fluxKeyword : `str` 

493 Header keyword that contains the flux per exposure. 

494 log : `lsst.utils.logging.LsstLogAdapter`, optional 

495 Log object. 

496 

497 Returns 

498 ------- 

499 flatsAtFlux : `dict` [`float`, 

500 `list`[(`lsst.pipe.base.connections.DeferredDatasetRef`, 

501 `int`)]] 

502 Dictionary that groups references to flat-field exposures 

503 (and their IDs) that have the same flux. 

504 """ 

505 flatsAtExpFlux = {} 

506 assert len(exposureList) == len(exposureIdList), "Different lengths for exp. list and exp. ID lists" 

507 for expRef, expId in zip(exposureList, exposureIdList): 

508 # Get flux from header, assuming it is in the metadata. 

509 try: 

510 expFlux = expRef.get().getMetadata()[fluxKeyword] 

511 except KeyError: 

512 # If it's missing from the header, continue; it will 

513 # be caught and rejected when pairing exposures. 

514 expFlux = None 

515 if expFlux is None: 

516 if log is not None: 

517 log.warning("Exposure %d does not have valid header keyword %s.", expId, fluxKeyword) 

518 expFlux = np.nan 

519 listAtExpFlux = flatsAtExpFlux.setdefault(expFlux, []) 

520 listAtExpFlux.append((expRef, expId)) 

521 

522 return flatsAtExpFlux 

523 

524 

525def arrangeFlatsByExpId(exposureList, exposureIdList): 

526 """Arrange exposures by exposure ID. 

527 

528 There is no guarantee that this will properly group exposures, but 

529 allows a sequence of flats that have different illumination 

530 (despite having the same exposure time) to be processed. 

531 

532 Parameters 

533 ---------- 

534 exposureList : `list`[`lsst.pipe.base.connections.DeferredDatasetRef`] 

535 Input list of exposure references. 

536 exposureIdList : `list`[`int`] 

537 List of exposure ids as obtained by dataId[`exposure`]. 

538 

539 Returns 

540 ------ 

541 flatsAtExpId : `dict` [`float`, 

542 `list`[(`lsst.pipe.base.connections.DeferredDatasetRef`, 

543 `int`)]] 

544 Dictionary that groups references to flat-field exposures (and their 

545 IDs) sequentially by their exposure id. 

546 

547 Notes 

548 ----- 

549 

550 This algorithm sorts the input exposure references by their exposure 

551 id, and then assigns each pair of exposure references (exp_j, exp_{j+1}) 

552 to pair k, such that 2*k = j, where j is the python index of one of the 

553 exposure references (starting from zero). By checking for the IndexError 

554 while appending, we can ensure that there will only ever be fully 

555 populated pairs. 

556 """ 

557 flatsAtExpId = {} 

558 assert len(exposureList) == len(exposureIdList), "Different lengths for exp. list and exp. ID lists" 

559 # Sort exposures by expIds, which are in the second list `exposureIdList`. 

560 sortedExposures = sorted(zip(exposureList, exposureIdList), key=lambda pair: pair[1]) 

561 

562 for jPair, expTuple in enumerate(sortedExposures): 

563 if (jPair + 1) % 2: 

564 kPair = jPair // 2 

565 listAtExpId = flatsAtExpId.setdefault(kPair, []) 

566 try: 

567 listAtExpId.append(expTuple) 

568 listAtExpId.append(sortedExposures[jPair + 1]) 

569 except IndexError: 

570 pass 

571 

572 return flatsAtExpId 

573 

574 

575class CovFastFourierTransform: 

576 """A class to compute (via FFT) the nearby pixels correlation function. 

577 

578 Implements appendix of Astier+19. 

579 

580 Parameters 

581 ---------- 

582 diff : `numpy.array` 

583 Image where to calculate the covariances (e.g., the difference 

584 image of two flats). 

585 w : `numpy.array` 

586 Weight image (mask): it should consist of 1's (good pixel) and 

587 0's (bad pixels). 

588 fftShape : `tuple` 

589 2d-tuple with the shape of the FFT 

590 maxRangeCov : `int` 

591 Maximum range for the covariances. 

592 """ 

593 

594 def __init__(self, diff, w, fftShape, maxRangeCov): 

595 # check that the zero padding implied by "fft_shape" 

596 # is large enough for the required correlation range 

597 assert fftShape[0] > diff.shape[0]+maxRangeCov+1 

598 assert fftShape[1] > diff.shape[1]+maxRangeCov+1 

599 # for some reason related to numpy.fft.rfftn, 

600 # the second dimension should be even, so 

601 if fftShape[1]%2 == 1: 

602 fftShape = (fftShape[0], fftShape[1]+1) 

603 tIm = np.fft.rfft2(diff*w, fftShape) 

604 tMask = np.fft.rfft2(w, fftShape) 

605 # sum of "squares" 

606 self.pCov = np.fft.irfft2(tIm*tIm.conjugate()) 

607 # sum of values 

608 self.pMean = np.fft.irfft2(tIm*tMask.conjugate()) 

609 # number of w!=0 pixels. 

610 self.pCount = np.fft.irfft2(tMask*tMask.conjugate()) 

611 

612 def cov(self, dx, dy): 

613 """Covariance for dx,dy averaged with dx,-dy if both non zero. 

614 

615 Implements appendix of Astier+19. 

616 

617 Parameters 

618 ---------- 

619 dx : `int` 

620 Lag in x 

621 dy : `int` 

622 Lag in y 

623 

624 Returns 

625 ------- 

626 0.5*(cov1+cov2) : `float` 

627 Covariance at (dx, dy) lag 

628 npix1+npix2 : `int` 

629 Number of pixels used in covariance calculation. 

630 

631 Raises 

632 ------ 

633 ValueError if number of pixels for a given lag is 0. 

634 """ 

635 # compensate rounding errors 

636 nPix1 = int(round(self.pCount[dy, dx])) 

637 if nPix1 == 0: 

638 raise ValueError(f"Could not compute covariance term {dy}, {dx}, as there are no good pixels.") 

639 cov1 = self.pCov[dy, dx]/nPix1-self.pMean[dy, dx]*self.pMean[-dy, -dx]/(nPix1*nPix1) 

640 if (dx == 0 or dy == 0): 

641 return cov1, nPix1 

642 nPix2 = int(round(self.pCount[-dy, dx])) 

643 if nPix2 == 0: 

644 raise ValueError("Could not compute covariance term {dy}, {dx} as there are no good pixels.") 

645 cov2 = self.pCov[-dy, dx]/nPix2-self.pMean[-dy, dx]*self.pMean[dy, -dx]/(nPix2*nPix2) 

646 return 0.5*(cov1+cov2), nPix1+nPix2 

647 

648 def reportCovFastFourierTransform(self, maxRange): 

649 """Produce a list of tuples with covariances. 

650 

651 Implements appendix of Astier+19. 

652 

653 Parameters 

654 ---------- 

655 maxRange : `int` 

656 Maximum range of covariances. 

657 

658 Returns 

659 ------- 

660 tupleVec : `list` 

661 List with covariance tuples. 

662 """ 

663 tupleVec = [] 

664 # (dy,dx) = (0,0) has to be first 

665 for dy in range(maxRange+1): 

666 for dx in range(maxRange+1): 

667 cov, npix = self.cov(dx, dy) 

668 if (dx == 0 and dy == 0): 

669 var = cov 

670 tupleVec.append((dx, dy, var, cov, npix)) 

671 return tupleVec 

672 

673 

674def getFitDataFromCovariances(i, j, mu, fullCov, fullCovModel, fullCovSqrtWeights, gain=1.0, 

675 divideByMu=False, returnMasked=False): 

676 """Get measured signal and covariance, cov model, weigths, and mask at 

677 covariance lag (i, j). 

678 

679 Parameters 

680 ---------- 

681 i : `int` 

682 Lag for covariance matrix. 

683 j : `int` 

684 Lag for covariance matrix. 

685 mu : `list` 

686 Mean signal values. 

687 fullCov : `list` of `numpy.array` 

688 Measured covariance matrices at each mean signal level in mu. 

689 fullCovSqrtWeights : `list` of `numpy.array` 

690 List of square root of measured covariances at each mean 

691 signal level in mu. 

692 fullCovModel : `list` of `numpy.array` 

693 List of modeled covariances at each mean signal level in mu. 

694 gain : `float`, optional 

695 Gain, in e-/ADU. If other than 1.0 (default), the returned 

696 quantities will be in electrons or powers of electrons. 

697 divideByMu : `bool`, optional 

698 Divide returned covariance, model, and weights by the mean 

699 signal mu? 

700 returnMasked : `bool`, optional 

701 Use mask (based on weights) in returned arrays (mu, 

702 covariance, and model)? 

703 

704 Returns 

705 ------- 

706 mu : `numpy.array` 

707 list of signal values at (i, j). 

708 covariance : `numpy.array` 

709 Covariance at (i, j) at each mean signal mu value (fullCov[:, i, j]). 

710 covarianceModel : `numpy.array` 

711 Covariance model at (i, j). 

712 weights : `numpy.array` 

713 Weights at (i, j). 

714 maskFromWeights : `numpy.array`, optional 

715 Boolean mask of the covariance at (i,j), where the weights 

716 differ from 0. 

717 """ 

718 mu = np.array(mu) 

719 fullCov = np.array(fullCov) 

720 fullCovModel = np.array(fullCovModel) 

721 fullCovSqrtWeights = np.array(fullCovSqrtWeights) 

722 covariance = fullCov[:, i, j]*(gain**2) 

723 covarianceModel = fullCovModel[:, i, j]*(gain**2) 

724 weights = fullCovSqrtWeights[:, i, j]/(gain**2) 

725 

726 maskFromWeights = weights != 0 

727 if returnMasked: 

728 weights = weights[maskFromWeights] 

729 covarianceModel = covarianceModel[maskFromWeights] 

730 mu = mu[maskFromWeights] 

731 covariance = covariance[maskFromWeights] 

732 

733 if divideByMu: 

734 covariance /= mu 

735 covarianceModel /= mu 

736 weights *= mu 

737 return mu, covariance, covarianceModel, weights, maskFromWeights 

738 

739 

740def symmetrize(inputArray): 

741 """ Copy array over 4 quadrants prior to convolution. 

742 

743 Parameters 

744 ---------- 

745 inputarray : `numpy.array` 

746 Input array to symmetrize. 

747 

748 Returns 

749 ------- 

750 aSym : `numpy.array` 

751 Symmetrized array. 

752 """ 

753 targetShape = list(inputArray.shape) 

754 r1, r2 = inputArray.shape[-1], inputArray.shape[-2] 

755 targetShape[-1] = 2*r1-1 

756 targetShape[-2] = 2*r2-1 

757 aSym = np.ndarray(tuple(targetShape)) 

758 aSym[..., r2-1:, r1-1:] = inputArray 

759 aSym[..., r2-1:, r1-1::-1] = inputArray 

760 aSym[..., r2-1::-1, r1-1::-1] = inputArray 

761 aSym[..., r2-1::-1, r1-1:] = inputArray 

762 

763 return aSym 

764 

765 

766def ddict2dict(d): 

767 """Convert nested default dictionaries to regular dictionaries. 

768 

769 This is needed to prevent yaml persistence issues. 

770 

771 Parameters 

772 ---------- 

773 d : `defaultdict` 

774 A possibly nested set of `defaultdict`. 

775 

776 Returns 

777 ------- 

778 dict : `dict` 

779 A possibly nested set of `dict`. 

780 """ 

781 for k, v in d.items(): 

782 if isinstance(v, dict): 

783 d[k] = ddict2dict(v) 

784 return dict(d) 

785 

786 

787class AstierSplineLinearityFitter: 

788 """Class to fit the Astier spline linearity model. 

789 

790 This is a spline fit with photodiode data based on a model 

791 from Pierre Astier, referenced in June 2023 from 

792 https://me.lsst.eu/astier/bot/7224D/model_nonlin.py 

793 

794 This model fits a spline with (optional) nuisance parameters 

795 to allow for different linearity coefficients with different 

796 photodiode settings. The minimization is a least-squares 

797 fit with the residual of 

798 Sum[(S(mu_i) + mu_i)/(k_j * D_i) - 1]**2, where S(mu_i) is 

799 an Akima Spline function of mu_i, the observed flat-pair 

800 mean; D_j is the photo-diode measurement corresponding to 

801 that flat-pair; and k_j is a constant of proportionality 

802 which is over index j as it is allowed to 

803 be different based on different photodiode settings (e.g. 

804 CCOBCURR). 

805 

806 The fit has additional constraints to ensure that the spline 

807 goes through the (0, 0) point, as well as a normalization 

808 condition so that the average of the spline over the full 

809 range is 0. The normalization ensures that the spline only 

810 fits deviations from linearity, rather than the linear 

811 function itself which is degenerate with the gain. 

812 

813 Parameters 

814 ---------- 

815 nodes : `np.ndarray` (N,) 

816 Array of spline node locations. 

817 grouping_values : `np.ndarray` (M,) 

818 Array of values to group values for different proportionality 

819 constants (e.g. CCOBCURR). 

820 pd : `np.ndarray` (M,) 

821 Array of photodiode measurements. 

822 mu : `np.ndarray` (M,) 

823 Array of flat mean values. 

824 mask : `np.ndarray` (M,), optional 

825 Input mask (True is good point, False is bad point). 

826 log : `logging.logger`, optional 

827 Logger object to use for logging. 

828 """ 

829 def __init__(self, nodes, grouping_values, pd, mu, mask=None, log=None): 

830 self._pd = pd 

831 self._mu = mu 

832 self._grouping_values = grouping_values 

833 self.log = log if log else logging.getLogger(__name__) 

834 

835 self._nodes = nodes 

836 if nodes[0] != 0.0: 

837 raise ValueError("First node must be 0.0") 

838 if not np.all(np.diff(nodes) > 0): 

839 raise ValueError("Nodes must be sorted with no repeats.") 

840 

841 # Check if sorted (raise otherwise) 

842 if not np.all(np.diff(self._grouping_values) >= 0): 

843 raise ValueError("Grouping values must be sorted.") 

844 

845 _, uindex, ucounts = np.unique(self._grouping_values, return_index=True, return_counts=True) 

846 self.ngroup = len(uindex) 

847 

848 self.group_indices = [] 

849 for i in range(self.ngroup): 

850 self.group_indices.append(np.arange(uindex[i], uindex[i] + ucounts[i])) 

851 

852 # Outlier weight values. Will be 1 (in) or 0 (out). 

853 self._w = np.ones(len(self._pd)) 

854 

855 if mask is not None: 

856 self._w[~mask] = 0.0 

857 

858 # Values to regularize spline fit. 

859 self._x_regularize = np.linspace(0.0, self._mu[self.mask].max(), 100) 

860 

861 def estimate_p0(self): 

862 """Estimate initial fit parameters. 

863 

864 Returns 

865 ------- 

866 p0 : `np.ndarray` 

867 Parameter array, with spline values (one for each node) followed 

868 by proportionality constants (one for each group). 

869 """ 

870 npt = len(self._nodes) + self.ngroup 

871 p0 = np.zeros(npt) 

872 

873 # Do a simple linear fit and set all the constants to this. 

874 linfit = np.polyfit(self._pd[self.mask], self._mu[self.mask], 1) 

875 p0[-self.ngroup:] = linfit[0] 

876 

877 # Look at the residuals... 

878 ratio_model = self.compute_ratio_model( 

879 self._nodes, 

880 self.group_indices, 

881 p0, 

882 self._pd, 

883 self._mu, 

884 ) 

885 # ...and adjust the linear parameters accordingly. 

886 p0[-self.ngroup:] *= np.median(ratio_model[self.mask]) 

887 

888 # Re-compute the residuals. 

889 ratio_model2 = self.compute_ratio_model( 

890 self._nodes, 

891 self.group_indices, 

892 p0, 

893 self._pd, 

894 self._mu, 

895 ) 

896 

897 # And compute a first guess of the spline nodes. 

898 bins = np.searchsorted(self._nodes, self._mu[self.mask]) 

899 tot_arr = np.zeros(len(self._nodes)) 

900 n_arr = np.zeros(len(self._nodes), dtype=int) 

901 np.add.at(tot_arr, bins, ratio_model2[self.mask]) 

902 np.add.at(n_arr, bins, 1) 

903 

904 ratio = np.ones(len(self._nodes)) 

905 ratio[n_arr > 0] = tot_arr[n_arr > 0]/n_arr[n_arr > 0] 

906 ratio[0] = 1.0 

907 p0[0: len(self._nodes)] = (ratio - 1) * self._nodes 

908 

909 return p0 

910 

911 @staticmethod 

912 def compute_ratio_model(nodes, group_indices, pars, pd, mu, return_spline=False): 

913 """Compute the ratio model values. 

914 

915 Parameters 

916 ---------- 

917 nodes : `np.ndarray` (M,) 

918 Array of node positions. 

919 group_indices : `list` [`np.ndarray`] 

920 List of group indices, one array for each group. 

921 pars : `np.ndarray` 

922 Parameter array, with spline values (one for each node) followed 

923 by proportionality constants (one for each group.) 

924 pd : `np.ndarray` (N,) 

925 Array of photodiode measurements. 

926 mu : `np.ndarray` (N,) 

927 Array of flat means. 

928 return_spline : `bool`, optional 

929 Return the spline interpolation as well as the model ratios? 

930 

931 Returns 

932 ------- 

933 ratio_models : `np.ndarray` (N,) 

934 Model ratio, (mu_i - S(mu_i))/(k_j * D_i) 

935 spl : `lsst.afw.math.thing` 

936 Spline interpolator (returned if return_spline=True). 

937 """ 

938 spl = lsst.afw.math.makeInterpolate( 

939 nodes, 

940 pars[0: len(nodes)], 

941 lsst.afw.math.stringToInterpStyle("AKIMA_SPLINE"), 

942 ) 

943 

944 numerator = mu - spl.interpolate(mu) 

945 denominator = pd.copy() 

946 ngroup = len(group_indices) 

947 kj = pars[-ngroup:] 

948 for j in range(ngroup): 

949 denominator[group_indices[j]] *= kj[j] 

950 

951 if return_spline: 

952 return numerator / denominator, spl 

953 else: 

954 return numerator / denominator 

955 

956 def fit(self, p0, min_iter=3, max_iter=20, max_rejection_per_iteration=5, n_sigma_clip=5.0): 

957 """ 

958 Perform iterative fit for linear + spline model with offsets. 

959 

960 Parameters 

961 ---------- 

962 p0 : `np.ndarray` 

963 Initial fit parameters (one for each knot, followed by one for 

964 each grouping). 

965 min_iter : `int`, optional 

966 Minimum number of fit iterations. 

967 max_iter : `int`, optional 

968 Maximum number of fit iterations. 

969 max_rejection_per_iteration : `int`, optional 

970 Maximum number of points to reject per iteration. 

971 n_sigma_clip : `float`, optional 

972 Number of sigma to do clipping in each iteration. 

973 """ 

974 init_params = p0 

975 for k in range(max_iter): 

976 params, cov_params, _, msg, ierr = leastsq( 

977 self, 

978 init_params, 

979 full_output=True, 

980 ftol=1e-5, 

981 maxfev=12000, 

982 ) 

983 init_params = params.copy() 

984 

985 # We need to cut off the constraints at the end (there are more 

986 # residuals than data points.) 

987 res = self(params)[: len(self._w)] 

988 std_res = median_abs_deviation(res[self.good_points], scale="normal") 

989 sample = len(self.good_points) 

990 

991 # We don't want to reject too many outliers at once. 

992 if sample > max_rejection_per_iteration: 

993 sres = np.sort(np.abs(res)) 

994 cut = max(sres[-max_rejection_per_iteration], std_res*n_sigma_clip) 

995 else: 

996 cut = std_res*n_sigma_clip 

997 

998 outliers = np.abs(res) > cut 

999 self._w[outliers] = 0 

1000 if outliers.sum() != 0: 

1001 self.log.info( 

1002 "After iteration %d there are %d outliers (of %d).", 

1003 k, 

1004 outliers.sum(), 

1005 sample, 

1006 ) 

1007 elif k >= min_iter: 

1008 self.log.info("After iteration %d there are no more outliers.", k) 

1009 break 

1010 

1011 return params 

1012 

1013 @property 

1014 def mask(self): 

1015 return (self._w > 0) 

1016 

1017 @property 

1018 def good_points(self): 

1019 return self.mask.nonzero()[0] 

1020 

1021 def __call__(self, pars): 

1022 

1023 ratio_model, spl = self.compute_ratio_model( 

1024 self._nodes, 

1025 self.group_indices, 

1026 pars, 

1027 self._pd, 

1028 self._mu, 

1029 return_spline=True, 

1030 ) 

1031 

1032 resid = self._w*(ratio_model - 1.0) 

1033 # Ensure masked points have 0 residual. 

1034 resid[~self.mask] = 0.0 

1035 

1036 constraint = [1e3 * np.mean(spl.interpolate(self._x_regularize))] 

1037 # 0 should transform to 0 

1038 constraint.append(spl.interpolate(0)*1e10) 

1039 

1040 return np.hstack([resid, constraint])