lsst.jointcal  16.0-4-g128aaef
FitterBase.cc
Go to the documentation of this file.
1 #include <vector>
2 #include "Eigen/Core"
3 
4 #include <boost/math/tools/minima.hpp>
5 
6 #include "lsst/log/Log.h"
7 
8 #include "lsst/jointcal/Chi2.h"
14 
15 namespace lsst {
16 namespace jointcal {
17 
19  Chi2Statistic chi2;
20  accumulateStatImageList(_associations->getCcdImageList(), chi2);
22  // chi2.ndof contains the number of squares.
23  // So subtract the number of parameters.
24  chi2.ndof -= _nParTot;
25  return chi2;
26 }
27 
28 unsigned FitterBase::findOutliers(double nSigmaCut, MeasuredStarList &msOutliers,
29  FittedStarList &fsOutliers) const {
30  // collect chi2 contributions
31  Chi2List chi2List;
32  chi2List.reserve(_nMeasuredStars + _associations->refStarList.size());
33  // contributions from measurement terms:
34  accumulateStatImageList(_associations->ccdImageList, chi2List);
35  // and from reference terms
36  accumulateStatRefStars(chi2List);
37 
38  // compute some statistics
39  size_t nval = chi2List.size();
40  if (nval == 0) return 0;
41  sort(chi2List.begin(), chi2List.end());
42  double median = (nval & 1) ? chi2List[nval / 2].chi2
43  : 0.5 * (chi2List[nval / 2 - 1].chi2 + chi2List[nval / 2].chi2);
44  auto averageAndSigma = chi2List.computeAverageAndSigma();
45  LOGLS_DEBUG(_log, "RemoveOutliers chi2 stat: mean/median/sigma " << averageAndSigma.first << '/' << median
46  << '/' << averageAndSigma.second);
47  double cut = averageAndSigma.first + nSigmaCut * averageAndSigma.second;
48  /* For each of the parameters, we will not remove more than 1
49  measurement that contributes to constraining it. Keep track using
50  of what we are touching using an integer vector. This is the
51  trick that Marc Betoule came up to for outlier removals in "star
52  flats" fits. */
53  Eigen::VectorXi affectedParams(_nParTot);
54  affectedParams.setZero();
55 
56  unsigned nOutliers = 0; // returned to the caller
57  // start from the strongest outliers.
58  for (auto chi2 = chi2List.rbegin(); chi2 != chi2List.rend(); ++chi2) {
59  if (chi2->chi2 < cut) break; // because the array is sorted.
60  std::vector<unsigned> indices;
61  /* now, we want to get the indices of the parameters this chi2
62  term depends on. We have to figure out which kind of term it
63  is; we use for that the type of the star attached to the Chi2Star. */
64  auto measuredStar = std::dynamic_pointer_cast<MeasuredStar>(chi2->star);
65  std::shared_ptr<FittedStar> fittedStar; // To add to fsOutliers if it is a reference outlier.
66  if (measuredStar == nullptr) {
67  // it is a reference outlier
68  fittedStar = std::dynamic_pointer_cast<FittedStar>(chi2->star);
69  if (fittedStar->getMeasurementCount() == 0) {
70  LOGLS_WARN(_log, "FittedStar with no measuredStars found as an outlier: " << *fittedStar);
71  continue;
72  }
73  // NOTE: Stars contribute twice to astrometry (x,y), but once to photometry (flux),
74  // NOTE: but we only need to mark one index here because both will be removed with that star.
75  indices.push_back(fittedStar->getIndexInMatrix());
76  /* One might think it would be useful to account for PM
77  parameters here, but it is just useless */
78  } else {
79  // it is a measurement outlier
80  auto tempFittedStar = measuredStar->getFittedStar();
81  if (tempFittedStar->getMeasurementCount() == 1 && tempFittedStar->getRefStar() == nullptr) {
82  LOGLS_WARN(_log, "FittedStar with 1 measuredStar and no refStar found as an outlier: "
83  << *tempFittedStar);
84  continue;
85  }
86  getIndicesOfMeasuredStar(*measuredStar, indices);
87  }
88 
89  /* Find out if we already discarded a stronger outlier
90  constraining some parameter this one constrains as well. If
91  yes, we keep this one, because this stronger outlier could be
92  causing the large chi2 we have in hand. */
93  bool drop_it = true;
94  for (auto const &i : indices) {
95  if (affectedParams(i) != 0) {
96  drop_it = false;
97  }
98  }
99 
100  if (drop_it) // store the outlier in one of the lists:
101  {
102  if (measuredStar == nullptr) {
103  // reference term
104  fsOutliers.push_back(fittedStar);
105  } else {
106  // measurement term
107  msOutliers.push_back(measuredStar);
108  }
109  // mark the parameters as directly changed when we discard this chi2 term.
110  for (auto const &i : indices) {
111  affectedParams(i)++;
112  }
113  nOutliers++;
114  }
115  } // end loop on measurements/references
116  LOGLS_INFO(_log, "findOutliers: found " << msOutliers.size() << " meas outliers and " << fsOutliers.size()
117  << " ref outliers ");
118 
119  return nOutliers;
120 }
121 
122 namespace {
124 SparseMatrixD createHessian(int nParTot, TripletList const &tripletList) {
125  SparseMatrixD jacobian(nParTot, tripletList.getNextFreeIndex());
126  jacobian.setFromTriplets(tripletList.begin(), tripletList.end());
127  return jacobian * jacobian.transpose();
128 }
129 
131 void dumpMatrixAndGradient(SparseMatrixD const &matrix, Eigen::VectorXd const &grad,
132  std::string const &dumpFile, LOG_LOGGER _log) {
133  std::string ext = ".txt";
134  Eigen::MatrixXd matrixDense(matrix);
135  std::string dumpMatrixPath = dumpFile + "-mat" + ext;
136  std::ofstream matfile(dumpMatrixPath);
137  matfile << matrixDense << std::endl;
138  std::string dumpGradPath = dumpFile + "-grad" + ext;
139  std::ofstream gradfile(dumpGradPath);
140  gradfile << grad << std::endl;
141  LOGLS_INFO(_log, "Dumped Hessian, gradient to: '" << dumpMatrixPath << "', '" << dumpGradPath << "'");
142 }
143 } // namespace
144 
145 MinimizeResult FitterBase::minimize(std::string const &whatToFit, double nSigmaCut, bool doRankUpdate,
146  bool const doLineSearch, std::string const &dumpMatrixFile) {
147  assignIndices(whatToFit);
148 
150 
151  // TODO : write a guesser for the number of triplets
152  unsigned nTrip = (_lastNTrip) ? _lastNTrip : 1e6;
153  TripletList tripletList(nTrip);
154  Eigen::VectorXd grad(_nParTot);
155  grad.setZero();
156  double scale = 1.0;
157 
158  // Fill the triplets
159  leastSquareDerivatives(tripletList, grad);
160  _lastNTrip = tripletList.size();
161 
162  LOGLS_DEBUG(_log, "End of triplet filling, ntrip = " << tripletList.size());
163 
164  SparseMatrixD hessian = createHessian(_nParTot, tripletList);
165  tripletList.clear(); // we don't need it any more after we have the hessian.
166 
167  LOGLS_DEBUG(_log, "Starting factorization, hessian: dim="
168  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
169  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
170 
171  if (dumpMatrixFile != "") {
172  if (hessian.rows() * hessian.cols() > 2e8) {
173  LOGLS_WARN(_log, "Hessian matrix is too big to dump to file, with rows, columns: "
174  << hessian.rows() << ", " << hessian.cols());
175  } else {
176  dumpMatrixAndGradient(hessian, grad, dumpMatrixFile, _log);
177  }
178  }
179 
181  if (chol.info() != Eigen::Success) {
182  LOGLS_ERROR(_log, "minimize: factorization failed ");
183  return MinimizeResult::Failed;
184  }
185 
186  unsigned totalMeasOutliers = 0;
187  unsigned totalRefOutliers = 0;
188  double oldChi2 = computeChi2().chi2;
189 
190  while (true) {
191  Eigen::VectorXd delta = chol.solve(grad);
192  if (doLineSearch) {
193  scale = _lineSearch(delta);
194  }
195  offsetParams(scale * delta);
196  Chi2Statistic currentChi2(computeChi2());
197  LOGLS_DEBUG(_log, currentChi2);
198  if (currentChi2.chi2 > oldChi2 && totalMeasOutliers + totalRefOutliers != 0) {
199  LOGL_WARN(_log, "chi2 went up, skipping outlier rejection loop");
200  returnCode = MinimizeResult::Chi2Increased;
201  break;
202  }
203  oldChi2 = currentChi2.chi2;
204 
205  if (nSigmaCut == 0) break; // no rejection step to perform
206  MeasuredStarList msOutliers;
207  FittedStarList fsOutliers;
208  // keep nOutliers so we don't have to sum msOutliers.size()+fsOutliers.size() twice below.
209  int nOutliers = findOutliers(nSigmaCut, msOutliers, fsOutliers);
210  totalMeasOutliers += msOutliers.size();
211  totalRefOutliers += fsOutliers.size();
212  if (nOutliers == 0) break;
213  TripletList outlierTriplets(nOutliers);
214  grad.setZero(); // recycle the gradient
215  // compute the contributions of outliers to derivatives
216  outliersContributions(msOutliers, fsOutliers, outlierTriplets, grad);
217  // Remove significant outliers
218  removeMeasOutliers(msOutliers);
219  removeRefOutliers(fsOutliers);
220  if (doRankUpdate) {
221  // convert triplet list to eigen internal format
222  SparseMatrixD H(_nParTot, outlierTriplets.getNextFreeIndex());
223  H.setFromTriplets(outlierTriplets.begin(), outlierTriplets.end());
224  chol.update(H, false /* means downdate */);
225  // The contribution of outliers to the gradient is the opposite
226  // of the contribution of all other terms, because they add up to 0
227  grad *= -1;
228  } else {
229  // don't reuse tripletList because we want a new nextFreeIndex.
230  TripletList nextTripletList(_lastNTrip);
231  grad.setZero();
232  // Rebuild the matrix and gradient
233  leastSquareDerivatives(nextTripletList, grad);
234  _lastNTrip = nextTripletList.size();
235  LOGLS_DEBUG(_log, "Triplets recomputed, ntrip = " << nextTripletList.size());
236 
237  hessian = createHessian(_nParTot, nextTripletList);
238  nextTripletList.clear(); // we don't need it any more after we have the hessian.
239 
241  "Restarting factorization, hessian: dim="
242  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
243  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
244  chol.compute(hessian);
245  if (chol.info() != Eigen::Success) {
246  LOGLS_ERROR(_log, "minimize: factorization failed ");
247  return MinimizeResult::Failed;
248  }
249  }
250  }
251 
252  // only print the outlier summary if outlier rejection was turned on.
253  if (nSigmaCut != 0) {
254  LOGLS_INFO(_log, "Number of outliers (Measured + Reference = Total): "
255  << totalMeasOutliers << " + " << totalRefOutliers << " = "
256  << totalMeasOutliers + totalRefOutliers);
257  }
258  return returnCode;
259 }
260 
262  TripletList &tripletList, Eigen::VectorXd &grad) {
263  for (auto &outlier : msOutliers) {
264  MeasuredStarList tmp;
265  tmp.push_back(outlier);
266  const CcdImage &ccdImage = outlier->getCcdImage();
267  leastSquareDerivativesMeasurement(ccdImage, tripletList, grad, &tmp);
268  }
269  leastSquareDerivativesReference(fsOutliers, tripletList, grad);
270 }
271 
273  for (auto &measuredStar : outliers) {
274  auto fittedStar = measuredStar->getFittedStar();
275  measuredStar->setValid(false);
276  fittedStar->getMeasurementCount()--; // could be put in setValid
277  }
278 }
279 
281  for (auto &fittedStar : outliers) {
282  fittedStar->setRefStar(nullptr);
283  }
284 }
285 
286 void FitterBase::leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const {
287  auto ccdImageList = _associations->getCcdImageList();
288  for (auto const &ccdImage : ccdImageList) {
289  leastSquareDerivativesMeasurement(*ccdImage, tripletList, grad);
290  }
291  leastSquareDerivativesReference(_associations->fittedStarList, tripletList, grad);
292 }
293 
294 void FitterBase::saveChi2Contributions(std::string const &baseName) const {
295  /* cook-up 2 different file names by inserting something just before
296  the dot (if any), and within the actual file name. */
297  size_t dot = baseName.rfind('.');
298  size_t slash = baseName.rfind('/');
299  if (dot == std::string::npos || (slash != std::string::npos && dot < slash)) dot = baseName.size();
300  std::string measTuple(baseName);
301  measTuple.insert(dot, "-meas");
302  saveChi2MeasContributions(measTuple);
303  std::string refTuple(baseName);
304  refTuple.insert(dot, "-ref");
305  saveChi2RefContributions(refTuple);
306 }
307 
308 double FitterBase::_lineSearch(Eigen::VectorXd const &delta) {
309  auto func = [this, &delta](double scale) {
310  auto offset = scale * delta;
311  offsetParams(offset);
312  auto chi2 = computeChi2();
313  // reset the system to where it was before offsetting.
314  offsetParams(-offset);
315  return chi2.chi2;
316  };
317  // The maximum theoretical precision is half the number of bits in the mantissa (see boost docs).
318  auto bits = std::numeric_limits<double>::digits / 2;
319  auto result = boost::math::tools::brent_find_minima(func, -1.0, 2.0, bits);
320  LOGLS_DEBUG(_log, "Line search scale factor: " << result.first);
321  return result.first;
322 }
323 
324 } // namespace jointcal
325 } // namespace lsst
#define LOGLS_WARN(logger, message)
virtual void accumulateStatRefStars(Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for RefStars...
virtual void leastSquareDerivativesReference(FittedStarList const &fittedStarList, TripletList &tripletList, Eigen::VectorXd &grad) const =0
Compute the derivatives of the reference terms.
Simple structure to accumulate chi2 and ndof.
Definition: Chi2.h:29
Eigen::SparseMatrix< double > SparseMatrixD
Definition: Eigenstuff.h:12
virtual void accumulateStatImageList(CcdImageList const &ccdImageList, Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for measurements...
unsigned findOutliers(double nSigmaCut, MeasuredStarList &msOutliers, FittedStarList &fsOutliers) const
Find Measurements and references contributing more than a cut, computed as The outliers are NOT remo...
Definition: FitterBase.cc:28
T rend(T... args)
def scale(algorithm, min, max=None, frame=None)
T rfind(T... args)
T endl(T... args)
virtual void getIndicesOfMeasuredStar(MeasuredStar const &measuredStar, std::vector< unsigned > &indices) const =0
Set the indices of a measured star from the full matrix, for outlier removal.
T end(T... args)
void removeMeasOutliers(MeasuredStarList &outliers)
Remove measuredStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:272
void update(SparseMatrixD const &H, bool UpOrDown)
Definition: Eigenstuff.h:42
A list of MeasuredStar. They are usually filled in Associations::createCcdImage.
Definition: MeasuredStar.h:113
virtual void saveChi2Contributions(std::string const &baseName) const
Save the full chi2 term per star that was used in the minimization, for debugging.
Definition: FitterBase.cc:294
MinimizeResult
Return value of minimize()
Definition: FitterBase.h:17
def dot(symb, c, r, frame=None, size=2, ctype=None, origin=afwImage.PARENT, args, kwargs)
STL class.
MinimizeResult minimize(std::string const &whatToFit, double const nSigmaCut=0, bool const doRankUpdate=true, bool const doLineSearch=false, std::string const &dumpMatrixFile="")
Does a 1 step minimization, assuming a linear model.
Definition: FitterBase.cc:145
virtual void saveChi2MeasContributions(std::string const &baseName) const =0
Save a CSV file containing residuals of measurement terms.
Structure to accumulate the chi2 contributions per each star (to help find outliers).
Definition: Chi2.h:77
T push_back(T... args)
STL class.
#define LOGLS_DEBUG(logger, message)
Class for a simple mapping implementing a generic Gtransfo.
A list of FittedStar s. Such a list is typically constructed by Associations.
Definition: FittedStar.h:121
void removeRefOutliers(FittedStarList &outliers)
Remove refStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:280
objects measured on actual images.
Definition: MeasuredStar.h:19
T dynamic_pointer_cast(T... args)
#define LOGL_WARN(logger, message...)
T clear(T... args)
virtual void saveChi2RefContributions(std::string const &baseName) const =0
Save a CSV file containing residuals of reference terms.
std::shared_ptr< Associations > _associations
Definition: FitterBase.h:135
virtual void assignIndices(std::string const &whatToFit)=0
Set parameters to fit and assign indices in the big matrix.
std::unique_ptr< SchemaItem< U > > result
#define LOGLS_INFO(logger, message)
T insert(T... args)
T size(T... args)
Chi2Statistic computeChi2() const
Returns the chi2 for the current state.
Definition: FitterBase.cc:18
T begin(T... args)
T pow(T... args)
void leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const
Evaluates the chI^2 derivatives (Jacobian and gradient) for the current whatToFit setting...
Definition: FitterBase.cc:286
T sort(T... args)
Handler of an actual image from a single CCD.
Definition: CcdImage.h:41
virtual void offsetParams(Eigen::VectorXd const &delta)=0
Offset the parameters by the requested quantities.
#define LOGLS_ERROR(logger, message)
unsigned getNextFreeIndex() const
Definition: Tripletlist.h:22
void outliersContributions(MeasuredStarList &msOutliers, FittedStarList &fsOutliers, TripletList &tripletList, Eigen::VectorXd &grad)
Contributions to derivatives from (presumably) outlier terms.
Definition: FitterBase.cc:261
The objects which have been measured several times.
Definition: FittedStar.h:37
std::pair< double, double > computeAverageAndSigma()
Compute the average and std-deviation of these chisq values.
Definition: Chi2.cc:31
T reserve(T... args)
virtual void leastSquareDerivativesMeasurement(CcdImage const &ccdImage, TripletList &tripletList, Eigen::VectorXd &grad, MeasuredStarList const *measuredStarList=nullptr) const =0
Compute the derivatives of the measured stars and model for one CcdImage.
T rbegin(T... args)