lsst.jointcal  16.0-16-g925333c+2
FitterBase.cc
Go to the documentation of this file.
1 #include <vector>
2 #include "Eigen/Core"
3 
4 #include <boost/math/tools/minima.hpp>
5 
6 #include "lsst/log/Log.h"
7 
8 #include "lsst/jointcal/Chi2.h"
14 
15 namespace lsst {
16 namespace jointcal {
17 
19  Chi2Statistic chi2;
20  accumulateStatImageList(_associations->getCcdImageList(), chi2);
22  // chi2.ndof contains the number of squares.
23  // So subtract the number of parameters.
24  chi2.ndof -= _nParTot;
25  return chi2;
26 }
27 
28 unsigned FitterBase::findOutliers(double nSigmaCut, MeasuredStarList &msOutliers,
29  FittedStarList &fsOutliers) const {
30  // collect chi2 contributions
31  Chi2List chi2List;
32  chi2List.reserve(_nMeasuredStars + _associations->refStarList.size());
33  // contributions from measurement terms:
34  accumulateStatImageList(_associations->ccdImageList, chi2List);
35  // and from reference terms
36  accumulateStatRefStars(chi2List);
37 
38  // compute some statistics
39  size_t nval = chi2List.size();
40  if (nval == 0) return 0;
41  sort(chi2List.begin(), chi2List.end());
42  double median = (nval & 1) ? chi2List[nval / 2].chi2
43  : 0.5 * (chi2List[nval / 2 - 1].chi2 + chi2List[nval / 2].chi2);
44  auto averageAndSigma = chi2List.computeAverageAndSigma();
45  LOGLS_DEBUG(_log, "RemoveOutliers chi2 stat: mean/median/sigma " << averageAndSigma.first << '/' << median
46  << '/' << averageAndSigma.second);
47  double cut = averageAndSigma.first + nSigmaCut * averageAndSigma.second;
48  /* For each of the parameters, we will not remove more than 1
49  measurement that contributes to constraining it. Keep track using
50  of what we are touching using an integer vector. This is the
51  trick that Marc Betoule came up to for outlier removals in "star
52  flats" fits. */
53  Eigen::VectorXi affectedParams(_nParTot);
54  affectedParams.setZero();
55 
56  unsigned nOutliers = 0; // returned to the caller
57  // start from the strongest outliers.
58  for (auto chi2 = chi2List.rbegin(); chi2 != chi2List.rend(); ++chi2) {
59  if (chi2->chi2 < cut) break; // because the array is sorted.
60  std::vector<unsigned> indices;
61  /* now, we want to get the indices of the parameters this chi2
62  term depends on. We have to figure out which kind of term it
63  is; we use for that the type of the star attached to the Chi2Star. */
64  auto measuredStar = std::dynamic_pointer_cast<MeasuredStar>(chi2->star);
65  std::shared_ptr<FittedStar> fittedStar; // To add to fsOutliers if it is a reference outlier.
66  if (measuredStar == nullptr) {
67  // it is a reference outlier
68  fittedStar = std::dynamic_pointer_cast<FittedStar>(chi2->star);
69  if (fittedStar->getMeasurementCount() == 0) {
70  LOGLS_WARN(_log, "FittedStar with no measuredStars found as an outlier: " << *fittedStar);
71  continue;
72  }
73  // NOTE: Stars contribute twice to astrometry (x,y), but once to photometry (flux),
74  // NOTE: but we only need to mark one index here because both will be removed with that star.
75  indices.push_back(fittedStar->getIndexInMatrix());
76  /* One might think it would be useful to account for PM
77  parameters here, but it is just useless */
78  } else {
79  // it is a measurement outlier
80  auto tempFittedStar = measuredStar->getFittedStar();
81  if (tempFittedStar->getMeasurementCount() == 1 && tempFittedStar->getRefStar() == nullptr) {
82  LOGLS_WARN(_log, "FittedStar with 1 measuredStar and no refStar found as an outlier: "
83  << *tempFittedStar);
84  continue;
85  }
86  getIndicesOfMeasuredStar(*measuredStar, indices);
87  }
88 
89  /* Find out if we already discarded a stronger outlier
90  constraining some parameter this one constrains as well. If
91  yes, we keep this one, because this stronger outlier could be
92  causing the large chi2 we have in hand. */
93  bool drop_it = true;
94  for (auto const &i : indices) {
95  if (affectedParams(i) != 0) {
96  drop_it = false;
97  }
98  }
99 
100  if (drop_it) // store the outlier in one of the lists:
101  {
102  if (measuredStar == nullptr) {
103  // reference term
104  fsOutliers.push_back(fittedStar);
105  } else {
106  // measurement term
107  msOutliers.push_back(measuredStar);
108  }
109  // mark the parameters as directly changed when we discard this chi2 term.
110  for (auto const &i : indices) {
111  affectedParams(i)++;
112  }
113  nOutliers++;
114  }
115  } // end loop on measurements/references
116  LOGLS_INFO(_log, "findOutliers: found " << msOutliers.size() << " meas outliers and " << fsOutliers.size()
117  << " ref outliers ");
118 
119  return nOutliers;
120 }
121 
122 namespace {
124 SparseMatrixD createHessian(int nParTot, TripletList const &tripletList) {
125  SparseMatrixD jacobian(nParTot, tripletList.getNextFreeIndex());
126  jacobian.setFromTriplets(tripletList.begin(), tripletList.end());
127  return jacobian * jacobian.transpose();
128 }
129 
131 void dumpMatrixAndGradient(SparseMatrixD const &matrix, Eigen::VectorXd const &grad,
132  std::string const &dumpFile, LOG_LOGGER _log) {
133  std::string ext = ".txt";
134  Eigen::MatrixXd matrixDense(matrix);
135  std::string dumpMatrixPath = dumpFile + "-mat" + ext;
136  std::ofstream matfile(dumpMatrixPath);
137  matfile << matrixDense << std::endl;
138  std::string dumpGradPath = dumpFile + "-grad" + ext;
139  std::ofstream gradfile(dumpGradPath);
140  gradfile << grad << std::endl;
141  LOGLS_INFO(_log, "Dumped Hessian, gradient to: '" << dumpMatrixPath << "', '" << dumpGradPath << "'");
142 }
143 } // namespace
144 
145 MinimizeResult FitterBase::minimize(std::string const &whatToFit, double nSigmaCut, bool doRankUpdate,
146  bool const doLineSearch, std::string const &dumpMatrixFile) {
147  assignIndices(whatToFit);
148 
150 
151  // TODO : write a guesser for the number of triplets
152  unsigned nTrip = (_lastNTrip) ? _lastNTrip : 1e6;
153  TripletList tripletList(nTrip);
154  Eigen::VectorXd grad(_nParTot);
155  grad.setZero();
156  double scale = 1.0;
157 
158  // Fill the triplets
159  leastSquareDerivatives(tripletList, grad);
160  _lastNTrip = tripletList.size();
161 
162  LOGLS_DEBUG(_log, "End of triplet filling, ntrip = " << tripletList.size());
163 
164  SparseMatrixD hessian = createHessian(_nParTot, tripletList);
165  tripletList.clear(); // we don't need it any more after we have the hessian.
166 
167  LOGLS_DEBUG(_log, "Starting factorization, hessian: dim="
168  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
169  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
170 
171  if (dumpMatrixFile != "") {
172  if (hessian.rows() * hessian.cols() > 2e8) {
173  LOGLS_WARN(_log, "Hessian matrix is too big to dump to file, with rows, columns: "
174  << hessian.rows() << ", " << hessian.cols());
175  } else {
176  dumpMatrixAndGradient(hessian, grad, dumpMatrixFile, _log);
177  }
178  }
179 
181  if (chol.info() != Eigen::Success) {
182  LOGLS_ERROR(_log, "minimize: factorization failed ");
183  return MinimizeResult::Failed;
184  }
185 
186  unsigned totalMeasOutliers = 0;
187  unsigned totalRefOutliers = 0;
188  double oldChi2 = computeChi2().chi2;
189 
190  while (true) {
191  Eigen::VectorXd delta = chol.solve(grad);
192  if (doLineSearch) {
193  scale = _lineSearch(delta);
194  }
195  offsetParams(scale * delta);
196  Chi2Statistic currentChi2(computeChi2());
197  LOGLS_DEBUG(_log, currentChi2);
198  if (!isfinite(currentChi2.chi2)) {
199  LOGL_ERROR(_log, "chi2 is not finite. Aborting outlier rejection.");
200  returnCode = MinimizeResult::NonFinite;
201  break;
202  }
203  if (currentChi2.chi2 > oldChi2 && totalMeasOutliers + totalRefOutliers != 0) {
204  LOGL_WARN(_log, "chi2 went up, skipping outlier rejection loop");
205  returnCode = MinimizeResult::Chi2Increased;
206  break;
207  }
208  oldChi2 = currentChi2.chi2;
209 
210  if (nSigmaCut == 0) break; // no rejection step to perform
211  MeasuredStarList msOutliers;
212  FittedStarList fsOutliers;
213  // keep nOutliers so we don't have to sum msOutliers.size()+fsOutliers.size() twice below.
214  int nOutliers = findOutliers(nSigmaCut, msOutliers, fsOutliers);
215  totalMeasOutliers += msOutliers.size();
216  totalRefOutliers += fsOutliers.size();
217  if (nOutliers == 0) break;
218  TripletList outlierTriplets(nOutliers);
219  grad.setZero(); // recycle the gradient
220  // compute the contributions of outliers to derivatives
221  outliersContributions(msOutliers, fsOutliers, outlierTriplets, grad);
222  // Remove significant outliers
223  removeMeasOutliers(msOutliers);
224  removeRefOutliers(fsOutliers);
225  if (doRankUpdate) {
226  // convert triplet list to eigen internal format
227  SparseMatrixD H(_nParTot, outlierTriplets.getNextFreeIndex());
228  H.setFromTriplets(outlierTriplets.begin(), outlierTriplets.end());
229  chol.update(H, false /* means downdate */);
230  // The contribution of outliers to the gradient is the opposite
231  // of the contribution of all other terms, because they add up to 0
232  grad *= -1;
233  } else {
234  // don't reuse tripletList because we want a new nextFreeIndex.
235  TripletList nextTripletList(_lastNTrip);
236  grad.setZero();
237  // Rebuild the matrix and gradient
238  leastSquareDerivatives(nextTripletList, grad);
239  _lastNTrip = nextTripletList.size();
240  LOGLS_DEBUG(_log, "Triplets recomputed, ntrip = " << nextTripletList.size());
241 
242  hessian = createHessian(_nParTot, nextTripletList);
243  nextTripletList.clear(); // we don't need it any more after we have the hessian.
244 
246  "Restarting factorization, hessian: dim="
247  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
248  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
249  chol.compute(hessian);
250  if (chol.info() != Eigen::Success) {
251  LOGLS_ERROR(_log, "minimize: factorization failed ");
252  return MinimizeResult::Failed;
253  }
254  }
255  }
256 
257  // only print the outlier summary if outlier rejection was turned on.
258  if (nSigmaCut != 0) {
259  LOGLS_INFO(_log, "Number of outliers (Measured + Reference = Total): "
260  << totalMeasOutliers << " + " << totalRefOutliers << " = "
261  << totalMeasOutliers + totalRefOutliers);
262  }
263  return returnCode;
264 }
265 
267  TripletList &tripletList, Eigen::VectorXd &grad) {
268  for (auto &outlier : msOutliers) {
269  MeasuredStarList tmp;
270  tmp.push_back(outlier);
271  const CcdImage &ccdImage = outlier->getCcdImage();
272  leastSquareDerivativesMeasurement(ccdImage, tripletList, grad, &tmp);
273  }
274  leastSquareDerivativesReference(fsOutliers, tripletList, grad);
275 }
276 
278  for (auto &measuredStar : outliers) {
279  auto fittedStar = measuredStar->getFittedStar();
280  measuredStar->setValid(false);
281  fittedStar->getMeasurementCount()--; // could be put in setValid
282  }
283 }
284 
286  for (auto &fittedStar : outliers) {
287  fittedStar->setRefStar(nullptr);
288  }
289 }
290 
291 void FitterBase::leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const {
292  auto ccdImageList = _associations->getCcdImageList();
293  for (auto const &ccdImage : ccdImageList) {
294  leastSquareDerivativesMeasurement(*ccdImage, tripletList, grad);
295  }
296  leastSquareDerivativesReference(_associations->fittedStarList, tripletList, grad);
297 }
298 
299 void FitterBase::saveChi2Contributions(std::string const &baseName) const {
300  /* cook-up 2 different file names by inserting something just before
301  the dot (if any), and within the actual file name. */
302  size_t dot = baseName.rfind('.');
303  size_t slash = baseName.rfind('/');
304  if (dot == std::string::npos || (slash != std::string::npos && dot < slash)) dot = baseName.size();
305  std::string measTuple(baseName);
306  measTuple.insert(dot, "-meas");
307  saveChi2MeasContributions(measTuple);
308  std::string refTuple(baseName);
309  refTuple.insert(dot, "-ref");
310  saveChi2RefContributions(refTuple);
311 }
312 
313 double FitterBase::_lineSearch(Eigen::VectorXd const &delta) {
314  auto func = [this, &delta](double scale) {
315  auto offset = scale * delta;
316  offsetParams(offset);
317  auto chi2 = computeChi2();
318  // reset the system to where it was before offsetting.
319  offsetParams(-offset);
320  return chi2.chi2;
321  };
322  // The maximum theoretical precision is half the number of bits in the mantissa (see boost docs).
323  auto bits = std::numeric_limits<double>::digits / 2;
324  auto result = boost::math::tools::brent_find_minima(func, -1.0, 2.0, bits);
325  LOGLS_DEBUG(_log, "Line search scale factor: " << result.first);
326  return result.first;
327 }
328 
329 } // namespace jointcal
330 } // namespace lsst
#define LOGL_ERROR(logger, message...)
virtual void accumulateStatRefStars(Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for RefStars...
virtual void leastSquareDerivativesReference(FittedStarList const &fittedStarList, TripletList &tripletList, Eigen::VectorXd &grad) const =0
Compute the derivatives of the reference terms.
Simple structure to accumulate chi2 and ndof.
Definition: Chi2.h:29
Eigen::SparseMatrix< double > SparseMatrixD
Definition: Eigenstuff.h:12
virtual void accumulateStatImageList(CcdImageList const &ccdImageList, Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for measurements...
unsigned findOutliers(double nSigmaCut, MeasuredStarList &msOutliers, FittedStarList &fsOutliers) const
Find Measurements and references contributing more than a cut, computed as The outliers are NOT remo...
Definition: FitterBase.cc:28
T rend(T... args)
def scale(algorithm, min, max=None, frame=None)
T rfind(T... args)
T endl(T... args)
virtual void getIndicesOfMeasuredStar(MeasuredStar const &measuredStar, std::vector< unsigned > &indices) const =0
Set the indices of a measured star from the full matrix, for outlier removal.
T end(T... args)
void removeMeasOutliers(MeasuredStarList &outliers)
Remove measuredStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:277
void update(SparseMatrixD const &H, bool UpOrDown)
Definition: Eigenstuff.h:42
A list of MeasuredStar. They are usually filled in Associations::createCcdImage.
Definition: MeasuredStar.h:118
virtual void saveChi2Contributions(std::string const &baseName) const
Save the full chi2 term per star that was used in the minimization, for debugging.
Definition: FitterBase.cc:299
MinimizeResult
Return value of minimize()
Definition: FitterBase.h:17
#define LOGLS_INFO(logger, message)
def dot(symb, c, r, frame=None, size=2, ctype=None, origin=afwImage.PARENT, args, kwargs)
STL class.
MinimizeResult minimize(std::string const &whatToFit, double const nSigmaCut=0, bool const doRankUpdate=true, bool const doLineSearch=false, std::string const &dumpMatrixFile="")
Does a 1 step minimization, assuming a linear model.
Definition: FitterBase.cc:145
virtual void saveChi2MeasContributions(std::string const &baseName) const =0
Save a CSV file containing residuals of measurement terms.
Structure to accumulate the chi2 contributions per each star (to help find outliers).
Definition: Chi2.h:77
T push_back(T... args)
STL class.
Class for a simple mapping implementing a generic Gtransfo.
A list of FittedStar s. Such a list is typically constructed by Associations.
Definition: FittedStar.h:99
void removeRefOutliers(FittedStarList &outliers)
Remove refStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:285
T isfinite(T... args)
objects measured on actual images.
Definition: MeasuredStar.h:19
T dynamic_pointer_cast(T... args)
T clear(T... args)
virtual void saveChi2RefContributions(std::string const &baseName) const =0
Save a CSV file containing residuals of reference terms.
std::shared_ptr< Associations > _associations
Definition: FitterBase.h:136
virtual void assignIndices(std::string const &whatToFit)=0
Set parameters to fit and assign indices in the big matrix.
std::unique_ptr< SchemaItem< U > > result
T insert(T... args)
T size(T... args)
Chi2Statistic computeChi2() const
Returns the chi2 for the current state.
Definition: FitterBase.cc:18
T begin(T... args)
T pow(T... args)
#define LOGLS_DEBUG(logger, message)
void leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const
Evaluates the chI^2 derivatives (Jacobian and gradient) for the current whatToFit setting...
Definition: FitterBase.cc:291
T sort(T... args)
Handler of an actual image from a single CCD.
Definition: CcdImage.h:41
#define LOGL_WARN(logger, message...)
virtual void offsetParams(Eigen::VectorXd const &delta)=0
Offset the parameters by the requested quantities.
unsigned getNextFreeIndex() const
Definition: Tripletlist.h:22
void outliersContributions(MeasuredStarList &msOutliers, FittedStarList &fsOutliers, TripletList &tripletList, Eigen::VectorXd &grad)
Contributions to derivatives from (presumably) outlier terms.
Definition: FitterBase.cc:266
The objects which have been measured several times.
Definition: FittedStar.h:37
std::pair< double, double > computeAverageAndSigma()
Compute the average and std-deviation of these chisq values.
Definition: Chi2.cc:31
#define LOGLS_ERROR(logger, message)
#define LOGLS_WARN(logger, message)
T reserve(T... args)
virtual void leastSquareDerivativesMeasurement(CcdImage const &ccdImage, TripletList &tripletList, Eigen::VectorXd &grad, MeasuredStarList const *measuredStarList=nullptr) const =0
Compute the derivatives of the measured stars and model for one CcdImage.
T rbegin(T... args)