lsst.jointcal  16.0-18-gdf247dd+5
FitterBase.cc
Go to the documentation of this file.
1 #include <vector>
2 #include "Eigen/Core"
3 
4 #include <boost/math/tools/minima.hpp>
5 
6 #include "lsst/log/Log.h"
7 
8 #include "lsst/jointcal/Chi2.h"
14 
15 namespace lsst {
16 namespace jointcal {
17 
19  Chi2Statistic chi2;
20  accumulateStatImageList(_associations->getCcdImageList(), chi2);
22  // chi2.ndof contains the number of squares.
23  // So subtract the number of parameters.
24  chi2.ndof -= _nParTot;
25  return chi2;
26 }
27 
28 unsigned FitterBase::findOutliers(double nSigmaCut, MeasuredStarList &msOutliers,
29  FittedStarList &fsOutliers) const {
30  // collect chi2 contributions
31  Chi2List chi2List;
32  chi2List.reserve(_nMeasuredStars + _associations->refStarList.size());
33  // contributions from measurement terms:
34  accumulateStatImageList(_associations->ccdImageList, chi2List);
35  // and from reference terms
36  accumulateStatRefStars(chi2List);
37 
38  // compute some statistics
39  size_t nval = chi2List.size();
40  if (nval == 0) return 0;
41  sort(chi2List.begin(), chi2List.end());
42  double median = (nval & 1) ? chi2List[nval / 2].chi2
43  : 0.5 * (chi2List[nval / 2 - 1].chi2 + chi2List[nval / 2].chi2);
44  auto averageAndSigma = chi2List.computeAverageAndSigma();
45  LOGLS_DEBUG(_log, "findOutliers chi2 stat: mean/median/sigma " << averageAndSigma.first << '/' << median
46  << '/' << averageAndSigma.second);
47  double cut = averageAndSigma.first + nSigmaCut * averageAndSigma.second;
48  /* For each of the parameters, we will not remove more than 1
49  measurement that contributes to constraining it. Keep track using
50  of what we are touching using an integer vector. This is the
51  trick that Marc Betoule came up to for outlier removals in "star
52  flats" fits. */
53  Eigen::VectorXi affectedParams(_nParTot);
54  affectedParams.setZero();
55 
56  unsigned nOutliers = 0; // returned to the caller
57  // start from the strongest outliers.
58  for (auto chi2 = chi2List.rbegin(); chi2 != chi2List.rend(); ++chi2) {
59  if (chi2->chi2 < cut) break; // because the array is sorted.
60  std::vector<unsigned> indices;
61  /* now, we want to get the indices of the parameters this chi2
62  term depends on. We have to figure out which kind of term it
63  is; we use for that the type of the star attached to the Chi2Star. */
64  auto measuredStar = std::dynamic_pointer_cast<MeasuredStar>(chi2->star);
65  std::shared_ptr<FittedStar> fittedStar; // To add to fsOutliers if it is a reference outlier.
66  if (measuredStar == nullptr) {
67  // it is a reference outlier
68  fittedStar = std::dynamic_pointer_cast<FittedStar>(chi2->star);
69  if (fittedStar->getMeasurementCount() == 0) {
70  LOGLS_WARN(_log, "FittedStar with no measuredStars found as an outlier: " << *fittedStar);
71  continue;
72  }
73  // NOTE: Stars contribute twice to astrometry (x,y), but once to photometry (flux),
74  // NOTE: but we only need to mark one index here because both will be removed with that star.
75  indices.push_back(fittedStar->getIndexInMatrix());
76  LOGLS_TRACE(_log, "Removing refStar " << *(fittedStar->getRefStar()) << " chi2: " << chi2->chi2);
77  /* One might think it would be useful to account for PM
78  parameters here, but it is just useless */
79  } else {
80  // it is a measurement outlier
81  auto tempFittedStar = measuredStar->getFittedStar();
82  if (tempFittedStar->getMeasurementCount() == 1 && tempFittedStar->getRefStar() == nullptr) {
83  LOGLS_WARN(_log, "FittedStar with 1 measuredStar and no refStar found as an outlier: "
84  << *tempFittedStar);
85  continue;
86  }
87  getIndicesOfMeasuredStar(*measuredStar, indices);
88  LOGLS_TRACE(_log, "Removing measStar " << *measuredStar << " chi2: " << chi2->chi2);
89  }
90 
91  /* Find out if we already discarded a stronger outlier
92  constraining some parameter this one constrains as well. If
93  yes, we keep this one, because this stronger outlier could be
94  causing the large chi2 we have in hand. */
95  bool drop_it = true;
96  for (auto const &i : indices) {
97  if (affectedParams(i) != 0) {
98  drop_it = false;
99  }
100  }
101 
102  if (drop_it) // store the outlier in one of the lists:
103  {
104  if (measuredStar == nullptr) {
105  // reference term
106  fsOutliers.push_back(fittedStar);
107  } else {
108  // measurement term
109  msOutliers.push_back(measuredStar);
110  }
111  // mark the parameters as directly changed when we discard this chi2 term.
112  for (auto const &i : indices) {
113  affectedParams(i)++;
114  }
115  nOutliers++;
116  }
117  } // end loop on measurements/references
118  LOGLS_INFO(_log, "findOutliers: found " << msOutliers.size() << " meas outliers and " << fsOutliers.size()
119  << " ref outliers ");
120 
121  return nOutliers;
122 }
123 
124 namespace {
126 SparseMatrixD createHessian(int nParTot, TripletList const &tripletList) {
127  SparseMatrixD jacobian(nParTot, tripletList.getNextFreeIndex());
128  jacobian.setFromTriplets(tripletList.begin(), tripletList.end());
129  return jacobian * jacobian.transpose();
130 }
131 
133 void dumpMatrixAndGradient(SparseMatrixD const &matrix, Eigen::VectorXd const &grad,
134  std::string const &dumpFile, LOG_LOGGER _log) {
135  std::string ext = ".txt";
136  Eigen::MatrixXd matrixDense(matrix);
137  std::string dumpMatrixPath = dumpFile + "-mat" + ext;
138  std::ofstream matfile(dumpMatrixPath);
139  matfile << matrixDense << std::endl;
140  std::string dumpGradPath = dumpFile + "-grad" + ext;
141  std::ofstream gradfile(dumpGradPath);
142  gradfile << grad << std::endl;
143  LOGLS_INFO(_log, "Dumped Hessian, gradient to: '" << dumpMatrixPath << "', '" << dumpGradPath << "'");
144 }
145 } // namespace
146 
147 MinimizeResult FitterBase::minimize(std::string const &whatToFit, double nSigmaCut, bool doRankUpdate,
148  bool const doLineSearch, std::string const &dumpMatrixFile) {
149  assignIndices(whatToFit);
150 
152 
153  // TODO : write a guesser for the number of triplets
154  unsigned nTrip = (_lastNTrip) ? _lastNTrip : 1e6;
155  TripletList tripletList(nTrip);
156  Eigen::VectorXd grad(_nParTot);
157  grad.setZero();
158  double scale = 1.0;
159 
160  // Fill the triplets
161  leastSquareDerivatives(tripletList, grad);
162  _lastNTrip = tripletList.size();
163 
164  LOGLS_DEBUG(_log, "End of triplet filling, ntrip = " << tripletList.size());
165 
166  SparseMatrixD hessian = createHessian(_nParTot, tripletList);
167  tripletList.clear(); // we don't need it any more after we have the hessian.
168 
169  LOGLS_DEBUG(_log, "Starting factorization, hessian: dim="
170  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
171  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
172 
173  if (dumpMatrixFile != "") {
174  if (hessian.rows() * hessian.cols() > 2e8) {
175  LOGLS_WARN(_log, "Hessian matrix is too big to dump to file, with rows, columns: "
176  << hessian.rows() << ", " << hessian.cols());
177  } else {
178  dumpMatrixAndGradient(hessian, grad, dumpMatrixFile, _log);
179  }
180  }
181 
183  if (chol.info() != Eigen::Success) {
184  LOGLS_ERROR(_log, "minimize: factorization failed ");
185  return MinimizeResult::Failed;
186  }
187 
188  unsigned totalMeasOutliers = 0;
189  unsigned totalRefOutliers = 0;
190  double oldChi2 = computeChi2().chi2;
191 
192  while (true) {
193  Eigen::VectorXd delta = chol.solve(grad);
194  if (doLineSearch) {
195  scale = _lineSearch(delta);
196  }
197  offsetParams(scale * delta);
198  Chi2Statistic currentChi2(computeChi2());
199  LOGLS_DEBUG(_log, currentChi2);
200  if (!isfinite(currentChi2.chi2)) {
201  LOGL_ERROR(_log, "chi2 is not finite. Aborting outlier rejection.");
202  returnCode = MinimizeResult::NonFinite;
203  break;
204  }
205  if (currentChi2.chi2 > oldChi2 && totalMeasOutliers + totalRefOutliers != 0) {
206  LOGL_WARN(_log, "chi2 went up, skipping outlier rejection loop");
207  returnCode = MinimizeResult::Chi2Increased;
208  break;
209  }
210  oldChi2 = currentChi2.chi2;
211 
212  if (nSigmaCut == 0) break; // no rejection step to perform
213  MeasuredStarList msOutliers;
214  FittedStarList fsOutliers;
215  // keep nOutliers so we don't have to sum msOutliers.size()+fsOutliers.size() twice below.
216  int nOutliers = findOutliers(nSigmaCut, msOutliers, fsOutliers);
217  totalMeasOutliers += msOutliers.size();
218  totalRefOutliers += fsOutliers.size();
219  if (nOutliers == 0) break;
220  TripletList outlierTriplets(nOutliers);
221  grad.setZero(); // recycle the gradient
222  // compute the contributions of outliers to derivatives
223  outliersContributions(msOutliers, fsOutliers, outlierTriplets, grad);
224  // Remove significant outliers
225  removeMeasOutliers(msOutliers);
226  removeRefOutliers(fsOutliers);
227  if (doRankUpdate) {
228  // convert triplet list to eigen internal format
229  SparseMatrixD H(_nParTot, outlierTriplets.getNextFreeIndex());
230  H.setFromTriplets(outlierTriplets.begin(), outlierTriplets.end());
231  chol.update(H, false /* means downdate */);
232  // The contribution of outliers to the gradient is the opposite
233  // of the contribution of all other terms, because they add up to 0
234  grad *= -1;
235  } else {
236  // don't reuse tripletList because we want a new nextFreeIndex.
237  TripletList nextTripletList(_lastNTrip);
238  grad.setZero();
239  // Rebuild the matrix and gradient
240  leastSquareDerivatives(nextTripletList, grad);
241  _lastNTrip = nextTripletList.size();
242  LOGLS_DEBUG(_log, "Triplets recomputed, ntrip = " << nextTripletList.size());
243 
244  hessian = createHessian(_nParTot, nextTripletList);
245  nextTripletList.clear(); // we don't need it any more after we have the hessian.
246 
248  "Restarting factorization, hessian: dim="
249  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
250  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
251  chol.compute(hessian);
252  if (chol.info() != Eigen::Success) {
253  LOGLS_ERROR(_log, "minimize: factorization failed ");
254  return MinimizeResult::Failed;
255  }
256  }
257  }
258 
259  // only print the outlier summary if outlier rejection was turned on.
260  if (nSigmaCut != 0) {
261  LOGLS_INFO(_log, "Number of outliers (Measured + Reference = Total): "
262  << totalMeasOutliers << " + " << totalRefOutliers << " = "
263  << totalMeasOutliers + totalRefOutliers);
264  }
265  return returnCode;
266 }
267 
269  TripletList &tripletList, Eigen::VectorXd &grad) {
270  for (auto &outlier : msOutliers) {
271  MeasuredStarList tmp;
272  tmp.push_back(outlier);
273  const CcdImage &ccdImage = outlier->getCcdImage();
274  leastSquareDerivativesMeasurement(ccdImage, tripletList, grad, &tmp);
275  }
276  leastSquareDerivativesReference(fsOutliers, tripletList, grad);
277 }
278 
280  for (auto &measuredStar : outliers) {
281  auto fittedStar = measuredStar->getFittedStar();
282  measuredStar->setValid(false);
283  fittedStar->getMeasurementCount()--; // could be put in setValid
284  }
285 }
286 
288  for (auto &fittedStar : outliers) {
289  fittedStar->setRefStar(nullptr);
290  }
291 }
292 
293 void FitterBase::leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const {
294  auto ccdImageList = _associations->getCcdImageList();
295  for (auto const &ccdImage : ccdImageList) {
296  leastSquareDerivativesMeasurement(*ccdImage, tripletList, grad);
297  }
298  leastSquareDerivativesReference(_associations->fittedStarList, tripletList, grad);
299 }
300 
301 void FitterBase::saveChi2Contributions(std::string const &baseName) const {
302  /* cook-up 2 different file names by inserting something just before
303  the dot (if any), and within the actual file name. */
304  size_t dot = baseName.rfind('.');
305  size_t slash = baseName.rfind('/');
306  if (dot == std::string::npos || (slash != std::string::npos && dot < slash)) dot = baseName.size();
307  std::string measTuple(baseName);
308  measTuple.insert(dot, "-meas");
309  saveChi2MeasContributions(measTuple);
310  std::string refTuple(baseName);
311  refTuple.insert(dot, "-ref");
312  saveChi2RefContributions(refTuple);
313 }
314 
315 double FitterBase::_lineSearch(Eigen::VectorXd const &delta) {
316  auto func = [this, &delta](double scale) {
317  auto offset = scale * delta;
318  offsetParams(offset);
319  auto chi2 = computeChi2();
320  // reset the system to where it was before offsetting.
321  offsetParams(-offset);
322  return chi2.chi2;
323  };
324  // The maximum theoretical precision is half the number of bits in the mantissa (see boost docs).
325  auto bits = std::numeric_limits<double>::digits / 2;
326  auto result = boost::math::tools::brent_find_minima(func, -1.0, 2.0, bits);
327  LOGLS_DEBUG(_log, "Line search scale factor: " << result.first);
328  return result.first;
329 }
330 
331 } // namespace jointcal
332 } // namespace lsst
#define LOGLS_WARN(logger, message)
virtual void accumulateStatRefStars(Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for RefStars...
virtual void leastSquareDerivativesReference(FittedStarList const &fittedStarList, TripletList &tripletList, Eigen::VectorXd &grad) const =0
Compute the derivatives of the reference terms.
Simple structure to accumulate chi2 and ndof.
Definition: Chi2.h:29
Eigen::SparseMatrix< double > SparseMatrixD
Definition: Eigenstuff.h:12
virtual void accumulateStatImageList(CcdImageList const &ccdImageList, Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for measurements...
unsigned findOutliers(double nSigmaCut, MeasuredStarList &msOutliers, FittedStarList &fsOutliers) const
Find Measurements and references contributing more than a cut, computed as The outliers are NOT remo...
Definition: FitterBase.cc:28
T rend(T... args)
def scale(algorithm, min, max=None, frame=None)
T rfind(T... args)
T endl(T... args)
virtual void getIndicesOfMeasuredStar(MeasuredStar const &measuredStar, std::vector< unsigned > &indices) const =0
Set the indices of a measured star from the full matrix, for outlier removal.
T end(T... args)
#define LOGL_ERROR(logger, message...)
void removeMeasOutliers(MeasuredStarList &outliers)
Remove measuredStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:279
void update(SparseMatrixD const &H, bool UpOrDown)
Definition: Eigenstuff.h:42
A list of MeasuredStar. They are usually filled in Associations::createCcdImage.
Definition: MeasuredStar.h:118
virtual void saveChi2Contributions(std::string const &baseName) const
Save the full chi2 term per star that was used in the minimization, for debugging.
Definition: FitterBase.cc:301
MinimizeResult
Return value of minimize()
Definition: FitterBase.h:17
def dot(symb, c, r, frame=None, size=2, ctype=None, origin=afwImage.PARENT, args, kwargs)
STL class.
MinimizeResult minimize(std::string const &whatToFit, double const nSigmaCut=0, bool const doRankUpdate=true, bool const doLineSearch=false, std::string const &dumpMatrixFile="")
Does a 1 step minimization, assuming a linear model.
Definition: FitterBase.cc:147
virtual void saveChi2MeasContributions(std::string const &baseName) const =0
Save a CSV file containing residuals of measurement terms.
Structure to accumulate the chi2 contributions per each star (to help find outliers).
Definition: Chi2.h:77
T push_back(T... args)
STL class.
#define LOGLS_DEBUG(logger, message)
Class for a simple mapping implementing a generic Gtransfo.
#define LOGLS_TRACE(logger, message)
A list of FittedStar s. Such a list is typically constructed by Associations.
Definition: FittedStar.h:99
void removeRefOutliers(FittedStarList &outliers)
Remove refStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:287
T isfinite(T... args)
objects measured on actual images.
Definition: MeasuredStar.h:19
T dynamic_pointer_cast(T... args)
#define LOGL_WARN(logger, message...)
T clear(T... args)
virtual void saveChi2RefContributions(std::string const &baseName) const =0
Save a CSV file containing residuals of reference terms.
std::shared_ptr< Associations > _associations
Definition: FitterBase.h:136
virtual void assignIndices(std::string const &whatToFit)=0
Set parameters to fit and assign indices in the big matrix.
std::unique_ptr< SchemaItem< U > > result
#define LOGLS_INFO(logger, message)
T insert(T... args)
T size(T... args)
Chi2Statistic computeChi2() const
Returns the chi2 for the current state.
Definition: FitterBase.cc:18
T begin(T... args)
T pow(T... args)
void leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const
Evaluates the chI^2 derivatives (Jacobian and gradient) for the current whatToFit setting...
Definition: FitterBase.cc:293
T sort(T... args)
Handler of an actual image from a single CCD.
Definition: CcdImage.h:41
virtual void offsetParams(Eigen::VectorXd const &delta)=0
Offset the parameters by the requested quantities.
#define LOGLS_ERROR(logger, message)
unsigned getNextFreeIndex() const
Definition: Tripletlist.h:22
void outliersContributions(MeasuredStarList &msOutliers, FittedStarList &fsOutliers, TripletList &tripletList, Eigen::VectorXd &grad)
Contributions to derivatives from (presumably) outlier terms.
Definition: FitterBase.cc:268
The objects which have been measured several times.
Definition: FittedStar.h:37
std::pair< double, double > computeAverageAndSigma()
Compute the average and std-deviation of these chisq values.
Definition: Chi2.cc:31
T reserve(T... args)
virtual void leastSquareDerivativesMeasurement(CcdImage const &ccdImage, TripletList &tripletList, Eigen::VectorXd &grad, MeasuredStarList const *measuredStarList=nullptr) const =0
Compute the derivatives of the measured stars and model for one CcdImage.
T rbegin(T... args)