BCGraphFitter.cxx
1 /*
2  * Copyright (C) 2007-2018, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 #include "BCGraphFitter.h"
10 
11 #include <BAT/BCLog.h>
12 #include <BAT/BCMath.h>
13 
14 #include <TF1.h>
15 #include <TGraphErrors.h>
16 #include <TLegend.h>
17 #include <TMath.h>
18 #include <Math/ProbFuncMathCore.h>
19 #include <TPad.h>
20 #include <TString.h>
21 
22 // ---------------------------------------------------------
23 BCGraphFitter::BCGraphFitter(const TGraphErrors& graph, const TF1& func, const std::string& name)
24  : BCFitter(func, name),
25  fGraph(graph)
26 {
27  // why not just one point?
28  if (fGraph.GetN() <= 1) {
29  throw std::invalid_argument(std::string(__PRETTY_FUNCTION__) + ": TGraphErrors needs at least two points.");
30  }
31 
32  double* x = fGraph.GetX();
33  double* y = fGraph.GetY();
34  double* ex = fGraph.GetEX();
35  double* ey = fGraph.GetEY();
36 
37  if (!ey) {
38  throw std::invalid_argument(std::string(__PRETTY_FUNCTION__) + ": TGraphErrors has NO errors set on Y. Not able to fit.");
39  }
40 
41  // fill the dataset
42  for (int i = 0; i < fGraph.GetN(); ++i) {
43  // create the data point
45  GetDataSet()->Back()[0] = x[i];
46  GetDataSet()->Back()[1] = y[i];
47  GetDataSet()->Back()[2] = ex ? ex[i] : 0;
48  GetDataSet()->Back()[3] = ey[i];
49  }
50  // adjust bounds for 1 sigma of the uncertainties
51  GetDataSet()->AdjustBoundForUncertainties(0, 1, 2); // x +- 1*errx
52  GetDataSet()->AdjustBoundForUncertainties(1, 1, 3); // y +- 1*erry
53 
55 
56  // set MCMC for marginalization
58 }
59 
60 // ---------------------------------------------------------
62 
63 // ---------------------------------------------------------
64 double BCGraphFitter::LogLikelihood(const std::vector<double>& params)
65 {
66  TF1& f = GetFitFunction();
67 
68  // set the parameters of the function
69  // passing the pointer to first element of the vector is
70  // not completely safe as there might be an implementation where
71  // the vector elements are not stored consecutively in memory.
72  // however it is much faster than copying the contents, especially
73  // for large number of parameters
74  f.SetParameters(&params[0]);
75 
76  // initialize probability
77  double logl = 0.;
78 
79  // loop over all data points
80  for (unsigned i = 0; i < GetNDataPoints(); i++)
81  // calculate log of probability assuming
82  // a Gaussian distribution for each point
83  logl += BCMath::LogGaus(GetDataSet()->GetDataPoint(i)[1], // y value of point
84  f.Eval(GetDataSet()->GetDataPoint(i)[0]), // f(x value of point)
85  GetDataSet()->GetDataPoint(i)[3], // uncertainty on y value of point
86  true); // include normalization factor
87 
88  return logl;
89 }
90 
91 // ---------------------------------------------------------
93 {
94  // check setup
95  BCLog::OutDetail(Form("Fitting %d data points with function of %d parameters", GetNDataPoints(), GetNParameters()));
96  if (GetNDataPoints() <= GetNParameters()) {
97  BCLog::OutWarning(Form("Number of parameters (%d) lower than or equal to number of points (%d).", GetNParameters(), GetNDataPoints()));
98  BCLog::OutWarning("Fit doesn't have much meaning.");
99  }
100 
101  // perform marginalization
102  MarginalizeAll();
103 
104  // maximize posterior probability, using the best-fit values close
105  // to the global maximum from the MCMC
109  SetOptimizationMethod(method_temp);
110 
111  // p value with (approximate) correction for degrees of freedom
113 
114  // print summary to screen
116 }
117 
118 // ---------------------------------------------------------
119 void BCGraphFitter::DrawFit(const std::string& options, bool flaglegend)
120 {
121  // check wheather options contain "same"
122  TString opt = options;
123  opt.ToLower();
124 
125  // if not same, draw the histogram first to get the axes
126  if (!opt.Contains("same"))
127  fGraph.Draw("ap");
128 
129  // draw the error band as central 68% probability interval
130  TGraph* errorBand = GetErrorBandGraph(0.16, 0.84);
131  fObjectTrash.Put(errorBand);
132  errorBand->Draw("f same");
133 
134  // draw the fit function on top
135  TGraph* graphFitFunction = GetFitFunctionGraph();
136  fObjectTrash.Put(graphFitFunction);
137  graphFitFunction->SetLineColor(kRed);
138  graphFitFunction->SetLineWidth(2);
139  graphFitFunction->Draw("l same");
140 
141  // now draw the histogram again since it was covered by the band and the best fit
142  fGraph.Draw("p same");
143 
144  // draw legend
145  if (flaglegend) {
146  TLegend* legend = new TLegend(0.25, 0.75, 0.55, 0.9);
147  fObjectTrash.Put(legend);
148  legend->SetBorderSize(0);
149  legend->SetFillColor(kWhite);
150  legend->AddEntry(&fGraph, "Data", "PE");
151  legend->AddEntry(graphFitFunction, "Best fit", "L");
152  legend->AddEntry(errorBand, "Error band", "F");
153  legend->Draw();
154  }
155 
156  gPad->RedrawAxis();
157 }
158 
159 // ---------------------------------------------------------
160 double BCGraphFitter::CalculateChi2(const std::vector<double>& pars)
161 {
162  TF1& f = GetFitFunction();
163 
164  // set pars into fit function
165  f.SetParameters(&pars[0]);
166 
167  double chi, chi2 = 0;
168  for (unsigned i = 0; i < GetDataSet()->GetNDataPoints(); ++i) {
169  chi = (GetDataSet()->GetDataPoint(i)[1] - f.Eval(GetDataSet()->GetDataPoint(i)[0])) / GetDataSet()->GetDataPoint(i)[3];
170  chi2 += chi * chi;
171  }
172 
173  return chi2;
174 }
175 
176 // ---------------------------------------------------------
177 double BCGraphFitter::CalculatePValue(const std::vector<double>& pars, bool ndf)
178 {
179  const double chi2 = CalculateChi2(pars);
180 
181  if (chi2 < 0) {
182  BCLOG_ERROR("chi2 is negative.");
183  fPValue = -1;
184  }
185 
186  else if (ndf) {
187  if (GetNDoF() <= 0) {
188  BCLOG_ERROR("number of degrees of freedom is not positive.");
189  fPValue = -1;
190  }
191  fPValue = TMath::Prob(chi2, GetNDoF());
192 
193  } else if (GetNDataPoints() == 0) {
194  BCLog::OutError("BCGraphFitter::CalculatePValue : number of data points is zero.");
195  fPValue = -1;
196 
197  } else
198  fPValue = TMath::Prob(chi2, GetNDataPoints());
199 
200  return fPValue;
201 }
void PrintShortFitSummary()
Prints a short summary of the fit results on the screen.
Definition: BCFitter.cxx:212
int GetNDoF() const
Definition: BCModel.h:89
virtual double CalculatePValue(const std::vector< double > &pars, bool ndf=true)
Calculate p value from chi^2 distribution, with assumption of Gaussian distribution for all data poin...
void Fit()
Performs the fit.
void SetMarginalizationMethod(BCIntegrate::BCMarginalizationMethod method)
Definition: BCIntegrate.h:465
bool AddDataPoint(const BCDataPoint &datapoint)
Adds a data point to the data set.
Definition: BCDataSet.cxx:209
TF1 & GetFitFunction()
Get fit function.
Definition: BCFitter.h:55
A class representing a data point.
Definition: BCDataPoint.h:34
TGraph * GetErrorBandGraph(double level1, double level2) const
Definition: BCFitter.cxx:249
int MarginalizeAll()
Marginalize all probabilities wrt.
std::vector< double > FindMode(std::vector< double > start=std::vector< double >())
Do the mode finding using a method set via SetOptimizationMethod.
void SetFitFunctionIndices(int indexx, int indexy)
Sets indices of the x and y values in function fits.
Definition: BCFitter.h:187
unsigned GetNDataPoints() const
Definition: BCDataSet.h:75
void AdjustBoundForUncertainties(unsigned i, double nSigma, unsigned i_err1, int i_err2=-1)
Recalculate a data axis bound accounting for uncertainties specified by other data axes...
Definition: BCDataSet.cxx:232
BCDataPoint & Back()
Access to last added data point.
Definition: BCDataSet.h:99
BCOptimizationMethod
An enumerator for the mode finding algorithm.
Definition: BCIntegrate.h:153
BCAux::BCTrash< TObject > fObjectTrash
Storage for plot objects with proper clean-up.
BCDataPoint & GetDataPoint(unsigned index)
Safer, but slower, access to data points.
Definition: BCDataSet.h:87
TGraph * GetFitFunctionGraph()
Definition: BCFitter.h:91
double LogGaus(double x, double mean=0, double sigma=1, bool norm=false)
Calculate the natural logarithm of a normal distribution function.
Definition: BCMath.cxx:24
unsigned GetNDataPoints() const
Definition: BCModel.h:83
BCIntegrate::BCOptimizationMethod GetOptimizationMethod() const
Definition: BCIntegrate.h:264
virtual double LogLikelihood(const std::vector< double > &parameters)
The log of the conditional probability.
virtual ~BCGraphFitter()
The default destructor.
double fPValue
p value for goodness of fit
Definition: BCFitter.h:285
void DrawFit(const std::string &options="", bool flaglegend=false)
Draw the fit in the current pad.
BCGraphFitter(const TGraphErrors &graph, const TF1 &func, const std::string &name="graph_fitter_model")
Constructor.
A base class for all fitting classes.
Definition: BCFitter.h:31
Metropolis Hastings.
Definition: BCIntegrate.h:178
BCDataSet * GetDataSet()
Definition: BCModel.h:78
ROOT&#39;s Minuit.
Definition: BCIntegrate.h:157
unsigned GetNParameters() const
Definition: BCEngineMCMC.h:656
virtual const std::vector< double > & GetBestFitParameters() const
void SetOptimizationMethod(BCIntegrate::BCOptimizationMethod method)
Definition: BCIntegrate.h:456
virtual double CalculateChi2(const std::vector< double > &pars)
Calculate chi^2, the sum of [(y-f(x))/sigma_y]^2 for all data points.