BCHistogramFitter.cxx
1 /*
2  * Copyright (C) 2007-2018, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 
11 #include <config.h>
12 
13 #include "BCHistogramFitter.h"
14 
15 #include <BAT/BCDataPoint.h>
16 #include <BAT/BCLog.h>
17 #include <BAT/BCMath.h>
18 
19 #include <TF1.h>
20 #include <TGraph.h>
21 #include <TH1D.h>
22 #include <TLegend.h>
23 #include <TMath.h>
24 #include <Math/ProbFuncMathCore.h> //for ROOT::Math namespace
25 #include <TPad.h>
26 #include <TRandom3.h>
27 #include <TString.h>
28 
29 // ---------------------------------------------------------
30 BCHistogramFitter::BCHistogramFitter(const TH1& hist, const TF1& func, const std::string& name) :
31  BCFitter(func, name)
32 {
33  if (hist.GetDimension() != 1)
34  throw std::invalid_argument("Only 1D histograms supported");
35 
36  hist.Copy(fHistogram);
37 
38  // create data points and add them to the data set.
39  // the x value is the lower edge of the bin, and
40  // the y value is the bin count
41  int nbins = hist.GetNbinsX();
42  for (int i = 1; i <= nbins; ++i) {
44  GetDataSet()->Back()[0] = hist.GetBinLowEdge(i);
45  GetDataSet()->Back()[1] = hist.GetBinContent(i);
46  }
47 
48  // set the data boundaries for x and y values
49  GetDataSet()->SetBounds(0, hist.GetXaxis()->GetXmin(), hist.GetXaxis()->GetXmax());
50  GetDataSet()->SetBounds(1, std::max<double>(hist.GetMinimum() - sqrt(hist.GetMinimum()) / 2, 0),
51  hist.GetMaximum() + sqrt(hist.GetMaximum()) / 2);
52 
53  // set the indices for fitting
55 
56  SetNIterationsRun(2000);
57  SetFillErrorBand(true);
58  fFlagIntegration = true;
59 
60  // set MCMC for marginalization
62 }
63 
64 // ---------------------------------------------------------
65 double BCHistogramFitter::LogLikelihood(const std::vector<double>& params)
66 {
67  // initialize probability
68  double loglikelihood = 0;
69 
70  // loop over all bins
71  for (int ibin = 1; ibin <= fHistogram.GetNbinsX(); ++ibin) {
72  // get bin edges and integrate for expectation
73  const double xmin = fHistogram.GetXaxis()->GetBinLowEdge(ibin);
74  const double xmax = fHistogram.GetXaxis()->GetBinUpEdge(ibin);
75  double yexp = Integral(params, xmin, xmax);
76 
77  // get the number of observed events
78  double y = fHistogram.GetBinContent(ibin);
79 
80  // get the value of the Poisson distribution
81  loglikelihood += BCMath::LogPoisson(y, yexp);
82  }
83 
84  return loglikelihood;
85 }
86 
87 // ---------------------------------------------------------
89 {
90  // perform marginalization
92 
93  // maximize posterior probability, using the best-fit values close
94  // to the global maximum from the MCMC
98  SetOptimizationMethod(method_temp);
99 
100  // calculate the p-value using the fast MCMC algorithm
102  BCLOG_WARNING("Could not use the fast p-value evaluation.");
103 
104  // print summary to screen
106 }
107 
108 // ---------------------------------------------------------
109 void BCHistogramFitter::DrawFit(const std::string& options, bool flaglegend)
110 {
111  if (GetBestFitParameters().empty()) {
112  BCLOG_ERROR("Fit not performed yet.");
113  return;
114  }
115 
116  // check whether options contain "same"
117  TString opt = options;
118  opt.ToLower();
119 
120  // if not same, draw the histogram first to get the axes
121  if (!opt.Contains("same"))
122  fHistogram.Draw(Form("hist%s", opt.Data()));
123 
124  // draw the error band as central 68% probability interval
125  TGraph* errorBand = GetErrorBandGraph(0.16, 0.84);
126  fObjectTrash.Put(errorBand);
127  errorBand->Draw("f same");
128 
129  // now draw the histogram again since it was covered by the band
130  fHistogram.Draw(Form("hist%ssame", opt.Data()));
131 
132  // draw the fit function on top
133  TGraph* graphFitFunction = GetFitFunctionGraph();
134  fObjectTrash.Put(graphFitFunction);
135  graphFitFunction->SetLineColor(kRed);
136  graphFitFunction->SetLineWidth(2);
137  graphFitFunction->Draw("l same");
138 
139  // draw legend
140  if (flaglegend) {
141  TLegend* legend = new TLegend(0.25, 0.75, 0.55, 0.9);
142  fObjectTrash.Put(legend);
143  legend->SetBorderSize(0);
144  legend->SetFillColor(kWhite);
145  legend->AddEntry(&fHistogram, "Data", "L");
146  legend->AddEntry(graphFitFunction, "Best fit", "L");
147  legend->AddEntry(errorBand, "Error band", "F");
148  legend->Draw();
149  }
150 
151  gPad->RedrawAxis();
152 }
153 
154 // ---------------------------------------------------------
155 double BCHistogramFitter::CalculatePValueFast(const std::vector<double>& pars, unsigned nIterations)
156 {
157  // check size of parameter vector
158  if (pars.size() != GetNParameters()) {
159  BCLOG_ERROR("Number of parameters is inconsistent.");
160  return false;
161  }
162 
163  // copy observed and expected values
164  std::vector<unsigned> observed(fHistogram.GetNbinsX());
165  std::vector<double> expected(observed.size());
166 
167  for (int ibin = 0 ; ibin < fHistogram.GetNbinsX(); ++ibin) {
168  observed[ibin] = (unsigned int) fHistogram.GetBinContent(ibin + 1);
169  expected[ibin] = Integral(pars, fHistogram.GetBinLowEdge(ibin + 1), fHistogram.GetBinLowEdge(ibin + 2));
170  }
171 
172  fPValue = BCMath::FastPValue(observed, expected, nIterations, fRandom.GetSeed());
173  return fPValue;
174 }
175 
176 // ---------------------------------------------------------
177 double BCHistogramFitter::CalculatePValueLikelihood(const std::vector<double>& pars)
178 {
179  // initialize test statistic -2*lambda
180  double logLambda = 0.0;
181 
182  for (int ibin = 1; ibin <= fHistogram.GetNbinsX(); ++ibin) {
183 
184  // get the number of observed events
185  const double y = fHistogram.GetBinContent(ibin);
186 
187  // get the number of expected events
188  const double yexp = Integral(pars, fHistogram.GetBinLowEdge(ibin), fHistogram.GetBinLowEdge(ibin + 1));
189 
190  // get the contribution from this datapoint
191  if (y == 0)
192  logLambda += yexp;
193  else
194  logLambda += yexp - y + y * log(y / yexp);
195  }
196 
197  // rescale
198  logLambda *= 2.0;
199 
200  //p value from chi^2 distribution, returns zero if logLambda < 0
201  fPValue = TMath::Prob(logLambda, GetNDataPoints());
202  return fPValue;
203 }
204 
205 // ---------------------------------------------------------
206 double BCHistogramFitter::CalculatePValueLeastSquares(const std::vector<double>& pars, bool weightExpect)
207 {
208  // initialize test statistic chi^2
209  double chi2 = 0.0;
210 
211  for (int ibin = 1; ibin <= fHistogram.GetNbinsX(); ++ibin) {
212 
213  // get the number of observed events
214  double y = fHistogram.GetBinContent(ibin);
215 
216  // get the number of expected events
217  const double yexp = Integral(pars, fHistogram.GetBinLowEdge(ibin), fHistogram.GetBinLowEdge(ibin + 1));
218 
219  //convert 1/0.0 into 1 for weighted sum
220  double weight;
221  if (weightExpect)
222  weight = (yexp > 0) ? yexp : 1.0;
223  else
224  weight = (y > 0) ? y : 1.0;
225 
226  // get the contribution from this datapoint
227  chi2 += (y - yexp) * (y - yexp) / weight;
228  }
229 
230  // p value from chi^2 distribution
231  fPValue = TMath::Prob(chi2, GetNDataPoints());
232  return fPValue;
233 }
234 
235 // ---------------------------------------------------------
236 double BCHistogramFitter::GraphCorrection(unsigned ibin) const
237 {
238  return fHistogram.GetXaxis()->GetBinUpEdge(ibin) - fHistogram.GetXaxis()->GetBinLowEdge(ibin);
239 }
void PrintShortFitSummary()
Prints a short summary of the fit results on the screen.
Definition: BCFitter.cxx:212
double CalculatePValueLeastSquares(const std::vector< double > &par, bool weightExpect=true)
Calculate the p-value using approximate chi^2 distribution of squared difference for conventional wei...
virtual double LogLikelihood(const std::vector< double > &parameters)
The log of the conditional probability.
void SetBounds(unsigned index, double lower_bound, double upper_bound, bool fixed=false)
Set bounds for data values.
Definition: BCDataSet.cxx:267
void SetMarginalizationMethod(BCIntegrate::BCMarginalizationMethod method)
Definition: BCIntegrate.h:465
bool AddDataPoint(const BCDataPoint &datapoint)
Adds a data point to the data set.
Definition: BCDataSet.cxx:209
A class representing a data point.
Definition: BCDataPoint.h:34
TGraph * GetErrorBandGraph(double level1, double level2) const
Definition: BCFitter.cxx:249
double FastPValue(const std::vector< unsigned > &observed, const std::vector< double > &expected, unsigned nIterations=1e5, unsigned seed=0)
Calculate the p value using fast MCMC for a histogram and the likelihood as test statistic.
Definition: BCMath.cxx:310
int MarginalizeAll()
Marginalize all probabilities wrt.
void DrawFit(const std::string &options="HIST", bool flaglegend=false)
Draw the fit in the current pad.
std::vector< double > FindMode(std::vector< double > start=std::vector< double >())
Do the mode finding using a method set via SetOptimizationMethod.
BCHistogramFitter(const TH1 &hist, const TF1 &func, const std::string &name="histogram_fitter_model")
Constructor.
void SetFitFunctionIndices(int indexx, int indexy)
Sets indices of the x and y values in function fits.
Definition: BCFitter.h:187
void SetNIterationsRun(unsigned n)
Sets the number of iterations.
Definition: BCEngineMCMC.h:789
TH1D fHistogram
The histogram with observed number of counts.
BCDataPoint & Back()
Access to last added data point.
Definition: BCDataSet.h:99
BCOptimizationMethod
An enumerator for the mode finding algorithm.
Definition: BCIntegrate.h:153
BCAux::BCTrash< TObject > fObjectTrash
Storage for plot objects with proper clean-up.
TGraph * GetFitFunctionGraph()
Definition: BCFitter.h:91
double CalculatePValueLikelihood(const std::vector< double > &par)
Calculate the p-value using approximate chi^2 distribution of scaled likelihood.
virtual void Fit()
Performs the fit.
unsigned GetNDataPoints() const
Definition: BCModel.h:83
BCIntegrate::BCOptimizationMethod GetOptimizationMethod() const
Definition: BCIntegrate.h:264
double Integral(const std::vector< double > &parameters, double xmin, double xmax)
Compute the integral of the fit function between xmin and xmax.
Definition: BCFitter.cxx:196
TRandom3 fRandom
Random number generator.
double LogPoisson(double x, double lambda)
Calculate the natural logarithm of a poisson distribution.
Definition: BCMath.cxx:53
double fPValue
p value for goodness of fit
Definition: BCFitter.h:285
void SetFillErrorBand(bool flag=true)
Turn on or off the filling of the error band during the MCMC run.
Definition: BCFitter.h:154
A base class for all fitting classes.
Definition: BCFitter.h:31
Metropolis Hastings.
Definition: BCIntegrate.h:178
BCDataSet * GetDataSet()
Definition: BCModel.h:78
virtual double GraphCorrection(unsigned ibin) const
Take care of bin width when creating a graph from the fit function.
ROOT&#39;s Minuit.
Definition: BCIntegrate.h:157
unsigned GetNParameters() const
Definition: BCEngineMCMC.h:656
double CalculatePValueFast(const std::vector< double > &par, unsigned nIterations=100000)
Calculate the p-value using fast-MCMC and the likelihood as test statistic.
virtual const std::vector< double > & GetBestFitParameters() const
bool fFlagIntegration
Flag for using the ROOT TH1::Integral method (true), or linear interpolation (false) ...
Definition: BCFitter.h:297
void SetOptimizationMethod(BCIntegrate::BCOptimizationMethod method)
Definition: BCIntegrate.h:456