BCEfficiencyFitter.cxx
1 /*
2  * Copyright (C) 2007-2018, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 #include <config.h>
11 
12 #include "BCEfficiencyFitter.h"
13 
14 #include <BAT/BCDataPoint.h>
15 #include <BAT/BCH1D.h>
16 #include <BAT/BCLog.h>
17 #include <BAT/BCMath.h>
18 
19 #include <TF1.h>
20 #include <TGraphAsymmErrors.h>
21 #include <TH1D.h>
22 #include <TH2D.h>
23 #include <TLegend.h>
24 #include <TMarker.h>
25 #include <TPad.h>
26 #include <TRandom3.h>
27 #include <TString.h>
28 
29 // ---------------------------------------------------------
30 BCEfficiencyFitter::BCEfficiencyFitter(const TH1& trials, const TH1& successes, const TF1& func, const std::string& name)
31  : BCFitter(func, name),
32  fHistogramBinomial("hist_binomial", "", 1000, 0., 1.),
33  fDataPointType(kDataPointSmallestInterval)
34 {
35  // check compatibility of both histograms : number of bins
36  if (trials.GetNbinsX() != successes.GetNbinsX())
37  throw std::invalid_argument("BCEfficiencyFitter: Histograms do not have the same number of bins.");
38 
39  // check compatibility of both histograms : bin edges incl. right-most edge = left edge of overflow bin
40  for (int i = 1; i <= trials.GetNbinsX() + 1; ++i) {
41  if (trials.GetBinLowEdge(i) != successes.GetBinLowEdge(i)) {
42  BCLOG_WARNING("Histograms of trials and successes don't have same bin boundaries.");
43  break;
44  }
45  }
46 
47  // check that there aren't more successes than trials
48  for (int i = 1; i <= trials.GetNbinsX(); ++i) {
49  if (trials.GetBinContent(i) < successes.GetBinContent(i)) {
50  throw std::invalid_argument("Trials histogram has fewer entries than successes histogram.");
51  }
52  }
53 
54  trials.Copy(fTrials);
55  successes.Copy(fSuccesses);
56 
57  // create data points and add them to the data set.
58  for (int i = 0; i < fTrials.GetNbinsX(); ++i)
60 
61  // set the data boundaries for x and y values.
62  GetDataSet()->SetBounds(0, fTrials.GetXaxis()->GetXmin(), fTrials.GetXaxis()->GetXmax());
63  GetDataSet()->SetBounds(1, 0.0, 1.0);
64 
65  // set the indices for fitting.
67 
68  fFlagIntegration = false;
69 
70  // set MCMC for marginalization
72 }
73 
74 // ---------------------------------------------------------
76 
77 // ---------------------------------------------------------
78 double BCEfficiencyFitter::LogLikelihood(const std::vector<double>& params)
79 {
80  TF1& f = GetFitFunction();
81 
82  // initialize probability
83  double loglikelihood = 0;
84 
85  // set the parameters of the function
86  // passing the pointer to first element of the vector is
87  // not completely safe as there might be an implementation where
88  // the vector elements are not stored consecutively in memory.
89  // however it is much faster than copying the contents, especially
90  // for large number of parameters
91  f.SetParameters(&params[0]);
92 
93  // get the number of bins
94  int nbins = fTrials.GetNbinsX();
95 
96  // loop over all bins
97  for (int ibin = 1; ibin <= nbins; ++ibin) {
98  // get n
99  int n = int(fTrials.GetBinContent(ibin));
100 
101  // get k
102  int k = int(fSuccesses.GetBinContent(ibin));
103 
104  // get bin edges
105  const double xmin = fTrials.GetXaxis()->GetBinLowEdge(ibin);
106  const double xmax = fTrials.GetXaxis()->GetBinUpEdge(ibin);
107 
108  const double eff = Integral(params, xmin, xmax);
109 
110  // get the value of the Binomial distribution
111  loglikelihood += BCMath::LogApproxBinomial(n, k, eff);
112  }
113 
114  return loglikelihood;
115 }
116 
117 // ---------------------------------------------------------
119 {
120  // perform marginalization
121  MarginalizeAll();
122 
123  // maximize posterior probability, using the best-fit values close
124  // to the global maximum from the MCMC
128  SetOptimizationMethod(method_temp);
129 
130  // calculate the p-value using the fast MCMC algorithm
132 
133  // print summary to screen
135 }
136 
137 // ---------------------------------------------------------
138 void BCEfficiencyFitter::DrawData(bool flaglegend)
139 {
141 
142  // create new histogram
143  TH2D* hist_axes = new TH2D("hist_axes",
144  (std::string(fTrials.GetTitle()) + ";" + fTrials.GetXaxis()->GetTitle() + ";" + fTrials.GetYaxis()->GetTitle()).data(),
145  100, fTrials.GetXaxis()->GetXmin(), fTrials.GetXaxis()->GetXmax(),
146  100, std::min(fTrials.GetMinimum(), fSuccesses.GetMinimum()), 1.1 * std::max(fTrials.GetMaximum(), fSuccesses.GetMaximum()));
147  hist_axes->SetStats(false);
148 
149  fObjectTrash.Put(hist_axes);
150  hist_axes->Draw();
151 
152  fTrials.SetLineWidth(1);
153  fTrials.SetLineColor(kGray + 3);
154  fTrials.SetFillColor(fTrials.GetLineColor());
155  fTrials.SetFillStyle(1001);
156 
157  fSuccesses.SetMarkerColor(kRed);
158  fSuccesses.SetMarkerStyle(8);
159  fSuccesses.SetMarkerSize(0.7);
160 
161  fTrials.Draw("same");
162  fSuccesses.Draw("Psame");
163 
164  // draw values when trial is nonzero but success is zero
165  TMarker M;
166  M.SetMarkerColor(fSuccesses.GetMarkerColor());
167  M.SetMarkerStyle(fSuccesses.GetMarkerStyle());
168  M.SetMarkerSize(fSuccesses.GetMarkerSize());
169  for (int i = 1; i <= fTrials.GetNbinsX(); ++i)
170  if (fTrials.GetBinContent(i) > 0 && fSuccesses.GetBinContent(i) == 0)
171  M.DrawMarker(fTrials.GetBinCenter(i), 0);
172 
173  // draw legend
174  if (flaglegend) {
175  TLegend* legend = new TLegend(0.135, 0.955, 0.85, 0.98);
176  fObjectTrash.Put(legend);
177  legend->SetBorderSize(0);
178  legend->SetFillColor(0);
179  legend->SetFillStyle(0);
180  legend->SetTextAlign(12);
181  legend->SetTextSize(0.03);
182  legend->SetNColumns(2);
183  legend->SetColumnSeparation(5e-2);
184 
185  legend->AddEntry(&fTrials, fTrials.GetTitle(), "F");
186  legend->AddEntry(&fSuccesses, fSuccesses.GetTitle(), "P");
187 
188  legend->Draw();
189  }
190 }
191 
192 // ---------------------------------------------------------
193 void BCEfficiencyFitter::DrawFit(const std::string& options, bool flaglegend)
194 {
195  // create efficiency graph
196  TGraphAsymmErrors* graphRatio = new TGraphAsymmErrors();
197  fObjectTrash.Put(graphRatio);
198  graphRatio->SetLineColor(kGray + 3);
199  graphRatio->SetMarkerColor(graphRatio->GetLineColor());
200  graphRatio->SetMarkerStyle(8);
201  graphRatio->SetMarkerSize(0.7);
202 
203  // set points
204  for (int i = 1; i <= fTrials.GetNbinsX(); ++i) {
205  // calculate central value and uncertainties
206  double xexp, xmin, xmax;
207  if (fTrials.GetBinContent(i) > 0) {
208  GetUncertainties( static_cast<int>(fTrials.GetBinContent(i)), static_cast<int>(fSuccesses.GetBinContent(i)), 0.68, xexp, xmin, xmax);
209  graphRatio->SetPoint(graphRatio->GetN(), fTrials.GetBinCenter(i), xexp);
210  graphRatio->SetPointError(graphRatio->GetN() - 1, 0, 0, xexp - xmin, xmax - xexp);
211  }
212  }
213 
214  // check wheather options contain "same"
215  TString opt = options;
216  opt.ToLower();
217 
218  // if not same, draw the histogram first to get the axes
219  if (!opt.Contains("same")) {
221 
222  // create new histogram
223  TH2D* hist_axes = new TH2D("hist_axes",
224  Form(";%s;ratio", fTrials.GetXaxis()->GetTitle()),
225  100, fTrials.GetXaxis()->GetXmin(), fTrials.GetXaxis()->GetXmax(),
226  100, 0., 1.05);
227 
228  fObjectTrash.Put(hist_axes);
229  hist_axes->SetStats(kFALSE);
230  hist_axes->Draw();
231 
232  graphRatio->Draw(Form("%sp", opt.Data()));
233  }
234 
235  // draw the error band as central 68% probability interval
236  TGraph* errorBand = GetErrorBandGraph(0.16, 0.84);
237  fObjectTrash.Put(errorBand);
238  errorBand->Draw("f same");
239 
240  // now draw the histogram again since it was covered by the band
241  graphRatio->Draw(Form("%ssamepX", opt.Data()));
242 
243  // draw the fit function on top
244  TGraph* graphFitFunction = GetFitFunctionGraph();
245  fObjectTrash.Put(graphFitFunction);
246  graphFitFunction->SetLineColor(kRed);
247  graphFitFunction->SetLineWidth(2);
248  graphFitFunction->Draw("l same");
249 
250  // draw legend
251  if (flaglegend) {
252  TLegend* legend = new TLegend(0.135, 0.955, 0.85, 0.98);
253  fObjectTrash.Put(legend);
254  legend->SetBorderSize(0);
255  legend->SetFillColor(0);
256  legend->SetFillStyle(0);
257  legend->SetTextAlign(12);
258  legend->SetTextSize(0.03);
259  legend->SetNColumns(3);
260  legend->SetColumnSeparation(5e-2);
261 
262  legend->AddEntry(graphRatio, "Data", "PE");
263  legend->AddEntry(graphFitFunction, "Best fit", "L");
264  legend->AddEntry(errorBand, "Error band", "F");
265 
266  legend->Draw();
267 
268  }
269  gPad->RedrawAxis();
270 }
271 
272 // ---------------------------------------------------------
273 double BCEfficiencyFitter::CalculatePValueFast(const std::vector<double>& par, unsigned nIterations)
274 {
275  //use NULL pointer for no callback
276  return CalculatePValueFast(par, NULL, nIterations);
277 }
278 
279 // ---------------------------------------------------------
280 double BCEfficiencyFitter::CalculatePValueFast(const std::vector<double>& pars, BCEfficiencyFitter::ToyDataInterface* callback, unsigned nIterations)
281 {
282  // define temporary variables
283  int nbins = fTrials.GetNbinsX();
284 
285  std::vector<unsigned> histogram(nbins, 0);
286  std::vector<double> expectation(nbins, 0);
287 
288  double logp = 0;
289  double logp_start = 0;
290  int counter_pvalue = 0;
291 
292  // define starting distribution
293  for (int ibin = 0; ibin < nbins; ++ibin) {
294  // get bin boundaries
295  const double xmin = fTrials.GetXaxis()->GetBinLowEdge(ibin + 1);
296  const double xmax = fTrials.GetXaxis()->GetBinUpEdge(ibin + 1);
297 
298  // get the number of expected events
299  double yexp = Integral(pars, xmin, xmax);
300 
301  // get n
302  int n = int(fTrials.GetBinContent(ibin));
303 
304  // get k
305  int k = int(fSuccesses.GetBinContent(ibin));
306 
307  histogram[ibin] = k;
308  expectation[ibin] = n * yexp;
309 
310  // calculate p;
311  logp += BCMath::LogApproxBinomial(n, k, yexp);
312  }
313  logp_start = logp;
314 
315  // loop over iterations
316  for (unsigned iiter = 0; iiter < nIterations; ++iiter) {
317  // loop over bins
318  for (int ibin = 0; ibin < nbins; ++ibin) {
319  // get n
320  int n = int(fTrials.GetBinContent(ibin));
321 
322  // get k
323  int k = histogram.at(ibin);
324 
325  // get expectation
326  double yexp = 0;
327  if (n > 0)
328  yexp = expectation.at(ibin) / n;
329 
330  // random step up or down in statistics for this bin
331  double ptest = fRandom.Rndm() - 0.5;
332 
333  // continue, if efficiency is at limit
334  if (!(yexp > 0. || yexp < 1.0))
335  continue;
336 
337  // increase statistics by 1
338  if (ptest > 0 && (k < n)) {
339  // calculate factor of probability
340  double r = (double(n) - double(k)) / (double(k) + 1.) * yexp / (1. - yexp);
341 
342  // walk, or don't (this is the Metropolis part)
343  if (fRandom.Rndm() < r) {
344  histogram[ibin] = k + 1;
345  logp += log(r);
346  }
347  }
348 
349  // decrease statistics by 1
350  else if (ptest <= 0 && (k > 0)) {
351  // calculate factor of probability
352  double r = double(k) / (double(n) - (double(k) - 1)) * (1 - yexp) / yexp;
353 
354  // walk, or don't (this is the Metropolis part)
355  if (fRandom.Rndm() < r) {
356  histogram[ibin] = k - 1;
357  logp += log(r);
358  }
359  }
360 
361  } // end of looping over bins
362 
363  //one new toy data set is created
364  //call user interface to calculate arbitrary statistic's distribution
365  if (callback)
366  (*callback)(expectation, histogram);
367 
368  // increase counter
369  if (logp < logp_start)
370  counter_pvalue++;
371 
372  }
373 
374  // pvalue estimated as simple fraction
375  fPValue = double(counter_pvalue) / double(nIterations);
376  return fPValue;
377 }
378 
379 // ---------------------------------------------------------
380 bool BCEfficiencyFitter::GetUncertainties(int n, int k, double p, double& xexp, double& xmin, double& xmax)
381 {
382  if (n == 0) {
383  xexp = 0.;
384  xmin = 0.;
385  xmax = 0.;
386  return false;
387  }
388 
389  BCLog::OutDebug(Form("Calculating efficiency data-point of type %d for (n,k) = (%d,%d)", fDataPointType, n, k));
390 
391  // create a clean histogram with binomial distribution
392  fHistogramBinomial.Reset();
393 
394  // loop over bins and fill histogram
395  for (int i = 1; i <= fHistogramBinomial.GetNbinsX(); ++i)
396  fHistogramBinomial.SetBinContent(i, BCMath::ApproxBinomial(n, k, fHistogramBinomial.GetBinCenter(i)));
397  // normalize
398  fHistogramBinomial.Scale(1. / fHistogramBinomial.Integral());
399 
400  switch (fDataPointType) {
401 
402  case kDataPointRMS: {
403  xexp = fHistogramBinomial.GetMean();
404  double rms = fHistogramBinomial.GetRMS();
405  xmin = xexp - rms;
406  xmax = xexp + rms;
407  BCLog::OutDebug(Form(" - mean = %f , rms = %f)", xexp, rms));
408  break;
409  }
410 
412  xexp = (double)k / (double)n;
413  BCH1D fbh(&fHistogramBinomial);
415  if (SI.intervals.empty()) {
416  xmin = xmax = xexp = 0.;
417  } else {
418  xmin = SI.intervals.front().xmin;
419  xmax = SI.intervals.front().xmax;
420  }
421 
422  break;
423  }
424 
426  // calculate quantiles
427  int nprobSum = 3;
428  double q[3];
429  double probSum[3];
430  probSum[0] = (1. - p) / 2.;
431  probSum[1] = 0.5;
432  probSum[2] = (1. + p) / 2.;
433 
434  fHistogramBinomial.GetQuantiles(nprobSum, q, probSum);
435 
436  xmin = q[0];
437  xexp = q[1];
438  xmax = q[2];
439  break;
440  }
441 
442  default: {
443  BCLog::OutError("BCEfficiencyFitter::GetUncertainties - invalid DataPointType specified.");
444  xmin = xmax = xexp = 0.;
445  }
446  }
447  BCLog::OutDebug(Form(" - efficiency = %f , range (%f - %f)", xexp, xmin, xmax));
448 
449  return true;
450 }
void PrintShortFitSummary()
Prints a short summary of the fit results on the screen.
Definition: BCFitter.cxx:212
virtual ~BCEfficiencyFitter()
The default destructor.
void SetBounds(unsigned index, double lower_bound, double upper_bound, bool fixed=false)
Set bounds for data values.
Definition: BCDataSet.cxx:267
void SetMarginalizationMethod(BCIntegrate::BCMarginalizationMethod method)
Definition: BCIntegrate.h:465
bool AddDataPoint(const BCDataPoint &datapoint)
Adds a data point to the data set.
Definition: BCDataSet.cxx:209
virtual void DrawData(bool flaglegend=false)
Draw the data in the current pad.
TF1 & GetFitFunction()
Get fit function.
Definition: BCFitter.h:55
double LogApproxBinomial(unsigned n, unsigned k, double p)
Calculates natural logarithm of the Binomial probability using approximations for factorial calculati...
Definition: BCMath.cxx:79
A class representing a data point.
Definition: BCDataPoint.h:34
double CalculatePValueFast(const std::vector< double > &par, BCEfficiencyFitter::ToyDataInterface *callback, unsigned nIterations=100000)
Calculate the p-value using fast-MCMC.
TGraph * GetErrorBandGraph(double level1, double level2) const
Definition: BCFitter.cxx:249
A guard object to prevent ROOT from taking over ownership of TNamed objects.
Definition: BCAux.h:171
int MarginalizeAll()
Marginalize all probabilities wrt.
std::vector< double > FindMode(std::vector< double > start=std::vector< double >())
Do the mode finding using a method set via SetOptimizationMethod.
Draw mean and standard deviation.
Draw mean and central 68% interval.
void SetFitFunctionIndices(int indexx, int indexy)
Sets indices of the x and y values in function fits.
Definition: BCFitter.h:187
bool GetUncertainties(int n, int k, double p, double &xexp, double &xmin, double &xmax)
Calculates the central value and the lower and upper limits for a given probability.
virtual void Fit()
Performs the fit.
BCOptimizationMethod
An enumerator for the mode finding algorithm.
Definition: BCIntegrate.h:153
BCAux::BCTrash< TObject > fObjectTrash
Storage for plot objects with proper clean-up.
virtual void DrawFit(const std::string &options="", bool flaglegend=false)
Draw the fit in the current pad.
TGraph * GetFitFunctionGraph()
Definition: BCFitter.h:91
double ApproxBinomial(unsigned n, unsigned k, double p)
Calculates Binomial probability using approximations for factorial calculations if calculation for nu...
Definition: BCMath.cxx:97
std::vector< BCH1D::BCH1DSmallestInterval > GetSmallestIntervals(std::vector< double > masses)
Definition: BCH1D.cxx:452
Draw mean and smallest 68% interval.
BCIntegrate::BCOptimizationMethod GetOptimizationMethod() const
Definition: BCIntegrate.h:264
BCDataSet fFitterDataSet
Needed for uncertainty propagation.
Definition: BCFitter.h:292
double Integral(const std::vector< double > &parameters, double xmin, double xmax)
Compute the integral of the fit function between xmin and xmax.
Definition: BCFitter.cxx:196
TRandom3 fRandom
Random number generator.
A class for handling 1D distributions.
Definition: BCH1D.h:34
double fPValue
p value for goodness of fit
Definition: BCFitter.h:285
Vector of intervals with information about total mass.
Definition: BCH1D.h:247
A base class for all fitting classes.
Definition: BCFitter.h:31
BCEfficiencyFitter(const TH1 &trials, const TH1 &successes, const TF1 &func, const std::string &name="efficiency_fitter_model")
Constructor.
Metropolis Hastings.
Definition: BCIntegrate.h:178
BCDataSet * GetDataSet()
Definition: BCModel.h:78
ROOT&#39;s Minuit.
Definition: BCIntegrate.h:157
virtual double LogLikelihood(const std::vector< double > &parameters)
The log of the prior probability.
virtual const std::vector< double > & GetBestFitParameters() const
bool fFlagIntegration
Flag for using the ROOT TH1::Integral method (true), or linear interpolation (false) ...
Definition: BCFitter.h:297
void SetOptimizationMethod(BCIntegrate::BCOptimizationMethod method)
Definition: BCIntegrate.h:456
Abstract class which doesn&#39;t do anything but offers the right interface to allow calculation the dist...