BATCalculator.cxx
1 /*
2  * Copyright (C) 2007-2018, the BAT core developer team
3  * All rights reserved.
4  *
5  * Original authors: Gregory Schott and Stefan A. Schmitz with inspiration from
6  * Kyle Cranmer, Lorenzo Moneta, Gregory Schott, and Wouter Verkerke.
7  *
8  * For the licensing terms see doc/COPYING.
9  * For documentation see http://mpp.mpg.de/bat
10  */
11 
12 #include "BATCalculator.h"
13 #include "BCRooInterface.h"
14 
15 #include "../../BAT/BCLog.h"
16 #include "../../BAT/BCAux.h"
17 #include "../../BAT/BCH1D.h"
18 
19 #include <RooAbsFunc.h>
20 #include <RooRealVar.h>
21 #include <RooBrentRootFinder.h>
22 #include <RooFormulaVar.h>
23 #include <RooGenericPdf.h>
24 #include <RooProdPdf.h>
25 #include <RooDataHist.h>
26 #include <RooHistPdf.h>
27 #include <RooStats/MarkovChain.h>
28 
29 #include <TAxis.h>
30 #include <TFile.h>
31 #include <TCanvas.h>
32 #include <TVector.h>
33 
34 #include <algorithm>
35 #include <cassert>
36 
38 
39 namespace
40 {
41 //help function for calculating the shortest interval
42 bool sortbyposterior(std::pair< Int_t, Double_t > pair1, std::pair< Int_t, Double_t > pair2)
43 {
44  return pair1.second > pair2.second;
45 }
46 }
47 
48 namespace RooStats
49 {
50 
51 // ---------------------------------------------------------
53  fData(0),
54  fPdf(0),
55  fPrior(0),
56  fparams(0),
57  _posteriorTH1D(0),
58  fProductPdf(0),
59  fLogLike(0),
60  fLikelihood(0),
61  fIntegratedLikelihood(0),
62  fPosteriorPdf(0),
63  fLower(0),
64  fUpper(0),
65  fBrfPrecision(0.00005),
66  fValidInterval(false),
67  fValidMCMCInterval(false),
68  fConnectedInterval(false),
69  _nMCMC(0),
70  fSize(0.05),
71  fLeftSideFraction(0.5)
72 {
73  // default constructor
74  _myRooInterface = new BCRooInterface();
75 }
76 
77 // ---------------------------------------------------------
78 BATCalculator::BATCalculator( /* const char * name, const char * title, */
79  RooAbsData& data,
80  RooAbsPdf& pdf,
81  RooArgSet& POI,
82  RooAbsPdf& prior,
83  RooArgSet* params,
84  bool fillChain) :
85  fData(&data),
86  fPdf(&pdf),
87  fPOI(POI),
88  fPrior(&prior),
89  fparams(params),
90  _posteriorTH1D(0),
91  fProductPdf(0),
92  fLogLike(0),
93  fLikelihood(0),
94  fIntegratedLikelihood(0),
95  fPosteriorPdf(0),
96  fLower(0),
97  fUpper(0),
98  fBrfPrecision(0.00005),
99  fValidInterval(false),
100  fValidMCMCInterval(false),
101  fConnectedInterval(false),
102  _nMCMC(1000000),
103  fSize(0.05),
104  fLeftSideFraction(0.5)
105 {
106  _myRooInterface = new BCRooInterface("BCRooInterfaceForBAT", fillChain);
107 }
108 
109 // ---------------------------------------------------------
110 BATCalculator::BATCalculator( RooAbsData& data, ModelConfig& model, bool fillChain) :
111  fData(&data),
112  fPdf(model.GetPdf()),
113  fPOI(*model.GetParametersOfInterest()),
114  fPrior(model.GetPriorPdf()),
115  fparams(model.GetNuisanceParameters()),
116  _posteriorTH1D(0),
117  fProductPdf(0),
118  fLogLike(0),
119  fLikelihood(0),
120  fIntegratedLikelihood(0),
121  fPosteriorPdf(0),
122  fLower(0),
123  fUpper(0),
124  fBrfPrecision(0.00005),
125  fValidInterval(false),
126  fValidMCMCInterval(false),
127  fConnectedInterval(false),
128  _nMCMC(1000000),
129  fSize(0.05),
130  fLeftSideFraction(0.5)
131 {
132  // constructor from Model Config
133  // SetModel(model);
134  _myRooInterface = new BCRooInterface("BCRooInterfaceForBAT", fillChain);
135 }
136 
137 // ---------------------------------------------------------
139 {
140  // destructor
141  ClearAll();
142  delete _myRooInterface;
143 }
144 
145 // ---------------------------------------------------------
146 void BATCalculator::ClearAll() const
147 {
148  // clear cached pdf objects
149  if (fProductPdf)
150  delete fProductPdf;
151  if (fLogLike)
152  delete fLogLike;
153  if (fLikelihood)
154  delete fLikelihood;
155  if (fIntegratedLikelihood)
156  delete fIntegratedLikelihood;
157  if (fPosteriorPdf)
158  delete fPosteriorPdf;
159  fPosteriorPdf = 0;
160  fProductPdf = 0;
161  fLogLike = 0;
162  fLikelihood = 0;
163  fIntegratedLikelihood = 0;
164  fLower = 0;
165  fUpper = 0;
166  fValidInterval = false;
167  fValidMCMCInterval = false;
168 }
169 
170 // ---------------------------------------------------------
171 //returns posterior with respect to first entry in the POI ArgSet
173 {
174  const char* POIname = fPOI.first()->GetName();
175  return GetPosteriorPdf1D(POIname);
176 }
177 
178 // ---------------------------------------------------------
179 // returns posterior with respect to provided name in the POI ArgSet
180 RooAbsPdf* BATCalculator::GetPosteriorPdf1D(const char* POIname) const
181 {
182  // run some sanity checks
183  if (!fPdf) {
184  std::cerr << "BATCalculator::GetPosteriorPdf - missing pdf model" << std::endl;
185  return 0;
186  }
187 
188  if (!fPrior) {
189  std::cerr << "BATCalculator::GetPosteriorPdf - missing prior pdf" << std::endl;
190  }
191 
192  if (fPOI.getSize() == 0) {
193  std::cerr << "BATCalculator::GetPosteriorPdf - missing parameter of interest" << std::endl;
194  return 0;
195  }
196 
197  if (fPOI.getSize() > 1) {
198  std::cerr << "BATCalculator::GetPosteriorPdf - current implementation works only on 1D intervals" << std::endl;
199  return 0;
200  }
201 
202  //initialize RooInterface object
203  _myRooInterface->Initialize(*fData, *fPdf, *fPrior, fparams, fPOI);
204  _myRooInterface->SetNIterationsRun(_nMCMC);
205  _myRooInterface->MarginalizeAll();
206  _myRooInterface->FindMode();
207  BCH1D myPosterior = _myRooInterface->GetMarginalized(POIname);
208  TH1* posteriorTH1D = myPosterior.GetHistogram();
209  // Is this a memory leak? A guard led to segfault, see https://github.com/bat/bat/pull/301#issuecomment-390121730
210  _posteriorTH1D = static_cast<TH1D*>(BCAux::OwnClone(posteriorTH1D, "_posteriorTH1D"));
211  RooDataHist* posteriorRooDataHist = new RooDataHist("posteriorhist", "", fPOI, posteriorTH1D);
212  fPosteriorPdf = new RooHistPdf("posteriorPdf", "", fPOI, *posteriorRooDataHist);
213 
214  return fPosteriorPdf; // is of type RooAbsPdf*
215 }
216 
217 // ---------------------------------------------------------
218 // return a RooPlot with the posterior PDF and the credibility region
219 RooPlot* BATCalculator::GetPosteriorPlot1D() const
220 {
221  if (!fPosteriorPdf) {
222  std::cout << "BATCalculator::GetPosteriorPlot1D:"
223  << "Warning : posterior not available" << std::endl;
225  }
226  if ((!fValidInterval) && (!fValidMCMCInterval)) {
227  std::cout << "BATCalculator::GetPosteriorPlot1D:"
228  << "Warning : interval not available" << std::endl;
229  GetInterval1D();
230  }
231 
232  RooAbsRealLValue* poi = dynamic_cast<RooAbsRealLValue*>( fPOI.first() );
233  assert(poi);
234 
235  RooPlot* plot = poi->frame();
236 
237  plot->SetTitle(TString("Posterior probability of parameter \"") + TString(poi->GetName()) + TString("\""));
238  fPosteriorPdf->plotOn(plot, RooFit::Range(fLower, fUpper, kFALSE), RooFit::VLines(), RooFit::DrawOption("F"), RooFit::MoveToBack(), RooFit::FillColor(kGray));
239  fPosteriorPdf->plotOn(plot);
240  plot->GetYaxis()->SetTitle("posterior probability");
241 
242  return plot;
243 }
244 
245 // ---------------------------------------------------------
246 // returns central interval for first POI in the POI ArgSet
247 SimpleInterval* BATCalculator::GetInterval1D() const
248 {
249  const char* POIname = fPOI.first()->GetName();
250  return GetInterval1D(POIname); //is of type RooAbsPdf *
251 }
252 
253 
254 // ---------------------------------------------------------
255 // returns central interval for the defined POI in the POI ArgSet->test code because orginal version is not working anymore
256 SimpleInterval* BATCalculator::GetInterval1D(const char* POIname) const
257 {
258  //const char * POIname = fPOI.first()->GetName();
259 
260  if (fValidInterval)
261  std::cout << "BATCalculator::GetShortestInterval1D:"
262  << "Warning : recomputing interval for the same CL and same model" << std::endl;
263 
264  // get pointer to selected parameter of interest
265  RooRealVar* poi = dynamic_cast<RooRealVar*>( fPOI.find(POIname) );
266  assert(poi);
267 
268  // get pointer to poserior pdf
269  if (!fPosteriorPdf)
270  fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
271  //RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
272 
273  // range of poi
274  Double_t minpoi = poi->getMin();
275  Double_t maxpoi = poi->getMax();
276 
277  // bin number of histogram for posterior
278  Int_t stepnumber = _posteriorTH1D->GetNbinsX();
279 
280  // width of one bin in histogram of posterior
281  Double_t stepsize = (maxpoi - minpoi) / stepnumber;
282 
283  // pair: first entry: bin number , second entry: value of posterior
284  std::vector< std::pair< Int_t, Double_t > > posteriorVector;
285 
286  // for normalization
287  Double_t histoIntegral = 0;
288  // give posteriorVector the right length
289  posteriorVector.resize(stepnumber);
290 
291  // initialize elements of posteriorVector
292  int i = 0;
293  std::vector< std::pair< Int_t, Double_t > >::iterator vecit = posteriorVector.begin();
294  std::vector< std::pair< Int_t, Double_t > >::iterator vecit_end = posteriorVector.end();
295  for ( ; vecit != vecit_end ; ++vecit) {
296  poi->setVal(poi->getMin() + i * stepsize);
297  posteriorVector[i] = std::make_pair(i, _posteriorTH1D->GetBinContent(i + 1) ); // hope this is working, +1 necessary, because GetBinContent(0) returns the underflow bin
298  histoIntegral += _posteriorTH1D->GetBinContent(i); // better to get this directly from the histogram ?!
299  ++i;
300  }
301 
302  double lowerProbLim = (1. - ConfidenceLevel()) * fLeftSideFraction;
303  double upperProbLim = 1. - ( (1. - ConfidenceLevel()) * (1. - fLeftSideFraction) );
304 
305  fLower = -1.;
306  fUpper = -1.;
307  bool lowerlimset = false;
308  bool upperlimset = false;
309 
310 
311  if (fLeftSideFraction == 0) {
312  fLower = minpoi;
313  lowerlimset = true;
314  }
315 
316  if (fLeftSideFraction == 1) {
317  fUpper = maxpoi;
318  upperlimset = true;
319  }
320 
321  // keep track of integrated posterior in the interval
322  Double_t integratedposterior = 0.;
323 
324  i = 0;
325  vecit = posteriorVector.begin();
326  for ( ; vecit != vecit_end ; ++vecit) {
327  integratedposterior += posteriorVector[i].second;
328  if ( (lowerlimset != true) && ((integratedposterior / histoIntegral) >= lowerProbLim) ) {
329  fLower = poi->getMin() + i * stepsize;
330  lowerlimset = true;
331  }
332  if ( (upperlimset != true) && ((integratedposterior / histoIntegral) >= upperProbLim) ) {
333  fUpper = poi->getMin() + i * stepsize;
334  upperlimset = true;
335  break;
336  }
337  i++;
338  }
339 
340  TString interval_name = TString("CentralBayesianInterval_a") + TString(this->GetName());
341  SimpleInterval* interval = new SimpleInterval(interval_name, *poi, fLower, fUpper, ConfidenceLevel());
342  interval->SetTitle("SimpleInterval from BATCalculator");
343 
344  return interval;
345 
346 }
347 
348 // ---------------------------------------------------------
350 {
351  const char* POIname = fPOI.first()->GetName();
352  bool checkConnected = true;
353  return GetShortestInterval1D(POIname, checkConnected);
354 }
355 
356 // ---------------------------------------------------------
357 // returns a SimpleInterval with lower and upper bounds on the
358 // parameter of interest. Applies the shortest interval rule to
359 // compute the credibility interval. The resulting interval is not necessarily connected.
360 // Methods for constructing the shortest region with respect given CL are now available in
361 // different places (here; MCMCCalculator, BAT)-> this might require cleaning at some point.
362 // Result is approximate as CL is not reached exactly (due to finite bin number/bin size in posterior histogram)-> make CL/interval a bit smaller or bigger ?
363 SimpleInterval* BATCalculator::GetShortestInterval1D(const char* POIname, bool& checkConnected) const
364 {
365 
366  if (fValidInterval)
367  std::cout << "BATCalculator::GetShortestInterval1D:"
368  << "Warning : recomputing interval for the same CL and same model" << std::endl;
369 
370  // get pointer to selected parameter of interest
371  RooRealVar* poi = dynamic_cast<RooRealVar*>( fPOI.find(POIname) );
372  assert(poi);
373 
374  // get pointer to poserior pdf
375  if (!fPosteriorPdf)
376  fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
377 
378  // range of poi
379  Double_t minpoi = poi->getMin();
380  Double_t maxpoi = poi->getMax();
381 
382  // bin number of histogram for posterior
383  Int_t stepnumber = _posteriorTH1D->GetNbinsX();
384 
385  // width of one bin in histogram of posterior
386  Double_t stepsize = (maxpoi - minpoi) / stepnumber;
387 
388  // pair: first entry: bin number , second entry: value of posterior
389  std::vector< std::pair< Int_t, Double_t > > posteriorVector;
390 
391  // for normalization
392  Double_t histoIntegral = 0;
393  // give posteriorVector the right length
394  posteriorVector.resize(stepnumber);
395 
396  // see in BayesianCalculator for details about this "feature"
397  Double_t tmpVal = poi->getVal();
398 
399  // initialize elements of posteriorVector
400  int i = 0;
401  std::vector< std::pair< Int_t, Double_t > >::iterator vecit = posteriorVector.begin();
402  std::vector< std::pair< Int_t, Double_t > >::iterator vecit_end = posteriorVector.end();
403  for ( ; vecit != vecit_end ; ++vecit) {
404  poi->setVal(poi->getMin() + i * stepsize);
405  posteriorVector[i] = std::make_pair(i, _posteriorTH1D->GetBinContent(i + 1) ); // hope this is working, +1 necessary, because GetBinContent(0) returns the underflow bin
406  histoIntegral += _posteriorTH1D->GetBinContent(i); // better to get this directly from the histogram ?!
407  i++;
408  }
409 
410  // sort posteriorVector with respect to posterior pdf
411  std::sort(posteriorVector.begin(), posteriorVector.end(), ::sortbyposterior);
412 
413  // keep track of integrated posterior in the interval
414  Double_t integratedposterior = 0.;
415 
416  // keep track of lowerLim and upperLim
417  Double_t lowerLim = posteriorVector.size();
418  Double_t upperLim = 0;
419 
420  // store the bins in the intervall
421  std::vector<bool> inInterval;
422  inInterval.resize(posteriorVector.size());
423 
424  // set all values in inInterval to false
425  for (unsigned int k = 0; k < inInterval.size(); k++)
426  inInterval[k] = false;
427 
428  unsigned int j = 0;
429  // add bins to interval while CL not reached
430  while (((integratedposterior / histoIntegral) < (1 - fSize)) && (j < posteriorVector.size())) {
431  integratedposterior += posteriorVector[j].second;
432 
433  // update vector with bins included in the interval
434  inInterval[posteriorVector[j].first] = true;
435 
436  if (posteriorVector[j].first < lowerLim) {
437  lowerLim = posteriorVector[j].first; // update lowerLim
438  }
439  if (posteriorVector[j].first > upperLim) {
440  upperLim = posteriorVector[j].first; // update upperLim
441  }
442 
443  fLower = lowerLim * stepsize; //
444  fUpper = upperLim * stepsize;
445  j++;
446  }
447 
448  // initialize vector with interval borders
449 
450  bool runInside = false;
451  for (unsigned int l = 0; l < inInterval.size(); l++) {
452  if ( (runInside == false) && (inInterval[l] == true) ) {
453  _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
454  runInside = true;
455  }
456  if ( ( runInside == true) && (l < (inInterval.size() - 1) ) && (inInterval[l + 1] == false) ) {
457  _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
458  runInside = false;
459  }
460  if ( ( runInside == true) && (l == (inInterval.size() - 1)) ) {
461  _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
462  }
463  }
464 
465  // check if the intervall is connected
466  if (checkConnected) {
467  if (_intervalBorders1D.size() > 2) {
468  fConnectedInterval = false;
469  } else {
470  fConnectedInterval = true;
471  }
472  }
473 
474  poi->setVal(tmpVal); // patch: restore the original value of poi
475 
476  fValidInterval = true;
477 
478  TString interval_name = TString("ShortestBayesianInterval_a") + TString(this->GetName());
479  SimpleInterval* interval = new SimpleInterval(interval_name, *poi, fLower, fUpper, ConfidenceLevel());
480  interval->SetTitle("Shortest SimpleInterval from BATCalculator");
481 
482  //if(assert)
483  return interval;
484 }
485 
486 // ---------------------------------------------------------
487 MCMCInterval* BATCalculator::GetInterval() const
488 {
489 
490  // run some sanity checks
491  if (!fPdf ) {
492  std::cerr << "BATCalculator::GetInterval - missing pdf model" << std::endl;
493  return 0;
494  }
495 
496  if (!fPrior) {
497  std::cerr << "BATCalculator::GetInterval - missing prior pdf" << std::endl;
498  }
499 
500  if (fPOI.getSize() == 0) {
501  std::cerr << "BATCalculator::GetInterval - missing parameter of interest" << std::endl;
502  return 0;
503  }
504 
505  if (!fPosteriorPdf) {
506  //initialize RooInterface object
507  _myRooInterface->Initialize(*fData, *fPdf, *fPrior, fparams, fPOI);
508  _myRooInterface->SetNIterationsRun(_nMCMC);
509  _myRooInterface->MarginalizeAll();
510  _myRooInterface->FindMode();
511  }
512 
513  MarkovChain* roostats_chain = GetBCRooInterface()->GetRooStatsMarkovChain();
514  MCMCInterval* mcmcInterval = new MCMCInterval("roostatsmcmcinterval", fPOI , *roostats_chain);
515  mcmcInterval->SetUseKeys(false);
516  mcmcInterval->SetConfidenceLevel(1. - fSize);
517  fValidMCMCInterval = true;
518  return mcmcInterval;
519 }
520 
521 // ---------------------------------------------------------
522 void BATCalculator::SetNumBins(const char* parname, int nbins)
523 {
524  _myRooInterface->SetNumBins(parname, nbins);
525 }
526 
527 // ---------------------------------------------------------
529 {
530  _myRooInterface->SetNumBins(nbins);
531 }
532 
533 // ---------------------------------------------------------
534 void BATCalculator::SetLeftSideTailFraction(Double_t leftSideFraction )
535 {
536  if ( (leftSideFraction >= 0.) && (leftSideFraction <= 1.) ) {
537  fLeftSideFraction = leftSideFraction;
538  } else {
539  std::cout << "BATCalculator::SetLeftSideTailFraction(Double_t leftSideFraction ) - value needs to be in the interval [0.,1.] to be meaningful, your value: " << leftSideFraction << " ,left side tail fraction has not been changed!" << std::endl;
540  }
541 
542 }
543 
544 // ---------------------------------------------------------
546 {
547  return GetInterval1D()->UpperLimit();
548 }
549 
550 } // end namespace RooStats
BATCalculator()
constructor
T * OwnClone(const T *o)
Create a clone of the input but avoid registering the object with ROOT so it cannot be deleted twice...
Definition: BCAux.h:189
Interface allowing to run BAT on a problem/data defined in a standard RooFit workspace format...
virtual ~BATCalculator()
destructor
int MarginalizeAll()
Marginalize all probabilities wrt.
Double_t GetOneSidedUperLim()
temporary solution ?
void SetNumBins(const char *parname, int nbins)
set the number of histogram bins for a specific parameter
std::vector< double > FindMode(std::vector< double > start=std::vector< double >())
Do the mode finding using a method set via SetOptimizationMethod.
void SetNIterationsRun(unsigned n)
Sets the number of iterations.
Definition: BCEngineMCMC.h:789
RooAbsPdf * GetPosteriorPdf1D() const
return posterior pdf
SimpleInterval * GetShortestInterval1D() const
return SimpleInterval object: shortest interval (not necessarily connected, one poi only) ...
Adapter to call BAT from within RooStats to compute credibility intervals.
Definition: BATCalculator.h:39
virtual SimpleInterval * GetInterval1D() const
return SimpleInterval object: central interval (one poi only)
virtual MCMCInterval * GetInterval() const
returns MCMCInterval object
A class for handling 1D distributions.
Definition: BCH1D.h:34
void SetLeftSideTailFraction(Double_t leftSideFraction)
set left side tail fraction (only for 1D interval, not meaningful for shortest interval) ...
BCH1D GetMarginalized(const std::string &name) const
Obtain the individual marginalized distributions with respect to one parameter.
Definition: BCEngineMCMC.h:561
RooStats::MarkovChain * GetRooStatsMarkovChain()
return the RooStats Markov Chain (empty if corresponding constructor option not set) ...
virtual Double_t ConfidenceLevel() const
Get the Confidence level for the test.
void SetNumBins(const char *parname, int nbins)
set the number of histogram bins for a specific parameter