11 #include "BCMTFAnalysisFacility.h" 14 #include "BCMTFChannel.h" 15 #include "BCMTFComparisonTool.h" 16 #include "BCMTFSystematic.h" 17 #include "BCMTFTemplate.h" 19 #include "../../BAT/BCLog.h" 20 #include "../../BAT/BCH1D.h" 21 #include "../../BAT/BCParameter.h" 37 void cd(
const std::string& dir)
40 if (chdir(dir.data()))
41 throw std::runtime_error(std::string(
"Cannot change directory to ") + dir);
47 : fRandom(new TRandom3(0))
48 , fFlagMarginalize(false)
49 , fLogLevel(
BCLog::nothing)
52 BCLog::OutDetail(
"Prepared Analysis Facility for MTF model \'" + mtf->
GetName() +
"\'");
64 bool flag_data =
false;
67 if (options.find(
"data") < options.size()) {
75 std::vector<TH1D> histograms;
78 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
87 int nbins = hist.GetNbinsX();
90 for (
int ibin = 1; ibin <= nbins; ++ibin) {
92 double expectation = fMTF->
Expectation(ichannel, ibin, parameters);
93 double observation = fRandom->Poisson(expectation);
95 hist.SetBinContent(ibin, observation);
100 histograms.push_back(hist);
113 BCLog::OutDetail(Form(
"MTF Building %d ensembles for %d channels.", nensembles, nchannels));
119 std::vector<double> parameters(nparameters);
122 for (
int i = 0; i < nparameters; ++i) {
123 tree->SetBranchAddress(Form(
"Parameter%i", i), &(parameters[i]));
127 TTree* tree_out =
new TTree(
"ensembles",
"ensembles");
130 std::vector< std::vector<double> > nbins_matrix;
134 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
142 std::vector<double> nbins_column(nbins);
145 nbins_matrix.push_back(nbins_column);
148 std::vector<double> in_parameters(nparameters);
152 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
160 for (
int ibin = 1; ibin <= nbins; ++ibin) {
162 tree_out->Branch(Form(
"channel_%i_bin_%i", ichannel, ibin),
163 &(nbins_matrix[ichannel])[ibin - 1],
"n/D");
167 for (
int i = 0; i < nparameters; ++i) {
168 tree_out->Branch(Form(
"parameter_%i", i), &in_parameters[i], Form(
"parameter_%i/D", i));
172 std::vector<TH1D> histograms;
175 for (
int iensemble = 0; iensemble < nensembles; ++iensemble) {
177 int index = (int) fRandom->Uniform(tree->GetEntries());
178 tree->GetEntry(index);
185 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
193 for (
int ibin = 1; ibin <= nbins; ++ibin) {
195 (nbins_matrix[ichannel])[ibin - 1] = histograms.at(ichannel).GetBinContent(ibin);
200 for (
int i = 0; i < nparameters; ++i) {
201 in_parameters[i] = parameters.at(i);
218 BCLog::OutDetail(Form(
"MTF Building %d ensambles for %d channels.", nensembles, nchannels));
221 TTree* tree =
new TTree(
"ensembles",
"ensembles");
224 std::vector< std::vector<double> > nbins_matrix;
228 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
236 std::vector<double> nbins_column(nbins);
239 nbins_matrix.push_back(nbins_column);
245 std::vector<double> in_parameters(nparameters);
249 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
257 for (
int ibin = 1; ibin <= nbins; ++ibin) {
259 tree->Branch(Form(
"channel_%i_bin_%i", ichannel, ibin),
260 &(nbins_matrix[ichannel])[ibin - 1],
"n/D");
264 for (
int i = 0; i < nparameters; ++i) {
265 tree->Branch(Form(
"parameter_%i", i), &in_parameters[i], Form(
"parameter_%i/D", i));
269 std::vector<TH1D> histograms;
272 for (
int iensemble = 0; iensemble < nensembles; ++iensemble) {
278 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
286 for (
int ibin = 1; ibin <= nbins; ++ibin) {
288 (nbins_matrix[ichannel])[ibin - 1] = histograms.at(ichannel).GetBinContent(ibin);
293 for (
int i = 0; i < nparameters; ++i) {
294 if (parameters.size() > 0)
295 in_parameters[i] = parameters.at(i);
297 in_parameters[i] = 0;
324 BCLog::OutSummary(
"Running ensemble test.");
325 if (fFlagMarginalize) {
326 BCLog::OutSummary(
"Fit for each ensemble is going to be run using MCMC. It can take a while.");
330 bool flag_mc =
false;
333 if (options.find(
"MC") < options.size()) {
343 BCLog::OutSummary(
"No log messages for the ensemble fits are going to be printed.");
345 }
else if (fLogLevel != lls) {
346 BCLog::OutSummary(
"The log level for the ensemble test is set to '" +
BCLog::ToString(fLogLevel) +
"'.");
354 std::vector<TH1D*> histograms_data(nchannels);
357 std::vector< std::vector<double> > nbins_matrix;
361 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
369 std::vector<double> nbins_column(nbins);
372 nbins_matrix.push_back(nbins_column);
376 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
384 for (
int ibin = 1; ibin <= nbins; ++ibin) {
386 tree->SetBranchAddress(Form(
"channel_%i_bin_%i", ichannel, ibin),
387 &(nbins_matrix[ichannel])[ibin - 1]);
395 std::vector<double> out_parameters(nparameters);
397 for (
int i = 0; i < nparameters; ++i) {
398 tree->SetBranchAddress(Form(
"parameter_%i", i), &out_parameters[i]);
403 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
412 TTree* tree_out =
new TTree(
"ensemble_test",
"ensemble test");
415 std::vector<double> out_mode_global(nparameters);
416 std::vector<double> out_std_global(nparameters);
417 std::vector<double> out_mode_marginalized(nparameters);
418 std::vector<double> out_mean_marginalized(nparameters);
419 std::vector<double> out_median_marginalized(nparameters);
420 std::vector<double> out_5quantile_marginalized(nparameters);
421 std::vector<double> out_10quantile_marginalized(nparameters);
422 std::vector<double> out_16quantile_marginalized(nparameters);
423 std::vector<double> out_84quantile_marginalized(nparameters);
424 std::vector<double> out_90quantile_marginalized(nparameters);
425 std::vector<double> out_95quantile_marginalized(nparameters);
426 std::vector<double> out_std_marginalized(nparameters);
427 std::vector<double> out_chi2_generated(nchannels);
428 std::vector<double> out_chi2_mode(nchannels);
429 std::vector<double> out_cash_generated(nchannels);
430 std::vector<double> out_cash_mode(nchannels);
431 std::vector<int> out_nevents(nchannels);
432 double out_chi2_generated_total;
433 double out_chi2_mode_total;
434 double out_cash_generated_total;
435 double out_cash_mode_total;
436 int out_nevents_total;
439 for (
int i = 0; i < nparameters; ++i) {
440 tree_out->Branch(Form(
"parameter_%i", i), &out_parameters[i], Form(
"parameter %i/D", i));
441 tree_out->Branch(Form(
"mode_global_%i", i), &out_mode_global[i], Form(
"global mode of par. %i/D", i));
442 tree_out->Branch(Form(
"std_global_%i", i), &out_std_global[i], Form(
"global std of par. %i/D", i));
443 if (fFlagMarginalize) {
444 tree_out->Branch(Form(
"mode_marginalized_%i", i), &out_mode_marginalized[i], Form(
"marginalized mode of par. %i/D", i));
445 tree_out->Branch(Form(
"mean_marginalized_%i", i), &out_mean_marginalized[i], Form(
"marginalized mean of par. %i/D", i));
446 tree_out->Branch(Form(
"median_marginalized_%i", i), &out_median_marginalized[i], Form(
"median of par. %i/D", i));
447 tree_out->Branch(Form(
"5quantile_marginalized_%i", i), &out_5quantile_marginalized[i], Form(
"marginalized 5 per cent quantile of par. %i/D", i));
448 tree_out->Branch(Form(
"10quantile_marginalized_%i", i), &out_10quantile_marginalized[i], Form(
"marginalized 10 per cent quantile of par. %i/D", i));
449 tree_out->Branch(Form(
"16quantile_marginalized_%i", i), &out_16quantile_marginalized[i], Form(
"marginalized 16 per cent quantile of par. %i/D", i));
450 tree_out->Branch(Form(
"84quantile_marginalized_%i", i), &out_84quantile_marginalized[i], Form(
"marginalized 84 per cent quantile of par. %i/D", i));
451 tree_out->Branch(Form(
"90quantile_marginalized_%i", i), &out_90quantile_marginalized[i], Form(
"marginalized 90 per cent quantile of par. %i/D", i));
452 tree_out->Branch(Form(
"95quantile_marginalized_%i", i), &out_95quantile_marginalized[i], Form(
"marginalized 95 per cent quantile of par. %i/D", i));
453 tree_out->Branch(Form(
"std_marginalized_%i", i), &out_std_marginalized[i], Form(
"marginalized std of par. %i/D", i));
456 for (
int i = 0; i < nchannels; ++i) {
457 tree_out->Branch(Form(
"chi2_generated_%i", i), &out_chi2_generated[i], Form(
"chi2 (generated par.) in channel %i/D", i));
458 tree_out->Branch(Form(
"chi2_mode_%i", i), &out_chi2_mode[i], Form(
"chi2 (mode of par.) in channel %i/D", i));
459 tree_out->Branch(Form(
"cash_generated_%i", i), &out_cash_generated[i], Form(
"cash statistic (generated par.) in channel %i/D", i));
460 tree_out->Branch(Form(
"cash_mode_%i", i), &out_cash_mode[i], Form(
"cash statistic (mode of par.) in channel %i/D", i));
461 tree_out->Branch(Form(
"nevents_%i", i), &out_nevents[i], Form(
"nevents in channel %i/I", i));
463 tree_out->Branch(
"chi2_generated_total", &out_chi2_generated_total,
"chi2 (generated par.) in all channels/D");
464 tree_out->Branch(
"chi2_mode_total", &out_chi2_mode_total,
"chi2 (mode of par.) in all channels/D");
465 tree_out->Branch(
"cash_generated_total", &out_cash_generated_total,
"cash statistic (generated par.) in all channels/D");
466 tree_out->Branch(
"cash_mode_total", &out_cash_mode_total,
"cash statistic (mode of par.) in all channels/D");
467 tree_out->Branch(
"nevents_total", &out_nevents_total,
"total number of events/I");
470 std::vector<TH1D*> histlist(0);
473 for (
int iensemble = 0; iensemble < nensembles; ++iensemble) {
475 if ((iensemble + 1) % 100 == 0 && iensemble > 0) {
477 int frac = int (
double(iensemble + 1) /
double(nensembles) * 100.);
478 BCLog::OutDetail(Form(
"Fraction of ensembles analyzed: %i%%", frac));
484 int index = iensemble + start;
485 tree->GetEntry(index);
488 std::vector<TH1D> histograms;
494 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
512 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
517 for (
unsigned int i = 0; i < ntemplates; ++i) {
521 histlist.push_back(temphist);
536 if (fFlagMarginalize) {
538 BCLog::OutDetail(Form(
"Running MCMC for ensemble %i", iensemble));
554 out_chi2_generated_total = 0;
555 out_chi2_mode_total = 0;
556 out_cash_generated_total = 0;
557 out_cash_mode_total = 0;
558 out_nevents_total = 0;
560 double quantile_probsums[7] = {5e-2, 10e-2, 16e-2, 50e-2, 84e-2, 90e-2, 95e-2};
561 double quantile_values[7];
563 for (
int i = 0; i < nparameters; ++i) {
564 if (fFlagMarginalize) {
570 out_mean_marginalized[i] = hist.
GetHistogram()->GetMean();
571 out_std_marginalized[i] = hist.
GetHistogram()->GetRMS();
573 hist.
GetHistogram()->GetQuantiles(6, quantile_values, quantile_probsums);
574 out_5quantile_marginalized[i] = quantile_values[0];
575 out_10quantile_marginalized[i] = quantile_values[1];
576 out_16quantile_marginalized[i] = quantile_values[2];
577 out_median_marginalized[i] = quantile_values[3];
578 out_84quantile_marginalized[i] = quantile_values[4];
579 out_90quantile_marginalized[i] = quantile_values[5];
580 out_95quantile_marginalized[i] = quantile_values[6];
584 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
592 out_chi2_generated[ichannel] = fMTF->
CalculateChi2( ichannel, out_parameters );
596 out_cash_generated[ichannel] = fMTF->
CalculateCash( ichannel, out_parameters );
600 out_nevents_total += out_nevents[ichannel];
603 out_chi2_generated_total += out_chi2_generated[ichannel];
604 out_chi2_mode_total += out_chi2_mode[ichannel];
607 out_cash_generated_total += out_cash_generated[ichannel];
608 out_cash_mode_total += out_cash_mode[ichannel];
620 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
625 for (
unsigned int i = 0; i < ntemplates; ++i) {
628 TH1D* temphist = histlist.at(ichannel * ntemplates + i);
643 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
657 BCLog::OutSummary(
"Ensemble test ran successfully.");
667 std::vector<TH1D> histograms;
670 int nchannels = matrix.size();;
673 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
678 std::vector<double> nbins_column = matrix[ichannel];
684 int nbins = hist.GetNbinsX();
687 for (
int ibin = 1; ibin <= nbins; ++ibin) {
688 hist.SetBinContent(ibin, nbins_column.at(ibin - 1));
692 histograms.push_back(hist);
703 BCLog::OutSummary(
"Running single channel analysis in directory \'" + dirname +
"\'");
706 mkdir(dirname.data(), 0777);
711 bool flag_syst =
true;
712 bool flag_mcmc =
true;
714 if (options.find(
"nosyst") < options.size())
717 if (options.find(
"mcmc") < options.size())
727 std::vector<bool> flag_channel(nchannels);
730 std::vector<bool> flag_systematic(nsystematics);
733 std::vector<BCMTFComparisonTool*> ctc;
734 std::vector<BCMTFComparisonTool*> ctc_mcmc;
742 for (
int i = 0; i < nparameters; ++ i) {
749 ctc_mcmc.push_back(ct_mcmc);
755 for (
int isystematic = 0; isystematic < nsystematics; ++isystematic) {
763 if (flag_systematic[isystematic])
770 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
802 for (
int i = 0; i < nparameters; ++ i) {
811 ct_mcmc->
AddContribution(
"all channels", hist->GetMean(), hist->GetRMS());
818 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
829 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
855 BCLog::OutSummary(Form(
"Summary for fitting channel %i", ichannel));
861 for (
int i = 0; i < nparameters; ++ i) {
880 for (
int isystematic = 0; isystematic < nsystematics; ++isystematic) {
885 if (flag_systematic[isystematic])
892 for (
int ichannel = 0; ichannel < nchannels; ++ichannel) {
904 TCanvas* c1 =
new TCanvas();
911 c1->Print((std::string(
"overview.pdf") + std::string(
"(")).c_str());
914 for (
int i = 1; i < nparameters - 1; ++ i) {
919 c1->Print(
"overview.pdf");
923 ct = ctc.at(nparameters - 1);
925 c1->Print((std::string(
"overview.pdf") + std::string(
")")).c_str());
929 TCanvas* c2 =
new TCanvas();
936 c2->Print((std::string(
"overview_mcmc.pdf") + std::string(
"(")).c_str());
939 for (
int i = 1; i < nparameters - 1; ++ i) {
944 c2->Print(
"overview_mcmc.pdf");
948 ct_mcmc = ctc_mcmc.at(nparameters - 1);
950 c2->Print((std::string(
"overview_mcmc.pdf") + std::string(
")")).c_str());
955 for (
int i = 0; i < nparameters; ++i) {
970 BCLog::OutSummary(
"Single channel analysis ran successfully");
976 BCLog::OutSummary(
"Running single channel systematic analysis in directory \'" + dirname +
"\'.");
980 mkdir(dirname.data(), 0777);
985 bool flag_mcmc =
true;
987 if (options.find(
"mcmc") < options.size())
997 std::vector<bool> flag_channel(nchannels);
1000 std::vector<bool> flag_systematic(nsystematics);
1003 std::vector<BCMTFComparisonTool*> ctc;
1011 for (
int i = 0; i < nparameters; ++ i) {
1021 for (
int isystematic = 0; isystematic < nsystematics; ++ isystematic) {
1052 for (
int i = 0; i < nparameters; ++ i) {
1064 for (
int isystematic = 0; isystematic < nsystematics; ++isystematic) {
1075 for (
int isystematic = 0; isystematic < nsystematics; ++isystematic) {
1100 BCLog::OutSummary(Form(
"Summary from fitting systematic %i", isystematic));
1106 for (
int i = 0; i < nparameters; ++ i) {
1138 BCLog::OutSummary(
"Summary without systematics");
1142 for (
int i = 0; i < nparameters; ++ i) {
1156 for (
int isystematic = 0; isystematic < nsystematics; ++isystematic) {
1161 if (flag_systematic[isystematic])
1169 TCanvas* c1 =
new TCanvas();
1176 c1->Print((std::string(
"overview.pdf") + std::string(
"(")).c_str());
1179 for (
int i = 1; i < nparameters - 1; ++i) {
1184 c1->Print(
"overview.pdf");
1188 ct = ctc.at(nparameters - 1);
1190 c1->Print((std::string(
"overview.pdf") + std::string(
")")).c_str());
1193 for (
int i = 0; i < nparameters; ++i) {
1205 BCLog::OutSummary(
"Single channel analysis ran successfully");
1211 BCLog::OutSummary(
"Running calibration analysis in directory \'" + dirname +
"\'.");
1215 mkdir(dirname.data(), 0777);
1220 int nvalues = int(parametervalues.size());
1221 for (
int ivalue = 0; ivalue < nvalues; ++ivalue) {
1224 TFile* file = TFile::Open(Form(
"ensemble_%i.root", ivalue),
"RECREATE");
1228 std::vector<double> parameters = default_parameters;
1229 parameters[index] = parametervalues.at(ivalue);
1247 BCLog::OutSummary(
"Calibration analysis ran successfully");
static BCLog::LogLevel GetLogLevelFile()
Returns the minimum log level for file output.
const std::vector< double > & GetBestFitParameterErrors() const
Returns the set of errors on the values of the parameters at the mode.
bool GetFlagChannelActive()
~BCMTFAnalysisFacility()
The default destructor.
void PerformSingleSystematicAnalyses(const std::string &dirname, const std::string &options="")
Perform the analysis using one systematic at a time, without systematic and with all systematics...
int GetNProcesses() const
void PerformCalibrationAnalysis(const std::string &dirname, const std::vector< double > &default_parameters, int index, const std::vector< double > ¶metervalues, int nensembles=1000)
Perform the analysis on pseudo-data generated by varying one of the parameters.
BCParameter & GetParameter(unsigned index)
static void SetLogLevel(BCLog::LogLevel loglevelfile, BCLog::LogLevel loglevelscreen)
Sets the minimum log level for file and screen output.
int MarginalizeAll()
Marginalize all probabilities wrt.
TTree * BuildEnsembles(const std::vector< double > ¶meters, int nensembles, const std::string &options="")
Build ensembles based on a single set of parameters.
std::vector< double > FindMode(std::vector< double > start=std::vector< double >())
Do the mode finding using a method set via SetOptimizationMethod.
void SetHistogram(TH1D *hist, double norm=1)
Set the histogram.
LogLevel
Enumerator for the amount of details to put into the log file.
const std::string & GetName() const
BCMTFTemplate * GetTemplate(int index)
Return a template.
TH1D FluctuateHistogram(const std::string &options="GZ", double norm=1)
Fluctuate the original template histogram by the uncertainty on the bin content.
double Expectation(int channelindex, int binindex, const std::vector< double > ¶meters)
Return the expected number of events for a channel and bin.
void SetFlagSystematicActive(bool flag)
Set a flag defining if this uncertainty is active or not.
void PerformSingleChannelAnalyses(const std::string &dirname, const std::string &options="")
Perform the full set of single channel analyses and the combination.
double CalculateCash(int channelindex, const std::vector< double > ¶meters)
Calculate the Cash statistic for a single channel.
const std::string & GetName() const
unsigned PrintAllMarginalized(const std::string &filename, unsigned hdiv=1, unsigned vdiv=1) const
Print all marginalizations.
BCMTFAnalysisFacility(BCMTF *mtf)
The default constructor.
bool GetFlagSystematicActive() const
int GetNSystematics() const
A class describing a physics channel.
virtual bool Valid() const
Whether histogram has been set and filled.
void SetData(const std::string &channelname, TH1D hist, double minimum=-1, double maximum=-1)
Set the data histogram in a particular channel.
BCMTFTemplate * GetData()
A class for handling 1D distributions.
void SetFlagChannelActive(bool flag)
Set flag to define if the channel is active or not.
BCMTFSystematic * GetSystematic(int index)
void PrintFitSummary()
Print a summary of the fit to the log.
std::vector< TH1D > MatrixToHistograms(const std::vector< std::vector< double > > &matrix)
Transform a matrix to a set of histograms.
TH1 * GetMarginalizedHistogram(const std::string &name) const
Obtain the individual marginalized distributions with respect to one parameter as a ROOT TH1...
BCMTFChannel * GetChannel(int index)
const std::string & GetName() const
virtual void PrintSummary() const
Prints a summary to the logs.
virtual const std::string & GetName() const
double CalculateChi2(int channelindex, const std::vector< double > ¶meters)
Calculate a chi2 for a single channel given a set of parameters.
static std::string ToString(BCLog::LogLevel)
Converts a log level to a string.
static BCLog::LogLevel GetLogLevelScreen()
Returns the minimum log level for screen output.
BCH1D GetMarginalized(const std::string &name) const
Obtain the individual marginalized distributions with respect to one parameter.
unsigned GetNParameters() const
virtual void ResetResults()
Reset all information on the best-fit parameters.
A class for fitting several templates to a data set.
std::vector< TH1D > BuildEnsemble(const std::vector< double > ¶meters, const std::string &options="")
Build a single ensemble based on a single set of parameters.
TTree * PerformEnsembleTest(const std::vector< double > ¶meters, int nensembles, const std::string &options="")
Perform ensemble test based on one set of parameters.
virtual const std::vector< double > & GetBestFitParameters() const
A class for managing log messages.
A class desribing a systematic uncertainty.