BCMath.cxx
1 /*
2  * Copyright (C) 2007-2018, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 
11 #include "BCMath.h"
12 
13 #include "BCLog.h"
14 
15 #include <Math/PdfFuncMathCore.h>
16 #include <Math/QuantFuncMathCore.h>
17 #include <TMath.h>
18 #include <TRandom3.h>
19 
20 #include <limits>
21 #include <math.h>
22 
23 // ---------------------------------------------------------
24 double BCMath::LogGaus(double x, double mean, double sigma, bool norm)
25 {
26  if (sigma == 0.) // delta function !
27  return (fabs(x - mean) < std::numeric_limits<double>::epsilon()) ? std::numeric_limits<double>::infinity() : 0;
28 
29  if (sigma < 0.) // if sigma is negative use absolute value
30  sigma *= -1.;
31 
32  double arg = (x - mean) / sigma;
33  double result = -.5 * arg * arg;
34 
35  // check if we should add the normalization constant
36  if (!norm)
37  return result;
38 
39  // return result with subtraction of log of normalization constant
40  return result - 0.5 * log(2 * M_PI) - log(sigma);
41 }
42 
43 // ---------------------------------------------------------
44 double BCMath::LogSplitGaus(double x, double mode, double sigma_below, double sigma_above, bool norm)
45 {
46  double norm_const = (norm) ? 0.5 * log(2 / M_PI) - log(sigma_below + sigma_above) : 0;
47  if (x > mode)
48  return LogGaus(x, mode, sigma_above, false) + norm_const;
49  return LogGaus(x, mode, sigma_below, false) + norm_const;
50 }
51 
52 // ---------------------------------------------------------
53 double BCMath::LogPoisson(double x, double lambda)
54 {
55  if (x < 0.0)
56  return -std::numeric_limits<double>::infinity();
57 
58  // for lambda == 0, only x = 0 allowed
59  if (lambda == 0.0) {
60  return (x == 0.0) ? 0.0 : -std::numeric_limits<double>::infinity();
61  }
62 
63  if (lambda < 0.0) {
64  BCLog::OutWarning("BCMath::LogPoisson : expectation value (lambda) cannot be negative.");
65  return std::numeric_limits<double>::quiet_NaN();
66  }
67 
68  if (lambda > 899.0)
69  return LogGaus(x, lambda, sqrt(lambda), true);
70 
71  // now lambda > 0
72  if (x == 0.0)
73  return -lambda;
74 
75  return x * log(lambda) - lambda - ApproxLogFact(x);
76 }
77 
78 // ---------------------------------------------------------
79 double BCMath::LogApproxBinomial(unsigned n, unsigned k, double p)
80 {
81  if (k > n) // impose k < n requirement
82  return std::numeric_limits<double>::quiet_NaN();
83 
84  // p in [0,1]
85  if (p < 0 || p > 1)
86  return std::numeric_limits<double>::quiet_NaN();
87 
88  if (p == 0) // no chance of success
89  return (k == 0) ? 0 : -std::numeric_limits<double>::infinity();
90  if (p == 1) // no chance of failure
91  return (k == n) ? 0 : -std::numeric_limits<double>::infinity();
92 
93  return LogBinomFactor(n, k) + k * log(p) + (n - k) * log(1 - p);
94 }
95 
96 // ---------------------------------------------------------
97 double BCMath::ApproxBinomial(unsigned n, unsigned k, double p)
98 {
99  return exp(LogApproxBinomial(n, k, p));
100 }
101 
102 namespace
103 {
107 static std::vector<double> CachedLogFactorials;
108 }
109 
110 // ---------------------------------------------------------
111 double BCMath::LogBinomFactor(unsigned n, unsigned k)
112 {
113  if (n < k) // impose k < n requirement
114  return std::numeric_limits<double>::quiet_NaN();
115 
116  if (k == 0 || k == n)
117  return 0.;
118  if (k == 1 || k == n - 1)
119  return log((double)n);
120 
121  // if no approximation needed
122  if (n < CachedLogFactorials.size() && (n - k) < 10)
123  return LogBinomFactorExact(n, k);
124 
125  // calculate final log(n over k) using approximations if necessary
126  return ApproxLogFact((double)n) - ApproxLogFact((double)k) - ApproxLogFact((double)(n - k));
127 }
128 
129 // ---------------------------------------------------------
130 double BCMath::ApproxLogFact(double x)
131 {
132  if (x < 0)
133  return std::numeric_limits<double>::quiet_NaN();
134  unsigned n = static_cast<unsigned>(floor(x + 0.5));
135  if (n < CachedLogFactorials.size())
136  return CachedLogFactorials[n];
137  return x * log(x) - x + log(x * (1 + 4 * x * (1 + 2 * x))) / 6 + log(M_PI) / 2;
138 }
139 
140 // ---------------------------------------------------------
141 double BCMath::LogFact(unsigned n)
142 {
143  // return cached value if available
144  if (n < CachedLogFactorials.size())
145  return CachedLogFactorials[n];
146 
147  // calculate factorial starting from the highest cached value
148  double logfact = (CachedLogFactorials.empty()) ? 0 : CachedLogFactorials.back();
149  for (unsigned i = CachedLogFactorials.size(); i <= n; ++i)
150  logfact += log((double)i);
151 
152  return logfact;
153 }
154 
155 // ---------------------------------------------------------
156 unsigned BCMath::CacheFactorials(unsigned n)
157 {
158  if (n < CachedLogFactorials.size())
159  // log factorials have already been cached up to n
160  return CachedLogFactorials.size();
161 
162  // reserve memory
163  CachedLogFactorials.reserve(n);
164 
165  // add log(0!) if not there
166  if (CachedLogFactorials.empty())
167  CachedLogFactorials.push_back(0);
168 
169  // calculate new log factorials
170  for (unsigned i = CachedLogFactorials.size(); i <= n; ++i)
171  CachedLogFactorials.push_back(CachedLogFactorials.back() + log(static_cast<double>(i)));
172 
173  return CachedLogFactorials.size();
174 }
175 
176 namespace
177 {
181 static unsigned __attribute__((unused)) cached_factorials = BCMath::CacheFactorials(899);
182 }
183 
184 // ---------------------------------------------------------
185 int BCMath::Nint(double x)
186 {
187  return static_cast<int>(((x > 0) ? 1 : -1) * floor(fabs(x) + 0.5));
188 }
189 
190 // ---------------------------------------------------------
191 double BCMath::LogBinomFactorExact(unsigned n, unsigned k)
192 {
193  if (n < k) // impose k<n requirement
194  return std::numeric_limits<double>::quiet_NaN();
195 
196  if ( k == 0 || k == n )
197  return 0;
198  if ( k == 1 || k == n - 1)
199  return log((double)n);
200 
201  int lmax = std::max(k, n - k);
202  int lmin = std::min(k, n - k);
203 
204  double log_bf = 0;
205 
206  for (int i = n; i > lmax; --i)
207  log_bf += log((double)i);
208  log_bf -= LogFact(lmin);
209 
210  return log_bf;
211 }
212 
213 // ---------------------------------------------------------
214 double BCMath::LogBreitWignerNonRel(double x, double mean, double Gamma, bool norm)
215 {
216  double bw = log(Gamma) - log((x - mean) * (x - mean) + Gamma * Gamma / 4.);
217 
218  if (norm)
219  bw -= log(2. * M_PI);
220 
221  return bw;
222 }
223 
224 // ---------------------------------------------------------
225 
226 double BCMath::LogBreitWignerRel(double x, double mean, double Gamma)
227 {
228  return -log((x * x - mean * mean) * (x * x - mean * mean) + mean * mean * Gamma * Gamma);
229 }
230 
231 // ---------------------------------------------------------
232 
233 double BCMath::LogChi2(double x, int n)
234 {
235  if (x < 0) {
236  BCLog::OutWarning("BCMath::LogChi2 : parameter cannot be negative!");
237  return std::numeric_limits<double>::quiet_NaN();
238  }
239 
240  if (x == 0 && n == 1) {
241  BCLog::OutWarning("BCMath::LogChi2 : returned value is infinity!");
242  return std::numeric_limits<double>::infinity();
243  }
244 
245  double nOver2 = ((double) n) / 2.;
246 
247  return (nOver2 - 1.) * log(x) - x / 2. - nOver2 * log(2) - log(TMath::Gamma(nOver2));
248 }
249 
250 // ---------------------------------------------------------
251 double BCMath::LogVoigtian(double x, double sigma, double gamma)
252 {
253  if (sigma <= 0 || gamma <= 0) {
254  BCLog::OutWarning("BCMath::LogVoigtian : widths are negative or zero!");
255  return std::numeric_limits<double>::quiet_NaN();
256  }
257 
258  return log(TMath::Voigt(x, sigma, gamma));
259 }
260 
261 // ---------------------------------------------------------
262 double BCMath::LogGammaPDF(double x, double alpha, double beta)
263 {
264  return alpha * log(beta) - TMath::LnGamma(alpha) + (alpha - 1) * log(x) - beta * x;
265 }
266 
267 // ---------------------------------------------------------
268 double BCMath::LogLogNormal(double x, double mean, double sigma)
269 {
270  // if we have a delta function, return fixed value
271  if (sigma == 0.)
272  return 0;
273 
274  // if sigma is negative use absolute value
275  if (sigma < 0.)
276  sigma *= -1.;
277 
278  double arg = (log(x) - mean) / sigma;
279  double result = -.5 * arg * arg;
280 
281  return result - log(x * sqrt(2. * M_PI) * sigma);
282 }
283 
284 // ---------------------------------------------------------
285 double BCMath::CorrectPValue(const double& pvalue, const unsigned& npar, const unsigned& nobservations)
286 {
287  // avoid pathologies
288  if (pvalue < 0 || pvalue > 1)
289  throw std::domain_error(Form("BCMath::CorrectPValue: pvalue (%g) out of range", pvalue));
290 
291  if (pvalue < std::numeric_limits<double>::epsilon())
292  return 0;
293 
294  // dof = nobs - npar
295  if (npar >= nobservations)
296  throw std::domain_error(Form("BCMath::CorrectPValue: "
297  "npar exceeds nobservations, %u vs %u", npar, nobservations));
298 
299  // convert upper-tail p value to a chi squared
300  const double chi2 = ROOT::Math::chisquared_quantile_c(pvalue, nobservations);
301 
302  // corrected degrees of freedom
303  const unsigned dof = nobservations - npar;
304 
305  // transform back to p value
306  return TMath::Prob(chi2, dof);
307 }
308 
309 // ---------------------------------------------------------
310 double BCMath::FastPValue(const std::vector<unsigned>& observed, const std::vector<double>& expected,
311  unsigned nIterations, unsigned seed)
312 {
313  size_t nbins = observed.size();
314  if (nbins != expected.size()) {
315  throw std::invalid_argument(Form("BCMath::FastPValue: "
316  "size of expected and observed do not match, %u vs %u", unsigned(expected.size()), unsigned(nbins)));
317  }
318 
319  // temporary histogram to be modified in each MCMC step
320  std::vector<unsigned> histogram(nbins, 0);
321 
322  // fix seed to iterations for reproducible results
323  TRandom3 rng(seed);
324 
325  // keep track of log of probability and count data sets with larger value
326  double logp = 0;
327  double logp_start = 0;
328  int counter_pvalue = 0;
329 
330  // define starting distribution as histogram with most likely entries
331  for (size_t ibin = 0; ibin < nbins; ++ibin) {
332 
333  // get the number of expected events
334  double yexp = expected[ibin];
335 
336  //most likely observed value = int(expected value)
337  histogram[ibin] = size_t(yexp);
338 
339  // calculate test statistic (= likelihood of entire histogram) for starting distr.
340  logp += LogPoisson(size_t(yexp), yexp);
341 
342  //statistic of the observed data set
343  logp_start += LogPoisson(observed[ibin], yexp);
344  }
345 
346  // loop over iterations
347  for (unsigned iiter = 0; iiter < nIterations; ++iiter) {
348  // loop over bins
349  for (size_t ibin = 0; ibin < nbins; ++ibin) {
350  // random step up or down in statistics for this bin
351  double ptest = rng.Rndm() - 0.5;
352 
353  // increase statistics by 1
354  if (ptest > 0) {
355  // calculate factor of probability
356  double r = expected[ibin] / double(histogram[ibin] + 1);
357 
358  // walk, or don't (this is the Metropolis part)
359  if (rng.Rndm() < r) {
360  ++histogram[ibin];
361  logp += log(r);
362  }
363  }
364 
365  // decrease statistics by 1
366  else if (ptest <= 0 && histogram[ibin] > 0) {
367  // calculate factor of probability
368  double r = double(histogram[ibin]) / expected[ibin];
369 
370  // walk, or don't (this is the Metropolis part)
371  if (rng.Rndm() < r) {
372  --histogram[ibin];
373  logp += log(r);
374  }
375  }
376  } // end of looping over bins
377 
378  // increase counter
379  if (logp <= logp_start)
380  ++counter_pvalue;
381  // handle the case where a -b +b > a because of precision loss
382  else if (logp_start && logp != logp_start && fabs((logp - logp_start) / logp_start) < 1e-15)
383  ++counter_pvalue;
384 
385  } // end of looping over iterations
386 
387  // calculate p-value
388  return double(counter_pvalue) / nIterations;
389 }
390 
391 double BCMath::Random::Chi2(TRandom* rng, double dof)
392 {
393  return 2.0 * Gamma(rng, dof / 2.0, 1.0);
394 }
395 
396 double BCMath::Random::Gamma(TRandom* rng, double a, double b)
397 {
398  /* assume a > 0 */
399 
400  if (a < 1) {
401  // u in ]0,1[
402  double u = rng->Uniform(1);
403  return Gamma(rng, 1.0 + a, b) * std::pow(u, 1.0 / a);
404  }
405 
406  double x, v, u;
407  double d = a - 1.0 / 3.0;
408  double c = (1.0 / 3.0) / sqrt (d);
409 
410  while (1) {
411  do {
412  // unit Gauss
413  x = rng->Gaus();
414  v = 1.0 + c * x;
415  } while (v <= 0);
416 
417  v = v * v * v;
418  u = rng->Uniform(1);
419 
420  if (u < 1 - 0.0331 * x * x * x * x)
421  break;
422 
423  if (log (u) < 0.5 * x * x + d * (1 - v + log (v)))
424  break;
425  }
426 
427  return b * d * v;
428 }
double LogApproxBinomial(unsigned n, unsigned k, double p)
Calculates natural logarithm of the Binomial probability using approximations for factorial calculati...
Definition: BCMath.cxx:79
double LogFact(unsigned n)
Calculates natural logarithm of the n-factorial (n!)
Definition: BCMath.cxx:141
double LogBinomFactorExact(unsigned n, unsigned k)
Calculates natural logarithm of the Binomial factor "n over k".
Definition: BCMath.cxx:191
double FastPValue(const std::vector< unsigned > &observed, const std::vector< double > &expected, unsigned nIterations=1e5, unsigned seed=0)
Calculate the p value using fast MCMC for a histogram and the likelihood as test statistic.
Definition: BCMath.cxx:310
double LogBreitWignerNonRel(double x, double mean, double Gamma, bool norm=false)
Calculates the logarithm of the nonrelativistic Breit-Wigner distribution.
Definition: BCMath.cxx:214
double LogBreitWignerRel(double x, double mean, double Gamma)
Calculates the logarithm of the relativistic Breit-Wigner distribution.
Definition: BCMath.cxx:226
double LogBinomFactor(unsigned n, unsigned k)
Calculates natural logarithm of the Binomial factor "n over k" using approximations for factorial cal...
Definition: BCMath.cxx:111
double LogLogNormal(double x, double mean=0, double sigma=1)
Return the log of the log normal distribution.
Definition: BCMath.cxx:268
double LogChi2(double x, int n)
Calculates the logarithm of chi square function: chi2(double x; size_t n)
Definition: BCMath.cxx:233
double LogGammaPDF(double x, double alpha, double beta)
Returns the log of the Gamma PDF.
Definition: BCMath.cxx:262
double CorrectPValue(const double &pvalue, const unsigned &npar, const unsigned &nobservations)
Correct a p value by transforming to a chi^2 with dof=nobservations, then back to a pvalue with dof r...
Definition: BCMath.cxx:285
double ApproxLogFact(double x)
Calculates natural logarithm of the n-factorial (n!) using Srinivasa Ramanujan approximation log(n!) ...
Definition: BCMath.cxx:130
double LogVoigtian(double x, double sigma, double gamma)
Calculates the logarithm of normalized voigtian function: voigtian(double x, double sigma...
Definition: BCMath.cxx:251
int Nint(double x)
Returns the nearest integer of a double number.
Definition: BCMath.cxx:185
double LogSplitGaus(double x, double mode, double sigma_below, double sigma_above, bool norm=false)
Calculate the natural logarithm of a normal distribution function with different variances below and ...
Definition: BCMath.cxx:44
double LogGaus(double x, double mean=0, double sigma=1, bool norm=false)
Calculate the natural logarithm of a normal distribution function.
Definition: BCMath.cxx:24
double ApproxBinomial(unsigned n, unsigned k, double p)
Calculates Binomial probability using approximations for factorial calculations if calculation for nu...
Definition: BCMath.cxx:97
double LogPoisson(double x, double lambda)
Calculate the natural logarithm of a poisson distribution.
Definition: BCMath.cxx:53
unsigned CacheFactorials(unsigned n)
Cache factorials for first.
Definition: BCMath.cxx:156