23 #include <Math/PdfFuncMathCore.h>
24 #include <Math/QuantFuncMathCore.h>
28 static unsigned int nCacheFact = 10000;
29 static double * logfact = 0;
33 double LogGaus(
double x,
double mean,
double sigma,
bool norm)
43 double arg = (x - mean) / sigma;
44 double result = -.5 * arg * arg;
52 return result - log(sqrt(2. * M_PI) * sigma);
60 return LogGaus(x, par, sqrt(par),
true);
96 return LogBinomFactor(n, k) + (double) k * log(p) + (double) (n - k) * log(1. - p);
110 if (n == k || k == 0)
112 if (k == 1 || k == n - 1)
113 return log((
double) n);
116 if ( n < BCMATH_NFACT_ALIMIT || (n < (
int) BCMath::nCacheFact && (n - k) < 10) )
127 if (x > BCMath::nCacheFact)
128 return x * log(x) - x + log(x * (1. + 4. * x * (1. + 2. * x))) / 6. + log(M_PI) / 2.;
131 return LogFact(static_cast<int>(floor(x + 0.5f)));
139 return std::numeric_limits<double>::quiet_NaN();
142 if ( !BCMath::logfact) {
143 BCMath::logfact =
new double[BCMath::nCacheFact+1];
144 double tmplogfact = 0;
145 BCMath::logfact[0] = tmplogfact;
146 for (
unsigned int i=1; i<=BCMath::nCacheFact; i++) {
147 tmplogfact += log((
double) i);
148 BCMath::logfact[i] = tmplogfact;
153 if (n <= (
int) BCMath::nCacheFact)
154 return BCMath::logfact[n];
159 ln = BCMath::logfact[nCacheFact];
161 for (
int i = BCMath::nCacheFact+1; i <= n; i++)
162 ln += log((
double) i);
185 if (n == k || k == 0)
187 if (k == 1 || k == n - 1)
188 return log((
double) n);
190 int lmax = std::max(k, n - k);
191 int lmin = std::min(k, n - k);
195 for (
int i = n; i > lmax; i--)
196 ln += log((
double) i);
211 if (x + .5 == (
double) i && (i&1))
216 if (x - 0.5 ==
double(i) && (i&1))
227 double bw = log(Gamma) - log((x - mean) * (x - mean) + Gamma*Gamma / 4.);
230 bw -= log(2. * M_PI);
237 double LogBreitWignerRel(
double x,
double mean,
double Gamma)
239 return -log((x*x - mean*mean) * (x*x - mean*mean) + mean*mean * Gamma*Gamma);
247 BCLog::OutWarning(
"BCMath::LogChi2 : parameter cannot be negative!");
251 if (x == 0 && n == 1) {
252 BCLog::OutWarning(
"BCMath::LogChi2 : returned value is infinity!");
256 double nOver2 = ((double) n) / 2.;
258 return (nOver2 - 1.) * log(x) - x / 2. - nOver2 * log(2) - log(TMath::Gamma(nOver2));
264 if (sigma <= 0 || gamma <= 0) {
265 BCLog::OutWarning(
"BCMath::LogVoigtian : widths are negative or zero!");
269 return log(TMath::Voigt(x, sigma, gamma));
275 return alpha * log(beta) - TMath::LnGamma(alpha) + (alpha - 1) * log(x) - beta * x;
289 double arg = (log(x) - mean) / sigma;
290 double result = -.5 * arg * arg;
292 return result - log(x * sqrt(2. * M_PI) * sigma);
298 double mean = par[0];
299 double sigmadown = par[1];
300 double sigmaup = par[2];
302 double sigma = sigmadown;
307 return 1.0/sqrt(2.0*M_PI)/sigma * exp(- (x[0]-mean)*(x[0]-mean)/2./sigma/sigma);
312 double chi2(
double *x,
double *par)
314 return ROOT::Math::chisquared_pdf(x[0], par[0]);
321 TF1 *f =
new TF1(
"chi2", chi2, 0.0, 1000, 1);
322 f->SetParameter(0, K);
326 for (
unsigned int i = 0; i < randoms.size(); i++)
327 randoms.at(i) = f->GetRandom();
333 TH1D *
ECDF(
const std::vector<double> & data)
337 std::set<double> uniqueObservations;
339 for (
int i = 0; i < N; ++i)
340 uniqueObservations.insert(data[i]);
343 unsigned nUnique = uniqueObservations.size();
344 std::vector<double> lowerEdges(nUnique);
347 std::set<double>::iterator iter;
349 for (iter = uniqueObservations.begin(); iter != uniqueObservations.end(); ++iter) {
350 lowerEdges[counter] = *iter;
357 TH1D *
ECDF =
new TH1D(
"ECDF",
"Empirical cumulative distribution function", nUnique - 1, &lowerEdges[0]);
360 for (
int i = 0; i < N; ++i)
361 ECDF -> Fill(data[i]);
364 ECDF -> SetBinContent(0, 0.0);
367 for (
int nBin = 1; nBin <= ECDF->GetNbinsX(); nBin++) {
368 double previousBin = ECDF -> GetBinContent(nBin - 1);
369 double thisBin = ECDF -> GetBinContent(nBin) / double(N);
370 ECDF -> SetBinContent(nBin, thisBin + previousBin);
373 ECDF -> SetBinError(nBin, 0.0);
377 ECDF -> SetBinContent(ECDF->GetNbinsX() + 1, 1.);
380 ECDF -> SetMinimum(0.);
381 ECDF -> SetMaximum(1.);
391 unsigned int maxRunAbove, maxRunBelow, currRun;
396 std::vector<int> runs(2, 0);
398 if (bitStream.empty())
402 bool aboveRun = bitStream.at(0);
405 std::vector<bool>::const_iterator iter = bitStream.begin();
407 while (iter != bitStream.end()) {
410 if (*(iter - 1) == *iter)
415 maxRunAbove = std::max(maxRunAbove, currRun);
417 maxRunBelow = std::max(maxRunBelow, currRun);
419 aboveRun = !aboveRun;
429 maxRunAbove = std::max(maxRunAbove, currRun);
431 maxRunBelow = std::max(maxRunBelow, currRun);
434 runs.at(0) = maxRunBelow;
435 runs.at(1) = maxRunAbove;
442 const std::vector<double>& yMeasured,
443 const std::vector<double>& yExpected,
const std::vector<double>& sigma)
446 double maxRunAbove, maxRunBelow, currRun;
451 std::vector<double> runs(2, 0);
454 if (yMeasured.size() != yExpected.size() || yMeasured.size() != sigma.size()
455 || yExpected.size() != sigma.size()) {
463 int N = yMeasured.size();
470 double residue = (yMeasured.at(0) - yExpected.at(0)) / sigma.at(0);
471 bool aboveRun = residue >= 0 ?
true :
false;
472 currRun = residue * residue;
475 for (
int i = 1; i < N; i++) {
476 residue = (yMeasured.at(i) - yExpected.at(i)) / sigma.at(i);
478 if ((residue >= 0) == aboveRun) {
479 currRun += residue * residue;
483 maxRunAbove = std::max(maxRunAbove, currRun);
485 maxRunBelow = std::max(maxRunBelow, currRun);
487 aboveRun = !aboveRun;
489 currRun = residue * residue;
495 maxRunAbove = std::max(maxRunAbove, currRun);
497 maxRunBelow = std::max(maxRunBelow, currRun);
500 runs.at(0) = maxRunBelow;
501 runs.at(1) = maxRunAbove;
506 double longestRunFrequency(
unsigned longestObserved,
unsigned int nTrials)
509 if (longestObserved >= nTrials)
516 typedef unsigned int uint;
517 uint Lobs = longestObserved;
521 double conditionalProb;
529 for (uint r = 0; r <= n; r++) {
530 conditionalProb = 0.0;
532 for (uint i = 1; (i <= n - r + 1) && (i <= uint(r /
double(Lobs + 1))); i++) {
537 conditionalProb += exp(tempLog);
539 conditionalProb -= exp(tempLog);
541 prob += (1 + n - r) * conditionalProb;
545 prob *= pow(2., -
double(n));
551 double Rvalue(
const std::vector<double> & chain_means,
const std::vector<double> & chain_variances,
552 const unsigned & chain_length,
const bool & strict)
throw (std::invalid_argument, std::domain_error)
554 if (chain_means.size() != chain_variances.size())
555 throw std::invalid_argument(
"BCMath::RValue: chain means and chain variances are not aligned!");
557 const double n = chain_length;
558 const double m = chain_means.size();
561 throw std::invalid_argument(
"BCMath:RValue: Need at least two chains to compute R-value!");
564 double variance_of_means = 0.0;
565 double mean_of_means = 0.0;
566 double mean_of_variances = 0.0;
567 double variance_of_variances = 0.0;
570 double previous_mean_of_means = 0;
571 double previous_mean_of_variances = 0;
572 for (
unsigned i = 0 ; i < m ; ++i)
576 mean_of_means = chain_means.front();
577 variance_of_means = 0;
578 mean_of_variances = chain_variances.front();
579 variance_of_variances = 0;
585 previous_mean_of_means = mean_of_means;
586 previous_mean_of_variances = mean_of_variances;
589 mean_of_means += (chain_means[i] - previous_mean_of_means) / (i + 1.0);
590 variance_of_means += (chain_means[i] - previous_mean_of_means) * (chain_means[i] - mean_of_means);
591 mean_of_variances += (chain_variances[i] - previous_mean_of_variances) / (i + 1.0);
592 variance_of_variances += (chain_variances[i] - previous_mean_of_variances) * (chain_variances[i] - mean_of_variances);
595 variance_of_means /= m - 1.0;
596 variance_of_variances /= m - 1.0;
599 double B = variance_of_means * n;
600 double W = mean_of_variances;
601 double sigma_squared = (n - 1.0) / n * W + B / n;
604 if (0.0 == W && 0.0 == B)
606 BCLog::OutDebug(
"BCMath::Rvalue: All samples in all chains identical!");
612 BCLog::OutDebug(
"BCMath::Rvalue: W = 0. Avoiding R = NaN.");
613 return std::numeric_limits<double>::max();
618 return sqrt(sigma_squared / W);
624 double covariance_22 = 0.0;
625 double covariance_21 = 0.0;
627 for (
unsigned i = 0 ; i < m ; ++i)
629 covariance_21 += (chain_variances[i] - mean_of_variances) * (chain_means.at(i) - mean_of_means);
630 covariance_22 += (chain_variances[i] - mean_of_variances) * (chain_means[i] * chain_means[i] - mean_of_means * mean_of_means);
633 covariance_21 /= m - 1.0;
634 covariance_22 /= m - 1.0;
637 double V = sigma_squared + B / (m * n);
640 double a = (n - 1.0) * (n - 1.0) / (n * n * m) * variance_of_variances;
641 double b = (m + 1) * (m + 1) / (m * n * m * n) * 2.0 / (m - 1) * B * B;
642 double c = 2.0 * (m + 1.0) * (n - 1.0) / (m * n * n) * n / m * (covariance_22 - 2.0 * mean_of_means * covariance_21);
643 double variance_of_V = a + b + c;
646 double df = 2.0 * V * V / variance_of_V;
650 BCLog::OutDebug(Form(
"BCMath::Rvalue: DoF (%g) below 2. Avoiding R = NaN.", df));
651 return std::numeric_limits<double>::max();;
655 R = sqrt(V / W * df / (df - 2.0));
658 if (R < 0.99 && n > 100)
659 throw std::domain_error(Form(
"BCMath::Rvalue: %g < 0.99. Check for a bug in the implementation!", R));
665 double CorrectPValue(
const double & pvalue,
const unsigned & npar,
const unsigned & nobservations)
throw (std::domain_error)
668 if (pvalue < 0 or pvalue > 1)
669 throw std::domain_error(Form(
"BCMath::CorrectPValue: pvalue (%g) out of range", pvalue));
675 if (npar >= nobservations)
676 throw std::domain_error(Form(
"BCMath::CorrectPValue: "
677 "npar exceeds nobservations, %u vs %u", npar, nobservations));
680 const double chi2 = ROOT::Math::chisquared_quantile_c(pvalue, nobservations);
683 const unsigned dof = nobservations - npar;
686 return TMath::Prob(chi2, dof);
690 double FastPValue(
const std::vector<unsigned> & observed,
const std::vector<double> & expected,
691 unsigned nIterations,
unsigned seed)
throw (std::invalid_argument)
693 size_t nbins = observed.size();
694 if (nbins != expected.size())
696 throw std::invalid_argument(Form(
"BCMath::FastPValue: "
697 "size of expected and observed do not match, %u vs %u",
unsigned(expected.size()),
unsigned(nbins)));
701 std::vector<unsigned> histogram(nbins, 0);
708 double logp_start = 0;
709 int counter_pvalue = 0;
712 for (
size_t ibin = 0; ibin < nbins; ++ibin) {
715 double yexp = expected[ibin];
718 histogram[ibin] = size_t(yexp);
724 logp_start +=
LogPoisson(observed[ibin], yexp);
728 for (
unsigned iiter = 0; iiter < nIterations; ++iiter) {
730 for (
size_t ibin = 0; ibin < nbins; ++ibin) {
732 double ptest = rng.Rndm() - 0.5;
737 double r = expected[ibin] / double(histogram[ibin] + 1);
740 if (rng.Rndm() < r) {
747 else if (ptest <= 0 && histogram[ibin] > 0) {
749 double r = double(histogram[ibin]) / expected[ibin];
752 if (rng.Rndm() < r) {
760 if (logp <= logp_start)
763 else if (logp_start && logp != logp_start && fabs((logp - logp_start) / logp_start) < 1e-15)
769 return double(counter_pvalue) / nIterations;
double LogLogNormal(double x, double mean, double sigma)
double FastPValue(const std::vector< unsigned > &observed, const std::vector< double > &expected, unsigned nIterations, unsigned seed)
double SplitGaussian(double *x, double *par)
double LogBreitWignerNonRel(double x, double mean, double Gamma, bool norm)
double LogNoverK(int n, int k)
double LogChi2(double x, int n)
double LogGammaPDF(double x, double alpha, double beta)
double CorrectPValue(const double &pvalue, const unsigned &npar, const unsigned &nobservations)
std::vector< int > longestRuns(const std::vector< bool > &bitStream)
double LogBinomFactor(int n, int k)
double ApproxLogFact(double x)
double LogVoigtian(double x, double sigma, double gamma)
void RandomChi2(std::vector< double > &randoms, int K)
void CacheFactorial(unsigned int n)
double LogGaus(double x, double mean, double sigma, bool norm)
double Rvalue(const std::vector< double > &chain_means, const std::vector< double > &chain_variances, const unsigned &chain_length, const bool &strict)
double ApproxBinomial(int n, int k, double p)
double LogApproxBinomial(int n, int k, double p)
double LogPoisson(double x, double par)
std::vector< double > longestRunsChi2(const std::vector< double > &yMeasured, const std::vector< double > &yExpected, const std::vector< double > &sigma)
TH1D * ECDF(const std::vector< double > &data)