00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "BCMath.h"
00011 #include "BCLog.h"
00012
00013 #include <math.h>
00014 #include <limits>
00015 #include <set>
00016
00017 #include <TMath.h>
00018 #include <TF1.h>
00019 #include <TH1D.h>
00020
00021 #include <Math/PdfFuncMathCore.h>
00022
00023 namespace BCMath {
00024
00025 static unsigned int nCacheFact = 10000;
00026 static double * logfact = 0;
00027
00028
00029
00030 double LogGaus(double x, double mean, double sigma, bool norm)
00031 {
00032
00033 if (sigma == 0.)
00034 return 0;
00035
00036
00037 if (sigma < 0.)
00038 sigma *= -1.;
00039
00040 double arg = (x - mean) / sigma;
00041 double result = -.5 * arg * arg;
00042
00043
00044 if (!norm)
00045 return result;
00046
00047
00048
00049 return result - log(sqrt(2. * M_PI) * sigma);
00050 }
00051
00052
00053
00054 double LogPoisson(double x, double par)
00055 {
00056 if (par > 899)
00057 return LogGaus(x, par, sqrt(par), true);
00058
00059 if (x < 0)
00060 return 0;
00061
00062 if (x == 0.)
00063 return -par;
00064
00065 return x * log(par) - par - ApproxLogFact(x);
00066 }
00067
00068
00069
00070 double ApproxBinomial(int n, int k, double p)
00071 {
00072 return exp(LogApproxBinomial(n, k, p));
00073 }
00074
00075
00076
00077 double LogApproxBinomial(int n, int k, double p)
00078 {
00079
00080 if (p == 0)
00081 return -1e99;
00082
00083 else if (p == 1)
00084 return 0;
00085
00086
00087 if (n < k) {
00088 int a = n;
00089 n = k;
00090 k = a;
00091 }
00092
00093 return LogBinomFactor(n, k) + (double) k * log(p) + (double) (n - k) * log(1. - p);
00094 }
00095
00096
00097
00098 double LogBinomFactor(int n, int k)
00099 {
00100
00101 if (n < k) {
00102 int a = n;
00103 n = k;
00104 k = a;
00105 }
00106
00107 if (n == k || k == 0)
00108 return 0.;
00109 if (k == 1 || k == n - 1)
00110 return log((double) n);
00111
00112
00113 if ( n < BCMATH_NFACT_ALIMIT || (n < (int) BCMath::nCacheFact && (n - k) < 10) )
00114 return LogNoverK(n, k);
00115
00116
00117 return ApproxLogFact((double)n) - ApproxLogFact((double)k) - ApproxLogFact((double)(n - k));
00118 }
00119
00120
00121
00122 double ApproxLogFact(double x)
00123 {
00124 if (x > BCMath::nCacheFact)
00125 return x * log(x) - x + log(x * (1. + 4. * x * (1. + 2. * x))) / 6. + log(M_PI) / 2.;
00126
00127 else
00128 return LogFact((int) x);
00129 }
00130
00131
00132 double LogFact(int n)
00133 {
00134
00135 if (n<0)
00136 return std::numeric_limits<double>::quiet_NaN();
00137
00138
00139 if ( !BCMath::logfact) {
00140 BCMath::logfact = new double[BCMath::nCacheFact+1];
00141 double tmplogfact = 0;
00142 BCMath::logfact[0] = tmplogfact;
00143 for (unsigned int i=1; i<=BCMath::nCacheFact; i++) {
00144 tmplogfact += log((double) i);
00145 BCMath::logfact[i] = tmplogfact;
00146 }
00147 }
00148
00149
00150 if (n <= (int) BCMath::nCacheFact)
00151 return BCMath::logfact[n];
00152
00153
00154 double ln(0.);
00155 if (BCMath::logfact)
00156 ln = BCMath::logfact[nCacheFact];
00157
00158 for (int i = BCMath::nCacheFact+1; i <= n; i++)
00159 ln += log((double) i);
00160
00161 return ln;
00162 }
00163
00164
00165
00166 void CacheFactorial(unsigned int n)
00167 {
00168 nCacheFact = n;
00169 }
00170
00171
00172
00173 double LogNoverK(int n, int k)
00174 {
00175
00176 if (n < k) {
00177 int a = n;
00178 n = k;
00179 k = a;
00180 }
00181
00182 if (n == k || k == 0)
00183 return 0.;
00184 if (k == 1 || k == n - 1)
00185 return log((double) n);
00186
00187 int lmax = Max(k, n - k);
00188 int lmin = Min(k, n - k);
00189
00190 double ln = 0.;
00191
00192 for (int i = n; i > lmax; i--)
00193 ln += log((double) i);
00194 ln -= LogFact(lmin);
00195
00196 return ln;
00197 }
00198
00199
00200
00201 int Nint(double x)
00202 {
00203
00204 int i;
00205
00206 if (x >= 0) {
00207 i = (int) (x + .5);
00208 if (x + .5 == (double) i && (i&1))
00209 i--;
00210 }
00211 else {
00212 i = int(x - 0.5);
00213 if (x - 0.5 == double(i) && (i&1))
00214 i++;
00215 }
00216
00217 return i;
00218 }
00219
00220
00221
00222 double rms(int n, const double *a)
00223 {
00224 if (n <= 0 || !a)
00225 return 0;
00226
00227 double sum = 0., sum2 = 0.;
00228
00229 for (int i = 0; i < n; i++) {
00230 sum += a[i];
00231 sum2 += a[i] * a[i];
00232 }
00233
00234 double n1 = 1. / (double) n;
00235 double mean = sum * n1;
00236
00237 return sqrt(fabs(sum2 * n1 - mean * mean));
00238 }
00239
00240
00241
00242 double LogBreitWignerNonRel(double x, double mean, double Gamma, bool norm)
00243 {
00244 double bw = log(Gamma) - log((x - mean) * (x - mean) + Gamma*Gamma / 4.);
00245
00246 if (norm)
00247 bw -= log(2. * M_PI);
00248
00249 return bw;
00250 }
00251
00252
00253
00254 double LogBreitWignerRel(double x, double mean, double Gamma)
00255 {
00256 return -log((x*x - mean*mean) * (x*x - mean*mean) + mean*mean * Gamma*Gamma);
00257 }
00258
00259
00260
00261 double LogChi2(double x, int n)
00262 {
00263 if (x < 0) {
00264 BCLog::OutWarning("BCMath::LogChi2 : parameter cannot be negative!");
00265 return -1e99;
00266 }
00267
00268 if (x == 0 && n == 1) {
00269 BCLog::OutWarning("BCMath::LogChi2 : returned value is infinity!");
00270 return 1e99;
00271 }
00272
00273 double nOver2 = ((double) n) / 2.;
00274
00275 return (nOver2 - 1.) * log(x) - x / 2. - nOver2 * log(2) - log(TMath::Gamma(nOver2));
00276 }
00277
00278
00279 double LogVoigtian(double x, double sigma, double gamma)
00280 {
00281 if (sigma <= 0 || gamma <= 0) {
00282 BCLog::OutWarning("BCMath::LogVoigtian : widths are negative or zero!");
00283 return -1e99;
00284 }
00285
00286 return log(TMath::Voigt(x, sigma, gamma));
00287 }
00288
00289
00290 double SplitGaussian(double* x, double* par)
00291 {
00292 double mean = par[0];
00293 double sigmadown = par[1];
00294 double sigmaup = par[2];
00295
00296 double sigma = sigmadown;
00297
00298 if (x[0] > mean)
00299 sigma = sigmaup;
00300
00301 return 1.0/sqrt(2.0*M_PI)/sigma * exp(- (x[0]-mean)*(x[0]-mean)/2./sigma/sigma);
00302 }
00303
00304
00305
00306 double chi2(double *x, double *par)
00307 {
00308 return ROOT::Math::chisquared_pdf(x[0], par[0]);
00309 }
00310
00311
00312 void RandomChi2(std::vector<double> &randoms, int K)
00313 {
00314
00315 TF1 *f = new TF1("chi2", chi2, 0.0, 1000, 1);
00316 f->SetParameter(0, K);
00317 f->SetNpx(500);
00318
00319
00320 for (unsigned int i = 0; i < randoms.size(); i++)
00321 randoms.at(i) = f->GetRandom();
00322
00323 delete f;
00324 }
00325
00326
00327 TH1D * ECDF(const std::vector<double> & data)
00328 {
00329 int N = data.size();
00330
00331 std::set<double> uniqueObservations;
00332
00333 for (int i = 0; i < N; ++i)
00334 uniqueObservations.insert(data[i]);
00335
00336
00337 unsigned nUnique = uniqueObservations.size();
00338 std::vector<double> lowerEdges(nUnique);
00339
00340
00341 std::set<double>::iterator iter;
00342 int counter = 0;
00343 for (iter = uniqueObservations.begin(); iter != uniqueObservations.end(); ++iter) {
00344 lowerEdges[counter] = *iter;
00345 counter++;
00346 }
00347
00348
00349
00350
00351 TH1D * ECDF = new TH1D("ECDF", "Empirical cumulative distribution function", nUnique - 1, &lowerEdges[0]);
00352
00353
00354 for (int i = 0; i < N; ++i)
00355 ECDF -> Fill(data[i]);
00356
00357
00358 ECDF -> SetBinContent(0, 0.0);
00359
00360
00361 for (int nBin = 1; nBin <= ECDF->GetNbinsX(); nBin++) {
00362 double previousBin = ECDF -> GetBinContent(nBin - 1);
00363
00364
00365 double thisBin = ECDF -> GetBinContent(nBin) / double(N);
00366 ECDF -> SetBinContent(nBin, thisBin + previousBin);
00367
00368
00369 ECDF -> SetBinError(nBin, 0.0);
00370 }
00371
00372
00373 ECDF -> SetBinContent(ECDF->GetNbinsX() + 1, 1.);
00374
00375
00376 ECDF -> SetMinimum(0.);
00377 ECDF -> SetMaximum(1.);
00378
00379 return ECDF;
00380 }
00381
00382
00383
00384 std::vector<int> longestRuns(const std::vector<bool> &bitStream)
00385 {
00386
00387 unsigned int maxRunAbove, maxRunBelow, currRun;
00388 maxRunAbove = 0;
00389 maxRunBelow = 0;
00390 currRun = 1;
00391
00392 std::vector<int> runs(2, 0);
00393
00394 if (bitStream.empty())
00395 return runs;
00396
00397
00398 bool aboveRun = bitStream.at(0);
00399
00400
00401 std::vector<bool>::const_iterator iter = bitStream.begin();
00402 ++iter;
00403 while (iter != bitStream.end()) {
00404
00405
00406 if (*(iter - 1) == *iter)
00407 currRun++;
00408 else {
00409
00410 if (aboveRun)
00411 maxRunAbove = Max(maxRunAbove, currRun);
00412 else
00413 maxRunBelow = Max(maxRunBelow, currRun);
00414
00415 aboveRun = !aboveRun;
00416
00417 currRun = 1;
00418 }
00419
00420 ++iter;
00421 }
00422
00423
00424 if (aboveRun)
00425 maxRunAbove = Max(maxRunAbove, currRun);
00426 else
00427 maxRunBelow = Max(maxRunBelow, currRun);
00428
00429
00430 runs.at(0) = maxRunBelow;
00431 runs.at(1) = maxRunAbove;
00432
00433 return runs;
00434 }
00435
00436
00437 std::vector<double> longestRunsChi2(
00438 const std::vector<double>& yMeasured,
00439 const std::vector<double>& yExpected, const std::vector<double>& sigma)
00440 {
00441
00442 double maxRunAbove, maxRunBelow, currRun;
00443 maxRunAbove = 0;
00444 maxRunBelow = 0;
00445 currRun = 0;
00446
00447 std::vector<double> runs(2, 0);
00448
00449
00450 if (yMeasured.size() != yExpected.size() || yMeasured.size() != sigma.size()
00451 || yExpected.size() != sigma.size()) {
00452
00453 return runs;
00454 }
00455
00456
00457
00458
00459 int N = yMeasured.size();
00460 if ( N<=0)
00461 return runs;
00462
00463
00464
00465
00466 double residue = (yMeasured.at(0) - yExpected.at(0)) / sigma.at(0);
00467 bool aboveRun = residue >= 0 ? true : false;
00468 currRun = residue * residue;
00469
00470
00471 for (int i = 1; i < N; i++) {
00472 residue = (yMeasured.at(i) - yExpected.at(i)) / sigma.at(i);
00473
00474 if ((residue >= 0) == aboveRun) {
00475 currRun += residue * residue;
00476 } else {
00477
00478 if (aboveRun)
00479 maxRunAbove = Max(maxRunAbove, currRun);
00480 else
00481 maxRunBelow = Max(maxRunBelow, currRun);
00482
00483 aboveRun = !aboveRun;
00484
00485 currRun = residue * residue;
00486 }
00487
00488
00489
00490
00491 }
00492
00493
00494
00495
00496
00497
00498 if (aboveRun)
00499 maxRunAbove = Max(maxRunAbove, currRun);
00500 else
00501 maxRunBelow = Max(maxRunBelow, currRun);
00502
00503
00504
00505
00506
00507 runs.at(0) = maxRunBelow;
00508 runs.at(1) = maxRunAbove;
00509
00510 return runs;
00511 }
00512
00513 double longestRunFrequency(unsigned longestObserved, unsigned int nTrials)
00514 {
00515
00516 if (longestObserved >= nTrials)
00517 return 0.;
00518
00519
00520 double prob = 0.;
00521
00522
00523 typedef unsigned int uint;
00524 uint Lobs = longestObserved;
00525 uint n = nTrials;
00526
00527
00528 double conditionalProb;
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00550
00555
00556
00558
00559
00560
00561
00562 double tempLog = 0;
00563 for (uint r = 0; r <= n; r++) {
00564 conditionalProb = 0.0;
00565
00566 for (uint i = 1; (i <= n - r + 1) && (i <= uint(r / double(Lobs + 1))); i++) {
00567 tempLog = ApproxLogFact(n - i * (Lobs + 1)) - ApproxLogFact(i)
00568 - ApproxLogFact(n - r - i + 1)
00569 - ApproxLogFact(r - i * (Lobs + 1));
00570 if (i % 2)
00571 conditionalProb += exp(tempLog);
00572 else
00573 conditionalProb -= exp(tempLog);
00574
00575 }
00576
00577 prob += (1 + n - r) * conditionalProb;
00578 }
00579
00580
00581 prob *= pow(2., -double(n));
00582
00583 return prob;
00584 }
00585
00586 double Rvalue(const std::vector<double> & chain_means, const std::vector<double> & chain_variances,
00587 const unsigned & chain_length, const bool & strict) throw (std::invalid_argument, std::domain_error)
00588 {
00589 if (chain_means.size() != chain_variances.size())
00590 throw std::invalid_argument("BCMath::RValue: chain means and chain variances are not aligned!");
00591
00592 const double n = chain_length;
00593 const double m = chain_means.size();
00594
00595 if (m <= 1)
00596 throw std::invalid_argument("BCMath:RValue: Need at least two chains to compute R-value!");
00597
00598
00599 double variance_of_means = 0.0;
00600 double mean_of_means = 0.0;
00601 double mean_of_variances = 0.0;
00602 double variance_of_variances = 0.0;
00603
00604
00605 double previous_mean_of_means = 0;
00606 double previous_mean_of_variances = 0;
00607 for (unsigned i = 0 ; i < m ; ++i)
00608 {
00609 if (0 == i)
00610 {
00611 mean_of_means = chain_means.front();
00612 variance_of_means = 0;
00613 mean_of_variances = chain_variances.front();
00614 variance_of_variances = 0;
00615
00616 continue;
00617 }
00618
00619
00620 previous_mean_of_means = mean_of_means;
00621 previous_mean_of_variances = mean_of_variances;
00622
00623
00624 mean_of_means += (chain_means[i] - previous_mean_of_means) / (i + 1.0);
00625 variance_of_means += (chain_means[i] - previous_mean_of_means) * (chain_means[i] - mean_of_means);
00626 mean_of_variances += (chain_variances[i] - previous_mean_of_variances) / (i + 1.0);
00627 variance_of_variances += (chain_variances[i] - previous_mean_of_variances) * (chain_variances[i] - mean_of_variances);
00628 }
00629
00630 variance_of_means /= m - 1.0;
00631 variance_of_variances /= m - 1.0;
00632
00633
00634 double B = variance_of_means * n;
00635 double W = mean_of_variances;
00636 double sigma_squared = (n - 1.0) / n * W + B / n;
00637
00638
00639 if (0.0 == W)
00640 {
00641 BCLog::OutDebug("BCMath::Rvalue: W = 0. Avoiding R = NaN.");
00642 return std::numeric_limits<double>::max();
00643 }
00644
00645
00646 if (!strict)
00647 return sqrt(sigma_squared / W);
00648
00649
00650 double R = 0.0;
00651
00652
00653 double covariance_22 = 0.0;
00654 double covariance_21 = 0.0;
00655
00656 for (unsigned i = 0 ; i < m ; ++i)
00657 {
00658 covariance_21 += (chain_variances[i] - mean_of_variances) * (chain_means.at(i) - mean_of_means);
00659 covariance_22 += (chain_variances[i] - mean_of_variances) * (chain_means[i] * chain_means[i] - mean_of_means * mean_of_means);
00660 }
00661
00662 covariance_21 /= m - 1.0;
00663 covariance_22 /= m - 1.0;
00664
00665
00666 double V = sigma_squared + B / (m * n);
00667
00668
00669 double a = (n - 1.0) * (n - 1.0) / (n * n * m) * variance_of_variances;
00670 double b = (m + 1) * (m + 1) / (m * n * m * n) * 2.0 / (m - 1) * B * B;
00671 double c = 2.0 * (m + 1.0) * (n - 1.0) / (m * n * n) * n / m * (covariance_22 - 2.0 * mean_of_means * covariance_21);
00672 double variance_of_V = a + b + c;
00673
00674
00675 double df = 2.0 * V * V / variance_of_V;
00676
00677 if (df <= 2)
00678 {
00679 BCLog::OutDebug(Form("BCMath::Rvalue: DoF (%g) below 2. Avoiding R = NaN.", df));
00680 return std::numeric_limits<double>::max();;
00681 }
00682
00683
00684 R = sqrt(V / W * df / (df - 2.0));
00685
00686
00687 if (R < 0.99 && n > 100)
00688 throw std::domain_error(Form("BCMath::Rvalue: %g < 0.99. Check for a bug in the implementation!", R));
00689
00690 return R;
00691 }
00692 }