• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BCHistogramFitter.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008, Daniel Kollar and Kevin Kroeninger.
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include <TH1D.h>
00011 #include <TF1.h>
00012 #include <TGraph.h>
00013 #include <TString.h>
00014 #include <TPad.h>
00015 #include <TRandom3.h>
00016 #include <TLegend.h>
00017 #include <TMath.h>
00018 #include <Math/ProbFuncMathCore.h> //for ROOT::Math namespace
00019 #include <TString.h>
00020 
00021 #include "BAT/BCLog.h"
00022 #include "BAT/BCDataSet.h"
00023 #include "BAT/BCDataPoint.h"
00024 #include "BAT/BCMath.h"
00025 
00026 #include "BCHistogramFitter.h"
00027 
00028 // ---------------------------------------------------------
00029 
00030 BCHistogramFitter::BCHistogramFitter() :
00031    BCModel("HistogramFitter")
00032 {
00033    fHistogram = 0;
00034    fFitFunction = 0;
00035 
00036    // set default options and values
00037    MCMCSetNIterationsRun(2000);
00038    SetFillErrorBand();
00039    fFlagIntegration = true;
00040    flag_discrete = true;
00041 }
00042 
00043 // ---------------------------------------------------------
00044 
00045 BCHistogramFitter::BCHistogramFitter(TH1D * hist, TF1 * func) :
00046    BCModel("HistogramFitter")
00047 {
00048    fHistogram = 0;
00049    fFitFunction = 0;
00050 
00051    SetHistogram(hist);
00052    SetFitFunction(func);
00053 
00054    MCMCSetNIterationsRun(2000);
00055    SetFillErrorBand();
00056    fFlagIntegration = true;
00057    flag_discrete = true;
00058 
00059    fHistogramExpected = 0;
00060 }
00061 
00062 // ---------------------------------------------------------
00063 
00064 int BCHistogramFitter::SetHistogram(TH1D * hist)
00065 {
00066    // check if histogram exists
00067    if (!hist) {
00068       BCLog::Out(BCLog::error, BCLog::error,
00069             "BCHistogramFitter::SetHistogram() : TH1D not created.");
00070       return 0;
00071    }
00072 
00073    // set pointer to histogram
00074    fHistogram = hist;
00075 
00076    // create a data set. this is necessary in order to calculate the
00077    // error band. the data set contains as many data points as there
00078    // are bins.
00079    BCDataSet * ds = new BCDataSet();
00080 
00081    // create data points and add them to the data set.
00082    // the x value is the lower edge of the bin, and
00083    // the y value is the bin count
00084    int nbins = fHistogram->GetNbinsX();
00085    for (int i = 0; i < nbins; ++i) {
00086       BCDataPoint* dp = new BCDataPoint(2);
00087       dp ->SetValue(0, fHistogram->GetBinLowEdge(i+1));
00088       dp ->SetValue(1, fHistogram->GetBinContent(i+1));
00089       ds->AddDataPoint(dp);
00090    }
00091 
00092    // set the new data set.
00093    SetDataSet(ds);
00094 
00095    // calculate the lower and upper edge in x.
00096    double xmin = hist->GetBinLowEdge(1);
00097    double xmax = hist->GetBinLowEdge(nbins + 1);
00098 
00099    // calculate the minimum and maximum range in y.
00100    double histymin = hist->GetMinimum();
00101    double histymax = hist->GetMaximum();
00102 
00103    // calculate the minimum and maximum of the function value based on
00104    // the minimum and maximum value in y.
00105    double ymin = TMath::Max(0., histymin - 5. * sqrt(histymin));
00106    double ymax = histymax + 5. * sqrt(histymax);
00107 
00108    // set the data boundaries for x and y values.
00109    SetDataBoundaries(0, xmin, xmax);
00110    SetDataBoundaries(1, ymin, ymax);
00111 
00112    // set the indeces for fitting.
00113    SetFitFunctionIndices(0, 1);
00114 
00115    // no error
00116    return 1;
00117 }
00118 
00119 // ---------------------------------------------------------
00120 
00121 int BCHistogramFitter::SetHistogramExpected(const std::vector <double>& parameters)
00122 {
00123    //clear up previous memory
00124    if(fHistogramExpected){
00125       delete fHistogramExpected;
00126    }
00127    //copy all properties from the data histogram
00128    fHistogramExpected = new TH1D(*fHistogram);
00129 
00130    // get the number of bins
00131    int nBins = fHistogramExpected->GetNbinsX();
00132 
00133    // get bin width
00134    double dx = fHistogramExpected->GetBinWidth(1);
00135 
00136    //set the parameters of fit function
00137    fFitFunction->SetParameters(&parameters[0]);
00138 
00139    // get function value of lower bin edge
00140    double fedgelow = fFitFunction->Eval(fHistogramExpected->GetBinLowEdge(1));
00141 
00142    // loop over all bins, skip underflow
00143    for (int ibin = 1; ibin <= nBins; ++ibin) {
00144       // get upper bin edge
00145       double xedgehi = fHistogramExpected->GetBinLowEdge(ibin) + dx;
00146 
00147       //expected count
00148       double yexp = 0.;
00149 
00150       // use ROOT's TH1D::Integral method
00151       if (fFlagIntegration)
00152          yexp = fFitFunction->Integral(xedgehi - dx, xedgehi);
00153 
00154       // use linear interpolation
00155       else {
00156          // get function value at upper bin edge
00157          double fedgehi = fFitFunction->Eval(xedgehi);
00158          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00159 
00160          // make the upper edge the lower edge for the next iteration
00161          fedgelow = fedgehi;
00162       }
00163 
00164       //write the expectation for the bin
00165       fHistogramExpected->SetBinContent(ibin, yexp);
00166 
00167       //avoid automatic error as sqrt(yexp), used e.g. in Kolmogorov correction factor
00168       fHistogramExpected->SetBinError(ibin, 0.0);
00169 
00170       // but the data under this model have that sqrt(yexp) uncertainty
00171       fHistogram->SetBinError(ibin, sqrt(yexp));
00172 
00173 
00174    }
00175    return 1;
00176 }
00177 
00178 // ---------------------------------------------------------
00179 
00180 int BCHistogramFitter::SetFitFunction(TF1 * func)
00181 {
00182    // check if function exists
00183    if (!func) {
00184       BCLog::Out(BCLog::error, BCLog::error,
00185             "BCHistogramFitter::SetFitFunction() : TF1 not created.");
00186       return 0;
00187    }
00188 
00189    // set the function
00190    fFitFunction = func;
00191 
00192    // update the model name to contain the function name
00193    SetName(TString::Format("HistogramFitter with %s",fFitFunction->GetName()));
00194 
00195    // reset parameters
00196    fParameterSet->clear();
00197 
00198    // get the new number of parameters
00199    int n = func->GetNpar();
00200 
00201    // add parameters
00202    for (int i = 0; i < n; ++i) {
00203       double xmin;
00204       double xmax;
00205 
00206       func->GetParLimits(i, xmin, xmax);
00207 
00208       AddParameter(func->GetParName(i), xmin, xmax);
00209    }
00210 
00211    // set flat prior for all parameters by default
00212    SetPriorConstantAll();
00213 
00214    return GetNParameters();
00215 }
00216 
00217 // ---------------------------------------------------------
00218 
00219 BCHistogramFitter::~BCHistogramFitter()
00220 {
00221    delete fHistogramExpected;
00222 }
00223 
00224 // ---------------------------------------------------------
00225 
00226 /*
00227 double BCHistogramFitter::LogAPrioriProbability(std::vector<double> parameters)
00228 {
00229    // using flat probability in all parameters
00230    double logprob = 0.;
00231    for (unsigned int i = 0; i < GetNParameters(); i++)
00232       logprob -= log(GetParameter(i)->GetRangeWidth());
00233 
00234    return logprob;
00235 }
00236 */
00237 
00238 // ---------------------------------------------------------
00239 
00240 double BCHistogramFitter::LogLikelihood(std::vector<double> params)
00241 {
00242    // initialize probability
00243    double loglikelihood = 0;
00244 
00245    // set the parameters of the function
00246    fFitFunction->SetParameters(&params[0]);
00247 
00248    // get the number of bins
00249    int nbins = fHistogram->GetNbinsX();
00250 
00251    // get bin width
00252    double dx = fHistogram->GetBinWidth(1);
00253 
00254    // get function value of lower bin edge
00255    double fedgelow = fFitFunction->Eval(fHistogram->GetBinLowEdge(1));
00256 
00257    // loop over all bins
00258    for (int ibin = 1; ibin <= nbins; ++ibin) {
00259       // get upper bin edge
00260       double xedgehi = fHistogram->GetBinLowEdge(ibin) + dx;
00261 
00262       // get function value at upper bin edge
00263       double fedgehi = fFitFunction->Eval(xedgehi);
00264 
00265       // get the number of observed events
00266       double y = fHistogram->GetBinContent(ibin);
00267 
00268       double yexp = 0.;
00269 
00270       // use ROOT's TH1D::Integral method
00271       if (fFlagIntegration)
00272          yexp = fFitFunction->Integral(xedgehi - dx, xedgehi);
00273 
00274       // use linear interpolation
00275       else {
00276          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00277 
00278          // make the upper edge the lower edge for the next iteration
00279          fedgelow = fedgehi;
00280       }
00281 
00282       // get the value of the Poisson distribution
00283       loglikelihood += BCMath::LogPoisson(y, yexp);
00284    }
00285 
00286    // // get bin boundaries
00287    // double xmin = fHistogram->GetBinLowEdge(ibin);
00288    // double xmax = fHistogram->GetBinLowEdge(ibin+1);
00289 
00290    // // get the number of observed events
00291    // double y = fHistogram->GetBinContent(ibin);
00292 
00293    // // get the number of expected events
00294    // double yexp = fFitFunction->Integral(xmin, xmax);
00295 
00296    // // get the value of the Poisson distribution
00297    // loglikelihood += BCMath::LogPoisson(y, yexp);
00298 
00299    return loglikelihood;
00300 }
00301 
00302 // ---------------------------------------------------------
00303 
00304 double BCHistogramFitter::FitFunction(std::vector<double> x, std::vector<double> params)
00305 {
00306    // set the parameters of the function
00307    fFitFunction->SetParameters(&params[0]);
00308 
00309    return fFitFunction->Eval(x[0]) * fHistogram->GetBinWidth(fHistogram->FindBin(x[0]));
00310 }
00311 
00312 // ---------------------------------------------------------
00313 
00314 int BCHistogramFitter::Fit(TH1D * hist, TF1 * func)
00315 {
00316    // set histogram
00317    if (hist)
00318       SetHistogram(hist);
00319    else {
00320       BCLog::OutError("BCHistogramFitter::Fit : Histogram not defined.");
00321       return 0;
00322    }
00323 
00324    // set function
00325    if (func)
00326       SetFitFunction(func);
00327    else {
00328       BCLog::OutError("BCHistogramFitter::Fit : Fit function not defined.");
00329       return 0;
00330    }
00331 
00332    return Fit();
00333 }
00334 
00335 // ---------------------------------------------------------
00336 
00337 int BCHistogramFitter::Fit()
00338 {
00339    // set histogram
00340    if (!fHistogram) {
00341       BCLog::OutError("BCHistogramFitter::Fit : Histogram not defined.");
00342       return 0;
00343    }
00344 
00345    // set function
00346    if (!fFitFunction) {
00347       BCLog::OutError("BCHistogramFitter::Fit : Fit function not defined.");
00348       return 0;
00349    }
00350 
00351    // perform marginalization
00352    MarginalizeAll();
00353 
00354    // maximize posterior probability, using the best-fit values close
00355    // to the global maximum from the MCMC
00356    FindModeMinuit(GetBestFitParameters(), -1);
00357 
00358    // calculate the p-value using the fast MCMC algorithm
00359    double pvalue;
00360    if (CalculatePValueFast(GetBestFitParameters(), pvalue))
00361       fPValue = pvalue;
00362    else
00363       BCLog::OutWarning("BCHistogramFitter::Fit : Could not use the fast p-value evaluation.");
00364 
00365    // print summary to screen
00366    PrintShortFitSummary();
00367 
00368    // no error
00369    return 1;
00370 }
00371 
00372 // ---------------------------------------------------------
00373 
00374 void BCHistogramFitter::DrawFit(const char * options, bool flaglegend)
00375 {
00376    if (!fHistogram) {
00377       BCLog::OutError("BCHistogramFitter::DrawFit : Histogram not defined.");
00378       return;
00379    }
00380 
00381    if (!fFitFunction) {
00382       BCLog::OutError("BCHistogramFitter::DrawFit : Fit function not defined.");
00383       return;
00384    }
00385 
00386    if (!fErrorBandXY || fBestFitParameters.empty() ) {
00387       BCLog::OutError("BCHistogramFitter::DrawFit : Fit not performed yet.");
00388       return;
00389    }
00390 
00391    // check wheather options contain "same"
00392    TString opt = options;
00393    opt.ToLower();
00394 
00395    // if not same, draw the histogram first to get the axes
00396    if (!opt.Contains("same"))
00397       fHistogram->Draw(opt.Data());
00398 
00399    // draw the error band as central 68% probability interval
00400    fErrorBand = GetErrorBandGraph(0.16, 0.84);
00401    fErrorBand->Draw("f same");
00402 
00403    // now draw the histogram again since it was covered by the band
00404    fHistogram->Draw(TString::Format("%ssame", opt.Data()));
00405 
00406    // draw the fit function on top
00407    fGraphFitFunction
00408          = GetFitFunctionGraph(GetBestFitParameters());
00409    fGraphFitFunction->SetLineColor(kRed);
00410    fGraphFitFunction->SetLineWidth(2);
00411    fGraphFitFunction->Draw("l same");
00412 
00413    // draw legend
00414    if (flaglegend) {
00415       TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00416       legend->SetBorderSize(0);
00417       legend->SetFillColor(kWhite);
00418       legend->AddEntry(fHistogram, "Data", "L");
00419       legend->AddEntry(fGraphFitFunction, "Best fit", "L");
00420       legend->AddEntry(fErrorBand, "Error band", "F");
00421       legend->Draw();
00422    }
00423 
00424    gPad->RedrawAxis();
00425 }
00426 
00427 // ---------------------------------------------------------
00428 
00429 int BCHistogramFitter::CalculatePValueFast(std::vector<double> par, double &pvalue, int nIterations)
00430 {
00431    // check size of parameter vector
00432    if (par.size() != GetNParameters()) {
00433       BCLog::OutError("BCHistogramFitter::CalculatePValueFast : Number of parameters is inconsistent.");
00434       return 0;
00435    }
00436 
00437    // check if histogram exists
00438    if (!fHistogram) {
00439       BCLog::OutError("BCHistogramFitter::CalculatePValueFast : Histogram not defined.");
00440       return 0;
00441    }
00442 
00443    // define temporary variables
00444    int nbins = fHistogram->GetNbinsX();
00445 
00446    std::vector<int> histogram;
00447    std::vector<double> expectation;
00448    histogram.assign(nbins, 0);
00449    expectation.assign(nbins, 0);
00450 
00451    double logp = 0;
00452    double logp_start = 0;
00453    int counter_pvalue = 0;
00454 
00455    //update the expected number of events for all bins
00456    SetHistogramExpected(par);
00457 
00458    // define starting distribution as histogram with most likely entries
00459    for (int ibin = 0; ibin < nbins; ++ibin) {
00460 
00461      // get the number of expected events
00462      double yexp = fHistogramExpected->GetBinContent(ibin +1);
00463 
00464      //most likely observed value = int(expected value)
00465       histogram[ibin] = int(yexp);
00466       expectation[ibin] = yexp;
00467 
00468       // calculate test statistic (= likelihood of entire histogram) for starting distr.
00469       logp += BCMath::LogPoisson(double(int(yexp)), yexp);
00470       //statistic of the observed data set
00471       logp_start += BCMath::LogPoisson(fHistogram->GetBinContent(ibin + 1),
00472             yexp);
00473    }
00474 
00475    // loop over iterations
00476    for (int iiter = 0; iiter < nIterations; ++iiter) {
00477       // loop over bins
00478       for (int ibin = 0; ibin < nbins; ++ibin) {
00479          // random step up or down in statistics for this bin
00480          double ptest = fRandom->Rndm() - 0.5;
00481 
00482          // increase statistics by 1
00483          if (ptest > 0) {
00484             // calculate factor of probability
00485             double r = expectation.at(ibin) / double(histogram.at(ibin) + 1);
00486 
00487             // walk, or don't (this is the Metropolis part)
00488             if (fRandom->Rndm() < r) {
00489                histogram[ibin] = histogram.at(ibin) + 1;
00490                logp += log(r);
00491             }
00492          }
00493 
00494          // decrease statistics by 1
00495          else if (ptest <= 0 && histogram[ibin] > 0) {
00496             // calculate factor of probability
00497             double r = double(histogram.at(ibin)) / expectation.at(ibin);
00498 
00499             // walk, or don't (this is the Metropolis part)
00500             if (fRandom->Rndm() < r) {
00501                histogram[ibin] = histogram.at(ibin) - 1;
00502                logp += log(r);
00503             }
00504          }
00505       } // end of looping over bins
00506 
00507       // increase counter
00508       if (logp < logp_start)
00509          counter_pvalue++;
00510 
00511    } // end of looping over iterations
00512 
00513    // calculate p-value
00514    pvalue = double(counter_pvalue) / double(nIterations);
00515 
00516    // no error
00517    return 1;
00518 }
00519 
00520 // ---------------------------------------------------------
00521 
00522 int BCHistogramFitter::CalculatePValueLikelihood(std::vector<double> par,
00523       double &pvalue)
00524 {
00525    // initialize test statistic -2*lambda
00526    double logLambda = 0.0;
00527 
00528    //Calculate expected counts to compare with
00529    SetHistogramExpected(par);
00530 
00531    for (int ibin = 1; ibin <= fHistogram->GetNbinsX(); ++ibin) {
00532 
00533       // get the number of observed events
00534       double y = fHistogram->GetBinContent(ibin);
00535 
00536       // get the number of expected events
00537       double yexp = fHistogramExpected->GetBinContent(ibin);
00538 
00539       // get the contribution from this datapoint
00540       if (y == 0)
00541          logLambda += yexp;
00542       else
00543          logLambda += yexp - y + y * log(y / yexp);
00544    }
00545 
00546    // rescale
00547    logLambda *= 2.0;
00548 
00549    // compute degrees of freedom for Poisson case
00550    int nDoF = GetNDataPoints() - GetNParameters();
00551 
00552    //p value from chi^2 distribution, returns zero if logLambda < 0
00553    fPValueNDoF = TMath::Prob(logLambda, nDoF);
00554    pvalue = fPValueNDoF;
00555 
00556    // no error
00557    return 1;
00558 }
00559 
00560 // ---------------------------------------------------------
00561 
00562 int BCHistogramFitter::CalculatePValueLeastSquares(std::vector<double> par, double &pvalue, bool weightExpect)
00563 {
00564    // initialize test statistic chi^2
00565    double chi2 = 0.0;
00566 
00567    //Calculate expected counts to compare with
00568    SetHistogramExpected(par);
00569 
00570    for (int ibin = 1; ibin <= fHistogram->GetNbinsX(); ++ibin) {
00571 
00572       // get the number of observed events
00573       double y = fHistogram->GetBinContent(ibin);
00574 
00575       // get the number of expected events
00576       double yexp = fHistogramExpected->GetBinContent(ibin);
00577 
00578       //convert 1/0.0 into 1 for weighted sum
00579       double weight;
00580       if (weightExpect)
00581          weight = (yexp > 0) ? yexp : 1.0;
00582       else
00583          weight = (y > 0) ? y : 1.0;
00584 
00585       // get the contribution from this datapoint
00586       chi2 += (y - yexp) * (y - yexp) / weight;
00587    }
00588 
00589    // compute degrees of freedom for Poisson case
00590    int nDoF = GetNDataPoints() - GetNParameters();
00591 
00592    // p value from chi^2 distribution, returns zero if logLambda < 0
00593    fPValueNDoF = TMath::Prob(chi2, nDoF);
00594    pvalue = fPValueNDoF;
00595 
00596    // no error
00597    return 1;
00598 }
00599 
00600 // ---------------------------------------------------------
00601 
00602 int BCHistogramFitter::CalculatePValueKolmogorov(std::vector<double> par, double& pvalue)
00603 {
00604    if (!fHistogramExpected || !fHistogram) {
00605       BCLog::OutError("BCHistogramFitter::CalculatePValueKolmogorov: "
00606             "Please define the reference distribution by calling \n"
00607             "BCHistogramFitter::SetHistogramExpected() first!");
00608       return 0;
00609    }
00610 
00611    //update expected counts with current parameters
00612    SetHistogramExpected(par);
00613 
00614    //perform the test
00615    pvalue = fHistogramExpected->KolmogorovTest(fHistogram);
00616 
00617    fPValue = pvalue;
00618 
00619    // no error
00620    return 1;
00621 }
00622 
00623 // ---------------------------------------------------------
00624 
00625 double BCHistogramFitter::CDF(const std::vector<double>& parameters, int index, bool lower)
00626 {
00627 
00628    if (parameters.empty())
00629       BCLog::OutWarning("BCHistogramFitter::CDF: parameter vector empty!");
00630    //histogram stores underflow in bin 0, so datapoint 0 is in bin 1
00631    index++;
00632 
00633    // get the number of observed events (should be integer)
00634    double yObs = fHistogram->GetBinContent(index);
00635 
00636    // if(double( (unsigned int)yObs)==yObs)
00637    //    BCLog::OutWarning(Form(
00638    //          "BCHistogramFitter::CDF: using bin count %f that  is not an integer!",yObs));
00639 
00640    // get function value of lower bin edge
00641    double edgeLow = fHistogram->GetBinLowEdge(index);
00642    double edgeHigh = edgeLow + fHistogram->GetBinWidth(index);
00643 
00644    // expectation value of this bin
00645    double yExp = 0.0;
00646 
00647    // use ROOT's TH1D::Integral method
00648    if (fFlagIntegration)
00649       yExp = fFitFunction->Integral(edgeLow, edgeHigh, &parameters[0]);
00650 
00651    // use linear interpolation
00652    else {
00653       double dx = fHistogram->GetBinWidth(index);
00654       double fEdgeLow = fFitFunction->Eval(edgeLow);
00655       double fEdgeHigh = fFitFunction->Eval(edgeHigh);
00656       yExp = fEdgeLow * dx + (fEdgeHigh - fEdgeLow) * dx / 2.;
00657    }
00658 
00659    // usually Poisson bins do not agree with fixed probability bins
00660    // so choose where it should belong
00661 
00662    if (lower) {
00663       if ((int) yObs >= 1)
00664          return ROOT::Math::poisson_cdf((unsigned int) (yObs - 1), yExp);
00665       else
00666          return 0.0;
00667    }
00668    // what if yObs as double doesn't reprepsent a whole number? exceptioN?
00669    if ((double) (unsigned int) yObs != yObs){
00670       BCLog::OutWarning(Form(
00671             "BCHistogramFitter::CDF: Histogram values should be integer!\n"
00672                " Bin %d = %f", index, yObs));
00673 
00674       //convert randomly to integer
00675       // ex. yObs = 9.785 =>
00676 //      yObs --> 10 with P = 78.5%
00677 //      yObs --> 9  with P = 21.5%
00678       double U = fRandom->Rndm() ;
00679       double yObsLower = (unsigned int)yObs;
00680       if( U > (yObs - yObsLower) )
00681          yObs = yObsLower;
00682       else
00683          yObs = yObsLower + 1;
00684    }
00685 
00686 // BCLog::OutDebug(Form("yExp= %f yObs= %f par2=%",yExp, yObs,parameters.at(2)));
00687 // BCLog::Out(TString::Format("yExp= %f yObs= %f par2=%",yExp, yObs,parameters.at(2)));
00688 
00689    return ROOT::Math::poisson_cdf((unsigned int)yObs, yExp);
00690 }
00691 
00692 // ---------------------------------------------------------

Generated on Fri Nov 19 2010 17:02:52 for Bayesian Analysis Toolkit by  doxygen 1.7.1