BCHistogramFitter.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008, Daniel Kollar and Kevin Kroeninger.
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include <TH1D.h>
00011 #include <TF1.h>
00012 #include <TGraph.h>
00013 #include <TString.h>
00014 #include <TPad.h>
00015 #include <TRandom3.h>
00016 #include <TLegend.h>
00017 #include <TMath.h>
00018 #include <Math/ProbFuncMathCore.h> //for ROOT::Math namespace
00019 #include <TString.h>
00020 
00021 #include "BAT/BCLog.h"
00022 #include "BAT/BCDataSet.h"
00023 #include "BAT/BCDataPoint.h"
00024 #include "BAT/BCMath.h"
00025 
00026 #include "BAT/BCHistogramFitter.h"
00027 
00028 // ---------------------------------------------------------
00029 
00030 BCHistogramFitter::BCHistogramFitter() :
00031    BCModel("HistogramFitter")
00032 {
00033    fHistogram = 0;
00034    fFitFunction = 0;
00035 
00036    // set default options and values
00037    this -> MCMCSetNIterationsRun(2000);
00038    this -> SetFillErrorBand();
00039    fFlagIntegration = true;
00040    flag_discrete = true;
00041 }
00042 
00043 // ---------------------------------------------------------
00044 
00045 BCHistogramFitter::BCHistogramFitter(TH1D * hist, TF1 * func) :
00046    BCModel("HistogramFitter")
00047 {
00048    fHistogram = 0;
00049    fFitFunction = 0;
00050 
00051    this -> SetHistogram(hist);
00052    this -> SetFitFunction(func);
00053 
00054    this -> MCMCSetNIterationsRun(2000);
00055    this -> SetFillErrorBand();
00056    fFlagIntegration = true;
00057    flag_discrete = true;
00058 }
00059 
00060 // ---------------------------------------------------------
00061 
00062 int BCHistogramFitter::SetHistogram(TH1D * hist)
00063 {
00064    // check if histogram exists
00065    if (!hist) {
00066       BCLog::Out(BCLog::error, BCLog::error,
00067             "BCHistogramFitter::SetHistogram() : TH1D not created.");
00068       return 0;
00069    }
00070 
00071    // set pointer to histogram
00072    fHistogram = hist;
00073 
00074    // create a data set. this is necessary in order to calculate the
00075    // error band. the data set contains as many data points as there
00076    // are bins. for now, the data points are empty.
00077    BCDataSet * ds = new BCDataSet();
00078 
00079    // create data points and add them to the data set.
00080    int nbins = fHistogram -> GetNbinsX();
00081    for (int i = 0; i < nbins; ++i) {
00082       BCDataPoint* dp = new BCDataPoint(2);
00083       ds -> AddDataPoint(dp);
00084    }
00085 
00086    // set the new data set.
00087    this -> SetDataSet(ds);
00088 
00089    // calculate the lower and upper edge in x.
00090    double xmin = hist -> GetBinLowEdge(1);
00091    double xmax = hist -> GetBinLowEdge(nbins + 1);
00092 
00093    // calculate the minimum and maximum range in y.
00094    double histymin = hist -> GetMinimum();
00095    double histymax = hist -> GetMaximum();
00096 
00097    // calculate the minimum and maximum of the function value based on
00098    // the minimum and maximum value in y.
00099    double ymin = TMath::Max(0., histymin - 5. * sqrt(histymin));
00100    double ymax = histymax + 5. * sqrt(histymax);
00101 
00102    // set the data boundaries for x and y values.
00103    this -> SetDataBoundaries(0, xmin, xmax);
00104    this -> SetDataBoundaries(1, ymin, ymax);
00105 
00106    // set the indeces for fitting.
00107    this -> SetFitFunctionIndices(0, 1);
00108 
00109    // no error
00110    return 1;
00111 }
00112 
00113 // ---------------------------------------------------------
00114 
00115 int BCHistogramFitter::SetFitFunction(TF1 * func)
00116 {
00117    // check if function exists
00118    if (!func) {
00119       BCLog::Out(BCLog::error, BCLog::error,
00120             "BCHistogramFitter::SetFitFunction() : TF1 not created.");
00121       return 0;
00122    }
00123 
00124    // set the function
00125    fFitFunction = func;
00126 
00127    // update the model name to contain the function name
00128    this -> SetName(TString::Format("HistogramFitter with %s",
00129          fFitFunction->GetName()));
00130 
00131    // reset parameters
00132    fParameterSet -> clear();
00133 
00134    // get the new number of parameters
00135    int n = func -> GetNpar();
00136 
00137    // add parameters
00138    for (int i = 0; i < n; ++i) {
00139       double xmin;
00140       double xmax;
00141 
00142       func -> GetParLimits(i, xmin, xmax);
00143 
00144       this -> AddParameter(func->GetParName(i), xmin, xmax);
00145    }
00146 
00147    // no error
00148    return 1;
00149 }
00150 
00151 // ---------------------------------------------------------
00152 
00153 BCHistogramFitter::~BCHistogramFitter()
00154 {
00155 }
00156 
00157 // ---------------------------------------------------------
00158 
00159 double BCHistogramFitter::LogAPrioriProbability(std::vector<double> parameters)
00160 {
00161    // using flat probability in all parameters
00162    double logprob = 0.;
00163    for (unsigned int i = 0; i < this -> GetNParameters(); i++)
00164       logprob -= log(this -> GetParameter(i) -> GetRangeWidth());
00165 
00166    return logprob;
00167 }
00168 
00169 // ---------------------------------------------------------
00170 
00171 double BCHistogramFitter::LogLikelihood(std::vector<double> params)
00172 {
00173    // initialize probability
00174    double loglikelihood = 0;
00175 
00176    // set the parameters of the function
00177    fFitFunction -> SetParameters(&params[0]);
00178 
00179    // get the number of bins
00180    int nbins = fHistogram -> GetNbinsX();
00181 
00182    // get bin width
00183    double dx = fHistogram -> GetBinWidth(1);
00184 
00185    // get function value of lower bin edge
00186    double fedgelow = fFitFunction -> Eval(fHistogram -> GetBinLowEdge(1));
00187 
00188    // loop over all bins
00189    for (int ibin = 1; ibin <= nbins; ++ibin) {
00190       // get upper bin edge
00191       double xedgehi = fHistogram -> GetBinLowEdge(ibin) + dx;
00192 
00193       // get function value at upper bin edge
00194       double fedgehi = fFitFunction -> Eval(xedgehi);
00195 
00196       // get the number of observed events
00197       double y = fHistogram -> GetBinContent(ibin);
00198 
00199       double yexp = 0.;
00200 
00201       // use ROOT's TH1D::Integral method
00202       if (fFlagIntegration)
00203          yexp = fFitFunction -> Integral(xedgehi - dx, xedgehi);
00204 
00205       // use linear interpolation
00206       else {
00207          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00208 
00209          // make the upper edge the lower edge for the next iteration
00210          fedgelow = fedgehi;
00211       }
00212 
00213       // get the value of the Poisson distribution
00214       loglikelihood += BCMath::LogPoisson(y, yexp);
00215    }
00216 
00217    // // get bin boundaries
00218    // double xmin = fHistogram -> GetBinLowEdge(ibin);
00219    // double xmax = fHistogram -> GetBinLowEdge(ibin+1);
00220 
00221    // // get the number of observed events
00222    // double y = fHistogram -> GetBinContent(ibin);
00223 
00224    // // get the number of expected events
00225    // double yexp = fFitFunction -> Integral(xmin, xmax);
00226 
00227    // // get the value of the Poisson distribution
00228    // loglikelihood += BCMath::LogPoisson(y, yexp);
00229 
00230    return loglikelihood;
00231 }
00232 
00233 // ---------------------------------------------------------
00234 
00235 double BCHistogramFitter::FitFunction(std::vector<double> x,
00236       std::vector<double> params)
00237 {
00238    // set the parameters of the function
00239    fFitFunction -> SetParameters(&params[0]);
00240 
00241    return fFitFunction -> Eval(x[0]) * fHistogram -> GetBinWidth(
00242          fHistogram -> FindBin(x[0]));
00243 }
00244 
00245 // ---------------------------------------------------------
00246 
00247 int BCHistogramFitter::Fit(TH1D * hist, TF1 * func)
00248 {
00249    // set histogram
00250    if (hist)
00251       this -> SetHistogram(hist);
00252    else {
00253       BCLog::Out(BCLog::warning, BCLog::warning,
00254             "BCHistogramFitter::Fit() : Histogram not defined.");
00255       return 0;
00256    }
00257 
00258    // set function
00259    if (func)
00260       this -> SetFitFunction(func);
00261    else {
00262       BCLog::Out(BCLog::warning, BCLog::warning,
00263             "BCHistogramFitter::Fit() : Fit function not defined.");
00264       return 0;
00265    }
00266 
00267    // perform marginalization
00268    this -> MarginalizeAll();
00269 
00270    // maximize posterior probability, using the best-fit values close
00271    // to the global maximum from the MCMC
00272    this -> FindModeMinuit(this -> GetBestFitParameters(), -1);
00273 
00274    // calculate the p-value using the fast MCMC algorithm
00275    double pvalue;
00276    if (this -> CalculatePValueFast(this -> GetBestFitParameters(), pvalue))
00277       fPValue = pvalue;
00278    else
00279       BCLog::Out(BCLog::warning, BCLog::warning,
00280             "BCHistogramFitter::Fit() : Could not use the fast p-value evaluation.");
00281 
00282    // print summary to screen
00283    this -> PrintShortFitSummary();
00284 
00285    // no error
00286    return 1;
00287 }
00288 
00289 // ---------------------------------------------------------
00290 
00291 void BCHistogramFitter::DrawFit(const char * options, bool flaglegend)
00292 {
00293    if (!fHistogram) {
00294       BCLog::Out(BCLog::warning, BCLog::warning,
00295             "BCHistogramFitter::DrawFit() : Histogram not defined.");
00296       return;
00297    }
00298 
00299    if (!fFitFunction) {
00300       BCLog::Out(BCLog::warning, BCLog::warning,
00301             "BCHistogramFitter::DrawFit() : Fit function not defined.");
00302       return;
00303    }
00304 
00305    // check wheather options contain "same"
00306    TString opt = options;
00307    opt.ToLower();
00308 
00309    // if not same, draw the histogram first to get the axes
00310    if (!opt.Contains("same"))
00311       fHistogram -> Draw(opt.Data());
00312 
00313    // draw the error band as central 68% probability interval
00314    fErrorBand = this -> GetErrorBandGraph(0.16, 0.84);
00315    fErrorBand -> Draw("f same");
00316 
00317    // now draw the histogram again since it was covered by the band
00318    fHistogram -> Draw(TString::Format("%ssame", opt.Data()));
00319 
00320    // draw the fit function on top
00321    fGraphFitFunction
00322          = this -> GetFitFunctionGraph(this->GetBestFitParameters());
00323    fGraphFitFunction -> SetLineColor(kRed);
00324    fGraphFitFunction -> SetLineWidth(2);
00325    fGraphFitFunction -> Draw("l same");
00326 
00327    // draw legend
00328    if (flaglegend) {
00329       TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00330       legend -> SetBorderSize(0);
00331       legend -> SetFillColor(kWhite);
00332       legend -> AddEntry(fHistogram, "Data", "L");
00333       legend -> AddEntry(fGraphFitFunction, "Best fit", "L");
00334       legend -> AddEntry(fErrorBand, "Error band", "F");
00335       legend -> Draw();
00336    }
00337 
00338    gPad -> RedrawAxis();
00339 }
00340 
00341 // ---------------------------------------------------------
00342 int BCHistogramFitter::CalculatePValueFast(std::vector<double> par,
00343       double &pvalue)
00344 {
00345    // check size of parameter vector
00346    if (par.size() != this -> GetNParameters()) {
00347       BCLog::Out(
00348             BCLog::warning,
00349             BCLog::warning,
00350             "BCHistogramFitter::CalculatePValueFast() : Number of parameters is inconsistent.");
00351       return 0;
00352    }
00353 
00354    // check if histogram exists
00355    if (!fHistogram) {
00356       BCLog::Out(BCLog::warning, BCLog::warning,
00357             "BCHistogramFitter::CalculatePValueFast() : Histogram not defined.");
00358       return 0;
00359    }
00360 
00361    // define temporary variables
00362    int nbins = fHistogram -> GetNbinsX();
00363 
00364    std::vector<int> histogram;
00365    std::vector<double> expectation;
00366    histogram.assign(nbins, 0);
00367    expectation.assign(nbins, 0);
00368 
00369    double logp = 0;
00370    double logp_start = 0;
00371    int counter_pvalue = 0;
00372 
00373    // define starting distribution
00374    for (int ibin = 0; ibin < nbins; ++ibin) {
00375       // get bin boundaries
00376       double xmin = fHistogram -> GetBinLowEdge(ibin + 1);
00377       double xmax = fHistogram -> GetBinLowEdge(ibin + 2);
00378 
00379       // get the number of expected events
00380       double yexp = fFitFunction -> Integral(xmin, xmax);
00381 
00382       histogram[ibin] = int(yexp);
00383       expectation[ibin] = yexp;
00384 
00385       // calculate p;
00386       logp += BCMath::LogPoisson(double(int(yexp)), yexp);
00387       logp_start += BCMath::LogPoisson(fHistogram -> GetBinContent(ibin + 1),
00388             yexp);
00389    }
00390 
00391    int niter = 100000;
00392 
00393    // loop over iterations
00394    for (int iiter = 0; iiter < niter; ++iiter) {
00395       // loop over bins
00396       for (int ibin = 0; ibin < nbins; ++ibin) {
00397          // random step up or down in statistics for this bin
00398          double ptest = fRandom -> Rndm() - 0.5;
00399 
00400          // increase statistics by 1
00401          if (ptest > 0) {
00402             // calculate factor of probability
00403             double r = expectation.at(ibin) / double(histogram.at(ibin) + 1);
00404 
00405             // walk, or don't (this is the Metropolis part)
00406             if (fRandom -> Rndm() < r) {
00407                histogram[ibin] = histogram.at(ibin) + 1;
00408                logp += log(r);
00409             }
00410          }
00411 
00412          // decrease statistics by 1
00413          else if (ptest <= 0 && histogram[ibin] > 0) {
00414             // calculate factor of probability
00415             double r = double(histogram.at(ibin)) / expectation.at(ibin);
00416 
00417             // walk, or don't (this is the Metropolis part)
00418             if (fRandom -> Rndm() < r) {
00419                histogram[ibin] = histogram.at(ibin) - 1;
00420                logp += log(r);
00421             }
00422          }
00423          // debugKK
00424          //   std::cout << " negative " << std::endl;
00425 
00426       } // end of looping over bins
00427 
00428       // increase counter
00429       if (logp < logp_start)
00430          counter_pvalue++;
00431 
00432    } // end of looping over iterations
00433 
00434    // calculate p-value
00435    pvalue = double(counter_pvalue) / double(niter);
00436 
00437    // no error
00438    return 1;
00439 }
00440 
00441 int BCHistogramFitter::CalculatePValueLikelihood(std::vector<double> par,
00442       double &pvalue)
00443 {
00444    // initialize test statistic -2*lambda
00445    double logLambda = 0.0;
00446 
00447    // set the parameters of the function
00448    fFitFunction -> SetParameters(&par[0]);
00449 
00450    // get the number of bins
00451    int nbins = fHistogram -> GetNbinsX();
00452 
00453    // get bin width
00454    double dx = fHistogram -> GetBinWidth(1);
00455 
00456    // get function value of lower bin edge
00457    double fedgelow = fFitFunction -> Eval(fHistogram -> GetBinLowEdge(1));
00458 
00459    // loop over all bins, skip underflow
00460    for (int ibin = 1; ibin <= nbins; ++ibin) {
00461       // get upper bin edge
00462       double xedgehi = fHistogram -> GetBinLowEdge(ibin) + dx;
00463 
00464       // get function value at upper bin edge
00465       double fedgehi = fFitFunction -> Eval(xedgehi);
00466 
00467       // get the number of observed events
00468       double y = fHistogram -> GetBinContent(ibin);
00469 
00470       double yexp = 0.;
00471 
00472       // use ROOT's TH1D::Integral method
00473       if (fFlagIntegration)
00474          yexp = fFitFunction -> Integral(xedgehi - dx, xedgehi);
00475 
00476       // use linear interpolation
00477       else {
00478          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00479 
00480          // make the upper edge the lower edge for the next iteration
00481          fedgelow = fedgehi;
00482       }
00483 
00484       // get the contribution from this datapoint
00485       if (y == 0)
00486          logLambda += yexp;
00487       else
00488          logLambda += yexp - y + y * log(y / yexp);
00489    }
00490 
00491    // rescale
00492    logLambda *= 2.0;
00493 
00494    // compute degrees of freedom for Poisson case
00495    int nDoF = this->GetNDataPoints() - this->GetNParameters();
00496 
00497    //p value from chi^2 distribution, returns zero if logLambda < 0
00498    fPValueNDoF = TMath::Prob(logLambda, nDoF);
00499    pvalue = fPValueNDoF;
00500 
00501    // no error
00502    return 1;
00503 }
00504 
00505 int BCHistogramFitter::CalculatePValueLeastSquares(std::vector<double> par,
00506       double &pvalue, bool weightExpect)
00507 {
00508    // initialize test statistic chi^2
00509    double chi2 = 0.0;
00510 
00511    // set the parameters of the function
00512    fFitFunction -> SetParameters(&par[0]);
00513 
00514    // get the number of bins
00515    int nbins = fHistogram -> GetNbinsX();
00516 
00517    // get bin width
00518    double dx = fHistogram -> GetBinWidth(1);
00519 
00520    // get function value of lower bin edge
00521    double fedgelow = fFitFunction -> Eval(fHistogram -> GetBinLowEdge(1));
00522 
00523    // loop over all bins, skip underflow
00524    for (int ibin = 1; ibin <= nbins; ++ibin) {
00525       // get upper bin edge
00526       double xedgehi = fHistogram -> GetBinLowEdge(ibin) + dx;
00527 
00528       // get function value at upper bin edge
00529       double fedgehi = fFitFunction -> Eval(xedgehi);
00530 
00531       // get the number of observed events
00532       double y = fHistogram -> GetBinContent(ibin);
00533 
00534       double yexp = 0.;
00535 
00536       // use ROOT's TH1D::Integral method
00537       if (fFlagIntegration)
00538          yexp = fFitFunction -> Integral(xedgehi - dx, xedgehi);
00539 
00540       // use linear interpolation
00541       else {
00542          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00543 
00544          // make the upper edge the lower edge for the next iteration
00545          fedgelow = fedgehi;
00546       }
00547 
00548       //convert 1/0.0 into 1 for weighted sum
00549       double weight;
00550       if (weightExpect)
00551          weight = (yexp > 0) ? yexp : 1.0;
00552       else
00553          weight = (y > 0) ? y : 1.0;
00554 
00555       // get the contribution from this datapoint
00556       chi2 += (y - yexp) * (y - yexp) / weight;
00557    }
00558 
00559    // compute degrees of freedom for Poisson case
00560    int nDoF = this->GetNDataPoints() - this->GetNParameters();
00561 
00562    // p value from chi^2 distribution, returns zero if logLambda < 0
00563    fPValueNDoF = TMath::Prob(chi2, nDoF);
00564    pvalue = fPValueNDoF;
00565 
00566    // no error
00567    return 1;
00568 }
00569 
00570 double BCHistogramFitter::CDF(const std::vector<double>& parameters, int index,
00571       bool lower)
00572 {
00573 
00574    if (parameters.empty())
00575       BCLog::OutWarning("BCHistogramFitter::CDF: parameter vector empty!");
00576    //histogram stores underflow in bin 0, so datapoint 0 is in bin 1
00577    index++;
00578 
00579    // get the number of observed events (should be integer)
00580    double yObs = fHistogram -> GetBinContent(index);
00581 
00582    // if(double( (unsigned int)yObs)==yObs)
00583    //    BCLog::OutWarning(Form(
00584    //          "BCHistogramFitter::CDF: using bin count %f that  is not an integer!",yObs));
00585 
00586    // get function value of lower bin edge
00587    double edgeLow = fHistogram -> GetBinLowEdge(index);
00588    double edgeHigh = edgeLow + fHistogram -> GetBinWidth(index);
00589 
00590    // expectation value of this bin
00591    double yExp = 0.0;
00592 
00593    // use ROOT's TH1D::Integral method
00594    if (fFlagIntegration)
00595       yExp = fFitFunction -> Integral(edgeLow, edgeHigh, &parameters[0]);
00596 
00597    // use linear interpolation
00598    else {
00599       double dx = fHistogram -> GetBinWidth(index);
00600       double fEdgeLow = fFitFunction -> Eval(edgeLow);
00601       double fEdgeHigh = fFitFunction -> Eval(edgeHigh);
00602       yExp = fEdgeLow * dx + (fEdgeHigh - fEdgeLow) * dx / 2.;
00603    }
00604 
00605    // usually Poisson bins do not agree with fixed probability bins
00606    // so choose where it should belong
00607 
00608    if (lower) {
00609       if ((int) yObs >= 1)
00610          return ROOT::Math::poisson_cdf((unsigned int) (yObs - 1), yExp);
00611       else
00612          return 0.0;
00613    }
00614    // what if yObs as double doesn't reprepsent a whole number? exceptioN?
00615    if ((double) (unsigned int) yObs != yObs){
00616       BCLog::OutWarning(Form(
00617             "BCHistogramFitter::CDF: Histogram values should be integer!\n"
00618                " Bin %d = %f", index, yObs));
00619 
00620       //convert randomly to integer
00621       // ex. yObs = 9.785 =>
00622 //      yObs --> 10 with P = 78.5%
00623 //      yObs --> 9  with P = 21.5%
00624       double U = fRandom -> Rndm() ;
00625       double yObsLower = (unsigned int)yObs;
00626       if( U > (yObs - yObsLower) )
00627          yObs = yObsLower;
00628       else
00629          yObs = yObsLower + 1;
00630    }
00631 
00632 // BCLog::OutDebug(Form("yExp= %f yObs= %f par2=%",yExp, yObs,parameters.at(2)));
00633 // BCLog::Out(TString::Format("yExp= %f yObs= %f par2=%",yExp, yObs,parameters.at(2)));
00634 
00635 return ROOT::Math::poisson_cdf(yObs, yExp);
00636 }
00637 
00638 // ---------------------------------------------------------

Generated on Thu Feb 11 11:29:30 2010 for BayesianAnalysisToolkit by  doxygen 1.5.1