• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BCEfficiencyFitter.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008-2012, Daniel Kollar and Kevin Kroeninger.
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include <TH1D.h>
00011 #include <TH2D.h>
00012 #include <TF1.h>
00013 #include <TGraph.h>
00014 #include <TGraphAsymmErrors.h>
00015 #include <TString.h>
00016 #include <TPad.h>
00017 #include <TRandom3.h>
00018 #include <TLegend.h>
00019 #include <TMath.h>
00020 
00021 #include "../../BAT/BCLog.h"
00022 #include "../../BAT/BCDataSet.h"
00023 #include "../../BAT/BCDataPoint.h"
00024 #include "../../BAT/BCMath.h"
00025 #include "../../BAT/BCH1D.h"
00026 
00027 #include "BCEfficiencyFitter.h"
00028 
00029 // ---------------------------------------------------------
00030 
00031 BCEfficiencyFitter::BCEfficiencyFitter()
00032  : BCModel()
00033  , fHistogram1(0)
00034  , fHistogram2(0)
00035  , fFitFunction(0)
00036  , fHistogramBinomial(0)
00037  , fDataPointType(1)
00038 {
00039    // set default options and values
00040    MCMCSetNIterationsRun(2000);
00041    MCMCSetRValueCriterion(0.01);
00042    SetFillErrorBand();
00043    fFlagIntegration = false;
00044 }
00045 
00046 // ---------------------------------------------------------
00047 
00048 BCEfficiencyFitter::BCEfficiencyFitter(const char * name)
00049  : BCModel(name)
00050  , fHistogram1(0)
00051  , fHistogram2(0)
00052  , fFitFunction(0)
00053  , fHistogramBinomial(0)
00054  , fDataPointType(1)
00055 {
00056    // set default options and values
00057    MCMCSetNIterationsRun(2000);
00058    MCMCSetRValueCriterion(0.01);
00059    SetFillErrorBand();
00060    fFlagIntegration = false;
00061 }
00062 
00063 // ---------------------------------------------------------
00064 
00065 BCEfficiencyFitter::BCEfficiencyFitter(TH1D * hist1, TH1D * hist2, TF1 * func)
00066  : BCModel()
00067  , fHistogram1(0)
00068  , fHistogram2(0)
00069  , fFitFunction(0)
00070  , fHistogramBinomial(0)
00071  , fDataPointType(1)
00072 {
00073    SetHistograms(hist1, hist2);
00074    SetFitFunction(func);
00075 
00076    MCMCSetNIterationsRun(2000);
00077    MCMCSetRValueCriterion(0.01);
00078    SetFillErrorBand();
00079    fFlagIntegration = false;
00080 }
00081 
00082 // ---------------------------------------------------------
00083 
00084 BCEfficiencyFitter::BCEfficiencyFitter(const char * name, TH1D * hist1, TH1D * hist2, TF1 * func)
00085  : BCModel(name)
00086  , fHistogram1(0)
00087  , fHistogram2(0)
00088  , fFitFunction(0)
00089  , fHistogramBinomial(0)
00090  , fDataPointType(1)
00091 {
00092    SetHistograms(hist1, hist2);
00093    SetFitFunction(func);
00094 
00095    MCMCSetNIterationsRun(2000);
00096    MCMCSetRValueCriterion(0.01);
00097    SetFillErrorBand();
00098    fFlagIntegration = false;
00099 }
00100 
00101 // ---------------------------------------------------------
00102 
00103 int BCEfficiencyFitter::SetHistograms(TH1D * hist1, TH1D * hist2)
00104 {
00105    // check if histogram exists
00106    if (!hist1 || !hist2) {
00107       BCLog::OutError("BCEfficiencyFitter::SetHistograms : TH1D not created.");
00108       return 0;
00109    }
00110 
00111    // check compatibility of both histograms : number of bins
00112    if (hist1->GetNbinsX() != hist2->GetNbinsX()) {
00113       BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histograms do not have the same number of bins.");
00114       return 0;
00115    }
00116 
00117    // check compatibility of both histograms : bin content
00118    for (int i = 1; i <= hist1->GetNbinsX(); ++i)
00119       if (hist1->GetBinContent(i) < hist2->GetBinContent(i)) {
00120          BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histogram 1 has fewer entries than histogram 2.");
00121          return 0;
00122       }
00123 
00124    // set pointer to histograms
00125    fHistogram1 = hist1;
00126    fHistogram2 = hist2;
00127 
00128    // create a data set. this is necessary in order to calculate the
00129    // error band. the data set contains as many data points as there
00130    // are bins. for now, the data points are empty.
00131    BCDataSet * ds = new BCDataSet();
00132 
00133    // create data points and add them to the data set.
00134    int nbins = fHistogram1->GetNbinsX();
00135    for (int i = 0; i < nbins; ++i) {
00136       BCDataPoint* dp = new BCDataPoint(2);
00137       ds->AddDataPoint(dp);
00138    }
00139 
00140    // set the new data set.
00141    SetDataSet(ds);
00142 
00143    // calculate the lower and upper edge in x.
00144    double xmin = hist1->GetBinLowEdge(1);
00145    double xmax = hist1->GetBinLowEdge(nbins+1);
00146 
00147 //   // calculate the minimum and maximum range in y.
00148 //   double histymin = hist2->GetMinimum();
00149 //   double histymax = hist1->GetMaximum();
00150 
00151 //   // calculate the minimum and maximum of the function value based on
00152 //   // the minimum and maximum value in y.
00153 //   double ymin = TMath::Max(0., histymin - 5.*sqrt(histymin));
00154 //   double ymax = histymax + 5.*sqrt(histymax);
00155 
00156    // set the data boundaries for x and y values.
00157    SetDataBoundaries(0, xmin, xmax);
00158    SetDataBoundaries(1, 0.0, 1.0);
00159 
00160    // set the indeces for fitting.
00161    SetFitFunctionIndices(0, 1);
00162 
00163    // no error
00164    return 1;
00165 }
00166 
00167 // ---------------------------------------------------------
00168 
00169 int BCEfficiencyFitter::SetFitFunction(TF1 * func)
00170 {
00171    // check if function exists
00172    if(!func) {
00173       BCLog::OutError("BCEfficiencyFitter::SetFitFunction : TF1 not created.");
00174       return 0;
00175    }
00176 
00177    // set the function
00178    fFitFunction = func;
00179 
00180    // update the model name to contain the function name
00181    if(fName=="model")
00182       SetName(TString::Format("BCEfficiencyFitter with %s",fFitFunction->GetName()));
00183 
00184    // reset parameters
00185    fParameterSet->clear();
00186 
00187    // get the new number of parameters
00188    int n = func->GetNpar();
00189 
00190    // add parameters
00191    for (int i = 0; i < n; ++i)
00192    {
00193       double xmin;
00194       double xmax;
00195 
00196       func->GetParLimits(i, xmin, xmax);
00197 
00198       AddParameter(func->GetParName(i), xmin, xmax);
00199    }
00200 
00201    // set flat prior for all parameters by default
00202    SetPriorConstantAll();
00203 
00204    return GetNParameters();;
00205 }
00206 
00207 // ---------------------------------------------------------
00208 
00209 BCEfficiencyFitter::~BCEfficiencyFitter()
00210 {
00211    if (fHistogram1)
00212       delete fHistogram1;
00213 
00214    if (fHistogram2)
00215       delete fHistogram2;
00216 
00217    if (fHistogramBinomial)
00218       delete fHistogramBinomial;
00219 }
00220 
00221 // ---------------------------------------------------------
00222 
00223 void BCEfficiencyFitter::SetDataPointType(int type)
00224 {
00225    if(type < 0 || type > 2)
00226       BCLog::OutError(Form("BCEfficiencyFitter::SetDataPointType : Unknown data point type %d (should be between 0 and 2).",type));
00227    else
00228       fDataPointType = type;
00229 }
00230 
00231 // ---------------------------------------------------------
00232 
00233 double BCEfficiencyFitter::LogLikelihood(const std::vector<double> & params)
00234 {
00235 
00236    // initialize probability
00237    double loglikelihood = 0;
00238 
00239    // set the parameters of the function
00240    fFitFunction->SetParameters(&params[0]);
00241 
00242    // get the number of bins
00243    int nbins = fHistogram1->GetNbinsX();
00244 
00245    // get bin width
00246    double dx = fHistogram1->GetXaxis()->GetBinWidth(0);
00247 
00248    // loop over all bins
00249    for (int ibin = 1; ibin <= nbins; ++ibin)
00250    {
00251       // get n
00252       int n = int(fHistogram1->GetBinContent(ibin));
00253 
00254       // get k
00255       int k = int(fHistogram2->GetBinContent(ibin));
00256 
00257       // get x
00258       double x = fHistogram1->GetBinCenter(ibin);
00259 
00260       double eff = 0;
00261 
00262       // use ROOT's TH1D::Integral method
00263       if (fFlagIntegration)
00264          eff = fFitFunction->Integral(x - dx/2., x + dx/2.) / dx;
00265 
00266       // use linear interpolation
00267       else
00268          eff = (fFitFunction->Eval(x + dx/2.) + fFitFunction->Eval(x - dx/2.)) / 2.;
00269 
00270       // get the value of the Poisson distribution
00271       loglikelihood += BCMath::LogApproxBinomial(n, k, eff);
00272    }
00273 
00274    return loglikelihood;
00275 }
00276 
00277 // ---------------------------------------------------------
00278 
00279 double BCEfficiencyFitter::FitFunction(const std::vector<double> & x, const std::vector<double> & params)
00280 {
00281    // set the parameters of the function
00282    fFitFunction->SetParameters(&params[0]);
00283 
00284    return fFitFunction->Eval(x[0]);
00285 }
00286 
00287 // ---------------------------------------------------------
00288 
00289 int BCEfficiencyFitter::Fit(TH1D * hist1, TH1D * hist2, TF1 * func)
00290 {
00291    // set histogram
00292    if (hist1 && hist2)
00293       SetHistograms(hist1, hist2);
00294    else {
00295       BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
00296       return 0;
00297    }
00298 
00299    // set function
00300    if (func)
00301       SetFitFunction(func);
00302    else {
00303       BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
00304       return 0;
00305    }
00306 
00307    return Fit();
00308 }
00309 
00310 // ---------------------------------------------------------
00311 
00312 int BCEfficiencyFitter::Fit()
00313 {
00314    // set histogram
00315    if (!fHistogram1 || !fHistogram2) {
00316       BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
00317       return 0;
00318    }
00319 
00320    // set function
00321    if (!fFitFunction) {
00322       BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
00323       return 0;
00324    }
00325 
00326    // perform marginalization
00327    MarginalizeAll();
00328 
00329    // maximize posterior probability, using the best-fit values close
00330    // to the global maximum from the MCMC
00331    FindModeMinuit( GetBestFitParameters() , -1);
00332 
00333    // calculate the p-value using the fast MCMC algorithm
00334    double pvalue;
00335    if ( CalculatePValueFast(GetBestFitParameters(), pvalue) )
00336       fPValue = pvalue;
00337    else
00338       BCLog::OutError("BCEfficiencyFitter::Fit : Could not use the fast p-value evaluation.");
00339 
00340    // print summary to screen
00341    PrintShortFitSummary();
00342 
00343    // no error
00344    return 1;
00345 }
00346 
00347 // ---------------------------------------------------------
00348 
00349 void BCEfficiencyFitter::DrawFit(const char * options, bool flaglegend)
00350 {
00351    if (!fHistogram1 || !fHistogram2) {
00352       BCLog::OutError("BCEfficiencyFitter::DrawFit : Histogram(s) not defined.");
00353       return;
00354    }
00355 
00356    if (!fFitFunction) {
00357       BCLog::OutError("BCEfficiencyFitter::DrawFit : Fit function not defined.");
00358       return;
00359    }
00360 
00361    // create efficiency graph
00362    TGraphAsymmErrors * histRatio = new TGraphAsymmErrors();
00363    histRatio->SetMarkerStyle(20);
00364    histRatio->SetMarkerSize(1.5);
00365 
00366    int npoints = 0;
00367 
00368    // set points
00369    for (int i = 1; i <= fHistogram1->GetNbinsX(); ++i)
00370    {
00371       if(int(fHistogram1->GetBinContent(i)) == 0) {
00372          ++npoints;
00373          continue;
00374       }
00375 
00376       // calculate central value and uncertainties
00377       double xexp, xmin, xmax;
00378       GetUncertainties( int(fHistogram1->GetBinContent(i)), int(fHistogram2->GetBinContent(i)), 0.68, xexp, xmin, xmax);
00379 
00380       histRatio->SetPoint( npoints, fHistogram1->GetBinCenter(i), xexp);
00381 
00382       // no X uncertainties
00383       histRatio->SetPointEXhigh(npoints, 0.);
00384       histRatio->SetPointEXlow(npoints, 0.);
00385 
00386       // set Y uncertainties
00387       histRatio->SetPointEYhigh(npoints, xmax - xexp);
00388       histRatio->SetPointEYlow(npoints, xexp - xmin);
00389 
00390       ++npoints;
00391    }
00392 
00393 
00394    // check wheather options contain "same"
00395    TString opt = options;
00396    opt.ToLower();
00397 
00398    // if not same, draw the histogram first to get the axes
00399    if(!opt.Contains("same"))
00400    {
00401       // create new histogram
00402       TH2D * hist_axes = new TH2D("hist_axes",
00403             Form(";%s;ratio", fHistogram1->GetXaxis()->GetTitle()),
00404             fHistogram1->GetNbinsX(),
00405             fHistogram1->GetXaxis()->GetBinLowEdge(1),
00406             fHistogram1->GetXaxis()->GetBinLowEdge(fHistogram1->GetNbinsX()+1),
00407             1, 0., 1.);
00408       hist_axes->SetStats(kFALSE);
00409       hist_axes->Draw();
00410 
00411       histRatio->Draw(TString::Format("%sp",opt.Data()));
00412    }
00413 
00414    // draw the error band as central 68% probability interval
00415    fErrorBand = GetErrorBandGraph(0.16, 0.84);
00416    fErrorBand->Draw("f same");
00417 
00418    // now draw the histogram again since it was covered by the band
00419    histRatio->SetMarkerSize(.7);
00420    histRatio->Draw(TString::Format("%ssamep",opt.Data()));
00421 
00422    // draw the fit function on top
00423    fGraphFitFunction = GetFitFunctionGraph( GetBestFitParameters() );
00424    fGraphFitFunction->SetLineColor(kRed);
00425    fGraphFitFunction->SetLineWidth(2);
00426    fGraphFitFunction->Draw("l same");
00427 
00428    // draw legend
00429    if (flaglegend)
00430    {
00431       TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00432       legend->SetBorderSize(0);
00433       legend->SetFillColor(kWhite);
00434       legend->AddEntry(histRatio, "Data", "L");
00435       legend->AddEntry(fGraphFitFunction, "Best fit", "L");
00436       legend->AddEntry(fErrorBand, "Error band", "F");
00437       legend->Draw();
00438    }
00439    gPad->RedrawAxis();
00440 }
00441 
00442 // ---------------------------------------------------------
00443 int BCEfficiencyFitter::CalculatePValueFast(const std::vector<double> & par, double &pvalue)
00444 {
00445    // check size of parameter vector
00446    if (par.size() != GetNParameters()) {
00447       BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Number of parameters is inconsistent.");
00448       return 0;
00449    }
00450 
00451    // check if histogram exists
00452    if (!fHistogram1 || !fHistogram2) {
00453       BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Histogram not defined.");
00454       return 0;
00455    }
00456 
00457    // define temporary variables
00458    int nbins = fHistogram1->GetNbinsX();
00459 
00460    std::vector<int> histogram;
00461    std::vector<double> expectation;
00462    histogram.assign(nbins, 0);
00463    expectation.assign(nbins, 0);
00464 
00465    double logp = 0;
00466    double logp_start = 0;
00467    int counter_pvalue = 0;
00468 
00469    // define starting distribution
00470    for (int ibin = 0; ibin < nbins; ++ibin) {
00471       // get bin boundaries
00472       double xmin = fHistogram1->GetBinLowEdge(ibin+1);
00473       double xmax = fHistogram1->GetBinLowEdge(ibin+2);
00474 
00475       // get the number of expected events
00476       double yexp = fFitFunction->Integral(xmin, xmax);
00477 
00478       // get n
00479       int n = int(fHistogram1->GetBinContent(ibin));
00480 
00481       // get k
00482       int k = int(fHistogram2->GetBinContent(ibin));
00483 
00484       histogram[ibin]   = k;
00485       expectation[ibin] = n * yexp;
00486 
00487       // calculate p;
00488       logp += BCMath::LogApproxBinomial(n, k, yexp);
00489    }
00490    logp_start = logp;
00491 
00492    int niter = 100000;
00493 
00494    // loop over iterations
00495    for (int iiter = 0; iiter < niter; ++iiter)
00496    {
00497       // loop over bins
00498       for (int ibin = 0; ibin < nbins; ++ibin)
00499       {
00500          // get n
00501          int n = int(fHistogram1->GetBinContent(ibin));
00502 
00503          // get k
00504          int k = histogram.at(ibin);
00505 
00506          // get expectation
00507          double yexp = 0;
00508          if (n > 0)
00509             yexp = expectation.at(ibin)/n;
00510 
00511          // random step up or down in statistics for this bin
00512          double ptest = fRandom->Rndm() - 0.5;
00513 
00514          // continue, if efficiency is at limit
00515          if (!(yexp > 0. || yexp < 1.0))
00516             continue;
00517 
00518          // increase statistics by 1
00519          if (ptest > 0 && (k < n))
00520          {
00521             // calculate factor of probability
00522             double r = (double(n)-double(k))/(double(k)+1.) * yexp / (1. - yexp);
00523 
00524             // walk, or don't (this is the Metropolis part)
00525             if (fRandom->Rndm() < r)
00526             {
00527                histogram[ibin] = k + 1;
00528                logp += log(r);
00529             }
00530          }
00531 
00532          // decrease statistics by 1
00533          else if (ptest <= 0 && (k > 0))
00534          {
00535             // calculate factor of probability
00536             double r = double(k) / (double(n)-(double(k)-1)) * (1-yexp)/yexp;
00537 
00538             // walk, or don't (this is the Metropolis part)
00539             if (fRandom->Rndm() < r)
00540             {
00541                histogram[ibin] = k - 1;
00542                logp += log(r);
00543             }
00544          }
00545 
00546       } // end of looping over bins
00547 
00548       // increase counter
00549       if (logp < logp_start)
00550          counter_pvalue++;
00551 
00552    } // end of looping over iterations
00553 
00554    // calculate p-value
00555    pvalue = double(counter_pvalue) / double(niter);
00556 
00557    // no error
00558    return 1;
00559 }
00560 
00561 // ---------------------------------------------------------
00562 int BCEfficiencyFitter::GetUncertainties(int n, int k, double p, double &xexp, double &xmin, double &xmax)
00563 {
00564    if (n == 0)
00565    {
00566       xexp = 0.;
00567       xmin = 0.;
00568       xmax = 0.;
00569       return 0;
00570    }
00571 
00572    BCLog::OutDebug(Form("Calculating efficiency data-point of type %d for (n,k) = (%d,%d)",fDataPointType,n,k));
00573 
00574    // create a histogram with binomial distribution
00575    if (fHistogramBinomial)
00576       fHistogramBinomial->Reset();
00577    else
00578       fHistogramBinomial = new TH1D("hist_binomial", "", 1000, 0., 1.);
00579 
00580    // loop over bins and fill histogram
00581    for (int i = 1; i <= 1000; ++i) {
00582       double x   = fHistogramBinomial->GetBinCenter(i);
00583       double val = BCMath::ApproxBinomial(n, k, x);
00584       fHistogramBinomial->SetBinContent(i, val);
00585    }
00586 
00587    // normalize
00588    fHistogramBinomial->Scale(1. / fHistogramBinomial->Integral());
00589 
00590    // calculate central value and uncertainties
00591    if (fDataPointType == 0) {
00592       xexp = fHistogramBinomial->GetMean();
00593       double rms = fHistogramBinomial->GetRMS();
00594       xmin = xexp-rms;
00595       xmax = xexp+rms;
00596       BCLog::OutDebug(Form(" - mean = %f , rms = %f)",xexp,rms));
00597    }
00598    else if (fDataPointType == 1) {
00599       xexp = (double)k/(double)n;
00600       BCH1D * fbh = new BCH1D((TH1D*)fHistogramBinomial->Clone("hcp"));
00601       std::vector<double> intervals = fbh->GetSmallestIntervals(p);
00602       int ninter = intervals.size();
00603       if ( ninter<2 ) {
00604          xmin = xmax = xexp = 0.;
00605       }
00606       else {
00607          xmin = intervals[0];
00608          xmax = intervals[1];
00609       }
00610       delete fbh;
00611    }
00612    else {
00613       // calculate quantiles
00614       int nprobSum = 3;
00615       double q[3];
00616       double probSum[3];
00617       probSum[0] = (1.-p)/2.;
00618       probSum[1] = 1.-(1.-p)/2.;
00619       probSum[2] = .5;
00620 
00621       fHistogramBinomial->GetQuantiles(nprobSum, q, probSum);
00622 
00623       xexp = q[2];
00624       xmin = q[0];
00625       xmax = q[1];
00626 
00627    }
00628 
00629    BCLog::OutDebug(Form(" - efficiency = %f , range (%f - %f)",xexp,xmin,xmax));
00630 
00631    return 1;
00632 }
00633 
00634 // ---------------------------------------------------------

Generated by  doxygen 1.7.1