• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BCEfficiencyFitter.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008, Daniel Kollar and Kevin Kroeninger.
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include <TH1D.h>
00011 #include <TH2D.h>
00012 #include <TF1.h>
00013 #include <TGraph.h>
00014 #include <TGraphAsymmErrors.h>
00015 #include <TString.h>
00016 #include <TPad.h>
00017 #include <TRandom3.h>
00018 #include <TLegend.h>
00019 #include <TMath.h>
00020 
00021 #include "BAT/BCLog.h"
00022 #include "BAT/BCDataSet.h"
00023 #include "BAT/BCDataPoint.h"
00024 #include "BAT/BCMath.h"
00025 #include "BAT/BCH1D.h"
00026 
00027 #include "BCEfficiencyFitter.h"
00028 
00029 // ---------------------------------------------------------
00030 
00031 BCEfficiencyFitter::BCEfficiencyFitter() : BCModel("EfficiencyFitter")
00032 {
00033    fHistogram1 = 0;
00034    fHistogram2 = 0;
00035    fHistogramBinomial = 0;
00036    fFitFunction = 0;
00037 
00038    fDataPointType = 1;
00039 
00040    // set default options and values
00041    MCMCSetNIterationsRun(2000);
00042    MCMCSetRValueCriterion(0.01);
00043    SetFillErrorBand();
00044    fFlagIntegration = false;
00045 }
00046 
00047 // ---------------------------------------------------------
00048 
00049 BCEfficiencyFitter::BCEfficiencyFitter(TH1D * hist1, TH1D * hist2, TF1 * func) : BCModel("EfficiencyFitter")
00050 {
00051    fHistogram1 = 0;
00052    fHistogram2 = 0;
00053    fHistogramBinomial = 0;
00054    fFitFunction = 0;
00055 
00056    fDataPointType = 1;
00057 
00058    SetHistograms(hist1, hist2);
00059    SetFitFunction(func);
00060 
00061    MCMCSetNIterationsRun(2000);
00062    MCMCSetRValueCriterion(0.01);
00063    SetFillErrorBand();
00064    fFlagIntegration = false;
00065 }
00066 
00067 // ---------------------------------------------------------
00068 
00069 int BCEfficiencyFitter::SetHistograms(TH1D * hist1, TH1D * hist2)
00070 {
00071    // check if histogram exists
00072    if (!hist1 || !hist2) {
00073       BCLog::OutError("BCEfficiencyFitter::SetHistograms : TH1D not created.");
00074       return 0;
00075    }
00076 
00077    // check compatibility of both histograms : number of bins
00078    if (hist1->GetNbinsX() != hist2->GetNbinsX()) {
00079       BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histograms do not have the same number of bins.");
00080       return 0;
00081    }
00082 
00083    // check compatibility of both histograms : bin content
00084    for (int i = 1; i <= hist1->GetNbinsX(); ++i)
00085       if (hist1->GetBinContent(i) < hist2->GetBinContent(i)) {
00086          BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histogram 1 has fewer entries than histogram 2.");
00087          return 0;
00088       }
00089 
00090    // set pointer to histograms
00091    fHistogram1 = hist1;
00092    fHistogram2 = hist2;
00093    
00094    // create a data set. this is necessary in order to calculate the
00095    // error band. the data set contains as many data points as there
00096    // are bins. for now, the data points are empty.
00097    BCDataSet * ds = new BCDataSet();
00098 
00099    // create data points and add them to the data set.
00100    int nbins = fHistogram1->GetNbinsX();
00101    for (int i = 0; i < nbins; ++i) {
00102       BCDataPoint* dp = new BCDataPoint(2);
00103       ds->AddDataPoint(dp);
00104    }
00105 
00106    // set the new data set.
00107    SetDataSet(ds);
00108 
00109    // calculate the lower and upper edge in x.
00110    double xmin = hist1->GetBinLowEdge(1);
00111    double xmax = hist1->GetBinLowEdge(nbins+1);
00112 
00113 // // calculate the minimum and maximum range in y.
00114 // double histymin = hist2->GetMinimum();
00115 // double histymax = hist1->GetMaximum();
00116 
00117 // // calculate the minimum and maximum of the function value based on
00118 // // the minimum and maximum value in y.
00119 // double ymin = TMath::Max(0., histymin - 5.*sqrt(histymin));
00120 // double ymax = histymax + 5.*sqrt(histymax);
00121 
00122    // set the data boundaries for x and y values.
00123    SetDataBoundaries(0, xmin, xmax);
00124    SetDataBoundaries(1, 0.0, 1.0);
00125 
00126    // set the indeces for fitting.
00127    SetFitFunctionIndices(0, 1);
00128 
00129    // no error
00130    return 1;
00131 }
00132 
00133 // ---------------------------------------------------------
00134 
00135 int BCEfficiencyFitter::SetFitFunction(TF1 * func)
00136 {
00137    // check if function exists
00138    if(!func) {
00139       BCLog::OutError("BCEfficiencyFitter::SetFitFunction : TF1 not created.");
00140       return 0;
00141    }
00142 
00143    // set the function
00144    fFitFunction = func;
00145 
00146    // update the model name to contain the function name
00147    SetName(TString::Format("BCEfficiencyFitter with %s",fFitFunction->GetName()));
00148 
00149    // reset parameters
00150    fParameterSet->clear();
00151 
00152    // get the new number of parameters
00153    int n = func->GetNpar();
00154 
00155    // add parameters
00156    for (int i = 0; i < n; ++i)
00157    {
00158       double xmin;
00159       double xmax;
00160 
00161       func->GetParLimits(i, xmin, xmax);
00162 
00163       AddParameter(func->GetParName(i), xmin, xmax);
00164    }
00165 
00166    // set flat prior for all parameters by default
00167    SetPriorConstantAll();
00168 
00169    return GetNParameters();;
00170 }
00171 
00172 // ---------------------------------------------------------
00173 
00174 BCEfficiencyFitter::~BCEfficiencyFitter()
00175 {
00176    if (fHistogram1)
00177       delete fHistogram1;
00178 
00179    if (fHistogram2)
00180       delete fHistogram2;
00181 
00182    if (fHistogramBinomial)
00183       delete fHistogramBinomial;
00184 }
00185 
00186 // ---------------------------------------------------------
00187 
00188 void BCEfficiencyFitter::SetDataPointType(int type)
00189 {
00190    if(type < 0 || type > 2)
00191       BCLog::OutError(Form("BCEfficiencyFitter::SetDataPointType : Unknown data point type %d (should be between 0 and 2)."));
00192    else
00193       fDataPointType = type;
00194 }
00195 
00196 // ---------------------------------------------------------
00197 
00198 double BCEfficiencyFitter::LogLikelihood(std::vector <double> params)
00199 {
00200 
00201    // initialize probability
00202    double loglikelihood = 0;
00203 
00204    // set the parameters of the function
00205    fFitFunction->SetParameters(&params[0]);
00206 
00207    // get the number of bins
00208    int nbins = fHistogram1->GetNbinsX();
00209 
00210    // get bin width
00211    double dx = fHistogram1->GetXaxis()->GetBinWidth(0);
00212 
00213    // loop over all bins
00214    for (int ibin = 1; ibin <= nbins; ++ibin)
00215    {
00216       // get n
00217       int n = int(fHistogram1->GetBinContent(ibin));
00218 
00219       // get k
00220       int k = int(fHistogram2->GetBinContent(ibin));
00221 
00222       // get x
00223       double x = fHistogram1->GetBinCenter(ibin);
00224 
00225       double eff = 0;
00226 
00227       // use ROOT's TH1D::Integral method
00228       if (fFlagIntegration)
00229          eff = fFitFunction->Integral(x - dx/2., x + dx/2.) / dx;
00230 
00231       // use linear interpolation
00232       else
00233          eff = (fFitFunction->Eval(x + dx/2.) + fFitFunction->Eval(x - dx/2.)) / 2.;
00234 
00235       // get the value of the Poisson distribution
00236       loglikelihood += BCMath::LogApproxBinomial(n, k, eff);
00237    }
00238 
00239    return loglikelihood;
00240 }
00241 
00242 // ---------------------------------------------------------
00243 
00244 double BCEfficiencyFitter::FitFunction(std::vector <double> x, std::vector <double> params)
00245 {
00246    // set the parameters of the function
00247    fFitFunction->SetParameters(&params[0]);
00248 
00249    return fFitFunction->Eval(x[0]);
00250 }
00251 
00252 // ---------------------------------------------------------
00253 
00254 int BCEfficiencyFitter::Fit(TH1D * hist1, TH1D * hist2, TF1 * func)
00255 {
00256    // set histogram
00257    if (hist1 && hist2)
00258       SetHistograms(hist1, hist2);
00259    else {
00260       BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
00261       return 0;
00262    }
00263 
00264    // set function
00265    if (func)
00266       SetFitFunction(func);
00267    else {
00268       BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
00269       return 0;
00270    }
00271 
00272    return Fit();
00273 }
00274 
00275 // ---------------------------------------------------------
00276 
00277 int BCEfficiencyFitter::Fit()
00278 {
00279    // set histogram
00280    if (!fHistogram1 || !fHistogram2) {
00281       BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
00282       return 0;
00283    }
00284 
00285    // set function
00286    if (!fFitFunction) {
00287       BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
00288       return 0;
00289    }
00290 
00291    // perform marginalization
00292    MarginalizeAll();
00293 
00294    // maximize posterior probability, using the best-fit values close
00295    // to the global maximum from the MCMC
00296    FindModeMinuit( GetBestFitParameters() , -1);
00297 
00298    // calculate the p-value using the fast MCMC algorithm
00299    double pvalue;
00300    if ( CalculatePValueFast(GetBestFitParameters(), pvalue) )
00301       fPValue = pvalue;
00302    else
00303       BCLog::OutError("BCEfficiencyFitter::Fit : Could not use the fast p-value evaluation.");
00304 
00305    // print summary to screen
00306    PrintShortFitSummary();
00307 
00308    // no error
00309    return 1;
00310 }
00311 
00312 // ---------------------------------------------------------
00313 
00314 void BCEfficiencyFitter::DrawFit(const char * options, bool flaglegend)
00315 {
00316    if (!fHistogram1 || !fHistogram2) {
00317       BCLog::OutError("BCEfficiencyFitter::DrawFit : Histogram(s) not defined.");
00318       return;
00319    }
00320 
00321    if (!fFitFunction) {
00322       BCLog::OutError("BCEfficiencyFitter::DrawFit : Fit function not defined.");
00323       return;
00324    }
00325 
00326    // create efficiency graph
00327    TGraphAsymmErrors * histRatio = new TGraphAsymmErrors();
00328    histRatio->SetMarkerStyle(20);
00329    histRatio->SetMarkerSize(1.5);
00330 
00331    int npoints = 0;
00332 
00333    // set points
00334    for (int i = 1; i <= fHistogram1->GetNbinsX(); ++i)
00335    {
00336       if(int(fHistogram1->GetBinContent(i)) == 0) {
00337          ++npoints;
00338          continue;
00339       }
00340 
00341       // calculate central value and uncertainties
00342       double xexp, xmin, xmax;
00343       GetUncertainties( int(fHistogram1->GetBinContent(i)), int(fHistogram2->GetBinContent(i)), 0.68, xexp, xmin, xmax);
00344 
00345       histRatio->SetPoint( npoints, fHistogram1->GetBinCenter(i), xexp);
00346 
00347       // no X uncertainties
00348       histRatio->SetPointEXhigh(npoints, 0.);
00349       histRatio->SetPointEXlow(npoints, 0.);
00350 
00351       // set Y uncertainties
00352       histRatio->SetPointEYhigh(npoints, xmax - xexp);
00353       histRatio->SetPointEYlow(npoints, xexp - xmin);
00354 
00355       ++npoints;
00356    }
00357 
00358 
00359    // check wheather options contain "same"
00360    TString opt = options;
00361    opt.ToLower();
00362 
00363    // if not same, draw the histogram first to get the axes
00364    if(!opt.Contains("same"))
00365    {
00366       // create new histogram
00367       TH2D * hist_axes = new TH2D("hist_axes",
00368             Form(";%s;ratio", fHistogram1->GetXaxis()->GetTitle()),
00369             fHistogram1->GetNbinsX(),
00370             fHistogram1->GetXaxis()->GetBinLowEdge(1),
00371             fHistogram1->GetXaxis()->GetBinLowEdge(fHistogram1->GetNbinsX()+1),
00372             1, 0., 1.);
00373       hist_axes->SetStats(kFALSE);
00374       hist_axes->Draw();
00375 
00376       histRatio->Draw(TString::Format("%sp",opt.Data()));
00377    }
00378 
00379    // draw the error band as central 68% probability interval
00380    fErrorBand = GetErrorBandGraph(0.16, 0.84);
00381    fErrorBand->Draw("f same");
00382 
00383    // now draw the histogram again since it was covered by the band
00384    histRatio->SetMarkerSize(.7);
00385    histRatio->Draw(TString::Format("%ssamep",opt.Data()));
00386 
00387    // draw the fit function on top
00388    fGraphFitFunction = GetFitFunctionGraph( GetBestFitParameters() );
00389    fGraphFitFunction->SetLineColor(kRed);
00390    fGraphFitFunction->SetLineWidth(2);
00391    fGraphFitFunction->Draw("l same");
00392 
00393    // draw legend
00394    if (flaglegend)
00395    {
00396       TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00397       legend->SetBorderSize(0);
00398       legend->SetFillColor(kWhite);
00399       legend->AddEntry(histRatio, "Data", "L");
00400       legend->AddEntry(fGraphFitFunction, "Best fit", "L");
00401       legend->AddEntry(fErrorBand, "Error band", "F");
00402       legend->Draw();
00403    }
00404    gPad->RedrawAxis();
00405 }
00406 
00407 // ---------------------------------------------------------
00408 int BCEfficiencyFitter::CalculatePValueFast(std::vector<double> par, double &pvalue)
00409 {
00410    // check size of parameter vector
00411    if (par.size() != GetNParameters()) {
00412       BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Number of parameters is inconsistent.");
00413       return 0;
00414    }
00415 
00416    // check if histogram exists
00417    if (!fHistogram1 || !fHistogram2) {
00418       BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Histogram not defined.");
00419       return 0;
00420    }
00421 
00422    // define temporary variables
00423    int nbins = fHistogram1->GetNbinsX();
00424 
00425    std::vector <int> histogram;
00426    std::vector <double> expectation;
00427    histogram.assign(nbins, 0);
00428    expectation.assign(nbins, 0);
00429 
00430    double logp = 0;
00431    double logp_start = 0;
00432    int counter_pvalue = 0;
00433 
00434    // define starting distribution
00435    for (int ibin = 0; ibin < nbins; ++ibin) {
00436       // get bin boundaries
00437       double xmin = fHistogram1->GetBinLowEdge(ibin+1);
00438       double xmax = fHistogram1->GetBinLowEdge(ibin+2);
00439 
00440       // get the number of expected events
00441       double yexp = fFitFunction->Integral(xmin, xmax);
00442 
00443       // get n
00444       int n = int(fHistogram1->GetBinContent(ibin));
00445 
00446       // get k
00447       int k = int(fHistogram2->GetBinContent(ibin));
00448 
00449       histogram[ibin]   = k;
00450       expectation[ibin] = n * yexp;
00451 
00452       // calculate p;
00453       logp += BCMath::LogApproxBinomial(n, k, yexp);
00454    }
00455    logp_start = logp;
00456 
00457    int niter = 100000;
00458 
00459    // loop over iterations
00460    for (int iiter = 0; iiter < niter; ++iiter)
00461    {
00462       // loop over bins
00463       for (int ibin = 0; ibin < nbins; ++ibin)
00464       {
00465          // get n
00466          int n = int(fHistogram1->GetBinContent(ibin));
00467 
00468          // get k
00469          int k = histogram.at(ibin);
00470 
00471          // get expectation
00472          double yexp = 0;
00473          if (n > 0)
00474             yexp = expectation.at(ibin)/n;
00475 
00476          // random step up or down in statistics for this bin
00477          double ptest = fRandom->Rndm() - 0.5;
00478 
00479          // continue, if efficiency is at limit
00480          if (!(yexp > 0. || yexp < 1.0))
00481             continue;
00482 
00483          // increase statistics by 1
00484          if (ptest > 0 && (k < n))
00485          {
00486             // calculate factor of probability
00487             double r = (double(n)-double(k))/(double(k)+1.) * yexp / (1. - yexp);
00488 
00489             // walk, or don't (this is the Metropolis part)
00490             if (fRandom->Rndm() < r)
00491             {
00492                histogram[ibin] = k + 1;
00493                logp += log(r);
00494             }
00495          }
00496 
00497          // decrease statistics by 1
00498          else if (ptest <= 0 && (k > 0))
00499          {
00500             // calculate factor of probability
00501             double r = double(k) / (double(n)-(double(k)-1)) * (1-yexp)/yexp;
00502 
00503             // walk, or don't (this is the Metropolis part)
00504             if (fRandom->Rndm() < r)
00505             {
00506                histogram[ibin] = k - 1;
00507                logp += log(r);
00508             }
00509          }
00510 
00511       } // end of looping over bins
00512 
00513       // increase counter
00514       if (logp < logp_start)
00515          counter_pvalue++;
00516 
00517    } // end of looping over iterations
00518 
00519    // calculate p-value
00520    pvalue = double(counter_pvalue) / double(niter);
00521 
00522    // no error
00523    return 1;
00524 }
00525 
00526 // ---------------------------------------------------------
00527 int BCEfficiencyFitter::GetUncertainties(int n, int k, double p, double &xexp, double &xmin, double &xmax)
00528 {
00529    if (n == 0)
00530    {
00531       xexp = 0.;
00532       xmin = 0.;
00533       xmax = 0.;
00534       return 0;
00535    }
00536 
00537    BCLog::OutDebug(Form("Calculating efficiency data-point of type %d for (n,k) = (%d,%d)",fDataPointType,n,k));
00538 
00539    // create a histogram with binomial distribution
00540    if (fHistogramBinomial)
00541       fHistogramBinomial->Reset();
00542    else
00543       fHistogramBinomial = new TH1D("hist_binomial", "", 1000, 0., 1.);
00544 
00545    // loop over bins and fill histogram
00546    for (int i = 1; i <= 1000; ++i) {
00547       double x   = fHistogramBinomial->GetBinCenter(i);
00548       double val = BCMath::ApproxBinomial(n, k, x);
00549       fHistogramBinomial->SetBinContent(i, val);
00550    }
00551 
00552    // normalize
00553    fHistogramBinomial->Scale(1. / fHistogramBinomial->Integral());
00554 
00555    // calculate central value and uncertainties
00556    if (fDataPointType == 0) {
00557       xexp = fHistogramBinomial->GetMean();
00558       double rms = fHistogramBinomial->GetRMS();
00559       xmin = xexp-rms;
00560       xmax = xexp+rms;
00561       BCLog::OutDebug(Form(" - mean = %f , rms = %f)",xexp,rms));
00562    }
00563    else if (fDataPointType == 1) {
00564       xexp = (double)k/(double)n;
00565       BCH1D * fbh = new BCH1D((TH1D*)fHistogramBinomial->Clone("hcp"));
00566       std::vector<double> intervals = fbh->GetSmallestIntervals(p);
00567       int ninter = intervals.size();
00568       if ( ninter<2 ) {
00569          xmin = xmax = xexp = 0.;
00570       }
00571       else {
00572          xmin = intervals[0];
00573          xmax = intervals[1];
00574       }
00575       delete fbh;
00576    }
00577    else {
00578       // calculate quantiles
00579       int nprobSum = 3;
00580       double q[3];
00581       double probSum[3];
00582       probSum[0] = (1.-p)/2.;
00583       probSum[1] = 1.-(1.-p)/2.;
00584       probSum[2] = .5;
00585 
00586       fHistogramBinomial->GetQuantiles(nprobSum, q, probSum);
00587 
00588       xexp = q[2];
00589       xmin = q[0];
00590       xmax = q[1];
00591 
00592    }
00593 
00594    BCLog::OutDebug(Form(" - efficiency = %f , range (%f - %f)",xexp,xmin,xmax));
00595 
00596    return 1;
00597 }
00598 
00599 // ---------------------------------------------------------

Generated on Fri Nov 19 2010 17:02:52 for Bayesian Analysis Toolkit by  doxygen 1.7.1