00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <TH1D.h>
00011 #include <TH2D.h>
00012 #include <TF1.h>
00013 #include <TGraph.h>
00014 #include <TGraphAsymmErrors.h>
00015 #include <TString.h>
00016 #include <TPad.h>
00017 #include <TRandom3.h>
00018 #include <TLegend.h>
00019 #include <TMath.h>
00020
00021 #include "BAT/BCLog.h"
00022 #include "BAT/BCDataSet.h"
00023 #include "BAT/BCDataPoint.h"
00024 #include "BAT/BCMath.h"
00025 #include "BAT/BCH1D.h"
00026
00027 #include "BCEfficiencyFitter.h"
00028
00029
00030
00031 BCEfficiencyFitter::BCEfficiencyFitter() : BCModel("EfficiencyFitter")
00032 {
00033 fHistogram1 = 0;
00034 fHistogram2 = 0;
00035 fHistogramBinomial = 0;
00036 fFitFunction = 0;
00037
00038 fDataPointType = 1;
00039
00040
00041 MCMCSetNIterationsRun(2000);
00042 MCMCSetRValueCriterion(0.01);
00043 SetFillErrorBand();
00044 fFlagIntegration = false;
00045 }
00046
00047
00048
00049 BCEfficiencyFitter::BCEfficiencyFitter(TH1D * hist1, TH1D * hist2, TF1 * func) : BCModel("EfficiencyFitter")
00050 {
00051 fHistogram1 = 0;
00052 fHistogram2 = 0;
00053 fHistogramBinomial = 0;
00054 fFitFunction = 0;
00055
00056 fDataPointType = 1;
00057
00058 SetHistograms(hist1, hist2);
00059 SetFitFunction(func);
00060
00061 MCMCSetNIterationsRun(2000);
00062 MCMCSetRValueCriterion(0.01);
00063 SetFillErrorBand();
00064 fFlagIntegration = false;
00065 }
00066
00067
00068
00069 int BCEfficiencyFitter::SetHistograms(TH1D * hist1, TH1D * hist2)
00070 {
00071
00072 if (!hist1 || !hist2) {
00073 BCLog::OutError("BCEfficiencyFitter::SetHistograms : TH1D not created.");
00074 return 0;
00075 }
00076
00077
00078 if (hist1->GetNbinsX() != hist2->GetNbinsX()) {
00079 BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histograms do not have the same number of bins.");
00080 return 0;
00081 }
00082
00083
00084 for (int i = 1; i <= hist1->GetNbinsX(); ++i)
00085 if (hist1->GetBinContent(i) < hist2->GetBinContent(i)) {
00086 BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histogram 1 has fewer entries than histogram 2.");
00087 return 0;
00088 }
00089
00090
00091 fHistogram1 = hist1;
00092 fHistogram2 = hist2;
00093
00094
00095
00096
00097 BCDataSet * ds = new BCDataSet();
00098
00099
00100 int nbins = fHistogram1->GetNbinsX();
00101 for (int i = 0; i < nbins; ++i) {
00102 BCDataPoint* dp = new BCDataPoint(2);
00103 ds->AddDataPoint(dp);
00104 }
00105
00106
00107 SetDataSet(ds);
00108
00109
00110 double xmin = hist1->GetBinLowEdge(1);
00111 double xmax = hist1->GetBinLowEdge(nbins+1);
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123 SetDataBoundaries(0, xmin, xmax);
00124 SetDataBoundaries(1, 0.0, 1.0);
00125
00126
00127 SetFitFunctionIndices(0, 1);
00128
00129
00130 return 1;
00131 }
00132
00133
00134
00135 int BCEfficiencyFitter::SetFitFunction(TF1 * func)
00136 {
00137
00138 if(!func) {
00139 BCLog::OutError("BCEfficiencyFitter::SetFitFunction : TF1 not created.");
00140 return 0;
00141 }
00142
00143
00144 fFitFunction = func;
00145
00146
00147 SetName(TString::Format("BCEfficiencyFitter with %s",fFitFunction->GetName()));
00148
00149
00150 fParameterSet->clear();
00151
00152
00153 int n = func->GetNpar();
00154
00155
00156 for (int i = 0; i < n; ++i)
00157 {
00158 double xmin;
00159 double xmax;
00160
00161 func->GetParLimits(i, xmin, xmax);
00162
00163 AddParameter(func->GetParName(i), xmin, xmax);
00164 }
00165
00166
00167 SetPriorConstantAll();
00168
00169 return GetNParameters();;
00170 }
00171
00172
00173
00174 BCEfficiencyFitter::~BCEfficiencyFitter()
00175 {
00176 if (fHistogram1)
00177 delete fHistogram1;
00178
00179 if (fHistogram2)
00180 delete fHistogram2;
00181
00182 if (fHistogramBinomial)
00183 delete fHistogramBinomial;
00184 }
00185
00186
00187
00188 void BCEfficiencyFitter::SetDataPointType(int type)
00189 {
00190 if(type < 0 || type > 2)
00191 BCLog::OutError(Form("BCEfficiencyFitter::SetDataPointType : Unknown data point type %d (should be between 0 and 2)."));
00192 else
00193 fDataPointType = type;
00194 }
00195
00196
00197
00198 double BCEfficiencyFitter::LogLikelihood(std::vector <double> params)
00199 {
00200
00201
00202 double loglikelihood = 0;
00203
00204
00205 fFitFunction->SetParameters(¶ms[0]);
00206
00207
00208 int nbins = fHistogram1->GetNbinsX();
00209
00210
00211 double dx = fHistogram1->GetXaxis()->GetBinWidth(0);
00212
00213
00214 for (int ibin = 1; ibin <= nbins; ++ibin)
00215 {
00216
00217 int n = int(fHistogram1->GetBinContent(ibin));
00218
00219
00220 int k = int(fHistogram2->GetBinContent(ibin));
00221
00222
00223 double x = fHistogram1->GetBinCenter(ibin);
00224
00225 double eff = 0;
00226
00227
00228 if (fFlagIntegration)
00229 eff = fFitFunction->Integral(x - dx/2., x + dx/2.) / dx;
00230
00231
00232 else
00233 eff = (fFitFunction->Eval(x + dx/2.) + fFitFunction->Eval(x - dx/2.)) / 2.;
00234
00235
00236 loglikelihood += BCMath::LogApproxBinomial(n, k, eff);
00237 }
00238
00239 return loglikelihood;
00240 }
00241
00242
00243
00244 double BCEfficiencyFitter::FitFunction(std::vector <double> x, std::vector <double> params)
00245 {
00246
00247 fFitFunction->SetParameters(¶ms[0]);
00248
00249 return fFitFunction->Eval(x[0]);
00250 }
00251
00252
00253
00254 int BCEfficiencyFitter::Fit(TH1D * hist1, TH1D * hist2, TF1 * func)
00255 {
00256
00257 if (hist1 && hist2)
00258 SetHistograms(hist1, hist2);
00259 else {
00260 BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
00261 return 0;
00262 }
00263
00264
00265 if (func)
00266 SetFitFunction(func);
00267 else {
00268 BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
00269 return 0;
00270 }
00271
00272 return Fit();
00273 }
00274
00275
00276
00277 int BCEfficiencyFitter::Fit()
00278 {
00279
00280 if (!fHistogram1 || !fHistogram2) {
00281 BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
00282 return 0;
00283 }
00284
00285
00286 if (!fFitFunction) {
00287 BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
00288 return 0;
00289 }
00290
00291
00292 MarginalizeAll();
00293
00294
00295
00296 FindModeMinuit( GetBestFitParameters() , -1);
00297
00298
00299 double pvalue;
00300 if ( CalculatePValueFast(GetBestFitParameters(), pvalue) )
00301 fPValue = pvalue;
00302 else
00303 BCLog::OutError("BCEfficiencyFitter::Fit : Could not use the fast p-value evaluation.");
00304
00305
00306 PrintShortFitSummary();
00307
00308
00309 return 1;
00310 }
00311
00312
00313
00314 void BCEfficiencyFitter::DrawFit(const char * options, bool flaglegend)
00315 {
00316 if (!fHistogram1 || !fHistogram2) {
00317 BCLog::OutError("BCEfficiencyFitter::DrawFit : Histogram(s) not defined.");
00318 return;
00319 }
00320
00321 if (!fFitFunction) {
00322 BCLog::OutError("BCEfficiencyFitter::DrawFit : Fit function not defined.");
00323 return;
00324 }
00325
00326
00327 TGraphAsymmErrors * histRatio = new TGraphAsymmErrors();
00328 histRatio->SetMarkerStyle(20);
00329 histRatio->SetMarkerSize(1.5);
00330
00331 int npoints = 0;
00332
00333
00334 for (int i = 1; i <= fHistogram1->GetNbinsX(); ++i)
00335 {
00336 if(int(fHistogram1->GetBinContent(i)) == 0) {
00337 ++npoints;
00338 continue;
00339 }
00340
00341
00342 double xexp, xmin, xmax;
00343 GetUncertainties( int(fHistogram1->GetBinContent(i)), int(fHistogram2->GetBinContent(i)), 0.68, xexp, xmin, xmax);
00344
00345 histRatio->SetPoint( npoints, fHistogram1->GetBinCenter(i), xexp);
00346
00347
00348 histRatio->SetPointEXhigh(npoints, 0.);
00349 histRatio->SetPointEXlow(npoints, 0.);
00350
00351
00352 histRatio->SetPointEYhigh(npoints, xmax - xexp);
00353 histRatio->SetPointEYlow(npoints, xexp - xmin);
00354
00355 ++npoints;
00356 }
00357
00358
00359
00360 TString opt = options;
00361 opt.ToLower();
00362
00363
00364 if(!opt.Contains("same"))
00365 {
00366
00367 TH2D * hist_axes = new TH2D("hist_axes",
00368 Form(";%s;ratio", fHistogram1->GetXaxis()->GetTitle()),
00369 fHistogram1->GetNbinsX(),
00370 fHistogram1->GetXaxis()->GetBinLowEdge(1),
00371 fHistogram1->GetXaxis()->GetBinLowEdge(fHistogram1->GetNbinsX()+1),
00372 1, 0., 1.);
00373 hist_axes->SetStats(kFALSE);
00374 hist_axes->Draw();
00375
00376 histRatio->Draw(TString::Format("%sp",opt.Data()));
00377 }
00378
00379
00380 fErrorBand = GetErrorBandGraph(0.16, 0.84);
00381 fErrorBand->Draw("f same");
00382
00383
00384 histRatio->SetMarkerSize(.7);
00385 histRatio->Draw(TString::Format("%ssamep",opt.Data()));
00386
00387
00388 fGraphFitFunction = GetFitFunctionGraph( GetBestFitParameters() );
00389 fGraphFitFunction->SetLineColor(kRed);
00390 fGraphFitFunction->SetLineWidth(2);
00391 fGraphFitFunction->Draw("l same");
00392
00393
00394 if (flaglegend)
00395 {
00396 TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00397 legend->SetBorderSize(0);
00398 legend->SetFillColor(kWhite);
00399 legend->AddEntry(histRatio, "Data", "L");
00400 legend->AddEntry(fGraphFitFunction, "Best fit", "L");
00401 legend->AddEntry(fErrorBand, "Error band", "F");
00402 legend->Draw();
00403 }
00404 gPad->RedrawAxis();
00405 }
00406
00407
00408 int BCEfficiencyFitter::CalculatePValueFast(std::vector<double> par, double &pvalue)
00409 {
00410
00411 if (par.size() != GetNParameters()) {
00412 BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Number of parameters is inconsistent.");
00413 return 0;
00414 }
00415
00416
00417 if (!fHistogram1 || !fHistogram2) {
00418 BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Histogram not defined.");
00419 return 0;
00420 }
00421
00422
00423 int nbins = fHistogram1->GetNbinsX();
00424
00425 std::vector <int> histogram;
00426 std::vector <double> expectation;
00427 histogram.assign(nbins, 0);
00428 expectation.assign(nbins, 0);
00429
00430 double logp = 0;
00431 double logp_start = 0;
00432 int counter_pvalue = 0;
00433
00434
00435 for (int ibin = 0; ibin < nbins; ++ibin) {
00436
00437 double xmin = fHistogram1->GetBinLowEdge(ibin+1);
00438 double xmax = fHistogram1->GetBinLowEdge(ibin+2);
00439
00440
00441 double yexp = fFitFunction->Integral(xmin, xmax);
00442
00443
00444 int n = int(fHistogram1->GetBinContent(ibin));
00445
00446
00447 int k = int(fHistogram2->GetBinContent(ibin));
00448
00449 histogram[ibin] = k;
00450 expectation[ibin] = n * yexp;
00451
00452
00453 logp += BCMath::LogApproxBinomial(n, k, yexp);
00454 }
00455 logp_start = logp;
00456
00457 int niter = 100000;
00458
00459
00460 for (int iiter = 0; iiter < niter; ++iiter)
00461 {
00462
00463 for (int ibin = 0; ibin < nbins; ++ibin)
00464 {
00465
00466 int n = int(fHistogram1->GetBinContent(ibin));
00467
00468
00469 int k = histogram.at(ibin);
00470
00471
00472 double yexp = 0;
00473 if (n > 0)
00474 yexp = expectation.at(ibin)/n;
00475
00476
00477 double ptest = fRandom->Rndm() - 0.5;
00478
00479
00480 if (!(yexp > 0. || yexp < 1.0))
00481 continue;
00482
00483
00484 if (ptest > 0 && (k < n))
00485 {
00486
00487 double r = (double(n)-double(k))/(double(k)+1.) * yexp / (1. - yexp);
00488
00489
00490 if (fRandom->Rndm() < r)
00491 {
00492 histogram[ibin] = k + 1;
00493 logp += log(r);
00494 }
00495 }
00496
00497
00498 else if (ptest <= 0 && (k > 0))
00499 {
00500
00501 double r = double(k) / (double(n)-(double(k)-1)) * (1-yexp)/yexp;
00502
00503
00504 if (fRandom->Rndm() < r)
00505 {
00506 histogram[ibin] = k - 1;
00507 logp += log(r);
00508 }
00509 }
00510
00511 }
00512
00513
00514 if (logp < logp_start)
00515 counter_pvalue++;
00516
00517 }
00518
00519
00520 pvalue = double(counter_pvalue) / double(niter);
00521
00522
00523 return 1;
00524 }
00525
00526
00527 int BCEfficiencyFitter::GetUncertainties(int n, int k, double p, double &xexp, double &xmin, double &xmax)
00528 {
00529 if (n == 0)
00530 {
00531 xexp = 0.;
00532 xmin = 0.;
00533 xmax = 0.;
00534 return 0;
00535 }
00536
00537 BCLog::OutDebug(Form("Calculating efficiency data-point of type %d for (n,k) = (%d,%d)",fDataPointType,n,k));
00538
00539
00540 if (fHistogramBinomial)
00541 fHistogramBinomial->Reset();
00542 else
00543 fHistogramBinomial = new TH1D("hist_binomial", "", 1000, 0., 1.);
00544
00545
00546 for (int i = 1; i <= 1000; ++i) {
00547 double x = fHistogramBinomial->GetBinCenter(i);
00548 double val = BCMath::ApproxBinomial(n, k, x);
00549 fHistogramBinomial->SetBinContent(i, val);
00550 }
00551
00552
00553 fHistogramBinomial->Scale(1. / fHistogramBinomial->Integral());
00554
00555
00556 if (fDataPointType == 0) {
00557 xexp = fHistogramBinomial->GetMean();
00558 double rms = fHistogramBinomial->GetRMS();
00559 xmin = xexp-rms;
00560 xmax = xexp+rms;
00561 BCLog::OutDebug(Form(" - mean = %f , rms = %f)",xexp,rms));
00562 }
00563 else if (fDataPointType == 1) {
00564 xexp = (double)k/(double)n;
00565 BCH1D * fbh = new BCH1D((TH1D*)fHistogramBinomial->Clone("hcp"));
00566 std::vector<double> intervals = fbh->GetSmallestIntervals(p);
00567 int ninter = intervals.size();
00568 if ( ninter<2 ) {
00569 xmin = xmax = xexp = 0.;
00570 }
00571 else {
00572 xmin = intervals[0];
00573 xmax = intervals[1];
00574 }
00575 delete fbh;
00576 }
00577 else {
00578
00579 int nprobSum = 3;
00580 double q[3];
00581 double probSum[3];
00582 probSum[0] = (1.-p)/2.;
00583 probSum[1] = 1.-(1.-p)/2.;
00584 probSum[2] = .5;
00585
00586 fHistogramBinomial->GetQuantiles(nprobSum, q, probSum);
00587
00588 xexp = q[2];
00589 xmin = q[0];
00590 xmax = q[1];
00591
00592 }
00593
00594 BCLog::OutDebug(Form(" - efficiency = %f , range (%f - %f)",xexp,xmin,xmax));
00595
00596 return 1;
00597 }
00598
00599