BAT  0.9.4
The Bayesian analysis toolkit
 All Classes Namespaces Functions Variables Enumerations
BCEfficiencyFitter.cxx
1 /*
2  * Copyright (C) 2007-2014, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 
11 #include <TH1D.h>
12 #include <TH2D.h>
13 #include <TF1.h>
14 #include <TGraph.h>
15 #include <TGraphAsymmErrors.h>
16 #include <TString.h>
17 #include <TPad.h>
18 #include <TRandom3.h>
19 #include <TLegend.h>
20 #include <TMath.h>
21 
22 #include "BAT/BCLog.h"
23 #include "BAT/BCDataSet.h"
24 #include "BAT/BCDataPoint.h"
25 #include "BAT/BCMath.h"
26 #include "BAT/BCH1D.h"
27 
28 #include "BCEfficiencyFitter.h"
29 
30 // ---------------------------------------------------------
31 
33  : BCFitter()
34  , fHistogram1(0)
35  , fHistogram2(0)
36  , fFitFunction(0)
37  , fHistogramBinomial(0)
38  , fDataPointType(1)
39 {
40  // set default options and values
41  fFlagIntegration = false;
42 
43  // set MCMC for marginalization
44  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
45 }
46 
47 // ---------------------------------------------------------
48 
50  : BCFitter(name)
51  , fHistogram1(0)
52  , fHistogram2(0)
53  , fFitFunction(0)
54  , fHistogramBinomial(0)
55  , fDataPointType(1)
56 {
57  // set default options and values
58  fFlagIntegration = false;
59 
60  // set MCMC for marginalization
61  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
62 }
63 
64 // ---------------------------------------------------------
65 
66 BCEfficiencyFitter::BCEfficiencyFitter(TH1D * hist1, TH1D * hist2, TF1 * func)
67  : BCFitter()
68  , fHistogram1(0)
69  , fHistogram2(0)
70  , fFitFunction(0)
71  , fHistogramBinomial(0)
72  , fDataPointType(1)
73 {
74  SetHistograms(hist1, hist2);
75  SetFitFunction(func);
76 
77  fFlagIntegration = false;
78 
79  // set MCMC for marginalization
80  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
81 }
82 
83 // ---------------------------------------------------------
84 
85 BCEfficiencyFitter::BCEfficiencyFitter(const char * name, TH1D * hist1, TH1D * hist2, TF1 * func)
86  : BCFitter(name)
87  , fHistogram1(0)
88  , fHistogram2(0)
89  , fFitFunction(0)
90  , fHistogramBinomial(0)
91  , fDataPointType(1)
92 {
93  SetHistograms(hist1, hist2);
94  SetFitFunction(func);
95 
96  MCMCSetRValueCriterion(0.01);
97  fFlagIntegration = false;
98 
99  // set MCMC for marginalization
100  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
101 }
102 
103 // ---------------------------------------------------------
104 
105 int BCEfficiencyFitter::SetHistograms(TH1D * hist1, TH1D * hist2)
106 {
107  // check if histogram exists
108  if (!hist1 || !hist2) {
109  BCLog::OutError("BCEfficiencyFitter::SetHistograms : TH1D not created.");
110  return 0;
111  }
112 
113  // check compatibility of both histograms : number of bins
114  if (hist1->GetNbinsX() != hist2->GetNbinsX()) {
115  BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histograms do not have the same number of bins.");
116  return 0;
117  }
118 
119  // check compatibility of both histograms : bin content
120  for (int i = 1; i <= hist1->GetNbinsX(); ++i)
121  if (hist1->GetBinContent(i) < hist2->GetBinContent(i)) {
122  BCLog::OutError("BCEfficiencyFitter::SetHistograms : Histogram 1 has fewer entries than histogram 2.");
123  return 0;
124  }
125 
126  // set pointer to histograms
127  fHistogram1 = hist1;
128  fHistogram2 = hist2;
129 
130  // create a data set. this is necessary in order to calculate the
131  // error band. the data set contains as many data points as there
132  // are bins. for now, the data points are empty.
133  BCDataSet * ds = new BCDataSet();
134 
135  // create data points and add them to the data set.
136  int nbins = fHistogram1->GetNbinsX();
137  for (int i = 0; i < nbins; ++i) {
138  BCDataPoint* dp = new BCDataPoint(2);
139  ds->AddDataPoint(dp);
140  }
141 
142  // set the new data set.
143  SetDataSet(ds);
144 
145  // calculate the lower and upper edge in x.
146  double xmin = hist1->GetBinLowEdge(1);
147  double xmax = hist1->GetBinLowEdge(nbins+1);
148 
149 // // calculate the minimum and maximum range in y.
150 // double histymin = hist2->GetMinimum();
151 // double histymax = hist1->GetMaximum();
152 
153 // // calculate the minimum and maximum of the function value based on
154 // // the minimum and maximum value in y.
155 // double ymin = TMath::Max(0., histymin - 5.*sqrt(histymin));
156 // double ymax = histymax + 5.*sqrt(histymax);
157 
158  // set the data boundaries for x and y values.
159  SetDataBoundaries(0, xmin, xmax);
160  SetDataBoundaries(1, 0.0, 1.0);
161 
162  // set the indeces for fitting.
163  SetFitFunctionIndices(0, 1);
164 
165  // no error
166  return 1;
167 }
168 
169 // ---------------------------------------------------------
170 
172 {
173  // check if function exists
174  if(!func) {
175  BCLog::OutError("BCEfficiencyFitter::SetFitFunction : TF1 not created.");
176  return 0;
177  }
178 
179  // set the function
180  fFitFunction = func;
181 
182  // update the model name to contain the function name
183  if(fName=="model")
184  SetName(TString::Format("BCEfficiencyFitter with %s",fFitFunction->GetName()));
185 
186  // reset parameters
187  ClearParameters(true);
188 
189  // get the new number of parameters
190  int n = func->GetNpar();
191 
192  // add parameters
193  for (int i = 0; i < n; ++i)
194  {
195  double xmin;
196  double xmax;
197 
198  func->GetParLimits(i, xmin, xmax);
199 
200  AddParameter(func->GetParName(i), xmin, xmax);
201  }
202 
203  // set flat prior for all parameters by default
205 
206  return GetNParameters();
207 }
208 
209 // ---------------------------------------------------------
210 
212 {
213  delete fHistogram1;
214  delete fHistogram2;
215  delete fHistogramBinomial;
216 }
217 
218 // ---------------------------------------------------------
219 
221 {
222  if(type < 0 || type > 2)
223  BCLog::OutError(Form("BCEfficiencyFitter::SetDataPointType : Unknown data point type %d (should be between 0 and 2).",type));
224  else
225  fDataPointType = type;
226 }
227 
228 // ---------------------------------------------------------
229 
230 double BCEfficiencyFitter::LogLikelihood(const std::vector<double> & params)
231 {
232 
233  // initialize probability
234  double loglikelihood = 0;
235 
236  // set the parameters of the function
237  fFitFunction->SetParameters(&params[0]);
238 
239  // get the number of bins
240  int nbins = fHistogram1->GetNbinsX();
241 
242  // get bin width
243  double dx = fHistogram1->GetXaxis()->GetBinWidth(0);
244 
245  // loop over all bins
246  for (int ibin = 1; ibin <= nbins; ++ibin)
247  {
248  // get n
249  int n = int(fHistogram1->GetBinContent(ibin));
250 
251  // get k
252  int k = int(fHistogram2->GetBinContent(ibin));
253 
254  // get x
255  double x = fHistogram1->GetBinCenter(ibin);
256 
257  double eff = 0;
258 
259  // use ROOT's TH1D::Integral method
260  if (fFlagIntegration)
261  eff = fFitFunction->Integral(x - dx/2., x + dx/2.) / dx;
262 
263  // use linear interpolation
264  else
265  eff = (fFitFunction->Eval(x + dx/2.) + fFitFunction->Eval(x - dx/2.)) / 2.;
266 
267  // get the value of the Poisson distribution
268  loglikelihood += BCMath::LogApproxBinomial(n, k, eff);
269  }
270 
271  return loglikelihood;
272 }
273 
274 // ---------------------------------------------------------
275 
276 double BCEfficiencyFitter::FitFunction(const std::vector<double> & x, const std::vector<double> & params)
277 {
278  // set the parameters of the function
279  fFitFunction->SetParameters(&params[0]);
280 
281  return fFitFunction->Eval(x[0]);
282 }
283 
284 // ---------------------------------------------------------
285 
286 int BCEfficiencyFitter::Fit(TH1D * hist1, TH1D * hist2, TF1 * func)
287 {
288  // set histogram
289  if (hist1 && hist2)
290  SetHistograms(hist1, hist2);
291  else {
292  BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
293  return 0;
294  }
295 
296  // set function
297  if (func)
298  SetFitFunction(func);
299  else {
300  BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
301  return 0;
302  }
303 
304  return Fit();
305 }
306 
307 // ---------------------------------------------------------
308 
310 {
311  // set histogram
312  if (!fHistogram1 || !fHistogram2) {
313  BCLog::OutError("BCEfficiencyFitter::Fit : Histogram(s) not defined.");
314  return 0;
315  }
316 
317  // set function
318  if (!fFitFunction) {
319  BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
320  return 0;
321  }
322 
323  // perform marginalization
324  MarginalizeAll();
325 
326  // maximize posterior probability, using the best-fit values close
327  // to the global maximum from the MCMC
328  BCIntegrate::BCOptimizationMethod method_temp = GetOptimizationMethod();
329  SetOptimizationMethod(BCIntegrate::kOptMinuit);
330  FindMode( GetBestFitParameters());
331  SetOptimizationMethod(method_temp);
332 
333  // calculate the p-value using the fast MCMC algorithm
334  double pvalue, pvalueCorrected;
335  if ( CalculatePValueFast(GetBestFitParameters(), pvalue, pvalueCorrected) )
336  fPValue = pvalue;
337  else
338  BCLog::OutError("BCEfficiencyFitter::Fit : Could not use the fast p-value evaluation.");
339 
340  // print summary to screen
342 
343  // no error
344  return 1;
345 }
346 
347 // ---------------------------------------------------------
348 
349 void BCEfficiencyFitter::DrawFit(const char * options, bool flaglegend)
350 {
351  if (!fHistogram1 || !fHistogram2) {
352  BCLog::OutError("BCEfficiencyFitter::DrawFit : Histogram(s) not defined.");
353  return;
354  }
355 
356  if (!fFitFunction) {
357  BCLog::OutError("BCEfficiencyFitter::DrawFit : Fit function not defined.");
358  return;
359  }
360 
361  // create efficiency graph
362  TGraphAsymmErrors * histRatio = new TGraphAsymmErrors();
363  histRatio->SetMarkerStyle(20);
364  histRatio->SetMarkerSize(1.5);
365 
366  int npoints = 0;
367 
368  // set points
369  for (int i = 1; i <= fHistogram1->GetNbinsX(); ++i)
370  {
371  if(int(fHistogram1->GetBinContent(i)) == 0) {
372  ++npoints;
373  continue;
374  }
375 
376  // calculate central value and uncertainties
377  double xexp, xmin, xmax;
378  GetUncertainties( int(fHistogram1->GetBinContent(i)), int(fHistogram2->GetBinContent(i)), 0.68, xexp, xmin, xmax);
379 
380  histRatio->SetPoint( npoints, fHistogram1->GetBinCenter(i), xexp);
381 
382  // no X uncertainties
383  histRatio->SetPointEXhigh(npoints, 0.);
384  histRatio->SetPointEXlow(npoints, 0.);
385 
386  // set Y uncertainties
387  histRatio->SetPointEYhigh(npoints, xmax - xexp);
388  histRatio->SetPointEYlow(npoints, xexp - xmin);
389 
390  ++npoints;
391  }
392 
393 
394  // check wheather options contain "same"
395  TString opt = options;
396  opt.ToLower();
397 
398  // if not same, draw the histogram first to get the axes
399  if(!opt.Contains("same"))
400  {
401  // create new histogram
402  TH2D * hist_axes = new TH2D("hist_axes",
403  Form(";%s;ratio", fHistogram1->GetXaxis()->GetTitle()),
404  fHistogram1->GetNbinsX(),
405  fHistogram1->GetXaxis()->GetBinLowEdge(1),
406  fHistogram1->GetXaxis()->GetBinLowEdge(fHistogram1->GetNbinsX()+1),
407  1, 0., 1.);
408  hist_axes->SetStats(kFALSE);
409  hist_axes->Draw();
410 
411  histRatio->Draw(TString::Format("%sp",opt.Data()));
412  }
413 
414  // draw the error band as central 68% probability interval
415  fErrorBand = GetErrorBandGraph(0.16, 0.84);
416  fErrorBand->Draw("f same");
417 
418  // now draw the histogram again since it was covered by the band
419  histRatio->SetMarkerSize(.7);
420  histRatio->Draw(TString::Format("%ssamep",opt.Data()));
421 
422  // draw the fit function on top
423  fGraphFitFunction = GetFitFunctionGraph( GetBestFitParameters() );
424  fGraphFitFunction->SetLineColor(kRed);
425  fGraphFitFunction->SetLineWidth(2);
426  fGraphFitFunction->Draw("l same");
427 
428  // draw legend
429  if (flaglegend)
430  {
431  TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
432  legend->SetBorderSize(0);
433  legend->SetFillColor(kWhite);
434  legend->AddEntry(histRatio, "Data", "L");
435  legend->AddEntry(fGraphFitFunction, "Best fit", "L");
436  legend->AddEntry(fErrorBand, "Error band", "F");
437  legend->Draw();
438  }
439  gPad->RedrawAxis();
440 }
441 
442 // ---------------------------------------------------------
443 int BCEfficiencyFitter::CalculatePValueFast(const std::vector<double> & par, double &pvalue,
444  double & pvalueCorrected, unsigned nIterations)
445 {
446  //use NULL pointer for no callback
447  return CalculatePValueFast(par, NULL, pvalue, pvalueCorrected, nIterations);
448 }
449 
450 // ---------------------------------------------------------
451 int BCEfficiencyFitter::CalculatePValueFast(const std::vector<double> & par,
452  BCEfficiencyFitter::ToyDataInterface * callback, double &pvalue,
453  double & pvalueCorrected, unsigned nIterations)
454 {
455  // check size of parameter vector
456  if (par.size() != GetNParameters()) {
457  BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Number of parameters is inconsistent.");
458  return 0;
459  }
460 
461  // check if histogram exists
462  if (!fHistogram1 || !fHistogram2) {
463  BCLog::OutError("BCEfficiencyFitter::CalculatePValueFast : Histogram not defined.");
464  return 0;
465  }
466 
467  // define temporary variables
468  int nbins = fHistogram1->GetNbinsX();
469 
470  std::vector<unsigned> histogram(nbins, 0);
471  std::vector<double> expectation(nbins, 0);
472 
473  double logp = 0;
474  double logp_start = 0;
475  int counter_pvalue = 0;
476 
477  // define starting distribution
478  for (int ibin = 0; ibin < nbins; ++ibin) {
479  // get bin boundaries
480  double xmin = fHistogram1->GetBinLowEdge(ibin+1);
481  double xmax = fHistogram1->GetBinLowEdge(ibin+2);
482 
483  // get the number of expected events
484  double yexp = fFitFunction->Integral(xmin, xmax);
485 
486  // get n
487  int n = int(fHistogram1->GetBinContent(ibin));
488 
489  // get k
490  int k = int(fHistogram2->GetBinContent(ibin));
491 
492  histogram[ibin] = k;
493  expectation[ibin] = n * yexp;
494 
495  // calculate p;
496  logp += BCMath::LogApproxBinomial(n, k, yexp);
497  }
498  logp_start = logp;
499 
500  // loop over iterations
501  for (unsigned iiter = 0; iiter < nIterations; ++iiter)
502  {
503  // loop over bins
504  for (int ibin = 0; ibin < nbins; ++ibin)
505  {
506  // get n
507  int n = int(fHistogram1->GetBinContent(ibin));
508 
509  // get k
510  int k = histogram.at(ibin);
511 
512  // get expectation
513  double yexp = 0;
514  if (n > 0)
515  yexp = expectation.at(ibin)/n;
516 
517  // random step up or down in statistics for this bin
518  double ptest = fRandom->Rndm() - 0.5;
519 
520  // continue, if efficiency is at limit
521  if (!(yexp > 0. || yexp < 1.0))
522  continue;
523 
524  // increase statistics by 1
525  if (ptest > 0 && (k < n))
526  {
527  // calculate factor of probability
528  double r = (double(n)-double(k))/(double(k)+1.) * yexp / (1. - yexp);
529 
530  // walk, or don't (this is the Metropolis part)
531  if (fRandom->Rndm() < r)
532  {
533  histogram[ibin] = k + 1;
534  logp += log(r);
535  }
536  }
537 
538  // decrease statistics by 1
539  else if (ptest <= 0 && (k > 0))
540  {
541  // calculate factor of probability
542  double r = double(k) / (double(n)-(double(k)-1)) * (1-yexp)/yexp;
543 
544  // walk, or don't (this is the Metropolis part)
545  if (fRandom->Rndm() < r)
546  {
547  histogram[ibin] = k - 1;
548  logp += log(r);
549  }
550  }
551 
552  } // end of looping over bins
553 
554  //one new toy data set is created
555  //call user interface to calculate arbitrary statistic's distribution
556  if (callback)
557  (*callback)(expectation, histogram);
558 
559  // increase counter
560  if (logp < logp_start)
561  counter_pvalue++;
562 
563  } // end of looping over iterations
564 
565  // calculate p-value
566  pvalue = double(counter_pvalue) / double(nIterations);
567 
568  // correct for fit bias
569  pvalueCorrected = BCMath::CorrectPValue(pvalue, GetNParameters(), nbins);
570 
571  // no error
572  return 1;
573 }
574 
575 // ---------------------------------------------------------
576 int BCEfficiencyFitter::GetUncertainties(int n, int k, double p, double &xexp, double &xmin, double &xmax)
577 {
578  if (n == 0)
579  {
580  xexp = 0.;
581  xmin = 0.;
582  xmax = 0.;
583  return 0;
584  }
585 
586  BCLog::OutDebug(Form("Calculating efficiency data-point of type %d for (n,k) = (%d,%d)",fDataPointType,n,k));
587 
588  // create a histogram with binomial distribution
589  if (fHistogramBinomial)
590  fHistogramBinomial->Reset();
591  else
592  fHistogramBinomial = new TH1D("hist_binomial", "", 1000, 0., 1.);
593 
594  // loop over bins and fill histogram
595  for (int i = 1; i <= 1000; ++i) {
596  double x = fHistogramBinomial->GetBinCenter(i);
597  double val = BCMath::ApproxBinomial(n, k, x);
598  fHistogramBinomial->SetBinContent(i, val);
599  }
600 
601  // normalize
602  fHistogramBinomial->Scale(1. / fHistogramBinomial->Integral());
603 
604  // calculate central value and uncertainties
605  if (fDataPointType == 0) {
606  xexp = fHistogramBinomial->GetMean();
607  double rms = fHistogramBinomial->GetRMS();
608  xmin = xexp-rms;
609  xmax = xexp+rms;
610  BCLog::OutDebug(Form(" - mean = %f , rms = %f)",xexp,rms));
611  }
612  else if (fDataPointType == 1) {
613  xexp = (double)k/(double)n;
614  BCH1D * fbh = new BCH1D((TH1D*)fHistogramBinomial->Clone("hcp"));
615  std::vector<double> intervals = fbh->GetSmallestIntervals(p);
616  int ninter = intervals.size();
617  if ( ninter<2 ) {
618  xmin = xmax = xexp = 0.;
619  }
620  else {
621  xmin = intervals[0];
622  xmax = intervals[1];
623  }
624  delete fbh;
625  }
626  else {
627  // calculate quantiles
628  int nprobSum = 3;
629  double q[3];
630  double probSum[3];
631  probSum[0] = (1.-p)/2.;
632  probSum[1] = 1.-(1.-p)/2.;
633  probSum[2] = .5;
634 
635  fHistogramBinomial->GetQuantiles(nprobSum, q, probSum);
636 
637  xexp = q[2];
638  xmin = q[0];
639  xmax = q[1];
640 
641  }
642 
643  BCLog::OutDebug(Form(" - efficiency = %f , range (%f - %f)",xexp,xmin,xmax));
644 
645  return 1;
646 }
647 
648 // ---------------------------------------------------------
void PrintShortFitSummary(int chi2flag=0)
Definition: BCModel.cxx:1029
A class representing a data point.
Definition: BCDataPoint.h:31
double FitFunction(const std::vector< double > &x, const std::vector< double > &parameters)
void SetName(const char *name)
Definition: BCModel.h:145
void SetDataSet(BCDataSet *dataset)
Definition: BCModel.h:178
int SetPriorConstantAll()
Definition: BCModel.cxx:753
int GetUncertainties(int n, int k, double p, double &xexp, double &xmin, double &xmax)
A class representing a set of data points.
Definition: BCDataSet.h:37
void SetFitFunctionIndices(int indexx, int indexy)
Definition: BCFitter.h:122
double CorrectPValue(const double &pvalue, const unsigned &npar, const unsigned &nobservations)
Definition: BCMath.cxx:665
virtual int AddParameter(BCParameter *parameter)
Definition: BCModel.cxx:225
std::string fName
Definition: BCModel.h:510
void DrawFit(const char *options="", bool flaglegend=false)
A class for handling 1D distributions.
Definition: BCH1D.h:31
std::vector< double > GetSmallestIntervals(double content=0.68)
Definition: BCH1D.cxx:1008
int CalculatePValueFast(const std::vector< double > &par, BCEfficiencyFitter::ToyDataInterface *callback, double &pvalue, double &pvalueCorrected, unsigned nIterations=100000)
double ApproxBinomial(int n, int k, double p)
Definition: BCMath.cxx:73
int SetFitFunction(TF1 *func)
double LogApproxBinomial(int n, int k, double p)
Definition: BCMath.cxx:80
double fPValue
Definition: BCModel.h:536
void SetDataPointType(int type)
void AddDataPoint(BCDataPoint *datapoint)
Definition: BCDataSet.cxx:358
int SetHistograms(TH1D *hist1, TH1D *hist2)
A base class for all fitting classes.
Definition: BCFitter.h:27
virtual double LogLikelihood(const std::vector< double > &parameters)