BAT  0.9.4
The Bayesian analysis toolkit
 All Classes Namespaces Functions Variables Enumerations
BCHistogramFitter.cxx
1 /*
2  * Copyright (C) 2007-2014, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 
11 #include "BCHistogramFitter.h"
12 #include "BAT/BCLog.h"
13 #include "BAT/BCDataSet.h"
14 #include "BAT/BCDataPoint.h"
15 #include "BAT/BCMath.h"
16 
17 #include <TH1D.h>
18 #include <TF1.h>
19 #include <TGraph.h>
20 #include <TString.h>
21 #include <TPad.h>
22 #include <TRandom3.h>
23 #include <TLegend.h>
24 #include <TMath.h>
25 #include <Math/ProbFuncMathCore.h> //for ROOT::Math namespace
26 #include <TString.h>
27 
28 // ---------------------------------------------------------
29 
31  : BCFitter()
32  , fHistogram(0)
33  , fFitFunction(0)
34  , fHistogramExpected(0)
35 {
36  // set default options and values
37  MCMCSetNIterationsRun(2000);
39  fFlagIntegration = true;
40  flag_discrete = true;
41 
42  // set MCMC for marginalization
43  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
44 }
45 
46 // ---------------------------------------------------------
47 
49  : BCFitter(name)
50  , fHistogram(0)
51  , fFitFunction(0)
52  , fHistogramExpected(0)
53 {
54  // set default options and values
55  MCMCSetNIterationsRun(2000);
57  fFlagIntegration = true;
58  flag_discrete = true;
59 
60  // set MCMC for marginalization
61  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
62 }
63 
64 // ---------------------------------------------------------
65 
66 BCHistogramFitter::BCHistogramFitter(TH1D * hist, TF1 * func)
67  : BCFitter()
68  , fHistogram(0)
69  , fFitFunction(0)
70  , fHistogramExpected(0)
71 {
72  SetHistogram(hist);
73  SetFitFunction(func);
74 
75  MCMCSetNIterationsRun(2000);
77  fFlagIntegration = true;
78  flag_discrete = true;
79 
80  // set MCMC for marginalization
81  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
82 }
83 
84 // ---------------------------------------------------------
85 
86 BCHistogramFitter::BCHistogramFitter(const char * name, TH1D * hist, TF1 * func)
87  : BCFitter(name)
88  , fHistogram(0)
89  , fFitFunction(0)
90  , fHistogramExpected(0)
91 {
92  SetHistogram(hist);
93  SetFitFunction(func);
94 
95  MCMCSetNIterationsRun(2000);
97  fFlagIntegration = true;
98  flag_discrete = true;
99 
100  // set MCMC for marginalization
101  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
102 }
103 
104 // ---------------------------------------------------------
105 
107 {
108  // check if histogram exists
109  if (!hist) {
110  BCLog::OutError("BCHistogramFitter::SetHistogram : TH1D not created.");
111  return 0;
112  }
113 
114  // set pointer to histogram
115  fHistogram = hist;
116 
117  // create a data set. this is necessary in order to calculate the
118  // error band. the data set contains as many data points as there
119  // are bins.
120  BCDataSet * ds = new BCDataSet();
121 
122  // create data points and add them to the data set.
123  // the x value is the lower edge of the bin, and
124  // the y value is the bin count
125  int nbins = fHistogram->GetNbinsX();
126  for (int i = 0; i < nbins; ++i) {
127  BCDataPoint* dp = new BCDataPoint(2);
128  dp ->SetValue(0, fHistogram->GetBinLowEdge(i + 1));
129  dp ->SetValue(1, fHistogram->GetBinContent(i + 1));
130  ds->AddDataPoint(dp);
131  }
132 
133  // set the new data set.
134  SetDataSet(ds);
135 
136  // calculate the lower and upper edge in x.
137  double xmin = hist->GetBinLowEdge(1);
138  double xmax = hist->GetBinLowEdge(nbins + 1);
139 
140  // calculate the minimum and maximum range in y.
141  double histymin = hist->GetMinimum();
142  double histymax = hist->GetMaximum();
143 
144  // calculate the minimum and maximum of the function value based on
145  // the minimum and maximum value in y.
146  double ymin = TMath::Max(0., histymin - 5. * sqrt(histymin));
147  double ymax = histymax + 5. * sqrt(histymax);
148 
149  // set the data boundaries for x and y values.
150  SetDataBoundaries(0, xmin, xmax);
151  SetDataBoundaries(1, ymin, ymax);
152 
153  // set the indeces for fitting.
154  SetFitFunctionIndices(0, 1);
155 
156  // no error
157  return 1;
158 }
159 
160 // ---------------------------------------------------------
161 
162 int BCHistogramFitter::SetHistogramExpected(const std::vector<double> & parameters)
163 {
164  //clear up previous memory
165  if (fHistogramExpected) {
166  delete fHistogramExpected;
167  }
168  //copy all properties from the data histogram
169  fHistogramExpected = new TH1D(*fHistogram);
170 
171  // get the number of bins
172  int nBins = fHistogramExpected->GetNbinsX();
173 
174  // get bin width
175  double dx = fHistogramExpected->GetBinWidth(1);
176 
177  //set the parameters of fit function
178  fFitFunction->SetParameters(&parameters[0]);
179 
180  // get function value of lower bin edge
181  double fedgelow = fFitFunction->Eval(fHistogramExpected->GetBinLowEdge(1));
182 
183  // loop over all bins, skip underflow
184  for (int ibin = 1; ibin <= nBins; ++ibin) {
185  // get upper bin edge
186  double xedgehi = fHistogramExpected->GetBinLowEdge(ibin) + dx;
187 
188  //expected count
189  double yexp = 0.;
190 
191  // use ROOT's TH1D::Integral method
192  if (fFlagIntegration)
193  yexp = fFitFunction->Integral(xedgehi - dx, xedgehi);
194 
195  // use linear interpolation
196  else {
197  // get function value at upper bin edge
198  double fedgehi = fFitFunction->Eval(xedgehi);
199  yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
200 
201  // make the upper edge the lower edge for the next iteration
202  fedgelow = fedgehi;
203  }
204 
205  //write the expectation for the bin
206  fHistogramExpected->SetBinContent(ibin, yexp);
207 
208  //avoid automatic error as sqrt(yexp), used e.g. in Kolmogorov correction factor
209  fHistogramExpected->SetBinError(ibin, 0.0);
210 
211  // but the data under this model have that sqrt(yexp) uncertainty
212  fHistogram->SetBinError(ibin, sqrt(yexp));
213 
214  }
215  return 1;
216 }
217 
218 // ---------------------------------------------------------
219 
221 {
222  // check if function exists
223  if (!func) {
224  BCLog::OutError("BCHistogramFitter::SetFitFunction : TF1 not created.");
225  return 0;
226  }
227 
228  // set the function
229  fFitFunction = func;
230 
231  // update the model name to contain the function name
232  if(fName=="model")
233  SetName(TString::Format("HistogramFitter with %s", fFitFunction->GetName()));
234 
235  // reset parameters
236  ClearParameters(true);
237 
238  // get the new number of parameters
239  int n = func->GetNpar();
240 
241  // add parameters
242  for (int i = 0; i < n; ++i) {
243  double xmin;
244  double xmax;
245 
246  func->GetParLimits(i, xmin, xmax);
247 
248  AddParameter(func->GetParName(i), xmin, xmax);
249  }
250 
251  // set flat prior for all parameters by default
253 
254  return GetNParameters();
255 }
256 
257 // ---------------------------------------------------------
258 
260 {
261  // todo memory leak, many members not removed
262  delete fHistogramExpected;
263 }
264 
265 // ---------------------------------------------------------
266 
267 double BCHistogramFitter::LogLikelihood(const std::vector<double> & params)
268 {
269  // initialize probability
270  double loglikelihood = 0;
271 
272  // set the parameters of the function
273  fFitFunction->SetParameters(&params[0]);
274 
275  // get the number of bins
276  int nbins = fHistogram->GetNbinsX();
277 
278  // get bin width
279  double dx = fHistogram->GetBinWidth(1);
280 
281  // get function value of lower bin edge
282  double fedgelow = fFitFunction->Eval(fHistogram->GetBinLowEdge(1));
283 
284  // loop over all bins
285  for (int ibin = 1; ibin <= nbins; ++ibin) {
286  // get upper bin edge
287  double xedgehi = fHistogram->GetBinLowEdge(ibin) + dx;
288 
289  // get function value at upper bin edge
290  double fedgehi = fFitFunction->Eval(xedgehi);
291 
292  // get the number of observed events
293  double y = fHistogram->GetBinContent(ibin);
294 
295  double yexp = 0.;
296 
297  // use ROOT's TH1D::Integral method
298  if (fFlagIntegration)
299  yexp = fFitFunction->Integral(xedgehi - dx, xedgehi);
300 
301  // use linear interpolation
302  else {
303  yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
304 
305  // make the upper edge the lower edge for the next iteration
306  fedgelow = fedgehi;
307  }
308 
309  // get the value of the Poisson distribution
310  loglikelihood += BCMath::LogPoisson(y, yexp);
311  }
312 
313  return loglikelihood;
314 }
315 
316 // ---------------------------------------------------------
317 
318 double BCHistogramFitter::FitFunction(const std::vector<double> & x, const std::vector<double> & params)
319 {
320  // set the parameters of the function
321  fFitFunction->SetParameters(&params[0]);
322 
323  return fFitFunction->Eval(x[0]) * fHistogram->GetBinWidth(fHistogram->FindBin(x[0]));
324 }
325 
326 // ---------------------------------------------------------
327 
328 int BCHistogramFitter::Fit(TH1D * hist, TF1 * func)
329 {
330  // set histogram
331  if (hist)
332  SetHistogram(hist);
333  else {
334  BCLog::OutError("BCHistogramFitter::Fit : Histogram not defined.");
335  return 0;
336  }
337 
338  // set function
339  if (func)
340  SetFitFunction(func);
341  else {
342  BCLog::OutError("BCHistogramFitter::Fit : Fit function not defined.");
343  return 0;
344  }
345 
346  return Fit();
347 }
348 
349 // ---------------------------------------------------------
350 
352 {
353  // set histogram
354  if (!fHistogram) {
355  BCLog::OutError("BCHistogramFitter::Fit : Histogram not defined.");
356  return 0;
357  }
358 
359  // set function
360  if (!fFitFunction) {
361  BCLog::OutError("BCHistogramFitter::Fit : Fit function not defined.");
362  return 0;
363  }
364 
365  // perform marginalization
366  MarginalizeAll();
367 
368  // maximize posterior probability, using the best-fit values close
369  // to the global maximum from the MCMC
370  BCIntegrate::BCOptimizationMethod method_temp = GetOptimizationMethod();
371  SetOptimizationMethod(BCIntegrate::kOptMinuit);
372  FindMode( GetBestFitParameters());
373  SetOptimizationMethod(method_temp);
374 
375  // calculate the p-value using the fast MCMC algorithm
376  if( !CalculatePValueFast(GetBestFitParameters()))
377  BCLog::OutWarning(
378  "BCHistogramFitter::Fit : Could not use the fast p-value evaluation.");
379 
380  // print summary to screen
382 
383  // no error
384  return 1;
385 }
386 
387 // ---------------------------------------------------------
388 
389 void BCHistogramFitter::DrawFit(const char * options, bool flaglegend)
390 {
391  if (!fHistogram) {
392  BCLog::OutError("BCHistogramFitter::DrawFit : Histogram not defined.");
393  return;
394  }
395 
396  if (!fFitFunction) {
397  BCLog::OutError("BCHistogramFitter::DrawFit : Fit function not defined.");
398  return;
399  }
400 
401  if (!fErrorBandXY || GetBestFitParameters().empty()) {
402  BCLog::OutError("BCHistogramFitter::DrawFit : Fit not performed yet.");
403  return;
404  }
405 
406  // check wheather options contain "same"
407  TString opt = options;
408  opt.ToLower();
409 
410  // if not same, draw the histogram first to get the axes
411  if (!opt.Contains("same"))
412  fHistogram->Draw(opt.Data());
413 
414  // draw the error band as central 68% probability interval
415  fErrorBand = GetErrorBandGraph(0.16, 0.84);
416  fErrorBand->Draw("f same");
417 
418  // now draw the histogram again since it was covered by the band
419  fHistogram->Draw(TString::Format("%ssame", opt.Data()));
420 
421  // draw the fit function on top
422  fGraphFitFunction = GetFitFunctionGraph(GetBestFitParameters());
423  fGraphFitFunction->SetLineColor(kRed);
424  fGraphFitFunction->SetLineWidth(2);
425  fGraphFitFunction->Draw("l same");
426 
427  // draw legend
428  if (flaglegend) {
429  TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
430  legend->SetBorderSize(0);
431  legend->SetFillColor(kWhite);
432  legend->AddEntry(fHistogram, "Data", "L");
433  legend->AddEntry(fGraphFitFunction, "Best fit", "L");
434  legend->AddEntry(fErrorBand, "Error band", "F");
435  legend->Draw();
436  }
437 
438  gPad->RedrawAxis();
439 }
440 
441 // ---------------------------------------------------------
442 int BCHistogramFitter::CalculatePValueFast(const std::vector<double> & par, unsigned nIterations)
443 {
444  // check size of parameter vector
445  if (par.size() != GetNParameters()) {
446  BCLog::OutError(
447  "BCHistogramFitter::CalculatePValueFast : Number of parameters is inconsistent.");
448  return 0;
449  }
450 
451  // check if histogram exists
452  if (!fHistogram) {
453  BCLog::OutError(
454  "BCHistogramFitter::CalculatePValueFast : Histogram not defined.");
455  return 0;
456  }
457 
458  //update the expected number of events for all bins
460 
461  // copy observed and expected values
462  std::vector<unsigned> observed(fHistogram->GetNbinsX());
463  std::vector<double> expected(fHistogramExpected->GetNbinsX());
464 
465  for (int ibin = 0 ; ibin < fHistogram->GetNbinsX(); ++ibin){
466  observed[ibin] = (unsigned int) fHistogram->GetBinContent(ibin + 1);
467  expected[ibin] = fHistogramExpected->GetBinContent(ibin + 1);
468  }
469 
470  // create pseudo experiments
471  fPValue = BCMath::FastPValue(observed, expected, nIterations, fRandom->GetSeed());
472 
473  // correct for fitting bias
474  fPValueNDoF = BCMath::CorrectPValue(fPValue, GetNParameters(), fHistogram->GetNbinsX());
475 
476  // no error
477  return 1;
478 }
479 
480 // ---------------------------------------------------------
481 int BCHistogramFitter::CalculatePValueLikelihood(const std::vector<double> & par)
482 {
483  // initialize test statistic -2*lambda
484  double logLambda = 0.0;
485 
486  //Calculate expected counts to compare with
488 
489  for (int ibin = 1; ibin <= fHistogram->GetNbinsX(); ++ibin) {
490 
491  // get the number of observed events
492  double y = fHistogram->GetBinContent(ibin);
493 
494  // get the number of expected events
495  double yexp = fHistogramExpected->GetBinContent(ibin);
496 
497  // get the contribution from this datapoint
498  if (y == 0)
499  logLambda += yexp;
500  else
501  logLambda += yexp - y + y * log(y / yexp);
502  }
503 
504  // rescale
505  logLambda *= 2.0;
506 
507  //p value from chi^2 distribution, returns zero if logLambda < 0
508  fPValue = TMath::Prob(logLambda, GetNDataPoints());
509  fPValueNDoF = TMath::Prob(logLambda, GetNDataPoints() - GetNParameters());
510 
511  // no error
512  return 1;
513 }
514 
515 // ---------------------------------------------------------
516 
517 int BCHistogramFitter::CalculatePValueLeastSquares(const std::vector<double> & par, bool weightExpect)
518 {
519  // initialize test statistic chi^2
520  double chi2 = 0.0;
521 
522  //Calculate expected counts to compare with
524 
525  for (int ibin = 1; ibin <= fHistogram->GetNbinsX(); ++ibin) {
526 
527  // get the number of observed events
528  double y = fHistogram->GetBinContent(ibin);
529 
530  // get the number of expected events
531  double yexp = fHistogramExpected->GetBinContent(ibin);
532 
533  //convert 1/0.0 into 1 for weighted sum
534  double weight;
535  if (weightExpect)
536  weight = (yexp > 0) ? yexp : 1.0;
537  else
538  weight = (y > 0) ? y : 1.0;
539 
540  // get the contribution from this datapoint
541  chi2 += (y - yexp) * (y - yexp) / weight;
542  }
543 
544  // p value from chi^2 distribution
545  fPValue = TMath::Prob(chi2, GetNDataPoints());
546  fPValueNDoF = TMath::Prob(chi2, GetNDataPoints() - GetNParameters());
547 
548  // no error
549  return 1;
550 }
551 
552 // ---------------------------------------------------------
553 
554 int BCHistogramFitter::CalculatePValueKolmogorov(const std::vector<double> & par)
555 {
556  if (!fHistogramExpected || !fHistogram) {
557  BCLog::OutError("BCHistogramFitter::CalculatePValueKolmogorov: "
558  "Please define the reference distribution by calling \n"
559  "BCHistogramFitter::SetHistogramExpected() first!");
560  return 0;
561  }
562 
563  //update expected counts with current parameters
565 
566  //perform the test
567  fPValue = fHistogramExpected->KolmogorovTest(fHistogram);
568  fPValue = BCMath::CorrectPValue(fPValue, GetNParameters(), GetNDataPoints());
569 
570  // no error
571  return 1;
572 }
573 
574 // ---------------------------------------------------------
575 
576 double BCHistogramFitter::CDF(const std::vector<double>& parameters, int index, bool lower)
577 {
578 
579  if (parameters.empty())
580  BCLog::OutWarning("BCHistogramFitter::CDF: parameter vector empty!");
581  //histogram stores underflow in bin 0, so datapoint 0 is in bin 1
582  index++;
583 
584  // get the number of observed events (should be integer)
585  double yObs = fHistogram->GetBinContent(index);
586 
587  // get function value of lower bin edge
588  double edgeLow = fHistogram->GetBinLowEdge(index);
589  double edgeHigh = edgeLow + fHistogram->GetBinWidth(index);
590 
591  // expectation value of this bin
592  double yExp = 0.0;
593 
594  // use ROOT's TH1D::Integral method
595  if (fFlagIntegration) {
596  TF1 tmpF(*fFitFunction);
597  tmpF.SetParameters(&parameters[0]);
598  yExp = tmpF.Integral(edgeLow, edgeHigh);
599  }
600  // use linear interpolation
601  else {
602  double dx = fHistogram->GetBinWidth(index);
603  double fEdgeLow = fFitFunction->Eval(edgeLow);
604  double fEdgeHigh = fFitFunction->Eval(edgeHigh);
605  yExp = fEdgeLow * dx + (fEdgeHigh - fEdgeLow) * dx / 2.;
606  }
607 
608  // usually Poisson bins do not agree with fixed probability bins
609  // so choose where it should belong
610 
611  if (lower) {
612  if ((int) yObs >= 1)
613  return ROOT::Math::poisson_cdf((unsigned int) (yObs - 1), yExp);
614  else
615  return 0.0;
616  }
617  // what if yObs as double doesn't reprepsent a whole number? exception?
618  if ((double) (unsigned int) yObs != yObs) {
619  BCLog::OutWarning(Form(
620  "BCHistogramFitter::CDF: Histogram values should be integer!\n"
621  " Bin %d = %f", index, yObs));
622 
623  //convert randomly to integer
624  // ex. yObs = 9.785 =>
625  // yObs --> 10 with P = 78.5%
626  // yObs --> 9 with P = 21.5%
627  double U = fRandom->Rndm();
628  double yObsLower = (unsigned int) yObs;
629  if (U > (yObs - yObsLower))
630  yObs = yObsLower;
631  else
632  yObs = yObsLower + 1;
633  }
634 
635  return ROOT::Math::poisson_cdf((unsigned int) yObs, yExp);
636 }
637 
638 // ---------------------------------------------------------
void PrintShortFitSummary(int chi2flag=0)
Definition: BCModel.cxx:1029
virtual double LogLikelihood(const std::vector< double > &parameters)
int SetFitFunction(TF1 *func)
int CalculatePValueLeastSquares(const std::vector< double > &par, bool weightExpect=true)
TH2D * fErrorBandXY
Definition: BCFitter.h:206
A class representing a data point.
Definition: BCDataPoint.h:31
int SetHistogramExpected(const std::vector< double > &parameters)
double FastPValue(const std::vector< unsigned > &observed, const std::vector< double > &expected, unsigned nIterations, unsigned seed)
Definition: BCMath.cxx:690
void SetName(const char *name)
Definition: BCModel.h:145
void SetDataSet(BCDataSet *dataset)
Definition: BCModel.h:178
int SetPriorConstantAll()
Definition: BCModel.cxx:753
A class representing a set of data points.
Definition: BCDataSet.h:37
void SetFitFunctionIndices(int indexx, int indexy)
Definition: BCFitter.h:122
double CorrectPValue(const double &pvalue, const unsigned &npar, const unsigned &nobservations)
Definition: BCMath.cxx:665
virtual int AddParameter(BCParameter *parameter)
Definition: BCModel.cxx:225
std::string fName
Definition: BCModel.h:510
unsigned GetNDataPoints() const
Definition: BCModel.cxx:123
bool flag_discrete
Definition: BCModel.h:543
double FitFunction(const std::vector< double > &x, const std::vector< double > &parameters)
int CalculatePValueKolmogorov(const std::vector< double > &par)
int SetHistogram(TH1D *hist)
void SetFillErrorBand(bool flag=true)
Definition: BCFitter.h:92
double LogPoisson(double x, double par)
Definition: BCMath.cxx:57
double CDF(const std::vector< double > &parameters, int index, bool lower=false)
double fPValue
Definition: BCModel.h:536
void DrawFit(const char *options="HIST", bool flaglegend=false)
int CalculatePValueFast(const std::vector< double > &par, unsigned nIterations=100000)
void AddDataPoint(BCDataPoint *datapoint)
Definition: BCDataSet.cxx:358
A base class for all fitting classes.
Definition: BCFitter.h:27
void SetValue(unsigned index, double value)
Definition: BCDataPoint.cxx:54
int CalculatePValueLikelihood(const std::vector< double > &par)