BAT  0.9.4
The Bayesian analysis toolkit
 All Classes Namespaces Functions Variables Enumerations
BCGraphFitter.cxx
1 /*
2  * Copyright (C) 2007-2014, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 
11 #include <TGraphErrors.h>
12 #include <TF1.h>
13 #include <TString.h>
14 #include <TPad.h>
15 #include <TLegend.h>
16 #include <Math/ProbFuncMathCore.h>
17 
18 #include "../../BAT/BCLog.h"
19 #include "../../BAT/BCDataSet.h"
20 #include "../../BAT/BCDataPoint.h"
21 #include "../../BAT/BCMath.h"
22 
23 #include "BCGraphFitter.h"
24 
25 // ---------------------------------------------------------
26 
28  : BCFitter()
29  , fGraph(0)
30  , fFitFunction(0)
31  , fErrorBand(0)
32  , fGraphFitFunction(0)
33 {
34  // set MCMC for marginalization
35  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
36 }
37 
38 // ---------------------------------------------------------
39 
40 BCGraphFitter::BCGraphFitter(const char * name)
41  : BCFitter(name)
42  , fGraph(0)
43  , fFitFunction(0)
44  , fErrorBand(0)
45  , fGraphFitFunction(0)
46 {
47  // set MCMC for marginalization
48  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
49 }
50 
51 // ---------------------------------------------------------
52 
53 BCGraphFitter::BCGraphFitter(TGraphErrors * graph, TF1 * func)
54  : BCFitter()
55  , fGraph(0)
56  , fFitFunction(0)
57  , fErrorBand(0)
58  , fGraphFitFunction(0)
59 {
60  SetGraph(graph);
61  SetFitFunction(func);
62 
63  // set MCMC for marginalization
64  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
65 }
66 
67 // ---------------------------------------------------------
68 
69 BCGraphFitter::BCGraphFitter(const char * name, TGraphErrors * graph, TF1 * func)
70  : BCFitter(name)
71  , fGraph(0)
72  , fFitFunction(0)
73  , fErrorBand(0)
74  , fGraphFitFunction(0)
75 {
76  SetGraph(graph);
77  SetFitFunction(func);
78 
79  // set MCMC for marginalization
80  SetMarginalizationMethod(BCIntegrate::kMargMetropolis);
81 }
82 
83 // ---------------------------------------------------------
84 
85 int BCGraphFitter::SetGraph(TGraphErrors * graph)
86 {
87  if(!graph) {
88  BCLog::Out(BCLog::error,BCLog::error,"BCGraphFitter::SetGraph() : TGraphErrors not created.");
89  return 0;
90  }
91 
92  int npoints = graph->GetN();
93  if(!npoints)
94  {
95  BCLog::Out(BCLog::error,BCLog::error,"BCGraphFitter::SetGraph() : TGraphErrors is empty.");
96  return 0;
97  }
98  else if(npoints==1)
99  {
100  BCLog::Out(BCLog::error,BCLog::error,"BCGraphFitter::SetGraph() : TGraphErrors has only one point. Not able to fit.");
101  return 0;
102  }
103 
104  fGraph = graph;
105 
106  double * x = fGraph->GetX();
107  double * y = fGraph->GetY();
108  double * ex = fGraph->GetEX();
109  double * ey = fGraph->GetEY();
110 
111  if(!ey)
112  {
113  BCLog::Out(BCLog::error,BCLog::error,"BCGraphFitter::SetGraph() : TGraphErrors has NO errors set on Y. Not able to fit.");
114  return 0;
115  }
116 
117  BCDataSet * ds = new BCDataSet();
118 
119  // fill the dataset
120  // find x and y boundaries for the error band calculation
121  double xmin=x[0];
122  double xmax=x[0];
123  double ymin=y[0];
124  double ymax=y[0];
125  for (int i = 0; i < npoints; ++i)
126  {
127  // if x errors are not set, set them to zero
128  double errx = ex ? ex[i] : 0.;
129 
130  // create the data point
131  BCDataPoint * dp = new BCDataPoint(4);
132  dp->SetValue(0, x[i]);
133  dp->SetValue(1, y[i]);
134  dp->SetValue(2, errx);
135  dp->SetValue(3, ey[i]);
136  ds->AddDataPoint(dp);
137 
138  if(x[i]-errx < xmin)
139  xmin = x[i]-errx;
140  else if(x[i]+errx > xmax)
141  xmax = x[i]+errx;
142 
143  if(y[i] - 5.*ey[i] < ymin)
144  ymin = y[i] - 5.*ey[i];
145  else if(y[i] + 5.*ey[i] > ymax)
146  ymax = y[i] + 5.*ey[i];
147  }
148 
149  SetDataSet(ds);
150 
151  // set boundaries for the error band calculation
152  SetDataBoundaries(0, xmin, xmax);
153  SetDataBoundaries(1, ymin, ymax);
154 
155  SetFitFunctionIndices(0, 1);
156 
157  return GetNDataPoints();
158 }
159 
160 // ---------------------------------------------------------
161 
163 {
164  if(!func)
165  {
166  BCLog::Out(BCLog::error,BCLog::error,"BCGraphFitter::SetFitFunction() : TF1 not created.");
167  return 0;
168  }
169 
170  // get the new number of parameters
171  int npar = func->GetNpar();
172  if(!npar)
173  {
174  BCLog::Out(BCLog::error,BCLog::error,"BCGraphFitter::SetFitFunction() : TF1 has zero parameters. Not able to fit.");
175  return 0;
176  }
177 
178  // set the function
179  fFitFunction = func;
180 
181  // update the model name to contain the function name
182  if(fName=="model")
183  SetName(TString::Format("GraphFitter with %s",fFitFunction->GetName()));
184 
185  // reset parameters
186  ClearParameters(true);
187 
188  // add parameters
189  for (int i = 0; i < npar; ++i)
190  {
191  double xmin;
192  double xmax;
193  fFitFunction->GetParLimits(i, xmin, xmax);
194 
195  AddParameter(fFitFunction->GetParName(i), xmin, xmax);
196  }
197 
198  // set flat prior for all parameters by default
200 
201  return GetNParameters();
202 }
203 
204 // ---------------------------------------------------------
205 
207 {
208  // todo memory leak
209 }
210 
211 // ---------------------------------------------------------
212 
213 double BCGraphFitter::LogLikelihood(const std::vector<double> & params)
214 {
215  // initialize probability
216  double logl = 0.;
217 
218  // set the parameters of the function
219  // passing the pointer to first element of the vector is
220  // not completely safe as there might be an implementation where
221  // the vector elements are not stored consecutively in memory.
222  // however it is much faster than copying the contents, especially
223  // for large number of parameters
224  fFitFunction->SetParameters(&params[0]);
225 
226  // loop over all data points
227  for (unsigned i = 0; i < GetNDataPoints(); i++)
228  {
229  std::vector<double> x = GetDataPoint(i)->GetValues();
230 
231  // her we ignore the errors on x even when they're available
232  // i.e. we treat them just as the region specifiers
233  double y = x[1];
234  double yerr = x[3];
235  double yexp = fFitFunction->Eval(x[0]);
236 
237  // calculate log of probability assuming
238  // a Gaussian distribution for each point
239  logl += BCMath::LogGaus(y, yexp, yerr, true);
240  }
241 
242  return logl;
243 }
244 
245 // ---------------------------------------------------------
246 
247 double BCGraphFitter::FitFunction(const std::vector<double> & x, const std::vector<double> & params)
248 {
249  // set the parameters of the function
250  // passing the pointer to first element of the vector is
251  // not completely safe as there might be an implementation where
252  // the vector elements are not stored consecutively in memory.
253  // however it is much faster than copying the contents, especially
254  // for large number of parameters
255  fFitFunction->SetParameters(&params[0]);
256 
257  return fFitFunction->Eval(x[0]);
258 }
259 
260 // ---------------------------------------------------------
261 
262 int BCGraphFitter::Fit(TGraphErrors * graph, TF1 * func)
263 {
264  // set graph
265  if (!SetGraph(graph)) {
266  BCLog::OutError("BCEfficiencyFitter::Fit : Graph not defined.");
267  return 0;
268  }
269 
270  // set function
271  if (!SetFitFunction(func)) {
272  return 0;
273  BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
274  }
275 
276  return Fit();
277 }
278 
279 // ---------------------------------------------------------
280 
282 {
283  // set graph
284  if (!fGraph) {
285  BCLog::OutError("BCEfficiencyFitter::Fit : Graph not defined.");
286  return 0;
287  }
288 
289  // set function
290  if (!fFitFunction) {
291  BCLog::OutError("BCEfficiencyFitter::Fit : Fit function not defined.");
292  return 0;
293  }
294 
295  // check setup
296  BCLog::Out(BCLog::detail,BCLog::detail,
297  Form("Fitting %d data points with function of %d parameters",GetNDataPoints(),GetNParameters()));
298  if(GetNDataPoints() <= GetNParameters())
299  {
300  BCLog::Out(BCLog::warning,BCLog::warning,
301  Form("Number of parameters (%d) lower than or equal to number of points (%d)."
302  , GetNParameters(), GetNDataPoints()));
303  BCLog::Out(BCLog::warning,BCLog::warning,"Fit doesn't have much meaning.");
304  }
305 
306  // perform marginalization
307  MarginalizeAll();
308 
309  // maximize posterior probability, using the best-fit values close
310  // to the global maximum from the MCMC
311  BCIntegrate::BCOptimizationMethod method_temp = GetOptimizationMethod();
312  SetOptimizationMethod(BCIntegrate::kOptMinuit);
313  FindMode( GetBestFitParameters());
314  SetOptimizationMethod(method_temp);
315 
316  // calculate p-value from the chi2 probability
317  // this is only valid for a product of gaussiang which is the case for
318  // the BCGraphFitter
319  GetPvalueFromChi2(GetBestFitParameters(), 3);
320  GetPvalueFromChi2NDoF(GetBestFitParameters(), 3);
321 
322  // print summary to screen
324 
325  return 1;
326 }
327 
328 // ---------------------------------------------------------
329 
330 void BCGraphFitter::DrawFit(const char * options, bool flaglegend)
331 {
332  if (!fGraph)
333  {
334  BCLog::Out(BCLog::error, BCLog::error,"BCGraphFitter::DrawFit() : TGraphErrors not defined.");
335  return;
336  }
337 
338  if (!fFitFunction)
339  {
340  BCLog::Out(BCLog::error, BCLog::error,"BCGraphFitter::DrawFit() : Fit function not defined.");
341  return;
342  }
343 
344  // check wheather options contain "same"
345  TString opt = options;
346  opt.ToLower();
347 
348  // if not same, draw the histogram first to get the axes
349  if(!opt.Contains("same"))
350  fGraph->Draw("ap");
351 
352  // draw the error band as central 68% probability interval
353  fErrorBand = GetErrorBandGraph(0.16, 0.84);
354  fErrorBand->Draw("f same");
355 
356  // draw the fit function on top
357  fGraphFitFunction = GetFitFunctionGraph( GetBestFitParameters() );
358  fGraphFitFunction->SetLineColor(kRed);
359  fGraphFitFunction->SetLineWidth(2);
360  fGraphFitFunction->Draw("l same");
361 
362  // now draw the histogram again since it was covered by the band and
363  // the best fit
364  fGraph->Draw("p same");
365 
366  // draw legend
367  if (flaglegend)
368  {
369  TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
370  legend->SetBorderSize(0);
371  legend->SetFillColor(kWhite);
372  legend->AddEntry(fGraph, "Data", "P");
373  legend->AddEntry(fGraphFitFunction, "Best fit", "L");
374  legend->AddEntry(fErrorBand, "Error band", "F");
375  legend->Draw();
376  }
377 
378  gPad->RedrawAxis();
379 }
380 
381 // ---------------------------------------------------------
382 
383 double BCGraphFitter::CDF(const std::vector<double> & parameters, int index, bool /*lower*/) {
384 
385  //format: x y error_x error_y
386  std::vector<double> values = fDataSet->GetDataPoint(index)->GetValues();
387 
388  if (values.at(2))
389  BCLog::OutWarning("BCGraphFitter::CDF: Non-zero errors in x-direction are ignored!");
390 
391  // get the observed value
392  double yObs = values.at(1);
393 
394  // expectation value
395  double yExp = FitFunction(values, parameters);
396 
397  return ROOT::Math::normal_cdf(yObs, values.at(3), yExp);
398 }
399 
400 // ---------------------------------------------------------
void PrintShortFitSummary(int chi2flag=0)
Definition: BCModel.cxx:1029
A class representing a data point.
Definition: BCDataPoint.h:31
void SetName(const char *name)
Definition: BCModel.h:145
void SetDataSet(BCDataSet *dataset)
Definition: BCModel.h:178
int SetPriorConstantAll()
Definition: BCModel.cxx:753
const std::vector< double > & GetValues() const
Definition: BCDataPoint.h:60
A class representing a set of data points.
Definition: BCDataSet.h:37
void SetFitFunctionIndices(int indexx, int indexy)
Definition: BCFitter.h:122
double GetPvalueFromChi2(const std::vector< double > &par, int sigma_index)
Definition: BCModel.cxx:351
int SetFitFunction(TF1 *func)
BCDataPoint * GetDataPoint(unsigned int index) const
Definition: BCModel.cxx:132
virtual int AddParameter(BCParameter *parameter)
Definition: BCModel.cxx:225
std::string fName
Definition: BCModel.h:510
unsigned GetNDataPoints() const
Definition: BCModel.cxx:123
double FitFunction(const std::vector< double > &x, const std::vector< double > &parameters)
static void Out(BCLog::LogLevel loglevelfile, BCLog::LogLevel loglevelscreen, const char *message)
Definition: BCLog.cxx:100
double LogGaus(double x, double mean, double sigma, bool norm)
Definition: BCMath.cxx:33
void DrawFit(const char *options="", bool flaglegend=false)
double LogLikelihood(const std::vector< double > &parameters)
BCDataSet * fDataSet
Definition: BCModel.h:522
BCDataPoint * GetDataPoint(unsigned int index)
Definition: BCDataSet.cxx:98
virtual double CDF(const std::vector< double > &parameters, int index, bool lower=false)
void AddDataPoint(BCDataPoint *datapoint)
Definition: BCDataSet.cxx:358
A base class for all fitting classes.
Definition: BCFitter.h:27
void SetValue(unsigned index, double value)
Definition: BCDataPoint.cxx:54
int SetGraph(TGraphErrors *graph)