BAT  0.9.4
The Bayesian analysis toolkit
 All Classes Namespaces Functions Variables Enumerations
BCGoFTest.cxx
1 /*
2  * Copyright (C) 2007-2014, the BAT core developer team
3  * All rights reserved.
4  *
5  * For the licensing terms see doc/COPYING.
6  * For documentation see http://mpp.mpg.de/bat
7  */
8 
9 // ---------------------------------------------------------
10 
11 #include "BCGoFTest.h"
12 
13 #include "BCDataPoint.h"
14 #include "BCDataSet.h"
15 #include "BCLog.h"
16 #include "BCParameter.h"
17 
18 #include <TH1D.h>
19 #include <TString.h>
20 
21 // ---------------------------------------------------------
22 
23 BCGoFTest::BCGoFTest(const char* name) : BCModel(name)
24 {
25  // set original data set to zero
26  fTemporaryDataSet = 0;
27 
28  // set test mode to zero
29  fTestModel = 0;
30 
31  // reset pvalue and counter
32  fPValue = 0;
33  fPValueAbove = 0;
34  fPValueBelow = 0;
35 
36  // reset loglikelihood and range
37  fLogLikelihood = 0;
38  fLogLikelihoodMin = 1e99;
39  fLogLikelihoodMax = -1e99;
40 
41  // define new histogram
42  fHistogramLogProb = 0;
43 
44  // set defaults for the MCMC
45  MCMCSetNChains(5);
46  MCMCSetNIterationsMax(100000);
47  MCMCSetNIterationsRun(2000);
48 }
49 
50 // ---------------------------------------------------------
51 
53 {
54  // restore original data set
55 
56  // get number of data points and values
57  int ndatapoints = fTemporaryDataSet->GetNDataPoints();
58  int ndatavalues = fTemporaryDataSet->GetDataPoint(0)->GetNValues();
59 
60  for (int i = 0; i < ndatapoints; ++i)
61  for (int j = 0; j < ndatavalues; ++j)
62  fTestModel->GetDataSet()->GetDataPoint(i)->SetValue(j, fTemporaryDataSet->GetDataPoint(i)->GetValue(j));
63 
64  // restore data point limits
65  for (unsigned int i = 0; i < GetNParameters(); ++i)
66  fTestModel->SetDataBoundaries(
67  fMapDataValue[i],
68  GetParameter(i)->GetLowerLimit(),
69  GetParameter(i)->GetUpperLimit());
70 
71  // delete temporary data set
72  delete fTemporaryDataSet;
73 }
74 
75 // ---------------------------------------------------------
76 
77 double BCGoFTest::LogLikelihood(const std::vector<double> & parameters)
78 {
79  // set the original data set to the new parameters
80  for (int i = 0; i < int(parameters.size()); ++i)
81  fTestModel->GetDataSet()->GetDataPoint(fMapDataPoint[i])->SetValue(fMapDataValue[i], parameters.at(i));
82 
83  // calculate likelihood at the point of the original parameters
84  double loglikelihood = fTestModel->LogLikelihood(fDataSet->GetDataPoint(0)->GetValues());
85 
86  // return likelihood
87  return loglikelihood;
88 }
89 
90 // ---------------------------------------------------------
91 
92 void BCGoFTest::MCMCUserIterationInterface()
93 {
94  int nchains = MCMCGetNChains();
95 
96  for (int i = 0; i < nchains; ++i)
97  {
98  // get likelihood at the point of the original parameters
99  double loglikelihood = MCMCGetLogProbx(i);
100 
101  // calculate pvalue
102  if (loglikelihood < fLogLikelihood)
103  fPValueBelow++;
104  else
105  fPValueAbove++;
106 
107  // if histogram exists already, then fill it ...
108  if (fHistogramLogProb)
109  fHistogramLogProb->Fill(loglikelihood);
110  // ...otherwise find range
111  else
112  {
113  if (loglikelihood > fLogLikelihoodMax)
114  fLogLikelihoodMax = loglikelihood;
115  else if (loglikelihood < fLogLikelihoodMin)
116  fLogLikelihoodMin = loglikelihood;
117  }
118  }
119 }
120 
121 // ---------------------------------------------------------
122 
123 int BCGoFTest::SetTestPoint(std::vector<double> parameters)
124 {
125  // check if the boundaries of the original data set exist.
126  if (!fTestModel->GetFlagBoundaries())
127  {
128  BCLog::OutError("BCGoFTest::SetTestDataPoint : Boundaries of the original data set are not defined.");
129  return 0;
130  }
131 
132  // reset histogram
133  if (fHistogramLogProb)
134  {
135  delete fHistogramLogProb;
136  fHistogramLogProb = 0;
137  }
138 
139  // reset variables
140  fPValue = 0;
141  fPValueAbove = 0;
142  fPValueBelow = 0;
143 
144  // create temporary data set ...
145  fTemporaryDataSet = new BCDataSet();
146 
147  // ... and fill with the original one
148 
149  // get number of data points and values
150  int ndatapoints = fTestModel->GetDataSet()->GetNDataPoints();
151  int ndatavalues = fTestModel->GetDataSet()->GetDataPoint(0)->GetNValues();
152 
153  for (int i = 0; i < ndatapoints; ++i)
154  {
155  BCDataPoint * dp = new BCDataPoint(fTestModel->GetDataSet()->GetDataPoint(i)->GetValues());
156  fTemporaryDataSet->AddDataPoint(dp);
157  }
158 
159  // clear maps
160  fMapDataPoint.clear();
161  fMapDataValue.clear();
162 
163  int counter = 0;
164 
165  // remove parameters, but doesn't clear up memory
166  fParameters = BCParameterSet();
167 
168  // loop through data points and values
169  for (int i = 0; i < ndatapoints; ++i)
170  for (int j = 0; j < ndatavalues; ++j)
171  {
172  // debugKK
173  // needs to be fixed
174  // if (fTestModel->GetFixedDataAxis(j))
175  // continue;
176 
177  // add parameter to this model
178  std::string parName = Form("parameter_%i", counter);
179  AddParameter(
180  parName.c_str(),
181  fTestModel->GetDataPointLowerBoundary(j),
182  fTestModel->GetDataPointUpperBoundary(j));
183 
184  // add another element to the maps
185  fMapDataPoint.push_back(i);
186  fMapDataValue.push_back(j);
187 
188  // increase counter
189  counter ++;
190  }
191 
192  // check if there are any non-fixed data values left
193  if (counter == 0)
194  {
195  BCLog::OutError("BCGoFTest::SetTestDataPoint : No non-fixed data values left.");
196  return 0;
197  }
198 
199  // create a new data set containing the vector of parameters which
200  // are to be tested
201  BCDataPoint * datapoint = new BCDataPoint(parameters);
202  BCDataSet * dataset = new BCDataSet();
203  dataset->AddDataPoint(datapoint);
204 
205  // calculate likelihood of the original data set
206  fLogLikelihood = fTestModel->LogLikelihood(parameters);
207 
208  // if data set has been set before, delete
209  if (fDataSet)
210  delete fDataSet;
211 
212  // set data set of this model
213  fDataSet = dataset;
214 
215  // put proper range to new data set
216  for (int i = 0; i < int(parameters.size()); ++i)
217  SetDataBoundaries(
218  i,
219  fTestModel->GetParameter(i)->GetLowerLimit(),
220  fTestModel->GetParameter(i)->GetUpperLimit());
221 
222  return 1;
223 }
224 
225 // ---------------------------------------------------------
226 
227 double BCGoFTest::GetCalculatedPValue(bool flag_histogram)
228 {
229  // set histogram point to null
230  fHistogramLogProb = 0;
231 
232  if (flag_histogram)
233  {
234  // perform first run to obtain limits for the log(likelihood)
235  MarginalizeAll();
236 
237  // create histogram
238  double D = fLogLikelihoodMax - fLogLikelihoodMin;
239  fHistogramLogProb = new TH1D(Form("hist_%s_logprob", GetName().data()), ";ln(prob);N", 100, fLogLikelihoodMin - 0.1*D, fLogLikelihoodMax + 0.1*D);
240  fHistogramLogProb->SetStats(kFALSE);
241  }
242  else
243  {
244  }
245 
246  // run MCMC
247  MarginalizeAll();
248 
249  // check for convergence
250  if (MCMCGetNIterationsConvergenceGlobal() < 0.)
251  {
252  BCLog::OutDetail(" --> MCMC did not converge in evaluation of the p-value.");
253  return -1;
254  }
255 
256  // calculate p-value
257  fPValue = double(fPValueBelow) / double(fPValueBelow + fPValueAbove);
258 
259  // return p-value
260  return fPValue;
261 }
262 
263 // ---------------------------------------------------------
virtual double LogLikelihood(const std::vector< double > &params)=0
int SetTestPoint(std::vector< double > parameters)
Definition: BCGoFTest.cxx:123
double GetDataPointLowerBoundary(unsigned int index) const
Definition: BCModel.cxx:142
A class representing a data point.
Definition: BCDataPoint.h:31
unsigned int GetNValues() const
Definition: BCDataPoint.h:65
BCDataSet * GetDataSet() const
Definition: BCModel.h:100
bool GetFlagBoundaries() const
Definition: BCModel.cxx:154
The base class for all user-defined models.
Definition: BCModel.h:50
double GetDataPointUpperBoundary(unsigned int index) const
Definition: BCModel.cxx:148
const std::vector< double > & GetValues() const
Definition: BCDataPoint.h:60
A class representing a set of data points.
Definition: BCDataSet.h:37
virtual int AddParameter(BCParameter *parameter)
Definition: BCModel.cxx:225
BCGoFTest(const char *name)
Definition: BCGoFTest.cxx:23
double GetValue(unsigned index) const
Definition: BCDataPoint.cxx:34
double GetCalculatedPValue(bool flag_histogram=false)
Definition: BCGoFTest.cxx:227
BCDataSet * fDataSet
Definition: BCModel.h:522
BCDataPoint * GetDataPoint(unsigned int index)
Definition: BCDataSet.cxx:98
unsigned int GetNDataPoints()
Definition: BCDataSet.cxx:79
double fPValue
Definition: BCModel.h:536
double LogLikelihood(const std::vector< double > &parameters)
Definition: BCGoFTest.cxx:77
void AddDataPoint(BCDataPoint *datapoint)
Definition: BCDataSet.cxx:358
void SetValue(unsigned index, double value)
Definition: BCDataPoint.cxx:54
const std::string & GetName() const
Definition: BCModel.h:85