13 #include "BCDataPoint.h"
14 #include "BCDataSet.h"
15 #include "BCGoFTest.h"
20 #include "BCModelOutput.h"
21 #include "BCParameter.h"
42 fDataPointLowerBoundaries(0),
43 fDataPointUpperBoundaries(0),
48 fGoFNIterationsMax(100000),
49 fGoFNIterationsRun(2000),
51 fPriorConstantAll(false)
83 fDataFixedValues = bcmodel.fDataFixedValues;
86 fChi2NDoF = bcmodel.fChi2NDoF;
87 fPValueNDoF = bcmodel.fPValueNDoF;
106 for (
unsigned int i = 0; i < GetNParameters(); ++i)
137 BCLog::OutWarning(
"BCModel::GetDataPoint : No data set defined.");
194 void BCModel::SetDataBoundaries(
unsigned int index,
double lowerboundary,
double upperboundary,
bool fixed)
198 BCLog::OutError(
"BCModel::SetDataBoundaries : Need to define data set first.");
204 BCLog::OutError(
"BCModel::SetDataBoundaries : Index out of range.");
215 if (fDataFixedValues.size() == 0)
221 fDataFixedValues[index] = fixed;
258 if (GetIntegral() <= 0.) {
259 BCLog::OutError(
"BCModel::LogProbability. Normalization not available or zero.");
279 for (
unsigned i = 0; i < fParameters.Size(); ++i) {
297 logprob += log(f->Eval(parameters[i]));
300 logprob += log(h->Interpolate(parameters[i]));
302 logprob += log(h->GetBinContent(h->FindBin(parameters[i])));
305 BCLog::OutError(Form(
306 "BCModel::LogAPrioriProbability : Prior for parameter %s "
307 "is defined but not recognized.",
312 BCLog::OutError(Form(
313 "BCModel::LogAPrioriProbability: Prior for parameter %s "
314 "is undefined. Using constant prior to proceed.",
332 return exp(
LogEval(parameters));
344 double probability = 1;
345 for (
unsigned i = 0 ; i < GetNParameters() ; ++i)
346 probability *= 1. / fParameters[i]->GetRangeWidth();
356 double sum_sigma = 0;
357 for (
int i = 0; i < n; i++)
358 sum_sigma += log(
GetDataPoint(i)->GetValue(sigma_index));
360 double chi2 = -2. * (ll + (double) n / 2. * log(2. * M_PI) + sum_sigma);
362 fPValue = TMath::Prob(chi2, n);
370 std::vector<double> x;
375 double BCModel::GetPvalueFromChi2NDoF(std::vector<double> par,
int sigma_index)
379 int npar = GetNParameters();
381 double sum_sigma = 0;
382 for (
int i = 0; i < n; i++)
383 sum_sigma += log(
GetDataPoint(i)->GetValue(sigma_index));
385 double chi2 = -2. * (ll + (double) n / 2. * log(2. * M_PI) + sum_sigma);
387 fChi2NDoF = chi2 / double(n - npar);
388 fPValueNDoF = TMath::Prob(chi2, n - npar);
397 BCLog::OutError(Form(
"BCModel::GetPvalueFromKolmogorov : "
398 "test defined only for continuous distributions."));
409 std::set<double> uniqueObservations;
410 for (
int i = 0; i < N; i++)
411 uniqueObservations.insert(
CDF(par, i,
false));
413 int nUnique = uniqueObservations.size();
414 if (nUnique != ECDF->GetNbinsX() + 1) {
415 BCLog::OutError(Form(
"BCModel::GetPvalueFromKolmogorov : "
416 "Number of unique data points doesn't match (%d vs %d)", nUnique,
417 ECDF->GetNbinsX() + 1));
427 std::set<double>::const_iterator iter = uniqueObservations.begin();
428 for (
int iBin = 0; iBin < nUnique; ++iBin) {
430 dist = TMath::Abs(*iter - ECDF->GetBinContent(iBin + 1));
432 distMax = TMath::Max(dist, distMax);
440 double z = distMax * sqrt(N);
442 fPValue = TMath::KolmogorovProb(z);
451 BCH1D * BCModel::CalculatePValue(std::vector<double> par,
bool flag_histogram)
456 BCLog::OutSummary(
"Do goodness-of-fit-test");
471 goftest->MCMCSetFlagFillHistograms(
false);
482 if (flag_histogram) {
504 if (point.size() != GetNParameters()) {
505 BCLog::OutError(
"BCModel::HessianMatrixElement : Invalid number of entries in the vector.");
515 std::vector<double> xpp = point;
516 std::vector<double> xpm = point;
517 std::vector<double> xmp = point;
518 std::vector<double> xmm = point;
520 unsigned idx1 = fParameters.Index(par1->
GetName());
521 unsigned idx2 = fParameters.Index(par2->
GetName());
542 return (ppp + pmm - ppm - pmp) / (4. * dx1 * dx2);
561 if (index < 0 && index >=
int(GetNParameters())) {
562 BCLog::OutError(
"BCModel::SetPrior : Index out of range.");
583 for (
unsigned int i = 0; i < GetNParameters(); i++)
584 if (name == GetParameter(i)->GetName())
595 GetParameter(index)->Fix(value);
604 fParameters.Get(name)->Fix(value);
613 if (index < 0 || index >=
int(GetNParameters())) {
614 BCLog::OutError(
"BCModel::SetPriorGauss : Index out of range.");
619 TF1 * f =
new TF1(Form(
"prior_%s", GetParameter(index)->
GetName().c_str()),
620 "1./sqrt(2.*TMath::Pi())/[1] * exp(- (x-[0])*(x-[0])/2./[1]/[1])",
621 GetParameter(index)->GetLowerLimit(),
622 GetParameter(index)->GetUpperLimit());
623 f->SetParameter(0, mean);
624 f->SetParameter(1, sigma);
635 for (
unsigned int i = 0; i < GetNParameters(); i++)
636 if (name == GetParameter(i)->GetName())
647 if (index < 0 || index >=
int(GetNParameters())) {
648 BCLog::OutError(
"BCModel::SetPriorGauss : Index out of range.");
653 TF1 * f =
new TF1(Form(
"prior_%s", GetParameter(index)->
GetName().c_str()),
655 GetParameter(index)->GetLowerLimit(),
656 GetParameter(index)->GetUpperLimit(),
658 f->SetParameter(0, mean);
659 f->SetParameter(1, sigmadown);
660 f->SetParameter(2, sigmaup);
673 for (
unsigned int i = 0; i < GetNParameters(); i++)
674 if (name == GetParameter(i)->GetName())
685 if (index < 0 && index >=
int(GetNParameters())) {
686 BCLog::OutError(
"BCModel::SetPrior : Index out of range.");
694 if (h->GetDimension() != 1) {
695 BCLog::OutError(Form(
"BCModel::SetPrior : Histogram given for parameter %d is not 1D.",index));
700 h->Scale(1./h->Integral(
"width"));
723 for (
unsigned int i = 0; i < GetNParameters(); i++)
724 if (name == GetParameter(i)->GetName())
728 return SetPrior(index, h, interpolate);
735 if (index < 0 && index >=
int(GetNParameters())) {
736 BCLog::OutError(
"BCModel::SetPriorConstant : Index out of range.");
755 if ( !fParameters.Size())
756 BCLog::OutWarning(
"BCModel::SetPriorConstantAll : No parameters defined.");
759 for (
unsigned i = 0; i < fParameters.Size(); ++i) {
775 BCLog::OutSummary(Form(
"Model : %s",
fName.data()));
776 BCLog::OutSummary(Form(
"Number of parameters : %u", GetNParameters()));
777 BCLog::OutSummary(
"Parameters:");
780 for (
unsigned i = 0; i < GetNParameters(); i++)
784 if ( !GetBestFitParameters().empty()) {
785 BCLog::OutSummary(Form(
"Log of the maximum posterior: %f", GetLogMaximum()));
786 BCLog::OutSummary(
"Best fit parameters:");
788 for (
unsigned i = 0; i < GetNParameters(); i++) {
789 if ( fParameters[i]->Fixed() )
790 BCLog::OutSummary(Form(
" %s = %f (fixed)", fParameters[i]->
GetName().data(), GetBestFitParameter(i)));
792 BCLog::OutSummary(Form(
" %s = %f (global)", fParameters[i]->
GetName().data(), GetBestFitParameter(i)));
794 if ( fMarginalModes.size() == GetNParameters())
795 BCLog::OutSummary(Form(
" %s = %f (marginalized)", fParameters[i]->
GetName().data(), GetBestFitParametersMarginalized()[i]));
802 double likelihood =
Likelihood(GetBestFitParameters());
803 BCLog::OutSummary(
" Model testing:");
804 BCLog::OutSummary(Form(
" p(data|lambda*) = %f", likelihood));
805 BCLog::OutSummary(Form(
" p-value = %f",
fPValue));
809 if (GetIntegral() > 0) {
810 BCLog::OutSummary(
" Evidence:");
811 BCLog::OutSummary(Form(
" - evidence : %f", GetIntegral()));
821 std::ofstream ofi(file);
824 if (!ofi.is_open()) {
825 std::cerr <<
"Couldn't open file " << file << std::endl;
830 unsigned npar = GetNParameters();
831 unsigned nchains = MCMCGetNChains();
834 bool flag_conv = MCMCGetNIterationsConvergenceGlobal() > 0;
837 <<
" -----------------------------------------------------" << std::endl
838 <<
" Summary" << std::endl
839 <<
" -----------------------------------------------------" << std::endl
842 ofi <<
" Model summary" << std::endl <<
" =============" << std::endl
843 <<
" Model: " <<
fName.data() << std::endl
844 <<
" Number of parameters: " << npar << std::endl
845 <<
" List of Parameters and ranges:" << std::endl;
846 for (
unsigned i = 0; i < npar; ++i) {
847 ofi <<
" (" << i <<
") Parameter \""
848 << fParameters[i]->GetName() <<
"\"" <<
": "
849 <<
"[" << fParameters[i]->GetLowerLimit() <<
", "
850 << fParameters[i]->GetUpperLimit() <<
"]";
851 if (fParameters[i]->Fixed()) {
858 ofi <<
" Results of the optimization" << std::endl
859 <<
" ===========================" << std::endl
860 <<
" Optimization algorithm used: "
861 << DumpUsedOptimizationMethod()<< std::endl;
863 if ( ! GetBestFitParameters().empty()) {
864 ofi <<
" Log of the maximum posterior: " << GetLogMaximum() << std::endl;
865 ofi <<
" List of parameters and global mode:" << std::endl;
866 for (
unsigned i = 0; i < npar; ++i) {
867 ofi <<
" (" << i <<
") Parameter \""
868 << fParameters[i]->GetName() <<
"\": "
869 << GetBestFitParameter(i);
870 if (fParameters[i]->Fixed()) {
873 else if (GetBestFitParameterErrors().size() == npar) {
874 if(GetBestFitParameterError(i) >= 0.)
875 ofi <<
" +- " << GetBestFitParameterError(i);
877 ofi <<
" (no error estimate available) ";
880 ofi <<
" (no error estimate available) ";
887 ofi <<
" No best fit information available." << std::endl;
892 ofi <<
" Results of the model test" << std::endl
893 <<
" =========================" << std::endl
894 <<
" p-value: " <<
fPValue << std::endl;
895 if (fPValueNDoF >= 0)
896 ofi <<
" p-value corrected for degrees of freedom: " << fPValueNDoF << std::endl;
901 if (GetIntegral() >= 0.) {
902 ofi <<
" Results of the integration" << std::endl
903 <<
" ============================" << std::endl
904 <<
" Integration method used: "
905 << DumpUsedIntegrationMethod() << std::endl;
906 ofi <<
" Evidence: " << GetIntegral();
908 ofi <<
" +- " << GetError() << std::endl;
910 ofi <<
" (no error estimate available) " << std::endl;
915 if (!flag_conv && fMCMCFlagRun)
916 ofi <<
" WARNING: the Markov Chain did not converge!" << std::endl
917 <<
" Be cautious using the following results!" << std::endl
921 if (fFlagMarginalized) {
922 ofi <<
" Results of the marginalization" << std::endl
923 <<
" ==============================" << std::endl
924 <<
" Marginalization algorithm used: "
925 << DumpUsedMarginalizationMethod() << std::endl
926 <<
" List of parameters and properties of the marginalized"
927 << std::endl <<
" distributions:" << std::endl;
928 for (
unsigned i = 0; i < npar; ++i) {
929 if ( ! fParameters[i]->FillHistograms())
933 BCH1D * bch1d = GetMarginalized(fParameters[i]);
935 ofi <<
" (" << i <<
") Parameter \""
936 << fParameters[i]->GetName() <<
"\":";
939 ofi <<
" fixed (or histogram does not exist) " << std::endl;
945 ofi <<
" Mean +- sqrt(V): " << std::setprecision(4)
946 << bch1d->
GetMean() <<
" +- " << std::setprecision(4)
947 << bch1d->
GetRMS() << std::endl
949 <<
" Median +- central 68% interval: "
950 << std::setprecision(4) << bch1d->
GetMedian() <<
" + "
952 <<
" - " << std::setprecision(4)
955 <<
" (Marginalized) mode: " << bch1d->
GetMode() << std::endl;
957 ofi <<
" 5% quantile: " << std::setprecision(4)
959 <<
" 10% quantile: " << std::setprecision(4)
961 <<
" 16% quantile: " << std::setprecision(4)
963 <<
" 84% quantile: " << std::setprecision(4)
965 <<
" 90% quantile: " << std::setprecision(4)
967 <<
" 95% quantile: " << std::setprecision(4)
970 std::vector<double> v;
972 ofi <<
" Smallest interval(s) containing at least 68% and local mode(s):"
974 for (
unsigned j = 0; j < v.size(); j += 5)
975 ofi <<
" (" << v[j] <<
", " << v[j + 1]
976 <<
") (local mode at " << v[j + 3] <<
" with rel. height "
977 << v[j + 2] <<
"; rel. area " << v[j + 4] <<
")"
983 ofi <<
" Status of the MCMC" << std::endl <<
" ==================" << std::endl
984 <<
" Convergence reached: " << (flag_conv ?
"yes" :
"no")
988 ofi <<
" Number of iterations until convergence: "
989 << MCMCGetNIterationsConvergenceGlobal() << std::endl;
991 ofi <<
" WARNING: the Markov Chain did not converge! Be\n"
992 <<
" cautious using the following results!" << std::endl
994 ofi <<
" Number of chains: " << MCMCGetNChains() << std::endl
995 <<
" Number of iterations per chain: " << MCMCGetNIterationsRun() << std::endl
996 <<
" Average pre-run efficiencies:" << std::endl;
998 std::vector<double> efficiencies;
999 efficiencies.assign(npar, 0.);
1001 for (
unsigned ipar = 0; ipar < npar; ++ipar)
1002 for (
unsigned ichain = 0; ichain < nchains; ++ichain) {
1003 efficiencies[ipar] +=
1004 fMCMCEfficiencies[ichain*npar+ipar] / double(nchains) * 100.;
1007 for (
unsigned ipar = 0; ipar < npar; ++ipar)
1008 ofi <<
" (" << ipar <<
") Parameter \""
1009 << fParameters[ipar]->
GetName().data() <<
"\": "
1010 << efficiencies.at(ipar) <<
"%" << std::endl;
1014 ofi <<
" -----------------------------------------------------" << std::endl
1015 <<
" Notation:" << std::endl
1016 <<
" Mean : mean value of the marg. pdf" << std::endl
1017 <<
" Median : median of the marg. pdf" << std::endl
1018 <<
" Marg. mode : most probable value of the marg. pdf" << std::endl
1019 <<
" V : Variance of the marg. pdf" << std::endl
1020 <<
" Quantiles : most commonly used quantiles" <<std::endl
1021 <<
" -----------------------------------------------------" << std::endl
1031 BCLog::OutSummary(
"---------------------------------------------------");
1032 BCLog::OutSummary(Form(
"Fit summary for model \'%s\':",
GetName().data()));
1033 BCLog::OutSummary(Form(
" Number of parameters: Npar = %i", GetNParameters()));
1035 BCLog::OutSummary(Form(
" Number of data points: Ndata = %i",
GetNDataPoints()));
1036 BCLog::OutSummary(
" Number of degrees of freedom:");
1037 BCLog::OutSummary(Form(
" NDoF = Ndata - Npar = %i",
GetNDataPoints() - GetNParameters()));
1039 if (!GetBestFitParameters().empty())
1040 BCLog::OutSummary(
" Best fit parameters (global):");
1041 for (
unsigned int i = 0; i < GetNParameters(); ++i)
1042 BCLog::OutSummary(Form(
" %s = %.3g", GetParameter(i)->
GetName().data(), GetBestFitParameter(i)));
1045 BCLog::OutSummary(
" Goodness-of-fit test:");
1046 BCLog::OutSummary(Form(
" p-value = %.3g",
GetPValue()));
1048 BCLog::OutSummary(Form(
" p-value corrected for NDoF = %.3g", GetPValueNDoF()));
1049 BCLog::OutSummary(Form(
" chi2 / NDoF = %.3g", GetChi2NDoF()));
1052 BCLog::OutSummary(
"---------------------------------------------------");
1059 if (parameters.size() != GetNParameters()) {
1060 BCLog::OutError(
"BCModel::PrintHessianMatrix : Invalid number of entries in the vector");
1065 BCLog::OutSummary(
"Hessian matrix elements: ");
1066 BCLog::OutSummary(
"Parameter values:");
1068 for (
int i = 0; i < int(parameters.size()); i++)
1069 BCLog::OutSummary(Form(
"Parameter %d : %f", i, parameters.at(i)));
1071 BCLog::OutSummary(
"Hessian matrix:");
1073 for (
unsigned int i = 0; i < GetNParameters(); i++)
1074 for (
unsigned int j = 0; j < i; j++) {
1077 fParameters[i], fParameters[j], parameters);
1080 BCLog::OutSummary(Form(
"%d %d : %f", i, j, hessianmatrixelement));
1085 BCDataPoint * BCModel::VectorToDataPoint(
const std::vector<double> &data)
1093 int BCModel::CompareStrings(
const char * string1,
const char * string2)
1097 if (strlen(string1) != strlen(string2))
1100 for (
int i = 0; i < int(strlen(string1)); i++)
1101 if (string1[i] != string2[i])
void SetDataPointUpperBoundary(int index, double upperboundary)
int SetPriorGauss(int index, double mean, double sigma)
std::vector< bool > fPriorContainerConstant
void PrintShortFitSummary(int chi2flag=0)
double APrioriProbability(const std::vector< double > ¶meters)
double HessianMatrixElement(const BCParameter *parameter1, const BCParameter *parameter2, std::vector< double > point)
virtual double Likelihood(const std::vector< double > ¶ms)
virtual double LogLikelihood(const std::vector< double > ¶ms)=0
std::vector< bool > fPriorContainerInterpolate
int SetTestPoint(std::vector< double > parameters)
double GetDataPointLowerBoundary(unsigned int index) const
double Probability(const std::vector< double > ¶meter)
TH1D * GetHistogramLogProb()
A class representing a data point.
int SetPriorDelta(int index, double value)
void SetTestModel(BCModel *testmodel)
unsigned int GetNValues() const
double SplitGaussian(double *x, double *par)
A class for handling numerical operations for models.
void SetDataSet(BCDataSet *dataset)
bool GetFlagBoundaries() const
int SetPriorConstantAll()
void PrintResults(const char *file)
double GetRangeWidth() const
The base class for all user-defined models.
double GetDataPointUpperBoundary(unsigned int index) const
void SetValues(const std::vector< double > &values)
void SetSingleDataPoint(BCDataPoint *datapoint)
void PrintHessianMatrix(std::vector< double > parameters)
A class representing a set of data points.
BCDataPoint * fDataPointLowerBoundaries
std::vector< TNamed * > fPriorContainer
double GetPvalueFromChi2(const std::vector< double > &par, int sigma_index)
double LogProbabilityNN(const std::vector< double > ¶meters)
BCModel(const char *name="model")
virtual double CDF(const std::vector< double > &, int, bool)
BCDataPoint * GetDataPoint(unsigned int index) const
std::vector< double > GetDataComponents(int index)
double LogProbability(const std::vector< double > ¶meter)
void SetHistogram(TH1D *hist)
A class representing a parameter of a model.
virtual int AddParameter(BCParameter *parameter)
unsigned GetNDataPoints() const
double Eval(const std::vector< double > ¶meters)
virtual int AddParameter(const char *name, double min, double max, const char *latexname="")
double ProbabilityNN(const std::vector< double > ¶ms)
The class for testing model hypotheses.
double GetPvalueFromKolmogorov(const std::vector< double > &par, int index)
double GetCalculatedPValue(bool flag_histogram=false)
A class for handling 1D distributions.
std::vector< double > GetSmallestIntervals(double content=0.68)
int SetPriorConstant(int index)
BCDataPoint * GetDataPoint(unsigned int index)
virtual double LogEval(const std::vector< double > ¶meters)
double GetQuantile(double probablity)
unsigned int GetNDataPoints()
virtual double SamplingFunction(const std::vector< double > ¶meters)
const std::string & GetName() const
int SetPrior(int index, TF1 *f)
BCDataPoint * fDataPointUpperBoundaries
void Copy(const BCModel &bcmodel)
void SetDataPointLowerBoundary(int index, double lowerboundary)
virtual double LogAPrioriProbability(const std::vector< double > ¶meters)
std::vector< double > GetChi2Runs(int dataIndex, int sigmaIndex)
void AddDataPoint(BCDataPoint *datapoint)
BCModel & operator=(const BCModel &bcmodel)
void SetValue(unsigned index, double value)
virtual void CorrelateDataPointValues(std::vector< double > &x)
const std::string & GetName() const
TH1D * ECDF(const std::vector< double > &data)