Bayesian Analysis Toolkit: BCModel.h Source File

Go to the documentation of this file.
00001 #ifndef __BCMODEL__H
00002 #define __BCMODEL__H
00003 
00004 /*!
00005  * \class BCModel
00006  * \brief The base class for all user-defined models.
00007  * \author Daniel Kollar
00008  * \author Kevin Kr&ouml;ninger
00009  * \version 1.0
00010  * \date 08.2008
00011  * \detail This class represents a model. It contains a container of
00012  * parameters, their prior distributions and the conditional
00013  * probabilities given those parameters.  The methods which implement
00014  * the prior and conditional probabilities have to be overloaded by
00015  * the user in the user defined model class which will inherit from
00016  * this class.
00017  */
00018 
00019 /*
00020  * Copyright (C) 2008-2010, Daniel Kollar and Kevin Kroeninger.
00021  * All rights reserved.
00022  *
00023  * For the licensing terms see doc/COPYING.
00024  */
00025 
00026 // ---------------------------------------------------------
00027 
00028 #include <vector>
00029 #include <string>
00030 
00031 #include <BAT/BCIntegrate.h>
00032 
00033 // ROOT classes
00034 class TNamed;
00035 class TH1;
00036 class TH2D;
00037 class TGraph;
00038 class TCanvas;
00039 class TPostscript;
00040 class TF1;
00041 
00042 //BAT classes
00043 class BCDataPoint;
00044 class BCDataSet;
00045 class BCParameter;
00046 class BCH1D;
00047 class BCH2D;
00048 
00049 const int MAXNDATAPOINTVALUES = 20;
00050 
00051 // ---------------------------------------------------------
00052 
00053 class BCModel : public BCIntegrate
00054 {
00055 
00056    public:
00057 
00058       /** \name Constructors and destructors */
00059       /* @{ */
00060 
00061       /**
00062        * The default constructor. */
00063       BCModel();
00064 
00065       /**
00066        * A constructor.
00067        * @param name The name of the model */
00068       BCModel(const char * name);
00069 
00070       /**
00071        * The default destructor. */
00072       virtual ~BCModel();
00073 
00074       /* @} */
00075 
00076       /** \name Member functions (get) */
00077       /* @{ */
00078 
00079       /**
00080        * @return The name of the model. */
00081       std::string GetName()
00082          { return fName; };
00083 
00084       /**
00085        * @return The index of the model. */
00086       int GetIndex()
00087          { return fIndex; };
00088 
00089       /**
00090        * @return The a priori probability. */
00091       double GetModelAPrioriProbability()
00092          { return fModelAPriori; };
00093 
00094       /**
00095        * @return The a posteriori probability. */
00096       double GetModelAPosterioriProbability()
00097          { return fModelAPosteriori; };
00098 
00099       /**
00100        * @return The normalization factor of the probability */
00101       double GetNormalization()
00102          { return fNormalization; };
00103 
00104       /**
00105        * @return The data set. */
00106       BCDataSet* GetDataSet()
00107          { return fDataSet; };
00108 
00109       /**
00110        * @return The lower boundaries of possible data values. */
00111       BCDataPoint* GetDataPointLowerBoundaries()
00112          { return fDataPointLowerBoundaries; };
00113 
00114       /**
00115        * @return The upper boundaries of possible data values. */
00116       BCDataPoint* GetDataPointUpperBoundaries()
00117          { return fDataPointUpperBoundaries; };
00118 
00119       /**
00120        * @param index The index of the variable.
00121        * @return The lower boundary of possible data values for a particular variable. */
00122       double GetDataPointLowerBoundary(unsigned int index)
00123          { return fDataPointLowerBoundaries -> GetValue(index); };
00124 
00125       /**
00126        * @param index The index of the variable.
00127        * @return The upper boundary of possible data values for a particular variable. */
00128       double GetDataPointUpperBoundary(unsigned int index)
00129          { return fDataPointUpperBoundaries -> GetValue(index); };
00130 
00131       /*
00132        * Checks if the boundaries have been defined
00133        * @return true, if the boundaries have been set, false otherwise */
00134       bool GetFlagBoundaries();
00135 
00136       /**
00137        * @return The number of data points in the current data set. */
00138       int GetNDataPoints();
00139 
00140       /**
00141        * @param index The index of the data point.
00142        * @return The data point in the current data set at index */
00143       BCDataPoint * GetDataPoint(unsigned int index);
00144 
00145       /**
00146        * @return The minimum number of data points. */
00147       unsigned int GetNDataPointsMinimum()
00148          { return fNDataPointsMinimum; };
00149 
00150       /**
00151        * @return The maximum number of data points. */
00152       unsigned int GetNDataPointsMaximum()
00153          { return fNDataPointsMaximum; };
00154 
00155       /**
00156        * @return The number of parameters of the model. */
00157       unsigned int GetNParameters()
00158          { return fParameterSet ? fParameterSet -> size() : 0; };
00159 
00160       /**
00161        * @param index The index of the parameter in the parameter set.
00162        * @return The parameter. */
00163       BCParameter * GetParameter(int index);
00164 
00165       /**
00166        * @param name The name of the parameter in the parameter set.
00167        * @return The parameter. */
00168       BCParameter * GetParameter(const char * name);
00169 
00170       /**
00171        * @return parameter set */
00172       BCParameterSet * GetParameterSet()
00173          { return fParameterSet; };
00174 
00175       /**
00176        * Returns the value of a parameter (defined by index) at
00177        * the global mode of the posterior pdf.
00178        * @param index index of the parameter.
00179        * @return best fit value of the parameter or -1e+111 on error or center of the range if mode finding not yer run */
00180       double GetBestFitParameter(unsigned int index);
00181 
00182       /**
00183        * Returns the error on the value of a parameter (defined by index) at
00184        * the global mode of the posterior pdf.
00185        * @param index index of the parameter.
00186        * @return error on the best fit value of the parameter or -1 if undefined */
00187       double GetBestFitParameterError(unsigned int index);
00188 
00189       /**
00190        * Returns the set of values of the parameters at the global mode of
00191        * the posterior pdf.
00192        * @return The best fit parameters */
00193       std::vector <double> GetBestFitParameters()
00194          { return fBestFitParameters; };
00195       
00196       std::vector <double> GetBestFitParameterErrors() 
00197          { return fBestFitParameterErrors; }; 
00198 
00199       /**
00200        * Returns the value of a particular parameter (defined by index) at
00201        * the modes of the marginalized posterior pdfs.
00202        * @param index index of the parameter.
00203        * @return best fit parameter or -1e+111 on error or center of the range if marginalization not yet run */
00204       double GetBestFitParameterMarginalized(unsigned int index);
00205 
00206       /**
00207        * Returns the set of values of the parameters at the modes of the
00208        * marginalized posterior pdfs.
00209        * @return best fit parameters */
00210       std::vector <double> GetBestFitParametersMarginalized()
00211          { return fBestFitParametersMarginalized; };                                                          
00212 
00213       /**
00214        * @return The 2-d histogram of the error band. */
00215       TH2D * GetErrorBandXY()
00216          { return fErrorBandXY; };
00217 
00218       TH2D * GetErrorBandXY_yellow(double level=.68, int nsmooth=0);
00219 
00220       /**
00221        * Returns a vector of y-values at a certain probability level.
00222        * @param level The level of probability
00223        * @return vector of y-values */
00224       std::vector <double> GetErrorBand(double level);
00225 
00226       TGraph * GetErrorBandGraph(double level1, double level2);
00227 
00228       TGraph * GetFitFunctionGraph(std::vector <double> parameters);
00229 
00230       TGraph * GetFitFunctionGraph()
00231          { return this -> GetFitFunctionGraph(this -> GetBestFitParameters()); };
00232 
00233       TGraph * GetFitFunctionGraph(std::vector <double> parameters, double xmin, double xmax, int n=1000);
00234 
00235       bool GetFixedDataAxis(unsigned int index);
00236 
00237       /* @} */
00238 
00239       /** \name Member functions (set) */
00240       /* @{ */
00241 
00242       /**
00243        * Sets the name of the model.
00244        * @param name Name of the model */
00245       void SetName(const char * name)
00246          { fName = name; };
00247 
00248       /**
00249        * Sets the index of the model within the BCModelManager.
00250        * @param index The index of the model */
00251       void SetIndex(int index)
00252          { fIndex = index; };
00253 
00254       /**
00255        * Set all parameters of the model using a BCParameterSet container.
00256        * @par pointer to parameter set */
00257       void SetParameterSet( BCParameterSet * parset )
00258          { fParameterSet = parset; };
00259 
00260       /** 
00261        * Set the range of a parameter
00262        * @param index The parameter index
00263        * @param parmin The parameter minimum
00264        * @param parmax The parameter maximum
00265        * @return An error code. */
00266       int SetParameterRange(int index, double parmin, double parmax);
00267 
00268       /**
00269        * Sets the a priori probability for a model.
00270        * @param model The model
00271        * @param probability The a priori probability */
00272       void SetModelAPrioriProbability(double probability)
00273          { fModelAPriori = probability; };
00274 
00275       /**
00276        * Sets the a posteriori probability for a model.
00277        * @param model The model
00278        * @param probability The a posteriori probability */
00279       void SetModelAPosterioriProbability(double probability)
00280          { fModelAPosteriori = probability; };
00281 
00282       /**
00283        * Sets the normalization of the likelihood.
00284        * The normalization is the integral of likelihood over all parameters.
00285        * @param norm The normalization of the likelihood */
00286       void SetNormalization(double norm)
00287          { fNormalization = norm; };
00288 
00289       /**
00290        * Sets the data set.
00291        * @param dataset A data set */
00292       void SetDataSet(BCDataSet* dataset)
00293          { fDataSet = dataset; fNormalization = -1.0; };
00294 
00295       /**
00296        * Sets a single data point as data set.
00297        * @param datapoint A data point */
00298       void SetSingleDataPoint(BCDataPoint * datapoint);
00299 
00300       void SetSingleDataPoint(BCDataSet * dataset, unsigned int index);
00301 
00302       /**
00303        * Sets the minimum number of data points. */
00304       void SetNDataPointsMinimum(unsigned int minimum)
00305          { fNDataPointsMinimum = minimum; };
00306 
00307       /**
00308        * Sets the maximum number of data points. */
00309       void SetNDataPointsMaximum(unsigned int maximum)
00310          { fNDataPointsMaximum = maximum; };
00311 
00312       void SetDataBoundaries(unsigned int index, double lowerboundary, double upperboundary, bool fixed=false);
00313 
00314       /**
00315        * Sets the error band flag to continuous function */
00316       void SetErrorBandContinuous(bool flag);
00317 
00318       /**
00319        * Set the number of bins for the marginalized distribution of a parameter.
00320        * @param parname The name of the parameter in the parameter set
00321        * @param nbins   Number of bins (default = 100) */
00322       void SetNbins(const char * parname, int nbins);
00323 
00324       /**
00325        * Set prior for a parameter. 
00326        * @param index The parameter index
00327        * @param f A pointer to a function describing the prior
00328        * @return An error code.
00329        */ 
00330       int SetPrior(int index, TF1* f); 
00331 
00332       /**
00333        * Set prior for a parameter. 
00334        * @param name The parameter name
00335        * @param f A pointer to a function describing the prior
00336        * @return An error code.
00337        */ 
00338       int SetPrior(const char* name, TF1* f); 
00339 
00340       /**
00341        * Set Gaussian prior for a parameter. 
00342        * @param index The parameter index
00343        * @param mean The mean of the Gaussian
00344        * @param sigma The sigma of the Gaussian
00345        * @return An error code.
00346        */ 
00347       int SetPriorGauss(int index, double mean, double sigma); 
00348 
00349       /**
00350        * Set Gaussian prior for a parameter. 
00351        * @param name The parameter name
00352        * @param mean The mean of the Gaussian
00353        * @param sigma The sigma of the Gaussian
00354        * @return An error code.
00355        */ 
00356       int SetPriorGauss(const char* name, double mean, double sigma); 
00357 
00358       /**
00359        * Set Gaussian prior for a parameter with two different widths.
00360        * @param index The parameter index
00361        * @param mean The mean of the Gaussian
00362        * @param sigmadown The sigma (down) of the Gaussian
00363        * @param sigmaup The sigma (up)of the Gaussian
00364        * @return An error code.
00365        */ 
00366       int SetPriorGauss(int index, double mean, double sigmadown, double sigmaup);
00367 
00368       /**
00369        * Set Gaussian prior for a parameter with two different widths.
00370        * @param name The parameter name
00371        * @param mean The mean of the Gaussian
00372        * @param sigmadown The sigma (down) of the Gaussian
00373        * @param sigmaup The sigma (up)of the Gaussian
00374        * @return An error code.
00375        */ 
00376       int SetPriorGauss(const char* name, double mean, double sigmadown, double sigmaup);
00377 
00378       /**
00379        * Set prior for a parameter. 
00380        * @param index parameter index
00381        * @param h pointer to a histogram describing the prior
00382        * @param flag whether or not to use linear interpolation
00383        * @return An error code.
00384        */ 
00385       int SetPrior(int index, TH1 * h, bool flag=false); 
00386 
00387       /**
00388        * Set prior for a parameter. 
00389        * @param name parameter name
00390        * @param h pointer to a histogram describing the prior
00391        * @param flag whether or not to use linear interpolation
00392        * @return An error code.
00393        */ 
00394       int SetPrior(const char* name, TH1 * h, bool flag=false); 
00395 
00396       /**
00397        * Set constant prior for this parameter
00398        * @param index the index of the parameter
00399        * @return An error code
00400        */
00401       int SetPriorConstant(int index);
00402 
00403       /**
00404        * Set constant prior for this parameter
00405        * @param name the name of the parameter
00406        * @return An error code
00407        */
00408       int SetPriorConstant(const char* name);
00409 
00410       /**
00411        * Enable caching the constant value of the prior, so LogAPrioriProbability
00412        * is called only once. Note that the prior for ALL parameters is
00413        * assumed to be constant. The value is computed from
00414        * the parameter ranges, so make sure these are defined before this method is
00415        * called.
00416        * @return An error code
00417        */
00418       int SetPriorConstantAll();
00419 
00420       /* @} */
00421 
00422       /** \name Member functions (miscellaneous methods) */
00423       /* @{ */
00424 
00425       /**
00426        * Adds a parameter to the parameter set
00427        * @param name The name of the parameter
00428        * @param lowerlimit The lower limit of the parameter values
00429        * @param upperlimit The upper limit of the parameter values
00430        * @see AddParameter(BCParameter* parameter); */
00431       int AddParameter(const char * name, double lowerlimit, double upperlimit);
00432 
00433       /**
00434        * Adds a parameter to the model.
00435        * @param parameter A model parameter
00436        * @see AddParameter(const char * name, double lowerlimit, double upperlimit); */
00437       int AddParameter(BCParameter* parameter);
00438 
00439       /**
00440        * Returns the prior probability.
00441        * @param parameters A set of parameter values
00442        * @return The prior probability p(parameters)
00443        * @see GetPrior(std::vector <double> parameters) */
00444       double APrioriProbability(std::vector <double> parameters)
00445          { return exp( this->LogAPrioriProbability(parameters) ); };
00446 
00447       /**
00448        * Returns natural logarithm of the prior probability.
00449        * Method needs to be overloaded by the user.
00450        * @param parameters A set of parameter values
00451        * @return The prior probability p(parameters)
00452        * @see GetPrior(std::vector <double> parameters) */
00453       virtual double LogAPrioriProbability(std::vector <double> parameters); 
00454 
00455       /**
00456        * Returns the likelihood
00457        * @param parameters A set of parameter values
00458        * @return The likelihood */
00459       double Likelihood(std::vector <double> parameter)
00460          { return exp( this->LogLikelihood(parameter) ); };
00461 
00462       /**
00463        * Calculates natural logarithm of the likelihood.
00464        * Method needs to be overloaded by the user.
00465        * @param parameters A set of parameter values
00466        * @return Natural logarithm of the likelihood */
00467       virtual double LogLikelihood(std::vector <double> parameter);
00468 
00469       /**
00470        * Returns the likelihood times prior probability given a set of parameter values
00471        * @param parameters A set of parameter values
00472        * @return The likelihood times prior probability */
00473       double ProbabilityNN(std::vector <double> parameter)
00474          { return exp( this->LogProbabilityNN(parameter) ); };
00475 
00476       /**
00477        * Returns the natural logarithm of likelihood times prior probability given
00478        * a set of parameter values
00479        * @param parameters A set of parameter values
00480        * @return The likelihood times prior probability */
00481       double LogProbabilityNN(std::vector <double> parameter);
00482 
00483       /**
00484        * Returns the a posteriori probability given a set of parameter values
00485        * @param parameters A set of parameter values
00486        * @return The a posteriori probability */
00487       double Probability(std::vector <double> parameter)
00488          { return exp( this->LogProbability(parameter) ); };
00489 
00490       /**
00491        * Returns natural logarithm of the  a posteriori probability given a set of parameter values
00492        * @param parameters A set of parameter values
00493        * @return The a posteriori probability */
00494       double LogProbability(std::vector <double> parameter);
00495 
00496       /**
00497        * Returns a conditional probability.
00498        * Method needs to be overloaded by the user.
00499        * @param datapoint A data point
00500        * @param parameters A set of parameter values
00501        * @return The conditional probability p(datapoint|parameters)
00502        * @see GetConditionalEntry(BCDataPoint* datapoint, std::vector <double> parameters) */
00503       double ConditionalProbabilityEntry(BCDataPoint * datapoint, std::vector <double> parameters)
00504          { return exp( this->LogConditionalProbabilityEntry(datapoint, parameters) ); };
00505 
00506       /**
00507        * Returns a natural logarithm of conditional probability.
00508        * Method needs to be overloaded by the user.
00509        * @param datapoint A data point
00510        * @param parameters A set of parameter values
00511        * @return The conditional probability p(datapoint|parameters)
00512        * @see GetConditionalEntry(BCDataPoint* datapoint, std::vector <double> parameters) */
00513       virtual double LogConditionalProbabilityEntry(BCDataPoint * /*datapoint*/, std::vector <double> /*parameters*/)
00514          { flag_ConditionalProbabilityEntry = false; return 0.; };
00515 
00516       /**
00517        * Sampling function used for importance sampling.
00518        * Method needs to be overloaded by the user.
00519        * @param parameters A set of parameter values
00520        * @return The probability density at the parameter values */
00521       virtual double SamplingFunction(std::vector <double> parameters);
00522 
00523       /**
00524        * Overloaded function to evaluate integral. */
00525       double Eval(std::vector <double> parameters)
00526          { return exp( this->LogEval(parameters) ); };
00527 
00528       /**
00529        * Overloaded function to evaluate integral. */
00530       double LogEval(std::vector <double> parameters);
00531 
00532       /**
00533        * Overloaded function to evaluate integral. */
00534       double EvalSampling(std::vector <double> parameters);
00535 
00536       /**
00537        * Integrates over the un-normalized probability and updates fNormalization. */
00538       double Normalize();
00539 
00540       /**
00541        * Checks if a set of parameters values is within the given range.
00542        * @param parameters A set of parameter values
00543        * @return Error code (0: OK, -1 length of parameters not correct, -2 values not within range)
00544        */
00545       int CheckParameters(std::vector <double> parameters);
00546 
00547       /**
00548        * Do the mode finding using a method set via SetOptimizationMethod.
00549        * Default is Minuit. The mode can be extracted using the GetBestFitParameters() method.
00550        *
00551        * A starting point for the mode finding can be specified for Minuit. If not
00552        * specified, Minuit default will be used (center of the parameter space).
00553        *
00554        * If running mode finding after the MCMC it is a good idea to specify the
00555        * mode obtained from MCMC as a starting point for the Minuit minimization.
00556        * MCMC will find the location of the global mode and Minuit will
00557        * converge to the mode precisely. The commands are:
00558          <pre>
00559          model -> MarginalizeAll();
00560          model -> FindMode( model -> GetBestFitParameters() );
00561          </pre>
00562        * @start startinf point of Minuit minimization
00563        * */
00564       void FindMode(std::vector<double> start = std::vector<double>(0));
00565 
00566       /**
00567        * Does the mode finding using Minuit. If starting point is not specified,
00568        * finding will start from the center of the parameter space.
00569        * @param start point in parameter space from which the mode finding is started.
00570        * @param printlevel The print level. */
00571       void FindModeMinuit(std::vector<double> start = std::vector<double>(0), int printlevel = 1);
00572 
00573       /**
00574        * Write mode into file */
00575       void WriteMode(const char * file);
00576 
00577       /**
00578        * Read mode from file created by WriteMode() call */
00579       int ReadMode(const char * file);
00580 
00581       /**
00582        * Read */
00583       int ReadMarginalizedFromFile(const char * file);
00584 
00585       /**
00586        * Read */
00587       int ReadErrorBandFromFile(const char * file);
00588 
00589       /**
00590        * Marginalize all probabilities wrt. single parameters and all combinations
00591        * of two parameters. The individual distributions can be retrieved using
00592        * the GetMarginalized method.
00593        * @return Total number of marginalized distributions */
00594       int MarginalizeAll();
00595 
00596       /**
00597        * If MarginalizeAll method was used, the individual marginalized distributions
00598        * with respect to one parameter can be retrieved using this method.
00599        * @param parameter Model parameter
00600        * @return 1D marginalized probability */
00601       BCH1D * GetMarginalized(BCParameter * parameter);
00602 
00603       BCH1D * GetMarginalized(const char * name)
00604          { return this -> GetMarginalized(this -> GetParameter(name)); };
00605 
00606       /**
00607        * If MarginalizeAll method was used, the individual marginalized distributions
00608        * with respect to two parameters can be retrieved using this method.
00609        * @param parameter1 First parameter
00610        * @param parameter2 Second parameter
00611        * @return 2D marginalized probability */
00612       BCH2D * GetMarginalized(BCParameter * parameter1, BCParameter * parameter2);
00613 
00614       BCH2D * GetMarginalized(const char * name1, const char * name2)
00615          { return this -> GetMarginalized(this -> GetParameter(name1), this -> GetParameter(name2)); };
00616 
00617       /**
00618        *   */
00619       int PrintAllMarginalized1D(const char * filebase);
00620       int PrintAllMarginalized2D(const char * filebase);
00621       int PrintAllMarginalized(const char * file, unsigned int hdiv=1, unsigned int ndiv=1);
00622 
00623       /**
00624        * Constrains a data point
00625        * @param x A vector of double */
00626       virtual void CorrelateDataPointValues(std::vector<double> &x);
00627 
00628       /**
00629        * Calculate p-value from Chi2 distribution for Gaussian problems
00630        * @param par Parameter set for the calculation of the likelihood
00631        * @param sigma_index Index of the sigma/uncertainty for the data points
00632        *        (for data in format "x y erry" the index would be 2) */
00633       double GetPvalueFromChi2(std::vector<double> par, int sigma_index);
00634 
00635       /**
00636        * Calculate p-value from asymptotic Chi2 distribution for arbitrary problems
00637        * using the definition (3) from
00638        * Johnson, V.E. A Bayesian chi2 Test for Goodness-of-Fit. The Annals of Statistics 32, 2361-2384(2004).
00639        *
00640        * @param par Parameter set for the calculation of the likelihood */
00641       double GetPvalueFromChi2Johnson(std::vector<double> par);
00642 
00643       /**
00644        * Calculate p-value from Kolmogorov-Smirnov test statistic
00645        * for 1D - datasets.
00646        *
00647        * @param par Parameter set for the calculation of the likelihood
00648        * @param index Index of the data point in the BCDataSet
00649        *        (for data in format "x y erry" the index would be 1) */
00650       double GetPvalueFromKolmogorov(const std::vector<double>& par, int index);
00651 
00652 
00653       /**
00654        * Calculate  Chi2  (also called R^{B}) for arbitrary problems with binned data
00655        * using the definition (3) from
00656        * Johnson, V.E. A Bayesian chi2 Test for Goodness-of-Fit. The Annals of Statistics 32, 2361-2384(2004).
00657        *
00658        * @param par Parameter set for the calculation of the likelihood
00659        * @param nBins how many bins to use for the data, for negative an adapted rule \
00660        * of thumb by  Wald(1942) is used, with at least three bins*/
00661       double GetChi2Johnson(std::vector<double> par, const int nBins=-1);
00662 
00663       /**
00664        * Calculate the A-value, a summary statistic. It computes the frequency
00665        * that a Chi2 value determined from the data by Johnson's binning prescription is
00666        * larger than a value sampled from the reference chi2 distribution. They out
00667        * from one chain is used. A=1/2 provides
00668        * no evidence against the null hypothesis. Compare
00669        * Johnson, V.E. A Bayesian chi2 Test for Goodness-of-Fit. The Annals of Statistics 32, 2361-2384(2004).
00670        *
00671        * @param par tree contains the samples of posterior of the parameters
00672        * @param par histogram filled by function with distribution of p values*/
00673       double GetAvalueFromChi2Johnson(TTree* tree, TH1D* distribution=0);
00674 
00675       double GetPvalueFromChi2NDoF(std::vector<double> par, int sigma_index);
00676 
00677       BCH1D * CalculatePValue(std::vector<double> par, bool flag_histogram=false);
00678 
00679       /*
00680        * @return The p-value */
00681       double GetPValue()
00682          { return fPValue; };
00683 
00684       double GetPValueNDoF()
00685          { return fPValueNDoF; };
00686 
00687       double GetChi2NDoF()
00688          { return fChi2NDoF; };
00689 
00690       /**
00691        * For a Gaussian problem, calculate the chi2 of the longest run of consecutive
00692        * values above/below the expected values
00693        * @param dataIndex component of datapoint with the observed value
00694        * @param sigmaIndex component of datapoint with uncertainty */
00695       std::vector<double> GetChi2Runs(int dataIndex, int sigmaIndex);
00696 
00697       /*
00698        * Set maximum number of iterations in the MCMC pre-run of the p-value
00699        * evaluation using MCMC */
00700       void SetGoFNIterationsMax(int n)
00701          { fGoFNIterationsMax=n; };
00702 
00703       /*
00704        * Set number of iterations in the MCMC normal run of the p-value
00705        * evaluation using MCMC */
00706       void SetGoFNIterationsRun(int n)
00707          { fGoFNIterationsRun=n; };
00708 
00709       /*
00710        * Set number of chains in the MCMC of the p-value
00711        * evaluation using MCMC */
00712       void SetGoFNChains(int n)
00713          { fGoFNChains=n; };
00714 
00715       /**
00716        * Calculates the matrix element of the Hessian matrix
00717        * @param parameter1 The parameter for the first derivative
00718        * @param parameter2 The parameter for the first derivative
00719        * @return The matrix element of the Hessian matrix */
00720       double HessianMatrixElement(BCParameter * parameter1, BCParameter * parameter2, std::vector<double> point);
00721 
00722       /**
00723        * Prints a summary on the screen. */
00724       void PrintSummary();
00725 
00726       /**
00727        * Prints a summary of the Markov Chain Monte Carlo to a file. */
00728       void PrintResults(const char * file);
00729 
00730       /**
00731        * Prints a short summary of the fit results on the screen. */
00732       void PrintShortFitSummary(int chi2flag=0);
00733 
00734       /**
00735        * Prints matrix elements of the Hessian matrix
00736        * @param parameters The parameter values at which point to evaluate the matrix */
00737       void PrintHessianMatrix(std::vector<double> parameters);
00738 
00739       void FixDataAxis(unsigned int index, bool fixed);
00740 
00741 
00742       /**
00743        * 1dim cumulative distribution function of the probability
00744        * to get the data f(x_i|param) for a single measurement, assumed to
00745        * be of identical functional form for all measurements
00746        * @param parameters The parameter values at which point to compute the cdf
00747        * @param index The data point index starting at 0,1...N-1
00748        * @param lower only needed for discrete distributions!
00749        * Return the CDF for the count one less than actually observed, e.g.
00750        * in Poisson process, if 3 actually observed, then CDF(2) is returned */
00751       virtual double CDF(const std::vector<double>& /*parameters*/,  int /*index*/, bool /*lower=false*/)
00752       {return 0.0;}
00753 
00754 
00755       /** 
00756        * Reset all results. 
00757        * @return An error code. */ 
00758       int ResetResults();
00759 
00760    /* @} */
00761 
00762    protected:
00763 
00764       /**
00765        * Index of the model. */
00766       int fIndex;
00767 
00768       /**
00769        * Name of the model. */
00770       std::string fName;
00771 
00772       /**
00773        * The model prior probability. */
00774       double fModelAPriori;
00775 
00776       /**
00777        * The model a posteriori probability. */
00778       double fModelAPosteriori;
00779 
00780       /**
00781        * A model parameter container. */
00782       BCParameterSet * fParameterSet;
00783 
00784       /**
00785        * A data set */
00786       BCDataSet * fDataSet;
00787 
00788       /**
00789        * Minimum number of data points */
00790       unsigned int fNDataPointsMinimum;
00791 
00792       /**
00793        * Maximum number of data points */
00794       unsigned int fNDataPointsMaximum;
00795 
00796       /**
00797        * A flag for overloading ConditionalProbabilityEntry */
00798       bool flag_ConditionalProbabilityEntry;
00799 
00800       /**
00801        * The p-value */
00802       double fPValue;
00803 
00804       double fChi2NDoF;
00805       double fPValueNDoF;
00806 
00807       /**
00808       * true for a discrete probability, false for continuous pdf  */
00809       bool flag_discrete;
00810 
00811       /*
00812        * Maximum number of iterations in the MCMC pre-run of the p-value
00813        * evaluation using MCMC */
00814       int fGoFNIterationsMax;
00815 
00816       /*
00817        * Number of iterations in the MCMC normal run of the p-value
00818        * evaluation using MCMC */
00819       int fGoFNIterationsRun;
00820 
00821       /*
00822        * Number of chains in the MCMC of the p-value
00823        * evaluation using MCMC */
00824       int fGoFNChains;
00825 
00826       /*
00827        * A vector of prior functions/histograms/graphs. */ 
00828 //    std::vector<TF1*> fPriorContainer;
00829       std::vector<TNamed*> fPriorContainer;
00830 
00831       /**
00832        * Flag to indicate that all parameters have constant prior. */
00833       bool fPriorConstantAll;
00834 
00835       /**
00836        * The value of the product of constant priors of
00837        * individual parameters. */
00838       double fPriorConstantValue;
00839 
00840       /**
00841        * List the parameters whose prior is constant */
00842       std::vector<bool> fPriorContainerConstant;
00843 
00844       /**
00845        * List the parameters for which the histogram prior should be interpolated */
00846       std::vector<bool> fPriorContainerInterpolate;
00847 
00848    private:
00849 
00850       /**
00851        * Converts a vector of doubles into a BCDataPoint */
00852       BCDataPoint * VectorToDataPoint(std::vector<double> data);
00853 
00854       /**
00855        * Compares to strings */
00856       int CompareStrings(const char * string1, const char * string2);
00857 
00858       /**
00859        * The Likelihood normalization. */
00860       double fNormalization;
00861 
00862       /**
00863        * rule of thumb for good number of bins (Wald1942, Johnson2004) to group observations
00864        * updated so minimum is three bins (for 1-5 observations)!
00865        * @param */
00866       int NumberBins()
00867          { return (int)(exp(0.4 * log(this -> GetNDataPoints())) + 2); }
00868 
00869       /**
00870        * Update the constant part of the prior. */
00871       void RecalculatePriorConstant();
00872 };
00873 
00874 // ---------------------------------------------------------
00875 
00876 typedef std::vector<BCModel*> BCModelContainer;
00877 
00878 // ---------------------------------------------------------
00879 
00880 #endif