00001 #ifndef __BCMODEL__H 00002 #define __BCMODEL__H 00003 00004 /*! 00005 * \class BCModel 00006 * \brief The base class for all user-defined models. 00007 * \author Daniel Kollar 00008 * \author Kevin Kröninger 00009 * \version 1.0 00010 * \date 08.2008 00011 * \detail This class represents a model. It contains a container of 00012 * parameters, their prior distributions and the conditional 00013 * probabilities given those parameters. The methods which implement 00014 * the prior and conditional probabilities have to be overloaded by 00015 * the user in the user defined model class which will inherit from 00016 * this class. 00017 */ 00018 00019 /* 00020 * Copyright (C) 2008, Daniel Kollar and Kevin Kroeninger. 00021 * All rights reserved. 00022 * 00023 * For the licensing terms see doc/COPYING. 00024 */ 00025 00026 // --------------------------------------------------------- 00027 00028 #include <vector> 00029 #include <string> 00030 00031 #include "BAT/BCIntegrate.h" 00032 00033 // ROOT classes 00034 class TH2D; 00035 class TGraph; 00036 class TCanvas; 00037 class TPostscript; 00038 00039 //BAT classes 00040 class BCDataPoint; 00041 class BCDataSet; 00042 class BCParameter; 00043 class BCH1D; 00044 class BCH2D; 00045 00046 const int MAXNDATAPOINTVALUES = 20; 00047 00048 // --------------------------------------------------------- 00049 00050 class BCModel : public BCIntegrate 00051 { 00052 00053 public: 00054 00055 /** \name Constructors and destructors */ 00056 /* @{ */ 00057 00058 /** 00059 * The default constructor. */ 00060 BCModel(); 00061 00062 /** 00063 * A constructor. 00064 * @param name The name of the model */ 00065 BCModel(const char * name); 00066 00067 /** 00068 * The default destructor. */ 00069 virtual ~BCModel(); 00070 00071 /* @} */ 00072 00073 /** \name Member functions (get) */ 00074 /* @{ */ 00075 00076 /** 00077 * @return The name of the model. */ 00078 std::string GetName() 00079 { return fName; }; 00080 00081 /** 00082 * @return The index of the model. */ 00083 int GetIndex() 00084 { return fIndex; }; 00085 00086 /** 00087 * @return The a priori probability. */ 00088 double GetModelAPrioriProbability() 00089 { return fModelAPriori; }; 00090 00091 /** 00092 * @return The a posteriori probability. */ 00093 double GetModelAPosterioriProbability() 00094 { return fModelAPosteriori; }; 00095 00096 /** 00097 * @return The normalization factor of the probability */ 00098 double GetNormalization() 00099 { return fNormalization; }; 00100 00101 /** 00102 * @return The data set. */ 00103 BCDataSet* GetDataSet() 00104 { return fDataSet; }; 00105 00106 /** 00107 * @return The lower boundaries of possible data values. */ 00108 BCDataPoint* GetDataPointLowerBoundaries() 00109 { return fDataPointLowerBoundaries; }; 00110 00111 /** 00112 * @return The upper boundaries of possible data values. */ 00113 BCDataPoint* GetDataPointUpperBoundaries() 00114 { return fDataPointUpperBoundaries; }; 00115 00116 /** 00117 * @param index The index of the variable. 00118 * @return The lower boundary of possible data values for a particular variable. */ 00119 double GetDataPointLowerBoundary(unsigned int index) 00120 { return fDataPointLowerBoundaries -> GetValue(index); }; 00121 00122 /** 00123 * @param index The index of the variable. 00124 * @return The upper boundary of possible data values for a particular variable. */ 00125 double GetDataPointUpperBoundary(unsigned int index) 00126 { return fDataPointUpperBoundaries -> GetValue(index); }; 00127 00128 /* 00129 * Checks if the boundaries have been defined 00130 * @return true, if the boundaries have been set, false otherwise */ 00131 bool GetFlagBoundaries(); 00132 00133 /** 00134 * @return The number of data points in the current data set. */ 00135 int GetNDataPoints(); 00136 00137 /** 00138 * @param index The index of the data point. 00139 * @return The data point in the current data set at index */ 00140 BCDataPoint * GetDataPoint(unsigned int index); 00141 00142 /** 00143 * @return The minimum number of data points. */ 00144 unsigned int GetNDataPointsMinimum() 00145 { return fNDataPointsMinimum; }; 00146 00147 /** 00148 * @return The maximum number of data points. */ 00149 unsigned int GetNDataPointsMaximum() 00150 { return fNDataPointsMaximum; }; 00151 00152 /** 00153 * @return The number of parameters of the model. */ 00154 unsigned int GetNParameters() 00155 { return fParameterSet ? fParameterSet -> size() : 0; }; 00156 00157 /** 00158 * @param index The index of the parameter in the parameter set. 00159 * @return The parameter. */ 00160 BCParameter* GetParameter(int index); 00161 00162 /** 00163 * @param name The name of the parameter in the parameter set. 00164 * @return The parameter. */ 00165 BCParameter * GetParameter(const char * name); 00166 00167 /** 00168 * @return parameter set */ 00169 BCParameterSet * GetParameterSet() 00170 { return fParameterSet; }; 00171 00172 /** 00173 * Returns the value of a particular parameter (defined by index) at 00174 * the global mode of the posterior pdf. 00175 * @param index The index of the parameter. 00176 * @return The best fit parameter. */ 00177 double GetBestFitParameter(unsigned int index) 00178 { return fBestFitParameters[index]; }; 00179 00180 double GetBestFitParameterError(unsigned int index) 00181 { return fBestFitParameterErrors[index]; }; 00182 00183 /** 00184 * Returns the set of values of the parameters at the global mode of 00185 * the posterior pdf. 00186 * @return The best fit parameters */ 00187 std::vector <double> GetBestFitParameters() 00188 { return fBestFitParameters; }; 00189 00190 std::vector <double> GetBestFitParameterErrors() 00191 { return fBestFitParameterErrors; }; 00192 00193 /** 00194 * Returns the value of a particular parameter (defined by index) at 00195 * the modes of the marginalized posterior pdfs. 00196 * @param index The index of the parameter. 00197 * @return The best fit parameter */ 00198 double GetBestFitParameterMarginalized(unsigned int index) 00199 { return fBestFitParametersMarginalized[index]; }; 00200 00201 /** 00202 * Returns the set of values of the parameters at the modes of the 00203 * marginalized posterior pdfs. 00204 * @return The best fit parameters. */ 00205 std::vector <double> GetBestFitParametersMarginalized() 00206 { return fBestFitParametersMarginalized; }; 00207 00208 /** 00209 * @return The 2-d histogram of the error band. */ 00210 TH2D * GetErrorBandXY() 00211 { return fErrorBandXY; }; 00212 00213 TH2D * GetErrorBandXY_yellow(double level=.68, int nsmooth=0); 00214 00215 /** 00216 * Returns a vector of y-values at a certain probability level. 00217 * @param level The level of probability 00218 * @return The vector of y-values */ 00219 std::vector <double> GetErrorBand(double level); 00220 00221 TGraph * GetErrorBandGraph(double level1, double level2); 00222 00223 TGraph * GetFitFunctionGraph(std::vector <double> parameters); 00224 00225 TGraph * GetFitFunctionGraph() 00226 { return this -> GetFitFunctionGraph(this -> GetBestFitParameters()); }; 00227 00228 TGraph * GetFitFunctionGraph(std::vector <double> parameters, double xmin, double xmax, int n=1000); 00229 00230 bool GetFixedDataAxis(unsigned int index); 00231 00232 /* @} */ 00233 00234 /** \name Member functions (set) */ 00235 /* @{ */ 00236 00237 /** 00238 * Sets the name of the model. 00239 * @param name Name of the model */ 00240 void SetName(const char * name) 00241 { fName = name; }; 00242 00243 /** 00244 * Sets the index of the model within the BCModelManager. 00245 * @param index The index of the model */ 00246 void SetIndex(int index) 00247 { fIndex = index; }; 00248 00249 /** 00250 * Set all parameters of the model using a BCParameterSet container. 00251 * @par pointer to parameter set */ 00252 void SetParameterSet( BCParameterSet * parset ) 00253 { fParameterSet = parset; }; 00254 00255 /** 00256 * Sets the a priori probability for a model. 00257 * @param model The model 00258 * @param probability The a priori probability */ 00259 void SetModelAPrioriProbability(double probability) 00260 { fModelAPriori = probability; }; 00261 00262 /** 00263 * Sets the a posteriori probability for a model. 00264 * @param model The model 00265 * @param probability The a posteriori probability */ 00266 void SetModelAPosterioriProbability(double probability) 00267 { fModelAPosteriori = probability; }; 00268 00269 /** 00270 * Sets the normalization of the likelihood. 00271 * The normalization is the integral of likelihood over all parameters. 00272 * @param norm The normalization of the likelihood */ 00273 void SetNormalization(double norm) 00274 { fNormalization = norm; }; 00275 00276 /** 00277 * Sets the data set. 00278 * @param dataset A data set */ 00279 void SetDataSet(BCDataSet* dataset) 00280 { fDataSet = dataset; fNormalization = -1.0; }; 00281 00282 /** 00283 * Sets a single data point as data set. 00284 * @param datapoint A data point */ 00285 void SetSingleDataPoint(BCDataPoint * datapoint); 00286 00287 void SetSingleDataPoint(BCDataSet * dataset, unsigned int index); 00288 00289 /** 00290 * Sets the minimum number of data points. */ 00291 void SetNDataPointsMinimum(unsigned int minimum) 00292 { fNDataPointsMinimum = minimum; }; 00293 00294 /** 00295 * Sets the maximum number of data points. */ 00296 void SetNDataPointsMaximum(unsigned int maximum) 00297 { fNDataPointsMaximum = maximum; }; 00298 00299 void SetDataBoundaries(unsigned int index, double lowerboundary, double upperboundary, bool fixed=false); 00300 00301 /** 00302 * Sets the error band flag to continuous function */ 00303 void SetErrorBandContinuous(bool flag); 00304 00305 /** 00306 * Set the number of bins for the marginalized distribution of a parameter. 00307 * @param parname The name of the parameter in the parameter set 00308 * @param nbins Number of bins (default = 100) */ 00309 void SetNbins(const char * parname, int nbins); 00310 00311 /* @} */ 00312 00313 /** \name Member functions (miscellaneous methods) */ 00314 /* @{ */ 00315 00316 /** 00317 * Adds a parameter to the parameter set 00318 * @param name The name of the parameter 00319 * @param lowerlimit The lower limit of the parameter values 00320 * @param upperlimit The upper limit of the parameter values 00321 * @see AddParameter(BCParameter* parameter); */ 00322 int AddParameter(const char * name, double lowerlimit, double upperlimit); 00323 00324 /** 00325 * Adds a parameter to the model. 00326 * @param parameter A model parameter 00327 * @see AddParameter(const char * name, double lowerlimit, double upperlimit); */ 00328 int AddParameter(BCParameter* parameter); 00329 00330 /** 00331 * Returns the prior probability. 00332 * @param parameters A set of parameter values 00333 * @return The prior probability p(parameters) 00334 * @see GetPrior(std::vector <double> parameters) */ 00335 double APrioriProbability(std::vector <double> parameters) 00336 { return exp( this->LogAPrioriProbability(parameters) ); }; 00337 00338 /** 00339 * Returns natural logarithm of the prior probability. 00340 * Method needs to be overloaded by the user. 00341 * @param parameters A set of parameter values 00342 * @return The prior probability p(parameters) 00343 * @see GetPrior(std::vector <double> parameters) */ 00344 virtual double LogAPrioriProbability(std::vector <double> parameters) 00345 { return 0.; }; 00346 00347 /** 00348 * Returns the likelihood 00349 * @param parameters A set of parameter values 00350 * @return The likelihood */ 00351 double Likelihood(std::vector <double> parameter) 00352 { return exp( this->LogLikelihood(parameter) ); }; 00353 00354 /** 00355 * Calculates natural logarithm of the likelihood. 00356 * Method needs to be overloaded by the user. 00357 * @param parameters A set of parameter values 00358 * @return Natural logarithm of the likelihood */ 00359 virtual double LogLikelihood(std::vector <double> parameter); 00360 00361 /** 00362 * Returns the likelihood times prior probability given a set of parameter values 00363 * @param parameters A set of parameter values 00364 * @return The likelihood times prior probability */ 00365 double ProbabilityNN(std::vector <double> parameter) 00366 { return exp( this->LogProbabilityNN(parameter) ); }; 00367 00368 /** 00369 * Returns the natural logarithm of likelihood times prior probability given 00370 * a set of parameter values 00371 * @param parameters A set of parameter values 00372 * @return The likelihood times prior probability */ 00373 double LogProbabilityNN(std::vector <double> parameter); 00374 00375 /** 00376 * Returns the a posteriori probability given a set of parameter values 00377 * @param parameters A set of parameter values 00378 * @return The a posteriori probability */ 00379 double Probability(std::vector <double> parameter) 00380 { return exp( this->LogProbability(parameter) ); }; 00381 00382 /** 00383 * Returns natural logarithm of the a posteriori probability given a set of parameter values 00384 * @param parameters A set of parameter values 00385 * @return The a posteriori probability */ 00386 double LogProbability(std::vector <double> parameter); 00387 00388 /** 00389 * Returns a conditional probability. 00390 * Method needs to be overloaded by the user. 00391 * @param datapoint A data point 00392 * @param parameters A set of parameter values 00393 * @return The conditional probability p(datapoint|parameters) 00394 * @see GetConditionalEntry(BCDataPoint* datapoint, std::vector <double> parameters) */ 00395 double ConditionalProbabilityEntry(BCDataPoint * datapoint, std::vector <double> parameters) 00396 { return exp( this->LogConditionalProbabilityEntry(datapoint, parameters) ); }; 00397 00398 /** 00399 * Returns a natural logarithm of conditional probability. 00400 * Method needs to be overloaded by the user. 00401 * @param datapoint A data point 00402 * @param parameters A set of parameter values 00403 * @return The conditional probability p(datapoint|parameters) 00404 * @see GetConditionalEntry(BCDataPoint* datapoint, std::vector <double> parameters) */ 00405 virtual double LogConditionalProbabilityEntry(BCDataPoint * datapoint, std::vector <double> parameters) 00406 { flag_ConditionalProbabilityEntry = false; return 0.; }; 00407 00408 /** 00409 * Sampling function used for importance sampling. 00410 * Method needs to be overloaded by the user. 00411 * @param parameters A set of parameter values 00412 * @return The probability density at the parameter values */ 00413 virtual double SamplingFunction(std::vector <double> parameters); 00414 00415 /** 00416 * Overloaded function to evaluate integral. */ 00417 double Eval(std::vector <double> parameters) 00418 { return exp( this->LogEval(parameters) ); }; 00419 00420 /** 00421 * Overloaded function to evaluate integral. */ 00422 double LogEval(std::vector <double> parameters); 00423 00424 /** 00425 * Overloaded function to evaluate integral. */ 00426 double EvalSampling(std::vector <double> parameters); 00427 00428 /** 00429 * Integrates over the un-normalized probability and updates fNormalization. */ 00430 double Normalize(); 00431 00432 /** 00433 * Checks if a set of parameters values is within the given range. 00434 * @param parameters A set of parameter values 00435 * @return Error code (0: OK, -1 length of parameters not correct, -2 values not within range) 00436 */ 00437 int CheckParameters(std::vector <double> parameters); 00438 00439 /** 00440 * Do the mode finding using a method set via SetOptimizationMethod. 00441 * Default is Minuit. The mode can be extracted using the GetBestFitParameters() method. 00442 * 00443 * A starting point for the mode finding can be specified for Minuit. If not 00444 * specified, Minuit default will be used (center of the parameter space). 00445 * 00446 * If running mode finding after the MCMC it is a good idea to specify the 00447 * mode obtained from MCMC as a starting point for the Minuit minimization. 00448 * MCMC will find the location of the global mode and Minuit will 00449 * converge to the mode precisely. The commands are: 00450 <pre> 00451 model -> MarginalizeAll(); 00452 model -> FindMode( model -> GetBestFitParameters() ); 00453 </pre> 00454 * @start startinf point of Minuit minimization 00455 * */ 00456 void FindMode(std::vector<double> start = std::vector<double>(0)); 00457 00458 /** 00459 * Does the mode finding using Minuit. If starting point is not specified, 00460 * finding will start from the center of the parameter space. 00461 * @param start point in parameter space from which the mode finding is started. 00462 * @param printlevel The print level. */ 00463 void FindModeMinuit(std::vector<double> start = std::vector<double>(0), int printlevel = 1); 00464 00465 /** 00466 * Write mode into file */ 00467 void WriteMode(const char * file); 00468 00469 /** 00470 * Read mode from file created by WriteMode() call */ 00471 int ReadMode(const char * file); 00472 00473 /** 00474 * Read */ 00475 int ReadMarginalizedFromFile(const char * file); 00476 00477 /** 00478 * Read */ 00479 int ReadErrorBandFromFile(const char * file); 00480 00481 /** 00482 * Marginalize all probabilities wrt. single parameters and all combinations 00483 * of two parameters. The individual distributions can be retrieved using 00484 * the GetMarginalized method. 00485 * @return Total number of marginalized distributions */ 00486 int MarginalizeAll(); 00487 00488 /** 00489 * If MarginalizeAll method was used, the individual marginalized distributions 00490 * with respect to one parameter can be retrieved using this method. 00491 * @param parameter Model parameter 00492 * @return 1D marginalized probability */ 00493 BCH1D * GetMarginalized(BCParameter * parameter); 00494 00495 BCH1D * GetMarginalized(const char * name) 00496 { return this -> GetMarginalized(this -> GetParameter(name)); }; 00497 00498 /** 00499 * If MarginalizeAll method was used, the individual marginalized distributions 00500 * with respect to otwo parameters can be retrieved using this method. 00501 * @param parameter1 First parameter 00502 * @param parameter2 Second parameter 00503 * @return 2D marginalized probability */ 00504 BCH2D * GetMarginalized(BCParameter * parameter1, BCParameter * parameter2); 00505 00506 BCH2D * GetMarginalized(const char * name1, const char * name2) 00507 { return this -> GetMarginalized(this -> GetParameter(name1), this -> GetParameter(name2)); }; 00508 00509 /** 00510 * */ 00511 int PrintAllMarginalized1D(const char * filebase); 00512 int PrintAllMarginalized2D(const char * filebase); 00513 int PrintAllMarginalized(const char * file, unsigned int hdiv=1, unsigned int ndiv=1); 00514 00515 /** 00516 * Constrains a data point 00517 * @param x A vector of double */ 00518 virtual void CorrelateDataPointValues(std::vector<double> &x); 00519 00520 /** 00521 * Calculate p-value from Chi2 distribution for Gaussian problems 00522 * @param par Parameter set for the calculation of the likelihood 00523 * @param sigma_index Index of the sigma/uncertainty for the data points 00524 * (for data in format "x y erry" the index would be 2) */ 00525 double GetPvalueFromChi2(std::vector<double> par, int sigma_index); 00526 00527 double GetPvalueFromChi2NDoF(std::vector<double> par, int sigma_index); 00528 00529 BCH1D * CalculatePValue(std::vector<double> par, bool flag_histogram=false); 00530 00531 /* 00532 * @return The p-value */ 00533 double GetPValue() 00534 { return fPValue; }; 00535 00536 double GetPValueNDoF() 00537 { return fPValueNDoF; }; 00538 00539 double GetChi2NDoF() 00540 { return fChi2NDoF; }; 00541 00542 /* 00543 * Set maximum number of iterations in the MCMC pre-run of the p-value 00544 * evaluation using MCMC */ 00545 void SetGoFNIterationsMax(int n) 00546 { fGoFNIterationsMax=n; }; 00547 00548 /* 00549 * Set number of iterations in the MCMC normal run of the p-value 00550 * evaluation using MCMC */ 00551 void SetGoFNIterationsRun(int n) 00552 { fGoFNIterationsRun=n; }; 00553 00554 /* 00555 * Set number of chains in the MCMC of the p-value 00556 * evaluation using MCMC */ 00557 void SetGoFNChains(int n) 00558 { fGoFNChains=n; }; 00559 00560 /** 00561 * Calculates the matrix element of the Hessian matrix 00562 * @param parameter1 The parameter for the first derivative 00563 * @param parameter2 The parameter for the first derivative 00564 * @return The matrix element of the Hessian matrix */ 00565 double HessianMatrixElement(BCParameter * parameter1, BCParameter * parameter2, std::vector<double> point); 00566 00567 /** 00568 * Prints a summary on the screen. */ 00569 void PrintSummary(); 00570 00571 /** 00572 * Prints a summary of the Markov Chain Monte Carlo to a file. */ 00573 void PrintResults(const char * file); 00574 00575 /** 00576 * Prints a short summary of the fit results on the screen. */ 00577 void PrintShortFitSummary(int chi2flag=0); 00578 00579 /** 00580 * Prints matrix elements of the Hessian matrix 00581 * @param parameters The parameter values at which point to evaluate the matrix */ 00582 void PrintHessianMatrix(std::vector<double> parameters); 00583 00584 void FixDataAxis(unsigned int index, bool fixed); 00585 00586 /* @} */ 00587 00588 protected: 00589 00590 /** 00591 * Index of the model. */ 00592 int fIndex; 00593 00594 /** 00595 * Name of the model. */ 00596 std::string fName; 00597 00598 /** 00599 * The model prior probability. */ 00600 double fModelAPriori; 00601 00602 /** 00603 * The model a posteriori probability. */ 00604 double fModelAPosteriori; 00605 00606 /** 00607 * A model parameter container. */ 00608 BCParameterSet * fParameterSet; 00609 00610 /** 00611 * A data set */ 00612 BCDataSet * fDataSet; 00613 00614 /** 00615 * Minimum number of data points */ 00616 unsigned int fNDataPointsMinimum; 00617 00618 /** 00619 * Maximum number of data points */ 00620 unsigned int fNDataPointsMaximum; 00621 00622 /** 00623 * A flag for overloading ConditionalProbabilityEntry */ 00624 bool flag_ConditionalProbabilityEntry; 00625 00626 /** 00627 * The p-value */ 00628 double fPValue; 00629 00630 double fChi2NDoF; 00631 double fPValueNDoF; 00632 00633 /* 00634 * Maximum number of iterations in the MCMC pre-run of the p-value 00635 * evaluation using MCMC */ 00636 int fGoFNIterationsMax; 00637 00638 /* 00639 * Number of iterations in the MCMC normal run of the p-value 00640 * evaluation using MCMC */ 00641 int fGoFNIterationsRun; 00642 00643 /* 00644 * Number of chains in the MCMC of the p-value 00645 * evaluation using MCMC */ 00646 int fGoFNChains; 00647 00648 private: 00649 00650 /** 00651 * Converts a vector of doubles into a BCDataPoint */ 00652 BCDataPoint * VectorToDataPoint(std::vector<double> data); 00653 00654 /** 00655 * Compares to strings */ 00656 int CompareStrings(const char * string1, const char * string2); 00657 00658 /** 00659 * The Likelihood normalization. */ 00660 double fNormalization; 00661 00662 }; 00663 00664 // --------------------------------------------------------- 00665 00666 typedef std::vector<BCModel*> BCModelContainer; 00667 00668 // --------------------------------------------------------- 00669 00670 #endif