• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BATCalculator.cxx

Go to the documentation of this file.
00001 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke, Stefan A. Schmitz
00002 
00003 #include <TAxis.h>
00004 #include <TFile.h>
00005 #include <TCanvas.h>
00006 #include <TVector.h>
00007 
00008 #include <RooAbsFunc.h>
00009 //#include <RooAbsReal.h>
00010 #include <RooRealVar.h>
00011 //#include <RooArgSet.h>
00012 #include <RooBrentRootFinder.h>
00013 #include <RooFormulaVar.h>
00014 #include <RooGenericPdf.h>
00015 //#include <RooPlot.h>
00016 #include <RooProdPdf.h>
00017 #include <RooDataHist.h>
00018 //#include <RooAbsPdf.h>
00019 #include <RooHistPdf.h>
00020 
00021 #include <RooStats/MarkovChain.h>
00022 
00023 #include "BCLog.h"
00024 #include "BCAux.h"
00025 #include "BCH1D.h"
00026 
00027 #include "BCRooInterface.h"
00028 #include "BATCalculator.h"
00029 
00030 #include <algorithm>
00031 #include <cassert>
00032 
00033 
00034 
00035 ClassImp(RooStats::BATCalculator)
00036 
00037 namespace RooStats {
00038 
00039 // ---------------------------------------------------------
00040 BATCalculator::BATCalculator()
00041    : fData(0)
00042    , fPdf(0)
00043    , fPrior(0)
00044    , fProductPdf(0)
00045    , fLogLike(0)
00046    , fLikelihood(0)
00047    , fIntegratedLikelihood(0)
00048    , fPosteriorPdf(0)
00049    , fLower(0)
00050    , fUpper(0)
00051    , fBrfPrecision(0.00005)
00052    , fValidInterval(false)
00053    , fSize(0.05)
00054    , fLeftSideFraction(0.5)
00055 {
00056    // default constructor
00057    _myRooInterface = new BCRooInterface();
00058 }
00059 
00060 // ---------------------------------------------------------
00061 BATCalculator::BATCalculator( /* const char * name,  const char * title, */
00062                               RooAbsData & data,
00063                               RooAbsPdf  & pdf,
00064                               RooArgSet  & POI,
00065                               RooAbsPdf  & prior,
00066                               RooArgSet  * params,
00067                               bool fillChain)
00068    : fData(&data)
00069    , fPdf(&pdf)
00070    , fPOI(POI)
00071    , fPrior(&prior)
00072    , fparams(params)
00073    , fProductPdf(0)
00074    , fLogLike(0)
00075    , fLikelihood(0)
00076    , fIntegratedLikelihood(0)
00077    , fPosteriorPdf(0)
00078    , fLower(0)
00079    , fUpper(0)
00080    , fBrfPrecision(0.00005)
00081    , fValidInterval(false)
00082    , _nMCMC(1000000)
00083    , fSize(0.05)
00084    , fLeftSideFraction(0.5)
00085    //, TNamed( TString(name), TString(title) )
00086 {
00087    // constructor
00088    //if (nuisanceParameters)
00089    //   fNuisanceParameters.add(*nuisanceParameters);
00090    _myRooInterface = new BCRooInterface("BCRooInterfaceForBAT",fillChain);
00091 }
00092 
00093 // ---------------------------------------------------------
00094 BATCalculator::BATCalculator( RooAbsData & data, ModelConfig & model, bool fillChain)
00095    : fData(&data)
00096    , fPdf(model.GetPdf())
00097    , fPOI(*model.GetParametersOfInterest())
00098    , fPrior(model.GetPriorPdf())
00099    , fparams(model.GetNuisanceParameters())
00100    , fProductPdf(0)
00101    , fLogLike(0)
00102    , fLikelihood(0)
00103    , fIntegratedLikelihood(0)
00104    , fPosteriorPdf(0)
00105    , fLower(0)
00106    , fUpper(0)
00107    , fBrfPrecision(0.00005)
00108    , fValidInterval(false)
00109    , _nMCMC(1000000)
00110    , fSize(0.05)
00111    , fLeftSideFraction(0.5) 
00112 {
00113    cout << "BATCalculator calling constructor ..." << endl;
00114    // constructor from Model Config
00115  //  SetModel(model);
00116    _myRooInterface = new BCRooInterface("BCRooInterfaceForBAT",fillChain);
00117    cout << "BATCalculator constructed" << endl;
00118 }
00119 
00120 // ---------------------------------------------------------
00121 BATCalculator::~BATCalculator()
00122 {
00123    cout << "BATCalculator calling destructor ..." << endl;
00124    // destructor
00125    ClearAll();
00126    delete _myRooInterface;
00127 }
00128 
00129 // ---------------------------------------------------------
00130 void BATCalculator::ClearAll() const
00131 {
00132    // clear cached pdf objects
00133    if (fProductPdf)
00134       delete fProductPdf;
00135    if (fLogLike)
00136       delete fLogLike;
00137    if (fLikelihood)
00138       delete fLikelihood;
00139    if (fIntegratedLikelihood)
00140       delete fIntegratedLikelihood;
00141    if (fPosteriorPdf)
00142       delete fPosteriorPdf;
00143    fPosteriorPdf = 0;
00144    fProductPdf = 0;
00145    fLogLike = 0;
00146    fLikelihood = 0;
00147    fIntegratedLikelihood = 0;
00148    fLower = 0;
00149    fUpper = 0;
00150    fValidInterval = false;
00151 }
00152 
00153 // ---------------------------------------------------------
00154 void BATCalculator::SetModel(const ModelConfig & model)
00155 {
00156    // set the model
00157    //fPdf = model.GetPdf();
00158    //fPriorPOI =  model.GetPriorPdf();
00159    // assignment operator = does not do a real copy the sets (must use add method)
00160    //fPOI.removeAll();
00161    //*fparams.removeAll();
00162    //if (model.GetParametersOfInterest())
00163    //   fPOI.add( *(model.GetParametersOfInterest()) );
00164    //if (model.GetNuisanceParameters())
00165    //   fNuisanceParameters.add( *(model.GetNuisanceParameters() ) );
00166 
00167    // invalidate the cached pointers
00168    //ClearAll();
00169 }
00170 
00171 // ---------------------------------------------------------
00172 RooArgSet * BATCalculator::GetMode(RooArgSet * /* parameters */) const
00173 {
00176   // Should cover multi-dimensional cases...
00177   // How do we do if there are points that are equi-probable?
00178 
00179   return 0;
00180 }
00181 
00182 // ---------------------------------------------------------
00183 //returns posterior with respect to first entry in the POI ArgSet
00184 RooAbsPdf * BATCalculator::GetPosteriorPdf1D() const
00185 {
00186    const char * POIname = fPOI.first()->GetName();
00187    return GetPosteriorPdf1D(POIname);
00188 }
00189 
00190 // ---------------------------------------------------------
00191 // returns posterior with respect to provided name in the POI ArgSet
00192 RooAbsPdf * BATCalculator::GetPosteriorPdf1D(const char * POIname) const
00193 {
00194 
00195    // run some sanity checks
00196    if (!fPdf ) {
00197      std::cerr << "BATCalculator::GetPosteriorPdf - missing pdf model" << std::endl;
00198      return 0;
00199    }
00200 
00201    if (!fPrior) {
00202       std::cerr << "BATCalculator::GetPosteriorPdf - missing prior pdf" << std::endl;
00203    }
00204 
00205    if (fPOI.getSize() == 0) {
00206      std::cerr << "BATCalculator::GetPosteriorPdf - missing parameter of interest" << std::endl;
00207      return 0;
00208    }
00209 
00210    if (fPOI.getSize() > 1) {
00211       std::cerr << "BATCalculator::GetPosteriorPdf - current implementation works only on 1D intervals" << std::endl;
00212       return 0;
00213    }
00214 
00215     //initialize RooInterface object
00216    _myRooInterface->Initialize(*fData,*fPdf,*fPrior,fparams,fPOI);
00217    _myRooInterface->MCMCSetNIterationsRun(_nMCMC);
00218    _myRooInterface->MarginalizeAll();
00219    _myRooInterface->FindMode();
00220    BCParameter * myPOI = _myRooInterface->GetParameter(POIname);
00221    BCH1D * myPosterior =_myRooInterface->GetMarginalized(myPOI);
00222    TH1D * posteriorTH1D = myPosterior->GetHistogram();
00223    _posteriorTH1D = static_cast<TH1D *>(posteriorTH1D->Clone("_posteriorTH1D"));
00224    RooDataHist * posteriorRooDataHist = new RooDataHist("posteriorhist","", fPOI,posteriorTH1D);
00225    fPosteriorPdf = new RooHistPdf("posteriorPdf","",fPOI,*posteriorRooDataHist);
00226 
00227    /*
00228    // plots for debugging
00229    TFile * debugfile = new TFile( "debug_posterior.root" ,"RECREATE");
00230    if ( debugfile->IsOpen() )
00231       cout << "File debug_posterior opened successfully" << endl;
00232    debugfile->cd();
00233    //posteriorTH1D->Write("posteriorTH1D");
00234    //posteriorRooDataHist->Write("posteriorRooDataHist");
00235    fPosteriorPdf->Write("fPosteriorPdf");
00236    TCanvas myCanvas("canvasforfposterior","canvas for fposterior");
00237    myCanvas.cd();
00238    //fPosteriorPdf->Draw();
00239    //myCanvas.Write();
00240    RooRealVar nSig("nSig","",0.0001,200.);
00241    RooPlot * pl = nSig.frame();
00242    fPosteriorPdf->plotOn(pl);
00243    pl->Draw();
00244    pl->Write();
00245    myCanvas.Write();
00246    debugfile->Close();
00247 
00248    // just for testing: create plots with BAT
00249    char * outputFile = "bat_plots_debugging.ps";
00250    _myRooInterface -> PrintAllMarginalized(outputFile);
00251    //*/
00252 
00253    return fPosteriorPdf; // is of type RooAbsPdf*
00254 }
00255 
00256 // ---------------------------------------------------------
00257 // return a RooPlot with the posterior PDF and the credibility region
00258 RooPlot * BATCalculator::GetPosteriorPlot1D() const
00259 {
00260   
00261 
00262    if (!fPosteriorPdf)
00263       GetPosteriorPdf1D();
00264    if (!fValidInterval)
00265       GetInterval1D();
00266 
00267    RooAbsRealLValue * poi = dynamic_cast<RooAbsRealLValue *>( fPOI.first() );
00268    assert(poi);
00269 
00270    RooPlot* plot = poi->frame();
00271 
00272    plot->SetTitle(TString("Posterior probability of parameter \"")+TString(poi->GetName())+TString("\""));
00273    fPosteriorPdf->plotOn(plot,RooFit::Range(fLower,fUpper,kFALSE),RooFit::VLines(),RooFit::DrawOption("F"),RooFit::MoveToBack(),RooFit::FillColor(kGray));
00274    fPosteriorPdf->plotOn(plot);
00275    plot->GetYaxis()->SetTitle("posterior probability");
00276 
00277    return plot;
00278 }
00279 
00280 // ---------------------------------------------------------
00281 // returns central interval for first POI in the POI ArgSet
00282 SimpleInterval * BATCalculator::GetInterval1D() const
00283 {
00284    const char * POIname = fPOI.first()->GetName();
00285    return GetInterval1D(POIname); //is of type RooAbsPdf *
00286 }
00287 
00288 // ---------------------------------------------------------
00289 // returns central interval for requested POI
00290 SimpleInterval * BATCalculator::GetInterval1D(const char * POIname) const
00291 {
00296 
00297    if (fValidInterval)
00298       std::cout << "BATCalculator::GetInterval1D:"
00299                 << "Warning : recomputing interval for the same CL and same model" << std::endl;
00300 
00301    RooRealVar * poi = dynamic_cast<RooRealVar *>( fPOI.find(POIname) );
00302    assert(poi);
00303 
00304    if (!fPosteriorPdf)
00305       fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
00306 
00307    RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
00308    //RooAbsReal* cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanNoCdf());
00309    RooAbsFunc * cdf_bind = cdf->bindVars(fPOI,&fPOI);
00310    RooBrentRootFinder brf(*cdf_bind);
00311    brf.setTol(fBrfPrecision); // set the brf precision
00312 
00313    double tmpVal = poi->getVal();  // patch used because findRoot changes the value of poi
00314 
00315    double y = fSize*fLeftSideFraction; //adjust lower tail prob. according to fLeftSideFraction
00316 
00317    brf.findRoot(fLower,poi->getMin(),poi->getMax(),y);
00318 
00319    y = 1.-(fSize*(1.-fLeftSideFraction) ); //adjust upper tail prob. according to fLeftSideFraction
00320 
00321    bool ret = brf.findRoot(fUpper,poi->getMin(),poi->getMax(),y);
00322    if (!ret)
00323       std::cout << "BATCalculator::GetInterval1D: Warning:"
00324                 << "Error returned from Root finder, estimated interval is not fully correct"
00325                 << std::endl;
00326 
00327    poi->setVal(tmpVal); // patch: restore the original value of poi
00328 
00329    delete cdf_bind;
00330    delete cdf;
00331    fValidInterval = true;
00332    fConnectedInterval = true;
00333 
00334    TString interval_name = TString("CentralBayesianInterval_a") + TString(this->GetName());
00335    SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
00336    interval->SetTitle("SimpleInterval from BATCalculator");
00337 
00338    return interval;
00339 }
00340 
00341 // ---------------------------------------------------------
00342 SimpleInterval * BATCalculator::GetShortestInterval1D() const
00343 {
00344    const char * POIname = fPOI.first()->GetName();
00345    bool checkConnected = true;
00346    return GetShortestInterval1D(POIname, checkConnected);
00347 }
00348 
00349 // ---------------------------------------------------------
00350 // returns a SimpleInterval with lower and upper bounds on the
00351 // parameter of interest. Applies the shortest interval rule to
00352 // compute the credibility interval. The resulting interval is not necessarily connected.
00353 // Methods for constructing the shortest region with respect given CL are now available in
00354 // different places (here; MCMCCalculator, BAT)-> this might require cleaning at some point.
00355 // Result is approximate as CL is not reached exactly (due to finite bin number/bin size in posterior histogram)-> make CL/interval a bit smaller or bigger ?
00356 SimpleInterval * BATCalculator::GetShortestInterval1D(const char * POIname, bool & checkConnected) const
00357 {
00358 
00359    if (fValidInterval)
00360       std::cout << "BATCalculator::GetShortestInterval1D:"
00361                 << "Warning : recomputing interval for the same CL and same model" << std::endl;
00362 
00363    // get pointer to selected parameter of interest
00364    RooRealVar * poi = dynamic_cast<RooRealVar*>( fPOI.find(POIname) );
00365    assert(poi);
00366 
00367    // get pointer to poserior pdf
00368    if (!fPosteriorPdf)
00369       fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
00370    //RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
00371 
00372    // range of poi
00373    Double_t minpoi = poi->getMin();
00374    Double_t maxpoi = poi->getMax();
00375 
00376    // bin number of histogram for posterior
00377    Int_t stepnumber = _posteriorTH1D->GetNbinsX();
00378    cout << "stepnumber is: " << stepnumber << endl;
00379 
00380    // width of one bin in histogram of posterior
00381    Double_t stepsize = (maxpoi-minpoi)/stepnumber;
00382    cout << "stepsize is: " << stepsize << endl;
00383 
00384    // pair: first entry: bin number , second entry: value of posterior
00385    vector < pair< Int_t,Double_t > > posteriorVector;
00386 
00387    // for normalization
00388    Double_t histoIntegral = 0;
00389    // give posteriorVector the right length
00390    posteriorVector.resize(stepnumber);
00391 
00392    // see in BayesianCalculator for details about this "feature"
00393    Double_t tmpVal = poi->getVal();
00394 
00395    // initialize elements of posteriorVector
00396    int i = 0;
00397    vector < pair< Int_t,Double_t > >::iterator vecit = posteriorVector.begin();
00398    vector < pair< Int_t,Double_t > >::iterator vecit_end = posteriorVector.end();
00399    for( ; vecit != vecit_end ; ++vecit) {
00400       poi->setVal(poi->getMin()+i*stepsize);
00401       posteriorVector[i] = make_pair(i, _posteriorTH1D->GetBinContent(i+1) ); // hope this is working, +1 necessary, because GetBinContent(0) returns the underflow bin
00402       histoIntegral+=_posteriorTH1D->GetBinContent(i); // better to get this directly from the histogram ?!
00403       //cout << "pair with binnumber: " << i << " and postriorprob: " << _posteriorTH1D->GetBinContent(i+1) << endl;
00404       i++;
00405    }
00406 
00407    cout << "histoIntegral is: " << histoIntegral << endl;
00408 
00409    // sort posteriorVector with respect to posterior pdf
00410    std::sort(posteriorVector.begin(), posteriorVector.end(), sortbyposterior);
00411 
00412    // keep track of integrated posterior in the interval
00413    Double_t integratedposterior = 0.;
00414 
00415    // keep track of lowerLim and upperLim
00416    Double_t lowerLim=posteriorVector.size();
00417    Double_t upperLim=0;
00418 
00419    // store the bins in the intervall
00420    vector<bool> inInterval;
00421    inInterval.resize(posteriorVector.size());
00422 
00423    // set all values in inInterval to false
00424    for (unsigned int k = 0; k < inInterval.size(); k++)
00425       inInterval[k] = false;
00426 
00427    unsigned int j = 0;
00428    // add bins to interval while CL not reached
00429    //cout << "integratedposterior: " << integratedposterior << "histoIntegral: " << histoIntegral << endl;
00430    //cout << " integratedposterior/histoIntegral : " << integratedposterior/histoIntegral << endl;
00431    //cout << " 1-fSize : " << 1-fSize << endl;
00432    //cout << " posteriorVector.size(): " << posteriorVector.size() << endl;
00433    while(((integratedposterior/histoIntegral) < (1-fSize)) && (j < posteriorVector.size())) {
00434       //cout << "j is: " << j << " , int prob: " << integratedposterior/histoIntegral << endl;
00435       integratedposterior+=posteriorVector[j].second;
00436 
00437       //cout << "bin number: " << posteriorVector[j].first << " with posterior prob.: " << posteriorVector[j].second << endl;
00438 
00439       // update vector with bins included in the interval
00440       inInterval[posteriorVector[j].first] = true;
00441 
00442       if(posteriorVector[j].first < lowerLim) {
00443          lowerLim = posteriorVector[j].first; // update lowerLim
00444          cout << "updating lower lim to: " << lowerLim << endl;
00445       }
00446       if(posteriorVector[j].first > upperLim) {
00447          upperLim = posteriorVector[j].first; // update upperLim
00448          cout << "updating upper lim to: " << upperLim << endl;
00449       }
00450 
00451       fLower = lowerLim * stepsize; // 
00452       fUpper = upperLim * stepsize;
00453       j++;
00454    }
00455 
00456    // initialize vector with interval borders
00457 
00458    bool runInside = false;
00459    for (unsigned int l = 0; l < inInterval.size(); l++) {
00460       if ( (runInside == false) && (inInterval[l] == true) ) {
00461          _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
00462          runInside = true;
00463       }
00464       if ( ( runInside == true) && (l < (inInterval.size()-1) ) && (inInterval[l+1] == false) ) {
00465          _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
00466          runInside = false;
00467       }
00468       if ( ( runInside == true) && (l == (inInterval.size()-1)) ) {
00469          _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
00470       }
00471    }
00472 
00473    // check if the intervall is connected
00474    if(checkConnected) {
00475       if (_intervalBorders1D.size() > 2) {
00476          fConnectedInterval = false;
00477       }
00478       else {
00479          fConnectedInterval = true;
00480       }
00481    }
00482 
00483    poi->setVal(tmpVal); // patch: restore the original value of poi
00484 
00485    fValidInterval = true;
00486 
00487    TString interval_name = TString("ShortestBayesianInterval_a") + TString(this->GetName());
00488    SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
00489    interval->SetTitle("Shortest SimpleInterval from BATCalculator");
00490 
00491    //if(assert)
00492    return interval;
00493 }
00494 
00495 // ---------------------------------------------------------
00496 MCMCInterval* BATCalculator::GetInterval() const{
00497 
00498    // run some sanity checks
00499    if (!fPdf ) {
00500      std::cerr << "BATCalculator::GetInterval - missing pdf model" << std::endl;
00501      return 0;
00502    }
00503 
00504    if (!fPrior) {
00505       std::cerr << "BATCalculator::GetInterval - missing prior pdf" << std::endl;
00506    }
00507 
00508    if (fPOI.getSize() == 0) {
00509      std::cerr << "BATCalculator::GetInterval - missing parameter of interest" << std::endl;
00510      return 0;
00511    }
00512 
00513    if (!fPosteriorPdf){
00514       //initialize RooInterface object
00515       _myRooInterface->Initialize(*fData,*fPdf,*fPrior,fparams,fPOI);
00516       _myRooInterface->MCMCSetNIterationsRun(_nMCMC);
00517       _myRooInterface->MarginalizeAll();
00518       _myRooInterface->FindMode();
00519    }
00520 
00521    MarkovChain * roostats_chain = GetBCRooInterface()->GetRooStatsMarkovChain();
00522    MCMCInterval * mcmcInterval = new MCMCInterval("roostatsmcmcinterval", *(GetBCRooInterface()->GetArgSetForMarkovChain()) , *roostats_chain);
00523    mcmcInterval->SetUseKeys(false);
00524    mcmcInterval->SetConfidenceLevel(1.-fSize);
00525    return mcmcInterval;
00526 }
00527 
00528 // ---------------------------------------------------------
00529 void BATCalculator::SetNumBins(const char * parname, int nbins)
00530 {
00531    _myRooInterface->SetNumBins(parname, nbins);
00532 }
00533 
00534 void BATCalculator::SetNumBins(int nbins)
00535 {
00536    _myRooInterface->SetNumBins(nbins);
00537 }
00538 
00539 void BATCalculator::SetLeftSideTailFraction(Double_t leftSideFraction ){
00540    if( (leftSideFraction >= 0.) && (leftSideFraction <= 1.) ){
00541       fLeftSideFraction = leftSideFraction;
00542    }
00543    else{
00544       std::cout << "BATCalculator::SetLeftSideTailFraction(Double_t leftSideFraction ) - value needs to be in the interval [0.,1.] to be meaningful, your value: " << leftSideFraction <<  " ,left side tail fraction has not been changed!" << std::endl;
00545    }
00546 
00547 } 
00548 
00549 
00550 // ---------------------------------------------------------
00551 Double_t BATCalculator::GetOneSidedUperLim()
00552 {
00553 //   double safeVal = fSize;
00554 //   fSize = safeVal/2.;
00555    cout << "calculating " << (1.-fSize/2) << "upper limit" << endl;
00556    return GetInterval1D()->UpperLimit();
00557 }
00558 
00559 /*
00560 // ---------------------------------------------------------
00561 int BATCalculator::GetNbins(const char * parname)
00562 {
00563    ;
00564 }
00565 */
00566 
00567 // ---------------------------------------------------------
00568 bool sortbyposterior(pair< Int_t,Double_t > pair1, pair< Int_t,Double_t > pair2)
00569 {
00570    return (pair1.second > pair2.second);
00571 }
00572 
00573 
00574 
00575 } // end namespace RooStats

Generated by  doxygen 1.7.1