• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BATCalculator.cxx

Go to the documentation of this file.
00001 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke, Stefan A. Schmitz
00002 
00003 #include <TAxis.h>
00004 #include <TFile.h>
00005 #include <TCanvas.h>
00006 #include <TVector.h>
00007 
00008 #include <RooAbsFunc.h>
00009 //#include <RooAbsReal.h>
00010 #include <RooRealVar.h>
00011 //#include <RooArgSet.h>
00012 #include <RooBrentRootFinder.h>
00013 #include <RooFormulaVar.h>
00014 #include <RooGenericPdf.h>
00015 //#include <RooPlot.h>
00016 #include <RooProdPdf.h>
00017 #include <RooDataHist.h>
00018 //#include <RooAbsPdf.h>
00019 #include <RooHistPdf.h>
00020 
00021 #include <RooStats/MarkovChain.h>
00022 
00023 #include "../../BAT/BCLog.h"
00024 #include "../../BAT/BCAux.h"
00025 #include "../../BAT/BCH1D.h"
00026 
00027 #include "BCRooInterface.h"
00028 #include "BATCalculator.h"
00029 
00030 #include <algorithm>
00031 #include <cassert>
00032 
00033 
00034 
00035 ClassImp(RooStats::BATCalculator)
00036 
00037 namespace RooStats {
00038 
00039 // ---------------------------------------------------------
00040 BATCalculator::BATCalculator()
00041    : fData(0)
00042    , fPdf(0)
00043    , fPrior(0)
00044    , fProductPdf(0)
00045    , fLogLike(0)
00046    , fLikelihood(0)
00047    , fIntegratedLikelihood(0)
00048    , fPosteriorPdf(0)
00049    , fLower(0)
00050    , fUpper(0)
00051    , fBrfPrecision(0.00005)
00052    , fValidInterval(false)
00053    , fSize(0.05)
00054    , fLeftSideFraction(0.5)
00055 {
00056    // default constructor
00057    _myRooInterface = new BCRooInterface();
00058 }
00059 
00060 // ---------------------------------------------------------
00061 BATCalculator::BATCalculator( /* const char * name,  const char * title, */
00062                               RooAbsData & data,
00063                               RooAbsPdf  & pdf,
00064                               RooArgSet  & POI,
00065                               RooAbsPdf  & prior,
00066                               RooArgSet  * params,
00067                               bool fillChain)
00068    : fData(&data)
00069    , fPdf(&pdf)
00070    , fPOI(POI)
00071    , fPrior(&prior)
00072    , fparams(params)
00073    , fProductPdf(0)
00074    , fLogLike(0)
00075    , fLikelihood(0)
00076    , fIntegratedLikelihood(0)
00077    , fPosteriorPdf(0)
00078    , fLower(0)
00079    , fUpper(0)
00080    , fBrfPrecision(0.00005)
00081    , fValidInterval(false)
00082    , _nMCMC(1000000)
00083    , fSize(0.05)
00084    , fLeftSideFraction(0.5)
00085    //, TNamed( TString(name), TString(title) )
00086 {
00087    // constructor
00088    //if (nuisanceParameters)
00089    //   fNuisanceParameters.add(*nuisanceParameters);
00090    _myRooInterface = new BCRooInterface("BCRooInterfaceForBAT",fillChain);
00091 }
00092 
00093 // ---------------------------------------------------------
00094 BATCalculator::BATCalculator( RooAbsData & data, ModelConfig & model, bool fillChain)
00095    : fData(&data)
00096    , fPdf(model.GetPdf())
00097    , fPOI(*model.GetParametersOfInterest())
00098    , fPrior(model.GetPriorPdf())
00099    , fparams(model.GetNuisanceParameters())
00100    , fProductPdf(0)
00101    , fLogLike(0)
00102    , fLikelihood(0)
00103    , fIntegratedLikelihood(0)
00104    , fPosteriorPdf(0)
00105    , fLower(0)
00106    , fUpper(0)
00107    , fBrfPrecision(0.00005)
00108    , fValidInterval(false)
00109    , _nMCMC(1000000)
00110    , fSize(0.05)
00111    , fLeftSideFraction(0.5)
00112 {
00113    cout << "BATCalculator calling constructor ..." << endl;
00114    // constructor from Model Config
00115  //  SetModel(model);
00116    _myRooInterface = new BCRooInterface("BCRooInterfaceForBAT",fillChain);
00117    cout << "BATCalculator constructed" << endl;
00118 }
00119 
00120 // ---------------------------------------------------------
00121 BATCalculator::~BATCalculator()
00122 {
00123    cout << "BATCalculator calling destructor ..." << endl;
00124    // destructor
00125    ClearAll();
00126    delete _myRooInterface;
00127 }
00128 
00129 // ---------------------------------------------------------
00130 void BATCalculator::ClearAll() const
00131 {
00132    // clear cached pdf objects
00133    if (fProductPdf)
00134       delete fProductPdf;
00135    if (fLogLike)
00136       delete fLogLike;
00137    if (fLikelihood)
00138       delete fLikelihood;
00139    if (fIntegratedLikelihood)
00140       delete fIntegratedLikelihood;
00141    if (fPosteriorPdf)
00142       delete fPosteriorPdf;
00143    fPosteriorPdf = 0;
00144    fProductPdf = 0;
00145    fLogLike = 0;
00146    fLikelihood = 0;
00147    fIntegratedLikelihood = 0;
00148    fLower = 0;
00149    fUpper = 0;
00150    fValidInterval = false;
00151 }
00152 
00153 // ---------------------------------------------------------
00154 void BATCalculator::SetModel(const ModelConfig & model)
00155 {
00156    // set the model
00157    //fPdf = model.GetPdf();
00158    //fPriorPOI =  model.GetPriorPdf();
00159    // assignment operator = does not do a real copy the sets (must use add method)
00160    //fPOI.removeAll();
00161    //*fparams.removeAll();
00162    //if (model.GetParametersOfInterest())
00163    //   fPOI.add( *(model.GetParametersOfInterest()) );
00164    //if (model.GetNuisanceParameters())
00165    //   fNuisanceParameters.add( *(model.GetNuisanceParameters() ) );
00166 
00167    // invalidate the cached pointers
00168    //ClearAll();
00169 }
00170 
00171 // ---------------------------------------------------------
00172 RooArgSet * BATCalculator::GetMode(RooArgSet * /* parameters */) const
00173 {
00176   // Should cover multi-dimensional cases...
00177   // How do we do if there are points that are equi-probable?
00178 
00179   return 0;
00180 }
00181 
00182 // ---------------------------------------------------------
00183 //returns posterior with respect to first entry in the POI ArgSet
00184 RooAbsPdf * BATCalculator::GetPosteriorPdf1D() const
00185 {
00186    const char * POIname = fPOI.first()->GetName();
00187    return GetPosteriorPdf1D(POIname);
00188 }
00189 
00190 // ---------------------------------------------------------
00191 // returns posterior with respect to provided name in the POI ArgSet
00192 RooAbsPdf * BATCalculator::GetPosteriorPdf1D(const char * POIname) const
00193 {
00194 
00195    // run some sanity checks
00196    if (!fPdf ) {
00197      std::cerr << "BATCalculator::GetPosteriorPdf - missing pdf model" << std::endl;
00198      return 0;
00199    }
00200 
00201    if (!fPrior) {
00202       std::cerr << "BATCalculator::GetPosteriorPdf - missing prior pdf" << std::endl;
00203    }
00204 
00205    if (fPOI.getSize() == 0) {
00206      std::cerr << "BATCalculator::GetPosteriorPdf - missing parameter of interest" << std::endl;
00207      return 0;
00208    }
00209 
00210    if (fPOI.getSize() > 1) {
00211       std::cerr << "BATCalculator::GetPosteriorPdf - current implementation works only on 1D intervals" << std::endl;
00212       return 0;
00213    }
00214 
00215     //initialize RooInterface object
00216    _myRooInterface->Initialize(*fData,*fPdf,*fPrior,fparams,fPOI);
00217    _myRooInterface->MCMCSetNIterationsRun(_nMCMC);
00218    _myRooInterface->MarginalizeAll();
00219    _myRooInterface->FindMode();
00220    BCParameter * myPOI = _myRooInterface->GetParameter(POIname);
00221    BCH1D * myPosterior =_myRooInterface->GetMarginalized(myPOI);
00222    TH1D * posteriorTH1D = myPosterior->GetHistogram();
00223    _posteriorTH1D = static_cast<TH1D *>(posteriorTH1D->Clone("_posteriorTH1D"));
00224    RooDataHist * posteriorRooDataHist = new RooDataHist("posteriorhist","", fPOI,posteriorTH1D);
00225    fPosteriorPdf = new RooHistPdf("posteriorPdf","",fPOI,*posteriorRooDataHist);
00226 
00227    /*
00228    // plots for debugging
00229    TFile * debugfile = new TFile( "debug_posterior.root" ,"RECREATE");
00230    if ( debugfile->IsOpen() )
00231       cout << "File debug_posterior opened successfully" << endl;
00232    debugfile->cd();
00233    //posteriorTH1D->Write("posteriorTH1D");
00234    //posteriorRooDataHist->Write("posteriorRooDataHist");
00235    fPosteriorPdf->Write("fPosteriorPdf");
00236    TCanvas myCanvas("canvasforfposterior","canvas for fposterior");
00237    myCanvas.cd();
00238    //fPosteriorPdf->Draw();
00239    //myCanvas.Write();
00240    RooRealVar nSig("nSig","",0.0001,200.);
00241    RooPlot * pl = nSig.frame();
00242    fPosteriorPdf->plotOn(pl);
00243    pl->Draw();
00244    pl->Write();
00245    myCanvas.Write();
00246    debugfile->Close();
00247 
00248    // just for testing: create plots with BAT
00249    char * outputFile = "bat_plots_debugging.ps";
00250    _myRooInterface -> PrintAllMarginalized(outputFile);
00251    //*/
00252 
00253    return fPosteriorPdf; // is of type RooAbsPdf*
00254 }
00255 
00256 // ---------------------------------------------------------
00257 // return a RooPlot with the posterior PDF and the credibility region
00258 RooPlot * BATCalculator::GetPosteriorPlot1D() const
00259 {
00260 
00261    if (!fPosteriorPdf)
00262       GetPosteriorPdf1D();
00263    if (!fValidInterval)
00264       GetInterval1D();
00265 
00266    RooAbsRealLValue * poi = dynamic_cast<RooAbsRealLValue *>( fPOI.first() );
00267    assert(poi);
00268 
00269    RooPlot* plot = poi->frame();
00270 
00271    plot->SetTitle(TString("Posterior probability of parameter \"")+TString(poi->GetName())+TString("\""));
00272    fPosteriorPdf->plotOn(plot,RooFit::Range(fLower,fUpper,kFALSE),RooFit::VLines(),RooFit::DrawOption("F"),RooFit::MoveToBack(),RooFit::FillColor(kGray));
00273    fPosteriorPdf->plotOn(plot);
00274    plot->GetYaxis()->SetTitle("posterior probability");
00275 
00276    return plot;
00277 }
00278 
00279 // ---------------------------------------------------------
00280 // returns central interval for first POI in the POI ArgSet
00281 SimpleInterval * BATCalculator::GetInterval1D() const
00282 {
00283    const char * POIname = fPOI.first()->GetName();
00284    return GetInterval1D(POIname); //is of type RooAbsPdf *
00285 }
00286 
00287 // ---------------------------------------------------------
00288 // returns central interval for requested POI
00289 SimpleInterval * BATCalculator::GetInterval1D(const char * POIname) const
00290 {
00295 
00296    if (fValidInterval)
00297       std::cout << "BATCalculator::GetInterval1D:"
00298                 << "Warning : recomputing interval for the same CL and same model" << std::endl;
00299 
00300    RooRealVar * poi = dynamic_cast<RooRealVar *>( fPOI.find(POIname) );
00301    assert(poi);
00302 
00303    if (!fPosteriorPdf)
00304       fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
00305 
00306    RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
00307    //RooAbsReal* cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanNoCdf());
00308    RooAbsFunc * cdf_bind = cdf->bindVars(fPOI,&fPOI);
00309    RooBrentRootFinder brf(*cdf_bind);
00310    brf.setTol(fBrfPrecision); // set the brf precision
00311 
00312    double tmpVal = poi->getVal();  // patch used because findRoot changes the value of poi
00313 
00314    double y = fSize*fLeftSideFraction; //adjust lower tail prob. according to fLeftSideFraction
00315 
00316    brf.findRoot(fLower,poi->getMin(),poi->getMax(),y);
00317 
00318    y = 1.-(fSize*(1.-fLeftSideFraction) ); //adjust upper tail prob. according to fLeftSideFraction
00319 
00320    bool ret = brf.findRoot(fUpper,poi->getMin(),poi->getMax(),y);
00321    if (!ret)
00322       std::cout << "BATCalculator::GetInterval1D: Warning:"
00323                 << "Error returned from Root finder, estimated interval is not fully correct"
00324                 << std::endl;
00325 
00326    poi->setVal(tmpVal); // patch: restore the original value of poi
00327 
00328    delete cdf_bind;
00329    delete cdf;
00330    fValidInterval = true;
00331    fConnectedInterval = true;
00332 
00333    TString interval_name = TString("CentralBayesianInterval_a") + TString(this->GetName());
00334    SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
00335    interval->SetTitle("SimpleInterval from BATCalculator");
00336 
00337    return interval;
00338 }
00339 
00340 // ---------------------------------------------------------
00341 SimpleInterval * BATCalculator::GetShortestInterval1D() const
00342 {
00343    const char * POIname = fPOI.first()->GetName();
00344    bool checkConnected = true;
00345    return GetShortestInterval1D(POIname, checkConnected);
00346 }
00347 
00348 // ---------------------------------------------------------
00349 // returns a SimpleInterval with lower and upper bounds on the
00350 // parameter of interest. Applies the shortest interval rule to
00351 // compute the credibility interval. The resulting interval is not necessarily connected.
00352 // Methods for constructing the shortest region with respect given CL are now available in
00353 // different places (here; MCMCCalculator, BAT)-> this might require cleaning at some point.
00354 // Result is approximate as CL is not reached exactly (due to finite bin number/bin size in posterior histogram)-> make CL/interval a bit smaller or bigger ?
00355 SimpleInterval * BATCalculator::GetShortestInterval1D(const char * POIname, bool & checkConnected) const
00356 {
00357 
00358    if (fValidInterval)
00359       std::cout << "BATCalculator::GetShortestInterval1D:"
00360                 << "Warning : recomputing interval for the same CL and same model" << std::endl;
00361 
00362    // get pointer to selected parameter of interest
00363    RooRealVar * poi = dynamic_cast<RooRealVar*>( fPOI.find(POIname) );
00364    assert(poi);
00365 
00366    // get pointer to poserior pdf
00367    if (!fPosteriorPdf)
00368       fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
00369    //RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
00370 
00371    // range of poi
00372    Double_t minpoi = poi->getMin();
00373    Double_t maxpoi = poi->getMax();
00374 
00375    // bin number of histogram for posterior
00376    Int_t stepnumber = _posteriorTH1D->GetNbinsX();
00377    cout << "stepnumber is: " << stepnumber << endl;
00378 
00379    // width of one bin in histogram of posterior
00380    Double_t stepsize = (maxpoi-minpoi)/stepnumber;
00381    cout << "stepsize is: " << stepsize << endl;
00382 
00383    // pair: first entry: bin number , second entry: value of posterior
00384    vector< pair< Int_t,Double_t > > posteriorVector;
00385 
00386    // for normalization
00387    Double_t histoIntegral = 0;
00388    // give posteriorVector the right length
00389    posteriorVector.resize(stepnumber);
00390 
00391    // see in BayesianCalculator for details about this "feature"
00392    Double_t tmpVal = poi->getVal();
00393 
00394    // initialize elements of posteriorVector
00395    int i = 0;
00396    vector< pair< Int_t,Double_t > >::iterator vecit = posteriorVector.begin();
00397    vector< pair< Int_t,Double_t > >::iterator vecit_end = posteriorVector.end();
00398    for( ; vecit != vecit_end ; ++vecit) {
00399       poi->setVal(poi->getMin()+i*stepsize);
00400       posteriorVector[i] = make_pair(i, _posteriorTH1D->GetBinContent(i+1) ); // hope this is working, +1 necessary, because GetBinContent(0) returns the underflow bin
00401       histoIntegral+=_posteriorTH1D->GetBinContent(i); // better to get this directly from the histogram ?!
00402       //cout << "pair with binnumber: " << i << " and postriorprob: " << _posteriorTH1D->GetBinContent(i+1) << endl;
00403       i++;
00404    }
00405 
00406    cout << "histoIntegral is: " << histoIntegral << endl;
00407 
00408    // sort posteriorVector with respect to posterior pdf
00409    std::sort(posteriorVector.begin(), posteriorVector.end(), sortbyposterior);
00410 
00411    // keep track of integrated posterior in the interval
00412    Double_t integratedposterior = 0.;
00413 
00414    // keep track of lowerLim and upperLim
00415    Double_t lowerLim=posteriorVector.size();
00416    Double_t upperLim=0;
00417 
00418    // store the bins in the intervall
00419    vector<bool> inInterval;
00420    inInterval.resize(posteriorVector.size());
00421 
00422    // set all values in inInterval to false
00423    for (unsigned int k = 0; k < inInterval.size(); k++)
00424       inInterval[k] = false;
00425 
00426    unsigned int j = 0;
00427    // add bins to interval while CL not reached
00428    //cout << "integratedposterior: " << integratedposterior << "histoIntegral: " << histoIntegral << endl;
00429    //cout << " integratedposterior/histoIntegral : " << integratedposterior/histoIntegral << endl;
00430    //cout << " 1-fSize : " << 1-fSize << endl;
00431    //cout << " posteriorVector.size(): " << posteriorVector.size() << endl;
00432    while(((integratedposterior/histoIntegral) < (1-fSize)) && (j < posteriorVector.size())) {
00433       //cout << "j is: " << j << " , int prob: " << integratedposterior/histoIntegral << endl;
00434       integratedposterior+=posteriorVector[j].second;
00435 
00436       //cout << "bin number: " << posteriorVector[j].first << " with posterior prob.: " << posteriorVector[j].second << endl;
00437 
00438       // update vector with bins included in the interval
00439       inInterval[posteriorVector[j].first] = true;
00440 
00441       if(posteriorVector[j].first < lowerLim) {
00442          lowerLim = posteriorVector[j].first; // update lowerLim
00443          cout << "updating lower lim to: " << lowerLim << endl;
00444       }
00445       if(posteriorVector[j].first > upperLim) {
00446          upperLim = posteriorVector[j].first; // update upperLim
00447          cout << "updating upper lim to: " << upperLim << endl;
00448       }
00449 
00450       fLower = lowerLim * stepsize;
00451       fUpper = upperLim * stepsize;
00452       j++;
00453    }
00454 
00455    // initialize vector with interval borders
00456 
00457    bool runInside = false;
00458    for (unsigned int l = 0; l < inInterval.size(); l++) {
00459       if ( (runInside == false) && (inInterval[l] == true) ) {
00460          _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
00461          runInside = true;
00462       }
00463       if ( ( runInside == true) && (l < (inInterval.size()-1) ) && (inInterval[l+1] == false) ) {
00464          _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
00465          runInside = false;
00466       }
00467       if ( ( runInside == true) && (l == (inInterval.size()-1)) ) {
00468          _intervalBorders1D.push_back(static_cast<double>(l)* stepsize);
00469       }
00470    }
00471 
00472    // check if the intervall is connected
00473    if(checkConnected) {
00474       if (_intervalBorders1D.size() > 2) {
00475          fConnectedInterval = false;
00476       }
00477       else {
00478          fConnectedInterval = true;
00479       }
00480    }
00481 
00482    poi->setVal(tmpVal); // patch: restore the original value of poi
00483 
00484    fValidInterval = true;
00485 
00486    TString interval_name = TString("ShortestBayesianInterval_a") + TString(this->GetName());
00487    SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
00488    interval->SetTitle("Shortest SimpleInterval from BATCalculator");
00489 
00490    //if(assert)
00491    return interval;
00492 }
00493 
00494 // ---------------------------------------------------------
00495 MCMCInterval* BATCalculator::GetInterval() const{
00496 
00497    // run some sanity checks
00498    if (!fPdf ) {
00499      std::cerr << "BATCalculator::GetInterval - missing pdf model" << std::endl;
00500      return 0;
00501    }
00502 
00503    if (!fPrior) {
00504       std::cerr << "BATCalculator::GetInterval - missing prior pdf" << std::endl;
00505    }
00506 
00507    if (fPOI.getSize() == 0) {
00508      std::cerr << "BATCalculator::GetInterval - missing parameter of interest" << std::endl;
00509      return 0;
00510    }
00511 
00512    if (!fPosteriorPdf){
00513       //initialize RooInterface object
00514       _myRooInterface->Initialize(*fData,*fPdf,*fPrior,fparams,fPOI);
00515       _myRooInterface->MCMCSetNIterationsRun(_nMCMC);
00516       _myRooInterface->MarginalizeAll();
00517       _myRooInterface->FindMode();
00518    }
00519 
00520    MarkovChain * roostats_chain = GetBCRooInterface()->GetRooStatsMarkovChain();
00521    MCMCInterval * mcmcInterval = new MCMCInterval("roostatsmcmcinterval", *(GetBCRooInterface()->GetArgSetForMarkovChain()) , *roostats_chain);
00522    mcmcInterval->SetUseKeys(false);
00523    mcmcInterval->SetConfidenceLevel(1.-fSize);
00524    return mcmcInterval;
00525 }
00526 
00527 // ---------------------------------------------------------
00528 void BATCalculator::SetNumBins(const char * parname, int nbins)
00529 {
00530    _myRooInterface->SetNumBins(parname, nbins);
00531 }
00532 
00533 void BATCalculator::SetNumBins(int nbins)
00534 {
00535    _myRooInterface->SetNumBins(nbins);
00536 }
00537 
00538 void BATCalculator::SetLeftSideTailFraction(Double_t leftSideFraction ){
00539    if( (leftSideFraction >= 0.) && (leftSideFraction <= 1.) ){
00540       fLeftSideFraction = leftSideFraction;
00541    }
00542    else{
00543       std::cout << "BATCalculator::SetLeftSideTailFraction(Double_t leftSideFraction ) - value needs to be in the interval [0.,1.] to be meaningful, your value: " << leftSideFraction <<  " ,left side tail fraction has not been changed!" << std::endl;
00544    }
00545 
00546 }
00547 
00548 // ---------------------------------------------------------
00549 Double_t BATCalculator::GetOneSidedUperLim()
00550 {
00551 //   double safeVal = fSize;
00552 //   fSize = safeVal/2.;
00553    cout << "calculating " << (1.-fSize/2) << "upper limit" << endl;
00554    return GetInterval1D()->UpperLimit();
00555 }
00556 
00557 /*
00558 // ---------------------------------------------------------
00559 int BATCalculator::GetNbins(const char * parname)
00560 {
00561    ;
00562 }
00563 */
00564 
00565 // ---------------------------------------------------------
00566 bool sortbyposterior(pair< Int_t,Double_t > pair1, pair< Int_t,Double_t > pair2)
00567 {
00568    return (pair1.second > pair2.second);
00569 }
00570 
00571 
00572 
00573 } // end namespace RooStats

Generated by  doxygen 1.7.1