From 71e26dab70b78cf22b064baa89cbf0d6a1958bc1 Mon Sep 17 00:00:00 2001 From: Rami Date: Wed, 4 Mar 2015 13:47:30 +0100 Subject: [PATCH 1/2] added few fncs to VarCut --- SelectionOptimization/VarCut.cc | 166 ++++++++++++++++++++++++++++++++ SelectionOptimization/VarCut.hh | 15 ++- 2 files changed, 180 insertions(+), 1 deletion(-) diff --git a/SelectionOptimization/VarCut.cc b/SelectionOptimization/VarCut.cc index 636f9cb..bd5d8ef 100644 --- a/SelectionOptimization/VarCut.cc +++ b/SelectionOptimization/VarCut.cc @@ -1,4 +1,12 @@ #include "VarCut.hh" +#include +#include +#include +#include +#include "assert.h" + +using std::vector; +using std::string; const int UNDEFCUT = -999; @@ -33,6 +41,108 @@ TCut *VarCut::getCut(){ return cut; } +TCut *VarCut::getCutsForMVAcomparison(){ + + TCut *cut = 0; + + // Die if something appears uninitialized + for(int i=0; inameTmva.Data(), + _cuts[i]); + } + + return cut; +} + + +TCut *VarCut::getCutDB(){ + + TCut *cut = 0; + + // Die if something appears uninitialized + for(int i=0; inameTmva.Data(), + _cuts[i]); + } + + return cut; +} + + +TCut *VarCut::getCutNminusOne(int varIndex){ + + TCut *cut = 0; + + // Die if something appears uninitialized + for(int i=0; inameTmva.Data(), + _cuts[i]); + } + + return cut; +} + + +TCut *VarCut::getCutOneVar(int varIndex){ + + TCut *cut = 0; + + // Die if something appears uninitialized + for(int i=0; inameTmva.Data(), + _cuts[i]); + } + + return cut; +} + + void VarCut::setCutValue(TString varName, float val){ int index = getVariableIndex(varName); @@ -73,6 +183,24 @@ float VarCut::getCutValue(TString variable){ return cutVal; } + +float VarCut::getCutValueTmvaName(TString variable){ + + float cutVal = UNDEFCUT; + int index = getVariableIndexTmvaName(variable); + + if( index != -1 ){ + cutVal = _cuts[index]; + }else{ + printf("VarCut::getCutValue: requested variable is not known!!!\n"); + } + + return cutVal; +} + + + + int VarCut::getVariableIndex(TString variable){ int index = -1; @@ -124,4 +252,42 @@ void VarCut::print(){ } +vector VarCut::printTable(){ + + string::size_type maxlen = strlen(Vars::variables[8]->nameTmva.Data()) + 6 ; // because it is "| " on the left plus " < |" on the right + vector ret; + + // std::cout<nameTmva.Data() ); + currStr = currStr_buff; + currStr += string(maxlen- strlen(Vars::variables[iVar]->nameTmva.Data()) -6,' '); //exclude 2 spaces + 4 for " < |" + currStr += string(" < |"); + + sprintf(cutStr_buff, " %1.6f" , _cuts[iVar] ); + cutStr= cutStr_buff; + cutStr += string(maxlen - 29,' '); //exclude 2 spaces + 8 chars number + cutStr += string("|"); + currStr += cutStr; + //currStr length is 11 " '8digits' |" + + ret.push_back(currStr); + } //end of for-loop + + return ret; +} //end of printTable fcn diff --git a/SelectionOptimization/VarCut.hh b/SelectionOptimization/VarCut.hh index 1cdc6c3..b661fac 100644 --- a/SelectionOptimization/VarCut.hh +++ b/SelectionOptimization/VarCut.hh @@ -3,8 +3,15 @@ #include "TCut.h" #include - +#include +#include +#include +#include #include "Variables.hh" +#include "VariablesDB.hh" + +using std::vector; +using std::string; class VarCut :public TObject { @@ -23,9 +30,14 @@ public: // Look up cut value for given variable (regular name) float getCutValue(TString var); + float getCutValueTmvaName(TString var); // Get the full TCut object with cuts on all variables TCut* getCut(); + TCut *getCutsForMVAcomparison(); + TCut* getCutDB(); + TCut* getCutNminusOne(int varIndex); + TCut* getCutOneVar(int varIndex); // Get index of the variable in the internal array from its name, // using the regular name and the name known to TMVA (may include abs()) @@ -37,6 +49,7 @@ public: // Input/output void print(); // print to stdout + vector printTable(); // print to stdout private: // The actual list of variables for which cuts are stored here From 597d66819b9ffb585d1c1387f26ee982a50c51f2 Mon Sep 17 00:00:00 2001 From: RemKamal Date: Wed, 4 Mar 2015 14:18:32 +0100 Subject: [PATCH 2/2] scripts to print cuts of Twiki format; and combined ROC --- SelectionOptimization/drawROCandWPv3.C | 372 +++++++++++++++++++++++++ SelectionOptimization/printWIKItable.C | 189 +++++++++++++ 2 files changed, 561 insertions(+) create mode 100644 SelectionOptimization/drawROCandWPv3.C create mode 100644 SelectionOptimization/printWIKItable.C diff --git a/SelectionOptimization/drawROCandWPv3.C b/SelectionOptimization/drawROCandWPv3.C new file mode 100644 index 0000000..7ffba5c --- /dev/null +++ b/SelectionOptimization/drawROCandWPv3.C @@ -0,0 +1,372 @@ +#include "TLatex.h" +#include "TTree.h" +#include "TLegend.h" +#include "TMarker.h" +#include "TCanvas.h" +#include "TString.h" +#include "TH1F.h" +#include "TFile.h" +#include +#include "OptimizationConstants.hh" +#include "VarCut.hh" +#include "TStyle.h" + +using std::cout; +using std::endl; + +// +// this script draws combined ROC and 4 Wps +// + +const bool verbose = true; +// Draw barrel or endcap +const bool drawBarrel = false; +const int nROCs = 4; + +// Files with signal and background trees (ideally the ntuples +// that were used for TMVA optimization +const TString fnameSignal = "DYJetsToLL_phys14_v1.root"; +const TString signalTreeName = "ntupler/ElectronTree"; +const TString fnameBackground = "TTJets_phys14_v1.root"; +const TString backgroundTreeName = "ntupler/ElectronTree"; + +// Name TMVA output file that contains the pre-computed ROC, etc +const TString tmvaFileNameBarrel[nROCs] = { + "../../barrel/SelectionOptimization/trainingData/training_results_barrel_pass1_20140203_224900/TMVA_training_results_barrel_pass1_20140203_224900.root", + "../../barrel/SelectionOptimization/trainingData/training_results_barrel_pass2_20140203_224900/TMVA_training_results_barrel_pass2_20140203_224900.root", + "../../barrel/SelectionOptimization/trainingData/training_results_barrel_pass3_20140203_224900/TMVA_training_results_barrel_pass3_20140203_224900.root", + "../../barrel/SelectionOptimization/trainingData/training_results_barrel_pass4_20140203_224900/TMVA_training_results_barrel_pass4_20140203_224900.root" +}; + + + +const TString tmvaFileNameEndcap[nROCs] ={"trainingData/training_results_endcap_pass1_20140203_225900/TMVA_training_results_endcap_pass1_20140203_225900.root", + "trainingData/training_results_endcap_pass2_20140203_225900/TMVA_training_results_endcap_pass2_20140203_225900.root", + "trainingData/training_results_endcap_pass3_20140203_225900/TMVA_training_results_endcap_pass3_20140203_225900.root", + "trainingData/training_results_endcap_pass4_20140203_225900/TMVA_training_results_endcap_pass4_20140203_225900.root" +}; + + +// +// Names of ROOT files that contain working points +// +const int nWorkingPointSets = 4; + +// Set 1 +const int markerColorSet1 = kRed; +const int markerStyleSet1 = 20; +const TString legendSet1 = "WP_Veto"; +const int nWP = 1; +const TString cutFileNamesBarrelSet1[nWP] = { + "../../barrel/SelectionOptimization/cut_repository/cuts_barrel_20140203_224900_WP_Veto.root" + +}; +const TString cutFileNamesEndcapSet1[nWP] = { + "cut_repository/cuts_endcap_20140203_225900_WP_Veto.root" + +}; + + +// Set 2 +const int markerColorSet2 = kOrange; +const int markerStyleSet2 = 20; +const TString legendSet2 = "WP_Loose"; +const TString cutFileNamesBarrelSet2[nWP] = { +"../../barrel/SelectionOptimization/cut_repository/cuts_barrel_20140203_224900_WP_Loose.root" + +}; +const TString cutFileNamesEndcapSet2[nWP] = { + "cut_repository/cuts_endcap_20140203_225900_WP_Loose.root" +}; + + +// Set 3 +const int markerColorSet3 = kBlue; +const int markerStyleSet3 = 20; +const TString legendSet3 = "WP_Medium"; +const TString cutFileNamesBarrelSet3[nWP] = { +"../../barrel/SelectionOptimization/cut_repository/cuts_barrel_20140203_224900_WP_Medium.root" + +}; +const TString cutFileNamesEndcapSet3[nWP] = { + "cut_repository/cuts_endcap_20140203_225900_WP_Medium.root" +}; + + +// Set 4 +const int markerColorSet4 = kGreen; +const int markerStyleSet4 = 20; +const TString legendSet4 = "WP_Tight"; +const TString cutFileNamesBarrelSet4[nWP] = { + "../../barrel/SelectionOptimization/cut_repository/cuts_barrel_20140203_224900_WP_Tight.root" +}; +const TString cutFileNamesEndcapSet4[nWP] = { + "cut_repository/cuts_endcap_20140203_225900_WP_Tight.root" +}; + + + +// Forward declarations +TTree *getTreeFromFile(TString fname, TString tname); +void overlayWorkingPoints(TCanvas *c1, + TTree *signalTree, TTree *backgroundTree, + const TString *cutFileNames, + int markerColor, int markerStyle, + TLegend *leg, const TString legendText); +void findEfficiencies(TTree *signalTree, TTree *backgroundTree, + float &effSignal, float &effBackground, + VarCut *cutObject); + +void debug( std::string message){ + if(verbose) + std::cout<< message <cd(); + + TString tmvaFileName1 = tmvaFileNameBarrel[0]; + TString tmvaFileName2 = tmvaFileNameBarrel[1]; + TString tmvaFileName3 = tmvaFileNameBarrel[2]; + TString tmvaFileName4 = tmvaFileNameBarrel[3]; + + const TString *cutFileNamesSet1 = cutFileNamesBarrelSet1; + const TString *cutFileNamesSet2 = cutFileNamesBarrelSet2; + const TString *cutFileNamesSet3 = cutFileNamesBarrelSet3; + const TString *cutFileNamesSet4 = cutFileNamesBarrelSet4; + if( !drawBarrel ){ + tmvaFileName1 = tmvaFileNameEndcap[0]; + tmvaFileName2 = tmvaFileNameEndcap[1]; + tmvaFileName3 = tmvaFileNameEndcap[2]; + tmvaFileName4 = tmvaFileNameEndcap[3]; + + cutFileNamesSet1 = cutFileNamesEndcapSet1; + cutFileNamesSet2 = cutFileNamesEndcapSet2; + cutFileNamesSet3 = cutFileNamesEndcapSet3; + cutFileNamesSet4 = cutFileNamesEndcapSet4; + } + +debug("I am here2"); + + // Open the file with TMVA output + TFile *tmvaFile1 = new TFile(tmvaFileName1); + TFile *tmvaFile2 = new TFile(tmvaFileName2); + TFile *tmvaFile3 = new TFile(tmvaFileName3); + TFile *tmvaFile4 = new TFile(tmvaFileName4); + if( !(tmvaFile1 || tmvaFile2 ||tmvaFile3 || tmvaFile4)) + assert(0); + + // + // Draw the ROC curve + // + TH1F *hROC1 = (TH1F*)tmvaFile1->Get("Method_Cuts/Cuts/MVA_Cuts_rejBvsS"); + TH1F *hROC2 = (TH1F*)tmvaFile2->Get("Method_Cuts/Cuts/MVA_Cuts_rejBvsS"); + TH1F *hROC3 = (TH1F*)tmvaFile3->Get("Method_Cuts/Cuts/MVA_Cuts_rejBvsS"); + TH1F *hROC4 = (TH1F*)tmvaFile4->Get("Method_Cuts/Cuts/MVA_Cuts_rejBvsS"); + if( !(hROC1 || hROC2 || hROC3|| hROC3 )) + assert(0); + +debug("I am here3"); + + + TH1F * hROC = new TH1F("hROC", "", 100, 0, 100); + for(int iBin = 1; iBin <=100; ++iBin){ + + if(1<=iBin && iBin<71){ + hROC->SetBinContent(iBin, 100*hROC4->GetBinContent(iBin)) ; + debug("I am here3A"); + } + else if(71 <= iBin && iBin<77){ //early cut-off at 77% + hROC->SetBinContent(iBin, 100*hROC3->GetBinContent(iBin)) ; + debug("I am here3B"); + } + else if(77 <=iBin && iBin<90){ + hROC->SetBinContent(iBin, 100*hROC2->GetBinContent(iBin)) ; + debug("I am here3C"); + } + else if(90<=iBin && iBin <=100){ + hROC->SetBinContent(iBin, 100*hROC1->GetBinContent(iBin)) ; + debug("I am here3D"); + } + else + printf("\nThis should never happen!"); + } + + //Set histogram attributes and draw the ROC curve + hROC->SetDirectory(0); + hROC->SetStats(0); + hROC->SetLineWidth(2); + hROC->SetTitle(""); + hROC->GetXaxis()->SetTitle("signal efficiency, %"); + hROC->GetYaxis()->SetTitle("background rejection, %"); + hROC->GetYaxis()->SetTitleOffset(1.4); + if( drawBarrel ){ + hROC->GetXaxis()->SetRangeUser(60, 100); + hROC->GetYaxis()->SetRangeUser(95, 100); + }else{ + hROC->GetXaxis()->SetRangeUser(60, 100); + hROC->GetYaxis()->SetRangeUser(80, 100); + } + + + + // gStyle->SetStatBorderSize(1); + // c1->SetFillColor(0); + gStyle->SetFillColor(0) ; + hROC->Draw("L"); + +debug("I am here4"); + TString commentText = "barrel electrons"; + if( !drawBarrel ) + commentText = "endcap electrons"; + TLatex *comment = new TLatex(0.2, 0.2, commentText); + comment->SetNDC(kTRUE); + comment->Draw(); + + c1->Update(); + + // + // Overlay the cuts + // +debug("I am here5"); + // First find the TestTree for measuring efficiency and rejection + printf("\n Take true electrons from %s tree %s\n", + fnameSignal.Data(), signalTreeName.Data()); + TTree *signalTree = getTreeFromFile( fnameSignal, signalTreeName); + // Input background tree + printf("\n Take background electrons from %s tree %s\n", + fnameBackground.Data(), backgroundTreeName.Data()); + TTree *backgroundTree = getTreeFromFile( fnameBackground, backgroundTreeName); + debug("I am here6"); + // Next, draw all working point sets + if( nWorkingPointSets >=1 ){ + + TLegend *leg = new TLegend(0.15, 0.45, 0.5, 0.7); + leg->SetFillStyle(0); + leg->SetBorderSize(0); + overlayWorkingPoints(c1, signalTree, backgroundTree, cutFileNamesSet1, + markerColorSet1, markerStyleSet1, leg, legendSet1); + +debug("I am here7"); + overlayWorkingPoints(c1, signalTree, backgroundTree, cutFileNamesSet2, + markerColorSet2, markerStyleSet2, leg, legendSet2); + + overlayWorkingPoints(c1, signalTree, backgroundTree, cutFileNamesSet3, + markerColorSet3, markerStyleSet3, leg, legendSet3); + overlayWorkingPoints(c1, signalTree, backgroundTree, cutFileNamesSet4, + markerColorSet4, markerStyleSet4, leg, legendSet4); + leg->Draw("same"); + + } + + debug("I am here8"); + // Save the figure into a file + TString outname = "figures/plot_ROCandWP_barrel_v3_feb2015.png"; + if( !drawBarrel ) + outname = "figures/plot_ROCandWP_endcap_v3_feb2015.png"; + c1->Print(outname); +debug("I am here9"); + return; +}; + +// Get a given tree from a given file name. +TTree *getTreeFromFile(TString fname, TString tname){ + + TFile *file = new TFile( fname ); + TTree *tree = (TTree*) file->Get(tname); + + return tree; +} + +// Draw on a given canvas the full set of working points +void overlayWorkingPoints(TCanvas *c1, + TTree *signalTree, TTree *backgroundTree, + const TString *cutFileNames, + int markerColor, int markerStyle, + TLegend *leg, const TString legendText){ + + + // Now loop over working points + for(int iwp = 0; iwpGet("cuts"); + if( !cutObject ) + assert(0); + + // Compute the efficiencies + float effSignal, effBackground; + findEfficiencies(signalTree, backgroundTree, effSignal, effBackground, + cutObject); + printf("Computed eff for cut from %s, effS= %.6f effB= %.6f\n", + cutFileNames[iwp].Data(), effSignal, effBackground); + + // Make a marker and draw it. + TMarker *marker = new TMarker(100*effSignal, 100-100*effBackground, 20); + marker->SetMarkerSize(2); + marker->SetMarkerColor(markerColor); + marker->SetMarkerStyle(markerStyle); + marker->Draw("same"); + + // Add marker to the legend only once. Do not draw the legend here, + // it is drawn in the main function later + if( iwp == 0 ){ + if( !leg ) + assert(0); + leg->AddEntry(marker, legendText, "p"); + } + + c1->Update(); + + cutFile->Close(); + } + + +} + +// Compute signal and background efficiencies for given cuts +void findEfficiencies(TTree *signalTree, TTree *backgroundTree, + float &effSignal, float &effBackground, VarCut *cutObject){ + + TCut etaCut = ""; + if( drawBarrel ){ + etaCut = Opt::etaCutBarrel; + }else{ + etaCut = Opt::etaCutEndcap; + } + TCut kinematicCuts = Opt::ptCut && etaCut; + + TCut preselectionCuts = kinematicCuts && Opt::otherPreselectionCuts; + + TCut signalCuts = preselectionCuts && Opt::trueEleCut; + TCut backgroundCuts = preselectionCuts && Opt::fakeEleCut; + + TCut selectionCuts = *(cutObject->getCut()); + + // printf("\nSelecton cuts:\n"); + // selectionCuts.Print(); + // printf("\nSignal cuts:\n"); + // signalCuts.Print(); + // printf("\nBackground cuts:\n"); + // backgroundCuts.Print(); + + effSignal = (1.0*signalTree->GetEntries(selectionCuts && signalCuts) ) + / signalTree->GetEntries(signalCuts); + + effBackground = (1.0*backgroundTree->GetEntries(selectionCuts + && backgroundCuts) ) + / backgroundTree->GetEntries(backgroundCuts); + + return; +} diff --git a/SelectionOptimization/printWIKItable.C b/SelectionOptimization/printWIKItable.C new file mode 100644 index 0000000..a0fab6b --- /dev/null +++ b/SelectionOptimization/printWIKItable.C @@ -0,0 +1,189 @@ +#include "Variables.hh" +#include "OptimizationConstants.hh" +#include "VarCut.hh" +#include +#include "assert.h" +#include +#include + +const TString barrelTimeName ="20140203_224900_"; +const TString endcapTimeName ="20140203_225900_"; + + +int nVarsInTable = Vars::nVariables +1; // +1 means add passConversionVeto +const int nWP = 4; + +TString cutsFiles[nWP]; +TFile * cutFiles[nWP]; +VarCut * cutObjects[nWP] ; + + +const bool printBarrelTable = false; + +vector doHorizontConcat(vector & vec1, vector & vec2, vector & vec3, vector & vec4 ); + + +void printWIKItable(){ + + + for (int iwp =0; iwp < nWP; ++iwp){ + + cutsFiles[iwp] = + Opt::cutRepositoryDir + TString("/") + + (printBarrelTable ? TString("cuts_barrel_") : TString("cuts_endcap_") ) + + (printBarrelTable ? barrelTimeName : endcapTimeName) + + Opt::wpNames[iwp] + + TString(".root"); + + cutFiles[iwp] = new TFile(cutsFiles[iwp]); + + /* #1 */ + if (cutFiles[iwp]->IsZombie()) { + std::cout << "Error opening file" << std::endl; + exit(-1); + } + + //same protection alternative: + /* #2 */ + if( !cutFiles[iwp] ) + assert(0); + + cutObjects[iwp] = (VarCut*)cutFiles[iwp]->Get("cuts"); + if( !cutObjects[iwp] ) + assert(0); + + }//end for-loop + + vector table_V = cutObjects[0]->VarCut::printTable(); + vector table_L = cutObjects[1]->VarCut::printTable(); + vector table_M = cutObjects[2]->VarCut::printTable(); + vector table_T = cutObjects[3]->VarCut::printTable(); + + vector vecToPrint = doHorizontConcat(table_V, table_L, table_M, table_T); + + vector::size_type vecst =0; + std::cout << (printBarrelTable ? "\n PHYS14 selection, conditions: PU20 bx25, barrel cuts ( |eta supercluster| <= 1.479)" : "\n PHYS14 selection, conditions: PU20 bx25, endcap cuts (1.479 < |eta supercluster| < 2.5)") << std::endl; + + while (vecst != vecToPrint.size() ){ + std::cout< doHorizontConcat(vector & vec1, vector & vec2, vector & vec3, vector & vec4 ){ + + /* problem with static and ROOT */ + const string comparSignInSep = " < |"; + + vector wikiTable; + vector::size_type vecst =0; + + //just few checks + if (vec1.size() != vec2.size() || vec3.size() != vec4.size() || vec1.size() != vec3.size() ) + assert(0); + + while( vecst != vec1.size() ){ + + string str; + typedef string::const_iterator iter; + iter Vbeg = vec1[2].begin(); + iter Vend = vec1[2].end(); + + string::size_type maxlen = strlen(Vars::variables[8]->nameTmva.Data()) + 6 ; // because it is "| " on the left plus " < |" on the right + + if(vecst==0) { + str = string ("| Variables |") + + string(" Veto |") + + string(" Loose |") + + string(" Medium |") + + string(" Tight |") ; + + } + + //last line regarding passConversionVeto + else if(vecst == vec1.size() -1) { + str = "| passConversionVeto" + + string(maxlen - strlen("| passConversionVeto "),' ') + + string("|") + + +" YES" + + string(maxlen - 24,' ') //to make string equal 11 chars + + string("|") + + +" YES" + + string(maxlen - 24,' ') //to make string equal 11 chars + + string("|") + + +" YES" + + string(maxlen - 24,' ') //to make string equal 11 chars + + string("|") + + +" YES" + + string(maxlen - 24,' ') //to make string equal 11 chars + + string("|"); + + } + + + else { + + // iter Vcur = search(Vbeg, Vend, comparSignInSep.begin(), comparSignInSep.end()); + // iter beg = find_if(Vcur, Vend, digit); + // iter end = find_if(beg, Vend, space); + + string str1,str2, str3, str4; + str1 = vec1[vecst]; + str2 = vec2[vecst]; + str3 = vec3[vecst]; + str4 = vec4[vecst]; + + //sanity checks + if (str1.compare(str2) == 0) + std::cout << str1 << " is " << str2 << '\n'; + if (str3.compare(str4) == 0) + std::cout << str3 << " is " << str4 << '\n'; + if (str1.compare(str3) == 0) + std::cout << str1 << " is " << str3 << '\n'; + if (str2.compare(str4) == 0) + std::cout << str2 << " is " << str4 << '\n'; + + string::size_type start = 31; + string::size_type finish = 40; + + // start = beg - str1.begin(); + // finish = end - str1.begin(); + // str += str1; //the whole string from the 1st vector + str = str.append (str1.begin(), str1.end() ) ; + // std::cout<<"\nlengths is "<