/// \file /// \ingroup tutorial_tmva /// \notebook -nodraw /// This macro provides a simple example on how to use the trained classifiers /// within an analysis module /// - Project : TMVA - a Root-integrated toolkit for multivariate data analysis /// - Package : TMVA /// - Exectuable: TMVAClassificationApplication /// /// \macro_output /// \macro_code /// \author Andreas Hoecker #include #include #include #include #include #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TSystem.h" #include "TROOT.h" #include "TStopwatch.h" #include "TMVA/Tools.h" #include "TMVA/Reader.h" #include "TMVA/MethodCuts.h" #include "TXMLEngine.h" using namespace TMVA; void TMVAClassificationApplication_socialfinal( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map Use; // Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 1; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN Use["DNN_CPU"] = 0; // CUDA-accelerated DNN training. Use["DNN_GPU"] = 0; // Multi-core accelerated DNN. // Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // //------------------------------------------------------------------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassificationApplication_socialfinal" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) { std::cout << it->first << " "; } std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // Create the Reader object TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" ); // Create a set of variables and declare them to the reader // - the variable names MUST corresponds in name and type to those given in the weight file(s) used // Float_t var1, var2; // Float_t var3, var4; // reader->AddVariable( "myvar1 := var1+var2", &var1 ); // reader->AddVariable( "myvar2 := var1-var2", &var2 ); // reader->AddVariable( "var3", &var3 ); // reader->AddVariable( "var4", &var4 ); Float_t Age, Workc, Educ, Marit, Occup, Rel, Race, Sex, Hrs, Pais; //variables utilizadas en el entrenamiento reader->AddVariable( "Age", &Age ); reader->AddVariable( "Workc", &Workc ); reader->AddVariable( "Educ", &Educ ); reader->AddVariable( "Marit", &Marit ); reader->AddVariable( "Occup", &Occup ); reader->AddVariable( "Rel", &Rel ); reader->AddVariable( "Race", &Race ); reader->AddVariable( "Sex", &Sex ); // reader->AddVariable( "Gain", &Gain ); // reader->AddVariable( "Loss", &Loss ); reader->AddVariable( "Hrs", &Hrs ); reader->AddVariable( "Pais", &Pais ); //------------------------------------------------------------------------------------------------------------ // Book the MVA methods TString dir = "/home/karen/Desktop/Prueba_clasificacion_social_02082018/Data_socialfinal/weights/"; TString prefix = "TMVAClassification_socialfinal"; // Book method(s) for (std::map::iterator it = Use.begin(); it != Use.end(); it++) { if (it->second) { TString methodName = TString(it->first) + TString(" method"); TString weightfile = dir + prefix + TString("_") + TString(it->first) + TString(".weights.xml"); reader->BookMVA( methodName, weightfile ); } } // Book output histograms UInt_t nbin = 100; // TH1F *histLk(0); // TH1F *histLkD(0); // TH1F *histLkPCA(0); // TH1F *histLkKDE(0); // TH1F *histLkMIX(0); // TH1F *histPD(0); // TH1F *histPDD(0); // TH1F *histPDPCA(0); // TH1F *histPDEFoam(0); // TH1F *histPDEFoamErr(0); // TH1F *histPDEFoamSig(0); // TH1F *histKNN(0); // TH1F *histHm(0); // TH1F *histFi(0); // TH1F *histFiG(0); // TH1F *histFiB(0); // TH1F *histLD(0); TH1F *histNn(0); TH1F *histNnbfgs(0); TH1F *histNnbnn(0); TH1F *histNnC(0); TH1F *histNnT(0); TH1F *histBdt(0); TH1F *histBdtG(0); TH1F *histBdtB(0); TH1F *histBdtD(0); TH1F *histBdtF(0); // TH1F *histRf(0); // TH1F *histSVMG(0); // TH1F *histSVMP(0); // TH1F *histSVML(0); // TH1F *histFDAMT(0); // TH1F *histFDAGA(0); // TH1F *histCat(0); // TH1F *histPBdt(0); TH1F *histDnnGpu(0); TH1F *histDnnCpu(0); // if (Use["Likelihood"]) histLk = new TH1F( "MVA_Likelihood", "MVA_Likelihood", nbin, -1, 1 ); // if (Use["LikelihoodD"]) histLkD = new TH1F( "MVA_LikelihoodD", "MVA_LikelihoodD", nbin, -1, 0.9999 ); // if (Use["LikelihoodPCA"]) histLkPCA = new TH1F( "MVA_LikelihoodPCA", "MVA_LikelihoodPCA", nbin, -1, 1 ); // if (Use["LikelihoodKDE"]) histLkKDE = new TH1F( "MVA_LikelihoodKDE", "MVA_LikelihoodKDE", nbin, -0.00001, 0.99999 ); // if (Use["LikelihoodMIX"]) histLkMIX = new TH1F( "MVA_LikelihoodMIX", "MVA_LikelihoodMIX", nbin, 0, 1 ); // if (Use["PDERS"]) histPD = new TH1F( "MVA_PDERS", "MVA_PDERS", nbin, 0, 1 ); // if (Use["PDERSD"]) histPDD = new TH1F( "MVA_PDERSD", "MVA_PDERSD", nbin, 0, 1 ); // if (Use["PDERSPCA"]) histPDPCA = new TH1F( "MVA_PDERSPCA", "MVA_PDERSPCA", nbin, 0, 1 ); // if (Use["KNN"]) histKNN = new TH1F( "MVA_KNN", "MVA_KNN", nbin, 0, 1 ); // if (Use["HMatrix"]) histHm = new TH1F( "MVA_HMatrix", "MVA_HMatrix", nbin, -0.95, 1.55 ); // if (Use["Fisher"]) histFi = new TH1F( "MVA_Fisher", "MVA_Fisher", nbin, -4, 4 ); // if (Use["FisherG"]) histFiG = new TH1F( "MVA_FisherG", "MVA_FisherG", nbin, -1, 1 ); // if (Use["BoostedFisher"]) histFiB = new TH1F( "MVA_BoostedFisher", "MVA_BoostedFisher", nbin, -2, 2 ); // if (Use["LD"]) histLD = new TH1F( "MVA_LD", "MVA_LD", nbin, -2, 2 ); if (Use["MLP"]) histNn = new TH1F( "MVA_MLP", "MVA_MLP", nbin, -1.25, 1.5 ); if (Use["MLPBFGS"]) histNnbfgs = new TH1F( "MVA_MLPBFGS", "MVA_MLPBFGS", nbin, -1.25, 1.5 ); if (Use["MLPBNN"]) histNnbnn = new TH1F( "MVA_MLPBNN", "MVA_MLPBNN", nbin, -1.25, 1.5 ); if (Use["CFMlpANN"]) histNnC = new TH1F( "MVA_CFMlpANN", "MVA_CFMlpANN", nbin, 0, 1 ); if (Use["TMlpANN"]) histNnT = new TH1F( "MVA_TMlpANN", "MVA_TMlpANN", nbin, -1.3, 1.3 ); if (Use["DNN_GPU"]) histDnnGpu = new TH1F("MVA_DNN_GPU", "MVA_DNN_GPU", nbin, -0.1, 1.1); if (Use["DNN_CPU"]) histDnnCpu = new TH1F("MVA_DNN_CPU", "MVA_DNN_CPU", nbin, -0.1, 1.1); if (Use["BDT"]) histBdt = new TH1F( "MVA_BDT", "MVA_BDT", nbin, -0.8, 0.8 ); if (Use["BDTG"]) histBdtG = new TH1F( "MVA_BDTG", "MVA_BDTG", nbin, -1.0, 1.0 ); if (Use["BDTB"]) histBdtB = new TH1F( "MVA_BDTB", "MVA_BDTB", nbin, -1.0, 1.0 ); if (Use["BDTD"]) histBdtD = new TH1F( "MVA_BDTD", "MVA_BDTD", nbin, -0.8, 0.8 ); if (Use["BDTF"]) histBdtF = new TH1F( "MVA_BDTF", "MVA_BDTF", nbin, -1.0, 1.0 ); // if (Use["RuleFit"]) histRf = new TH1F( "MVA_RuleFit", "MVA_RuleFit", nbin, -2.0, 2.0 ); // if (Use["SVM_Gauss"]) histSVMG = new TH1F( "MVA_SVM_Gauss", "MVA_SVM_Gauss", nbin, 0.0, 1.0 ); // if (Use["SVM_Poly"]) histSVMP = new TH1F( "MVA_SVM_Poly", "MVA_SVM_Poly", nbin, 0.0, 1.0 ); // if (Use["SVM_Lin"]) histSVML = new TH1F( "MVA_SVM_Lin", "MVA_SVM_Lin", nbin, 0.0, 1.0 ); // if (Use["FDA_MT"]) histFDAMT = new TH1F( "MVA_FDA_MT", "MVA_FDA_MT", nbin, -2.0, 3.0 ); // if (Use["FDA_GA"]) histFDAGA = new TH1F( "MVA_FDA_GA", "MVA_FDA_GA", nbin, -2.0, 3.0 ); // if (Use["Category"]) histCat = new TH1F( "MVA_Category", "MVA_Category", nbin, -2., 2. ); // if (Use["Plugin"]) histPBdt = new TH1F( "MVA_PBDT", "MVA_BDT", nbin, -0.8, 0.8 ); // PDEFoam also returns per-event error, fill in histogram, and also fill significance // if (Use["PDEFoam"]) { // histPDEFoam = new TH1F( "MVA_PDEFoam", "MVA_PDEFoam", nbin, 0, 1 ); // histPDEFoamErr = new TH1F( "MVA_PDEFoamErr", "MVA_PDEFoam error", nbin, 0, 1 ); // histPDEFoamSig = new TH1F( "MVA_PDEFoamSig", "MVA_PDEFoam significance", nbin, 0, 10 ); // } // Book example histogram for probability (the other methods are done similarly) // TH1F *probHistFi(0), *rarityHistFi(0); // if (Use["Fisher"]) { // probHistFi = new TH1F( "MVA_Fisher_Proba", "MVA_Fisher_Proba", nbin, 0, 1 ); // rarityHistFi = new TH1F( "MVA_Fisher_Rarity", "MVA_Fisher_Rarity", nbin, 0, 1 ); // } // Prepare input tree (this must be replaced by your data source) // in this example, there is a toy tree with signal and one with background events // we'll later on use only the "signal" events for the test in this example. // TFile *input(0); TString fname = "./home/karen/Desktop/Prueba_clasificacion_social_02082018/Data_social_training.root"; // if (!gSystem->AccessPathName( fname )) { // input = TFile::Open( fname ); // check if file in local directory exists // } // else { // TFile::SetCacheFileDir("."); // input = TFile::Open("http://root.cern.ch/files/tmva_class_example.root", "CACHEREAD"); // if not: download from ROOT server // } // if (!input) { // std::cout << "ERROR: could not open data file" << std::endl; // exit(1); // } //Real data input // TFile* datafile = new TFile("/home/karen/Desktop/Prueba_clasificacion_social_02082018/Data_social_real.root"); //open the file // TTree* datatree = (TTree*)datafile->Get("TreeD"); std::cout << "--- TMVAClassificationApp : Using input file: " << input->GetName() << std::endl; // std::cout << "--- TMVAClassificationApp : Using real data file "<< std::endl; // Event loop // Prepare the event tree // - Here the variable names have to corresponds to your tree // - You can use the same variables as above which is slightly faster, // but of course you can use different ones and copy the values inside the event loop // std::cout << "--- Select signal sample" << std::endl; // TTree* theTree = (TTree*)input->Get("TreeS"); // Float_t userVar1, userVar2; // theTree->SetBranchAddress( "var1", &userVar1 ); // theTree->SetBranchAddress( "var2", &userVar2 ); // theTree->SetBranchAddress( "var3", &var3 ); // theTree->SetBranchAddress( "var4", &var4 ); // TFile* signalfile = new TFile("/home/karen/Desktop/Prueba_clasificacion_social_02082018/Data_social_training.root"); //open the file TTree* signaltree = (TTree*)input->Get("TreeS"); signaltree->SetBranchAddress( "Age", &Age ); signaltree->SetBranchAddress( "Workc", &Workc ); signaltree->SetBranchAddress( "Educ", &Educ ); signaltree->SetBranchAddress( "Marit", &Marit ); signaltree->SetBranchAddress( "Occup", &Occup ); signaltree->SetBranchAddress( "Rel", &Rel ); signaltree->SetBranchAddress( "Race", &Race ); signaltree->SetBranchAddress( "Sex", &Sex ); // signaltree->SetBranchAddress( "Gain", &Gain ); // signaltree->SetBranchAddress( "Loss", &Loss ); signaltree->SetBranchAddress( "Hrs", &Hrs ); signaltree->SetBranchAddress( "Pais", &Pais ); // Efficiency calculator for cut method // Int_t nSelCutsGA = 0; // Double_t effS = 0.7; std::vector vecVar(4); // vector for EvaluateMVA tests std::cout << "--- Processing: " << signaltree->GetEntries() << " events" << std::endl; TStopwatch sw; sw.Start(); for (Long64_t ievt=0; ievtGetEntries();ievt++) { if (ievt%1000 == 0) std::cout << "--- ... Processing event: " << ievt << std::endl; signaltree->GetEntry(ievt); // var1 = userVar1 + userVar2; // var2 = userVar1 - userVar2; // Return the MVA outputs and fill into histograms // if (Use["CutsGA"]) { // // Cuts is a special case: give the desired signal efficienciy // Bool_t passed = reader->EvaluateMVA( "CutsGA method", effS ); // if (passed) nSelCutsGA++; // } // if (Use["Likelihood" ]) histLk ->Fill( reader->EvaluateMVA( "Likelihood method" ) ); // if (Use["LikelihoodD" ]) histLkD ->Fill( reader->EvaluateMVA( "LikelihoodD method" ) ); // if (Use["LikelihoodPCA"]) histLkPCA ->Fill( reader->EvaluateMVA( "LikelihoodPCA method" ) ); // if (Use["LikelihoodKDE"]) histLkKDE ->Fill( reader->EvaluateMVA( "LikelihoodKDE method" ) ); // if (Use["LikelihoodMIX"]) histLkMIX ->Fill( reader->EvaluateMVA( "LikelihoodMIX method" ) ); // if (Use["PDERS" ]) histPD ->Fill( reader->EvaluateMVA( "PDERS method" ) ); // if (Use["PDERSD" ]) histPDD ->Fill( reader->EvaluateMVA( "PDERSD method" ) ); // if (Use["PDERSPCA" ]) histPDPCA ->Fill( reader->EvaluateMVA( "PDERSPCA method" ) ); // if (Use["KNN" ]) histKNN ->Fill( reader->EvaluateMVA( "KNN method" ) ); // if (Use["HMatrix" ]) histHm ->Fill( reader->EvaluateMVA( "HMatrix method" ) ); // if (Use["Fisher" ]) histFi ->Fill( reader->EvaluateMVA( "Fisher method" ) ); // if (Use["FisherG" ]) histFiG ->Fill( reader->EvaluateMVA( "FisherG method" ) ); // if (Use["BoostedFisher"]) histFiB ->Fill( reader->EvaluateMVA( "BoostedFisher method" ) ); // if (Use["LD" ]) histLD ->Fill( reader->EvaluateMVA( "LD method" ) ); if (Use["MLP" ]) histNn ->Fill( reader->EvaluateMVA( "MLP method" ) ); if (Use["MLPBFGS" ]) histNnbfgs ->Fill( reader->EvaluateMVA( "MLPBFGS method" ) ); if (Use["MLPBNN" ]) histNnbnn ->Fill( reader->EvaluateMVA( "MLPBNN method" ) ); if (Use["CFMlpANN" ]) histNnC ->Fill( reader->EvaluateMVA( "CFMlpANN method" ) ); if (Use["TMlpANN" ]) histNnT ->Fill( reader->EvaluateMVA( "TMlpANN method" ) ); if (Use["DNN_GPU"]) histDnnGpu->Fill(reader->EvaluateMVA("DNN_GPU method")); if (Use["DNN_CPU"]) histDnnCpu->Fill(reader->EvaluateMVA("DNN_CPU method")); if (Use["BDT" ]) histBdt ->Fill( reader->EvaluateMVA( "BDT method" ) ); if (Use["BDTG" ]) histBdtG ->Fill( reader->EvaluateMVA( "BDTG method" ) ); if (Use["BDTB" ]) histBdtB ->Fill( reader->EvaluateMVA( "BDTB method" ) ); if (Use["BDTD" ]) histBdtD ->Fill( reader->EvaluateMVA( "BDTD method" ) ); if (Use["BDTF" ]) histBdtF ->Fill( reader->EvaluateMVA( "BDTF method" ) ); // if (Use["RuleFit" ]) histRf ->Fill( reader->EvaluateMVA( "RuleFit method" ) ); // if (Use["SVM_Gauss" ]) histSVMG ->Fill( reader->EvaluateMVA( "SVM_Gauss method" ) ); // if (Use["SVM_Poly" ]) histSVMP ->Fill( reader->EvaluateMVA( "SVM_Poly method" ) ); // if (Use["SVM_Lin" ]) histSVML ->Fill( reader->EvaluateMVA( "SVM_Lin method" ) ); // if (Use["FDA_MT" ]) histFDAMT ->Fill( reader->EvaluateMVA( "FDA_MT method" ) ); // if (Use["FDA_GA" ]) histFDAGA ->Fill( reader->EvaluateMVA( "FDA_GA method" ) ); // if (Use["Category" ]) histCat ->Fill( reader->EvaluateMVA( "Category method" ) ); // if (Use["Plugin" ]) histPBdt ->Fill( reader->EvaluateMVA( "P_BDT method" ) ); // Retrieve also per-event error // if (Use["PDEFoam"]) { // Double_t val = reader->EvaluateMVA( "PDEFoam method" ); // Double_t err = reader->GetMVAError(); // histPDEFoam ->Fill( val ); // histPDEFoamErr->Fill( err ); // if (err>1.e-50) histPDEFoamSig->Fill( val/err ); // } // Retrieve probability instead of MVA output // if (Use["Fisher"]) { // probHistFi ->Fill( reader->GetProba ( "Fisher method" ) ); // rarityHistFi->Fill( reader->GetRarity( "Fisher method" ) ); // } // } // Get elapsed time sw.Stop(); std::cout << "--- End of event loop: "; sw.Print(); // Get efficiency for cuts classifier // if (Use["CutsGA"]) std::cout << "--- Efficiency for CutsGA method: " << double(nSelCutsGA)/theTree->GetEntries() // << " (for a required signal efficiency of " << effS << ")" << std::endl; // if (Use["CutsGA"]) { // // test: retrieve cuts for particular signal efficiency // // CINT ignores dynamic_casts so we have to use a cuts-secific Reader function to acces the pointer // TMVA::MethodCuts* mcuts = reader->FindCutsMVA( "CutsGA method" ) ; // if (mcuts) { // std::vector cutsMin; // std::vector cutsMax; // mcuts->GetCuts( 0.7, cutsMin, cutsMax ); // std::cout << "--- -------------------------------------------------------------" << std::endl; // std::cout << "--- Retrieve cut values for signal efficiency of 0.7 from Reader" << std::endl; // for (UInt_t ivar=0; ivarGetInputVar(ivar) // << "\" <= " // << cutsMax[ivar] << std::endl; // } // std::cout << "--- -------------------------------------------------------------" << std::endl; // } // } // Write histograms TFile *target = new TFile( "TMVAClassificationApp_socialfinal.root","RECREATE" ); // if (Use["Likelihood" ]) histLk ->Write(); // if (Use["LikelihoodD" ]) histLkD ->Write(); // if (Use["LikelihoodPCA"]) histLkPCA ->Write(); // if (Use["LikelihoodKDE"]) histLkKDE ->Write(); // if (Use["LikelihoodMIX"]) histLkMIX ->Write(); // if (Use["PDERS" ]) histPD ->Write(); // if (Use["PDERSD" ]) histPDD ->Write(); // if (Use["PDERSPCA" ]) histPDPCA ->Write(); // if (Use["KNN" ]) histKNN ->Write(); // if (Use["HMatrix" ]) histHm ->Write(); // if (Use["Fisher" ]) histFi ->Write(); // if (Use["FisherG" ]) histFiG ->Write(); // if (Use["BoostedFisher"]) histFiB ->Write(); // if (Use["LD" ]) histLD ->Write(); if (Use["MLP" ]) histNn ->Write(); if (Use["MLPBFGS" ]) histNnbfgs ->Write(); if (Use["MLPBNN" ]) histNnbnn ->Write(); if (Use["CFMlpANN" ]) histNnC ->Write(); if (Use["TMlpANN" ]) histNnT ->Write(); if (Use["DNN_GPU"]) histDnnGpu->Write(); if (Use["DNN_CPU"]) histDnnCpu->Write(); if (Use["BDT" ]) histBdt ->Write(); if (Use["BDTG" ]) histBdtG ->Write(); if (Use["BDTB" ]) histBdtB ->Write(); if (Use["BDTD" ]) histBdtD ->Write(); if (Use["BDTF" ]) histBdtF ->Write(); // if (Use["RuleFit" ]) histRf ->Write(); // if (Use["SVM_Gauss" ]) histSVMG ->Write(); // if (Use["SVM_Poly" ]) histSVMP ->Write(); // if (Use["SVM_Lin" ]) histSVML ->Write(); // if (Use["FDA_MT" ]) histFDAMT ->Write(); // if (Use["FDA_GA" ]) histFDAGA ->Write(); // if (Use["Category" ]) histCat ->Write(); // if (Use["Plugin" ]) histPBdt ->Write(); // Write also error and significance histos // if (Use["PDEFoam"]) { histPDEFoam->Write(); histPDEFoamErr->Write(); histPDEFoamSig->Write(); } // Write also probability hists // if (Use["Fisher"]) { if (probHistFi != 0) probHistFi->Write(); if (rarityHistFi != 0) rarityHistFi->Write(); } // target->Close(); std::cout << "--- Created root file: \"TMVApp_Classification_socialfinal.root\" containing the MVA output histograms" << std::endl; delete reader; std::cout << "==> TMVAClassificationApplication_socialfinal is done!" << std::endl; } } int main( int argc, char** argv ) { TString methodList; for (int i=1; i