#include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/Tools.h" #include "TMVA/TMVAGui.h" int TMVAClassification_socialfinal( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // Methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map Use; // Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 1; // Recommended ANN Use["MLPBFGS"] = 1; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 1; // ROOT's own ANN Use["DNN_GPU"] = 0; // CUDA-accelerated DNN training. Use["DNN_CPU"] = 0; // Multi-core accelerated DNN. // Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 1; // uses Bagging Use["BDTD"] = 1; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // ---------------------------------------------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification_socialfinal" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 1; } Use[regMethod] = 1; } } //--------------------------------------------------------------------------------------------------------------------------------- // Here the preparation phase begins // Register the training and test trees //Monte Carlo data input TFile* signalfile = new TFile("/home/karen/Desktop/Prueba_clasificacion_social_02082018/Data_social_training.root"); //open the file TTree* signaltree = (TTree*)signalfile->Get("TreeS"); // TChain *dataMC = new TChain("ntuple", ""); // dataMC->Add("/home/karen/Desktop/babymeson/Rootuple_BstoJpsiK_2017_MC_MiniAODSIM_1.root/rootuple/ntuple"); // dataMC->Add("/home/karen/Desktop/babymeson/Rootuple_BstoJpsiK_2017_MC_MiniAODSIM_1.root/rootuple/ntuple"); // dataMC->Add("/home/karen/Desktop/babymeson/Rootuple_BstoJpsiK_2017_MC_MiniAODSIM_1.root/rootuple/ntuple"); // TTree *signaltree = (TTree*)dataMC; //Real data input TFile* datafile = new TFile("/home/karen/Desktop/Prueba_clasificacion_social_02082018/Data_social_real.root"); //open the file TTree* datatree = (TTree*)datafile->Get("TreeD"); // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVAClassification_socialfinal.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); //------------------------------------------------------------------------------------------------------------------------------------ // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_socialfinal", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P,D:AnalysisType=Classification" ); TMVA::DataLoader *dataloader=new TMVA::DataLoader("Data_socialfinal"); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //dataloader->AddVariable( "myvar1 := var1+var2", 'F' ); //dataloader->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //Float_t Age, Workc, Educ, Marit, Occup, Rel, Race, Sex, Gain, Loss, Hrs, Pais, weight, Inc; //state variables dataloader->AddVariable( "Age", "Age", "units", 'F' ); dataloader->AddVariable( "Workc", "Sector_trabajo", "units", 'F' ); dataloader->AddVariable( "Educ", "Educacion", "units", 'F' ); dataloader->AddVariable( "Marit", "Edo. civil", "units", 'F' ); dataloader->AddVariable( "Occup", "Profesion", "units", 'F' ); dataloader->AddVariable( "Rel", "Relacion", "units", 'F' ); dataloader->AddVariable( "Race", "Raza", "units", 'F' ); dataloader->AddVariable( "Sex", "Genero", "units", 'F' ); // dataloader->AddVariable( "Gain", "Ganancia", "units", 'F' ); // dataloader->AddVariable( "Loss", "Perdida", "units", 'F' ); dataloader->AddVariable( "Hrs", "Hrs/semana", "units", 'F' ); dataloader->AddVariable( "Pais", "Pais", "units", 'F' ); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees dataloader->AddSignalTree ( signaltree, signalWeight ); dataloader->AddBackgroundTree( datatree, backgroundWeight ); // To give different trees for training and testing, do as follows: // // dataloader->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // dataloader->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // ```cpp // // --- begin ---------------------------------------------------------- // std::vector vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signalTree->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; iGetEntries(); i++) { // signalTree->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signalTree->GetEntries()/2.0) dataloader->AddSignalTrainingEvent( vars, signalWeight ); // else dataloader->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; iGetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) dataloader->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else dataloader->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // // --- end ------------------------------------------------------------ // ``` // End of tree registration // Set individual event weights (the variables must exist in the original TTree) // - for signal : `dataloader->SetSignalWeightExpression ("weight1*weight2");` // - for background: `dataloader->SetBackgroundWeightExpression("weight1*weight2");` // dataloader->SetSignalWeightExpression( "weight" ); // dataloader->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the dataloader how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // // To also specify the number of testing events, use: // // dataloader->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); dataloader->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" ); // ### Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // Multi-architecture DNN implementation. if (Use["DNN_CPU"] or Use["DNN_GPU"]) { // General layout. TString layoutString ("Layout=TANH|128,TANH|128,TANH|128,LINEAR"); // Training strategies. TString training0("LearningRate=1e-1,Momentum=0.9,Repetitions=1," "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True"); TString training1("LearningRate=1e-2,Momentum=0.9,Repetitions=1," "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); TString training2("LearningRate=1e-3,Momentum=0.0,Repetitions=1," "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2; // General Options. TString dnnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" "WeightInitialization=XAVIERUNIFORM"); dnnOptions.Append (":"); dnnOptions.Append (layoutString); dnnOptions.Append (":"); dnnOptions.Append (trainingStrategyString); // Cuda implementation. if (Use["DNN_GPU"]) { TString gpuOptions = dnnOptions + ":Architecture=GPU"; factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN_GPU", gpuOptions); } // Multi-core CPU implementation. if (Use["DNN_CPU"]) { TString cpuOptions = dnnOptions + ":Architecture=CPU"; factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN_CPU", cpuOptions); } } // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( dataloader, TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( dataloader, TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTF", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( dataloader, TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // // -------------------------------------------------------------------------------------------------- // Now you can optimize the setting (configuration) of the MVAs using the set of training events // STILL EXPERIMENTAL and only implemented for BDT's ! // // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // // -------------------------------------------------------------------------------------------------- // Now you can tell the factory to train, test, and evaluate the MVAs // // Train MVAs using the set of training events factory->TrainAllMethods(); // Evaluate all MVAs using the set of test events factory->TestAllMethods(); // Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification_socialfinal is done!" << std::endl; delete factory; delete dataloader; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; } int main( int argc, char** argv ) { // Select methods (don't look at this code - not of interest) TString methodList; for (int i=1; i