// @(#)root/tmva $Id$ /********************************************************************************** * Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis * * Package : TMVA * * Root Macro: TMVAClassification * * * * This macro provides examples for the training and testing of the * * TMVA classifiers. * * * * As input data is used a toy-MC sample consisting of four Gaussian-distributed * * and linearly correlated input variables. * * * * The methods to be used can be switched on and off by means of booleans, or * * via the prompt command, for example: * * * * root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) * * * * (note that the backslashes are mandatory) * * If no method given, a default set of classifiers is used. * * * * The output file "TMVA.root" can be analysed with the use of dedicated * * macros (simply say: root -l ), which can be conveniently * * invoked through a GUI that will appear at the end of the run of this macro. * * Launch the GUI via the command: * * * * root -l ./TMVAGui.C * * * * You can also compile and run the example with the following commands * * * * make * * ./TMVAClassification * * * * where: = "method1 method2" * * are the TMVA classifier names * * * * example: * * ./TMVAClassification Fisher LikelihoodPCA BDT * * * * If no method given, a default set is of classifiers is used * **********************************************************************************/ #include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/Factory.h" #include "TMVA/Tools.h" #include "TMVA/TMVAGui.h" int TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map Use; Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 1; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN Use["SVM"] = 0; Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting Use["RuleFit"] = 0; std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 1; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,"!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification" ); factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); TString fname = "/afs/cern.ch/work/m/mwojtas/Phys/RareDecay/r_lambda/TrainOutput/InputFile.root"; TFile *input = TFile::Open( fname ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *tree = (TTree*)input->Get("TestTree"); float var[3]; std::vector vars(3); int SigBck; TRandom *r0 = new TRandom(); tree->SetBranchAddress("Proton_PT",&var[0]); tree->SetBranchAddress("Kaon_PT",&var[1]); tree->SetBranchAddress("Lb_PT",&var[2]); tree->SetBranchAddress("classID",&SigBck); for (UInt_t i=0; iGetEntries(); i++) { tree->GetEntry(i); for (int i=0;i<3;++i) { vars[i]=var[i]; } double MyRand = r0->Rndm(); if( SigBck == 0 ) { if(MyRand<0.8) { factory->AddSignalTrainingEvent( vars, 1 ); } else { factory->AddSignalTestEvent( vars, 1 ); } } else { if(MyRand<0.8) { factory->AddBackgroundTrainingEvent( vars, 1 ); } else { factory->AddBackgroundTestEvent( vars, 1 ); } } } TCut mycuts,mycutb; factory->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); if (Use["MLPBFGS"]) { TString randomize = ""; for (int i = 0; i<2; ++i) { randomize += i; factory->BookMethod(TMVA::Types::kMLP,"MLP_"+randomize,"!H:!V:ConvergenceImprove=1e-4:ConvergenceTests=20:TestRate=5:Sampling=0.1:SamplingEpoch=100:SamplingImportance=2:Tau=3:HiddenLayers=60,40:VarTransform= G,D,Norm:NCycles= 10 :NeuronType= sigmoid:TrainingMethod=BFGS :UseRegulator=False:EstimatorType=MSE:RandomSeed=0");// <-----DOES NOT WORK ON THE SECOND REPETITION randomize=""; } factory->BookMethod( TMVA::Types::kMLP, "MLP_2", "!H:!V:NeuronType=sigmoid:VarTransform=G,N:NCycles=10:HiddenLayers=10,5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );//<----DOES not work if loop is run first factory->BookMethod( TMVA::Types::kMLP, "MLP_3", "!H:!V:NeuronType=sigmoid:VarTransform=G,N:NCycles=10:HiddenLayers=10,5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );// <--- Works only if the loop is run first } // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; } int main( int argc, char** argv ) { // Select methods (don't look at this code - not of interest) TString methodList; for (int i=1; i