#include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/Tools.h" #include "TMVA/TMVAGui.h" #include "TMVA/CrossValidation.h" // Cut, k-NN, FDA, Neural Networks, Support vector Machine, Boosted Decision TreeS int tmva1( TString myMethodList = "" ) { //This loads thelibrary TMVA::Tools::Instance(); //Default MVA methods to be trained + tested std::map Use; //-------Cut optimization-------------------------------------- Use["Cuts"] =1; Use["CutsD"] =1; Use["CutsPCA"] =0; Use["CutsGA"] =0; Use["CutsSA"] =0; //-------KNN ----------------------------------------------- Use["KNN"] =1; //------FDA ----------------------------------------------- Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; //-----Neural Networks---------------------------------------- Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN Use["DNN_GPU"] = 0; // CUDA-accelerated DNN training. //----- Support Vector Machine ----------------------------- Use["SVM"] = 0; //---------Boosted Decision Trees---------------------------- Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost // Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; //--------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { std::vector mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 1; } Use[regMethod] = 1; } } //----------Fase de preparacion ---------- // Read training and test data TFile *input(0); TString fname = "recortados.root"; if (!gSystem->AccessPathName(fname)) { input = TFile::Open(fname); //verifica si existe el archivo en la carpeta } if (!input) { std::cout << "ERROR: no se encuentra el archivo" << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; //-------------------------------------------------------------------------------------------------- //Puse el arbol en general y los otros dos que tienen los pesos, me genera duda que en el ejemplo //se tenia dos arboles y los branches que eran de solo señal y de solo background //-------------------------------------------------------------------------------------------------- //TTree *arboldatos = (TTree*)input->Get("Tree"); // Arbol con todos los branches de las variables TTree *signalTree = (TTree*)input->Get("Tree"); //Abro mi arbol con los pesos de señas TTree *background = (TTree*)input->Get("Tree"); // Arbol con los pesos de background TString outfileName( "TMVA1.root" ); // Se crea un archivo .root para guardar los plots que se generen TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "tmva1", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); //La factory para el outfileName TMVA::DataLoader *dataloader=new TMVA ::DataLoader("dataset"); //se crea una carpeta con el nombre dataset, para guardar los distintos archivos //---------------------------------------------------------------------------------------------------- //Se tiene que poner la data a la cual se va a trabajar, de este paso no estoy muy seguro //Las variables son los branches del arbol que tengo //-------------------------------------------------------------------------------------------------- dataloader->AddVariable("P_P", "P_P","F"); //por el momento no pongo todas mientras salgo de dudas //dataloader->AddVariable("P_PlDppi", "P_PlDppi","F"); dataloader->AddVariable("DK_Mass","DK_Mass","D"); dataloader->AddVariable("DPi_Mass","DPi_Mass","F"); // No entiendo para que son los AddSpectator - ya, es para tenerlos a un lado y no alterarlos //global event weights per tree Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; //------------------------------------------------ dataloader->AddSignalTree ( signalTree, signalWeight ); // TTree *signalTree = (TTree*)input->Get("Tree;2/Pesosig"); // para el problema: se prueba dejarlo en la señal, ya que en el peso esta con fatal error dataloader->AddBackgroundTree( background, backgroundWeight ); //--------------------------------------------------------------------------------------------------- //no se bien el caso del AddSignalTree y el AddBackgroundTree, por lo que en el ejemplo lo gestionan con todas //branches del Tree, en este caso yo solo tengo una de señal y de background. //--------------------------------------------------------------------------------------------------- dataloader->SetSignalWeightExpression("Pesosig"); // este es el problema!!!!!!!!!!!!!!! //dataloader->SetWeightExpression("Pesosig"); dataloader->SetBackgroundWeightExpression("Pesobac"); //------------------------------------------------------------------------------------------------------- //Se realiza un Cut para la señal y el background, para el training y el test //puse ese número (114413) por lo que pensaba usar el 50% de training y el 50% en test //------------------------------------------------------------------------------------------------------- TCut mycut = ""; TCut mycuts = ""; TCut mycutb = ""; // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // //dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // hacerlo con CrossValidation dataloader->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=150000:nTrain_Background=150000:SplitMode=Random:NormMode=NumEvents:!V" ); //Se crean los factoy BookMethod---------------------------------------------------- // ----los BookMethod del metodo CUT if (Use["Cuts"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( dataloader, TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=2:Steps=5:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // Multi-architecture DNN implementation. if (Use["DNN_CPU"] or Use["DNN_GPU"]) { // General layout. TString layoutString ("Layout=TANH|128,TANH|128,TANH|128,LINEAR"); // Training strategies. TString training0("LearningRate=1e-1,Momentum=0.9,Repetitions=1," "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True"); TString training1("LearningRate=1e-2,Momentum=0.9,Repetitions=1," "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); TString training2("LearningRate=1e-3,Momentum=0.0,Repetitions=1," "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2; // General Options. TString dnnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" "WeightInitialization=XAVIERUNIFORM"); dnnOptions.Append (":"); dnnOptions.Append (layoutString); dnnOptions.Append (":"); dnnOptions.Append (trainingStrategyString); // Cuda implementation. if (Use["DNN_GPU"]) { TString gpuOptions = dnnOptions + ":Architecture=GPU"; factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN_GPU", gpuOptions); } // Multi-core CPU implementation. if (Use["DNN_CPU"]) { TString cpuOptions = dnnOptions + ":Architecture=CPU"; factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN_CPU", cpuOptions); } } // // Support Vector Machine if (Use["SVM"]) factory->BookMethod( dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTF", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // Now you can tell the factory to train, test, and evaluate the MVAs // // Train MVAs using the set of training events factory->TrainAllMethods(); // Evaluate all MVAs using the set of test events factory->TestAllMethods(); // Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; delete dataloader; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; }