/// root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) /// /// root -l ./TMVAGui.C #include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/Tools.h" #include "TMVA/TMVAGui.h" int tmva_ee2vvh_train_sigbkg( TString myMethodList = "" ) { // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map Use; // Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVATest", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset"); TFile* sigtrainingFile = new TFile("/home/dell/Desktop/ee2vvh2bb_10l_oew_1tev_minus80_20/mva/sigTrainData.root"); TTree *sigTrain = (TTree*)sigtrainingFile->Get("sigTrain"); TFile* sigtestingFile = new TFile("/home/dell/Desktop/ee2vvh2bb_10l_oew_1tev_minus80_20/mva/sigTestData.root"); TTree *sigTest = (TTree*)sigtestingFile->Get("sigTest"); TFile* bkg0trainingFile = new TFile("/home/dell/Desktop/ee2vvh2bb_10l_sm_minus80_20.root/mva/bkgTrainData.root"); TTree *bkg0Train = (TTree*)bkg0trainingFile->Get("bkgTrain"); TFile* bkg0testingFile = new TFile("/home/dell/Desktop/ee2vvh2bb_10l_sm_minus80_20.root/mva/bkgTestData.root"); TTree *bkg0Test = (TTree*)bkg0testingFile->Get("bkgTest"); int nSigTrain = sigTrain->GetEntries(); int nBkg0Train = bkg0Train->GetEntries(); int nSigTest = sigTest->GetEntries(); int nBkg0Test = bkg0Test->GetEntries(); double sigWeight = 2166*0.129*0.15*0.15*0.7*0.7*3000./(nSigTrain+nSigTest); double bkg0Weight = 41.05*1.32*0.7*0.7*1000*3000*0.0041319*1.1/(nBkg0Train+nBkg0Test); dataloader->AddSignalTree(sigTrain, sigWeight, TMVA::Types::kTraining); dataloader->AddBackgroundTree(bkg0Train, bkg0Weight, TMVA::Types::kTraining); dataloader->AddSignalTree(sigTest, sigWeight, TMVA::Types::kTesting); dataloader->AddBackgroundTree(bkg0Test, bkg0Weight, TMVA::Types::kTesting); // dataloader->SetSignalWeightExpression ("sigWeight", 'F' ); // dataloader->SetBackgroundWeightExpression( "weight1" ); dataloader->AddVariable("vvh_Higgs_pt",'F'); dataloader->AddVariable("vvh_Higgs_E",'F'); dataloader->AddVariable("vvh_Higgs_eta",'F'); dataloader->AddVariable("vvh_Higgs_phi",'F'); dataloader->AddVariable("vvh_MissingE",'F'); //dataloader->AddVariable("vvh_mass_nunu",'F'); dataloader->AddVariable("vvh_Higgs_mass",'F'); dataloader->AddVariable("vvh_Afb_costheta",'F'); dataloader->AddVariable("vvh_costheta_higgs",'F'); dataloader->AddVariable("vvh_bjet1_pt",'F'); dataloader->AddVariable("vvh_bjet1_eta",'F'); dataloader->AddVariable("vvh_bjet1_phi",'F'); dataloader->AddVariable("vvh_bjet2_pt",'F'); dataloader->AddVariable("vvh_bjet2_eta",'F'); dataloader->AddVariable("vvh_bjet2_phi",'F'); dataloader->AddVariable("vvh_HT_variable",'F'); // Set individual event weights (the variables must exist in the original TTree) // - for signal : `dataloader->SetSignalWeightExpression ("weight1*weight2");` // - for background: `dataloader->SetBackgroundWeightExpression("weight1*weight2");` // dataloader->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the dataloader how to use the training and testing events dataloader->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Cut optimisation if (Use["Cuts"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") // if (Use["Likelihood"]) // factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "Likelihood", // "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( dataloader, TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( dataloader, TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:MinNodeSize=5.4%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5.4%:MaxDepth=2:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTF", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( dataloader, TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // Now you can tell the factory to train, test, and evaluate the MVAs // // Train MVAs using the set of training events factory->TrainAllMethods(); // Evaluate all MVAs using the set of test events factory->TestAllMethods(); // Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; delete dataloader; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; } int main( int argc, char** argv ) { // Select methods (don't look at this code - not of interest) TString methodList; for (int i=1; i