/// \file /// \ingroup tutorial_tmva /// \notebook -nodraw /// This macro provides examples for the training and testing of the /// TMVA classifiers. /// /// As input data is used a toy-MC sample consisting of four Gaussian-distributed /// and linearly correlated input variables. /// The methods to be used can be switched on and off by means of booleans, or /// via the prompt command, for example: /// /// root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) /// /// (note that the backslashes are mandatory) /// If no method given, a default set of classifiers is used. /// The output file "TMVA.root" can be analysed with the use of dedicated /// macros (simply say: root -l ), which can be conveniently /// invoked through a GUI that will appear at the end of the run of this macro. /// Launch the GUI via the command: /// /// root -l ./TMVAGui.C /// /// You can also compile and run the example with the following commands /// /// make /// ./TMVAClassification /// /// where: ` = "method1 method2"` are the TMVA classifier names /// example: /// /// ./TMVAClassification Fisher LikelihoodPCA BDT /// /// If no method given, a default set is of classifiers is used /// /// - Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis /// - Package : TMVA /// - Root Macro: TMVAClassification /// /// \macro_output /// \macro_code /// \author Andreas Hoecker #include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/MethodCategory.h" #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/Tools.h" #include "TMVA/TMVAGui.h" int TMVAClassificationCategory(Float_t ptmin, Float_t ptmax, TString suffix = "", Int_t UseCategory = 1) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVAClassificationCategory" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TDatime date; Int_t year = date.GetYear(); Int_t month = date.GetMonth(); Int_t day = date.GetDay(); TString outfileName( Form("TMVA_Lc_%s_%d%02d%02d_ptBin_%.0f_%.0f_12_Category.root", suffix.Data(), year, month, day, ptmin, ptmax )); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification" ); TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset"); dataloader->AddVariable( "massK0S", 'F' ); dataloader->AddVariable( "tImpParBach", 'F' ); dataloader->AddVariable( "tImpParV0", 'F' ); dataloader->AddVariable( "CtK0S := DecayLengthK0S*0.497/v0P", 'F'); dataloader->AddVariable( "cosPAK0S", 'F'); dataloader->AddVariable( "CosThetaStar", 'F'); dataloader->AddVariable( "nSigmaTOFpr", 'F' ); dataloader->AddVariable( "nSigmaTOFpi", 'F' ); dataloader->AddVariable( "nSigmaTOFka", 'F' ); dataloader->AddVariable( "nSigmaTPCpr", 'F' ); dataloader->AddVariable( "nSigmaTPCpi", 'F' ); dataloader->AddVariable( "nSigmaTPCka", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables dataloader->AddSpectator( "massLc2K0Sp", "mass Lc-->K0S+p", "units", 'F' ); dataloader->AddSpectator( "LcPt", "Lc Pt", "units", 'F' ); dataloader->AddSpectator( "V0positivePt", "V0 positive Pt", "units", 'F' ); dataloader->AddSpectator( "V0negativePt", "V0 negative Pt", "units", 'F' ); dataloader->AddSpectator( "v0Pt", "K0S Pt", "units", 'F'); dataloader->AddSpectator( "dcaV0", "dca V0", "units", 'F'); dataloader->AddSpectator( "bachelorEta", "eta bachelor", "units", 'F'); dataloader->AddSpectator( "centrality", "centrality", "units", 'F'); // Read training and test data TString fnameSgn1, fnameSgn2, fnameSgn3; fnameSgn1 = "../treeMC/3002_LHC20I3_P82016/AnalysisResults.root"; fnameSgn2 = "../treeMC/3003_LHC20I3_P82017/AnalysisResults.root"; fnameSgn3 = "../treeMC/3004_LHC20I3_P82018/AnalysisResults.root"; if (gSystem->AccessPathName( fnameSgn1 ) || gSystem->AccessPathName( fnameSgn2 ) || gSystem->AccessPathName( fnameSgn3 )){ // file does not exist in local directory Printf("Signal File for training does not exist, check please, and retry. Now I'll return..."); return 0; } TString fnameBkg1, fnameBkg2, fnameBkg3, fnameBkg4; fnameBkg1 = "../treeData/3520_LHC2016_deghjop/AnalysisResults.root"; fnameBkg2 = "../treeData/3522_LHC2016_kl/AnalysisResults.root"; fnameBkg3 = "../treeData/3521_LHC2017_cefhijklmor/AnalysisResults.root"; fnameBkg4 = "../treeData/3523_LHC2018_bdefghijklmnop/AnalysisResults.root"; if (gSystem->AccessPathName( fnameBkg1 ) || gSystem->AccessPathName( fnameBkg2 ) || gSystem->AccessPathName( fnameBkg3 ) || gSystem->AccessPathName( fnameBkg4 )){ // file does not exist in local directory Printf("Signal File for training does not exist, check please, and retry. Now I'll return..."); return 0; } TFile *inputSgn1 = TFile::Open( fnameSgn1 ); std::cout << "--- TMVAClassification : Using input file: " << inputSgn1->GetName() << std::endl; TFile *inputSgn2 = TFile::Open( fnameSgn2 ); std::cout << "--- TMVAClassification : Using input file: " << inputSgn2->GetName() << std::endl; TFile *inputSgn3 = TFile::Open( fnameSgn3 ); std::cout << "--- TMVAClassification : Using input file: " << inputSgn3->GetName() << std::endl; TFile *inputBkg1 = TFile::Open( fnameBkg1 ); std::cout << "--- TMVAClassification : Using input file: " << inputBkg1->GetName() << std::endl; TFile *inputBkg2 = TFile::Open( fnameBkg2 ); std::cout << "--- TMVAClassification : Using input file: " << inputBkg2->GetName() << std::endl; TFile *inputBkg3 = TFile::Open( fnameBkg3 ); std::cout << "--- TMVAClassification : Using input file: " << inputBkg3->GetName() << std::endl; TFile *inputBkg4 = TFile::Open( fnameBkg4 ); std::cout << "--- TMVAClassification : Using input file: " << inputBkg4->GetName() << std::endl; // --- Register the training and test tree TTree *signal1 = (TTree*)inputSgn1->Get("treeList_0_24_0_24_Sgn"); TTree *signal2 = (TTree*)inputSgn2->Get("treeList_0_24_0_24_Sgn"); TTree *signal3 = (TTree*)inputSgn3->Get("treeList_0_24_0_24_Sgn"); TTree *background1 = (TTree*)inputBkg1->Get("treeList_0_24_0_24_Sgn"); TTree *background2 = (TTree*)inputBkg2->Get("treeList_0_24_0_24_Sgn"); TTree *background3 = (TTree*)inputBkg3->Get("treeList_0_24_0_24_Sgn"); TTree *background4 = (TTree*)inputBkg4->Get("treeList_0_24_0_24_Sgn"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees dataloader->AddSignalTree ( signal1, signalWeight ); dataloader->AddSignalTree ( signal2, signalWeight ); dataloader->AddSignalTree ( signal3, signalWeight ); dataloader->AddBackgroundTree( background1, backgroundWeight ); dataloader->AddBackgroundTree( background2, backgroundWeight ); dataloader->AddBackgroundTree( background3, backgroundWeight ); dataloader->AddBackgroundTree( background4, backgroundWeight ); //dataloader->SetBackgroundWeightExpression( "weight" ); Int_t nSigmaSideBands = 3; Float_t sideBands = 0; if(ptmin==0 && ptmax==1) { sideBands = nSigmaSideBands * 0.0076; } else if(ptmin==1 && ptmax==2) { sideBands = nSigmaSideBands * 0.0076; } else if(ptmin==2 && ptmax==3) { sideBands = nSigmaSideBands * 0.0076; } else if(ptmin==2 && ptmax==4) { sideBands = nSigmaSideBands * 0.0077; } else if(ptmin==3 && ptmax==4) { sideBands = nSigmaSideBands * 0.0079; } else if(ptmin==4 && ptmax==5) { sideBands = nSigmaSideBands * 0.0084; } else if(ptmin==4 && ptmax==6) { sideBands = nSigmaSideBands * 0.0078; } else if(ptmin==5 && ptmax==6) { sideBands = nSigmaSideBands * 0.0090; } else if(ptmin==6 && ptmax==7) { sideBands = nSigmaSideBands * 0.0096; } else if(ptmin==6 && ptmax==8) { sideBands = nSigmaSideBands * 0.0098; } else if(ptmin==7 && ptmax==8) { sideBands = nSigmaSideBands * 0.0101; } else if(ptmin==8 && ptmax==10) { sideBands = nSigmaSideBands * 0.0108; } else if(ptmin==8 && ptmax==12) { sideBands = nSigmaSideBands * 0.0111; } else if(ptmin==10 && ptmax==12) { sideBands = nSigmaSideBands * 0.0118; } else if(ptmin==12 && ptmax==24) { sideBands = nSigmaSideBands * 0.0136; } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts, mycutb; Int_t nTrainingEventsSgn, nTrainingEventsBkg, nTestingEventsBkg; Float_t isFromC = 4; mycuts = Form("LcPt < %f && LcPt > %f && origin == %f", ptmax, ptmin, isFromC); //only prompt mycutb = Form("LcPt < %f && LcPt > %f && abs(massLc2K0Sp - 2.286) > %f", ptmax, ptmin, sideBands); nTrainingEventsSgn = TMath::Min((signal1->GetEntries(mycuts)+signal2->GetEntries(mycuts)+signal3->GetEntries(mycuts)) * 0.25, 250000.); nTrainingEventsBkg = TMath::Min((background1->GetEntries(mycutb)+background2->GetEntries(mycutb)+background3->GetEntries(mycutb)+background4->GetEntries(mycutb)) * 0.3, 250000.); nTestingEventsBkg = TMath::Min((background1->GetEntries(mycutb)+background2->GetEntries(mycutb)+background3->GetEntries(mycutb)+background4->GetEntries(mycutb)) * 0.2, 250000.); dataloader->PrepareTrainingAndTestTree( mycuts, mycutb, Form("nTrain_Signal=%0d:nTest_Signal=%0d:nTrain_Background=%0d:nTest_Background=%0d:SplitMode=Random:NormMode=NumEvents:!V", nTrainingEventsSgn, nTrainingEventsSgn, nTrainingEventsBkg, nTestingEventsBkg) ); //Default Boosted Decision Trees TString methodTitle; // Adaptive Boost methodTitle = Form("BDT_Default_%.0f_%.0f", ptmin, ptmax); Printf("Default ON: methodTitle will be %s", methodTitle.Data()); factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // Categorised classifier TMVA::MethodCategory* mcat = 0; // The variable sets TString theCat1Vars = "massK0S:tImpParBach:tImpParV0:CtK0S:cosPAK0S:CosThetaStar:nSigmaTOFpr:nSigmaTOFpi:nSigmaTOFka:nSigmaTPCpr:nSigmaTPCpi:nSigmaTPCka"; TString theCat2Vars = (UseCategory ? "massK0S:tImpParBach:tImpParV0:CtK0S:cosPAK0S:CosThetaStar:nSigmaTPCpr:nSigmaTPCpi:nSigmaTPCka" : "massK0S:tImpParBach:tImpParV0:CtK0S:cosPAK0S:CosThetaStar:nSigmaTOFpr:nSigmaTOFpi:nSigmaTOFka:nSigmaTPCpr:nSigmaTPCpi:nSigmaTPCka"); // BDT with categories TMVA::MethodBase* bdtCat = factory->BookMethod( dataloader, TMVA::Types::kCategory, "BdtCat","" ); mcat = dynamic_cast(bdtCat); mcat->AddMethod( "nSigmaTOFpr > -100", theCat1Vars, TMVA::Types::kBDT, "Category_BDT_1", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); mcat->AddMethod( "nSigmaTOFpr <= -100", theCat2Vars, TMVA::Types::kBDT, "Category_BDT_2", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; delete dataloader; return 0; } int main( int argc, char** argv ) { // Select methods (don't look at this code - not of interest) TString methodList; Float_t ptmin, ptmax; Int_t UseCategory; for (int i=1; i