#include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #include "TMVA/Tools.h" #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/TMVARegGui.h" using namespace TMVA; void Regression(TString myMethodList = "") { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\‍(\"myMethod1,myMethod2,myMethod3\"\‍) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map Use; // Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // Linear Discriminant Analysis Use["LD"] = 1; // // Function Discriminant analysis Use["FDA_GA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // Neural Network Use["MLP"] = 0; // Deep neural network (with CPU or GPU) #ifdef R__HAS_TMVAGPU Use["DNN_GPU"] = 1; Use["DNN_CPU"] = 0; #else Use["DNN_GPU"] = 0; #ifdef R__HAS_TMVACPU Use["DNN_CPU"] = 1; #else Use["DNN_CPU"] = 0; #endif #endif // // Support Vector Machine Use["SVM"] = 0; // // Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector mlist = gTools().SplitString(myMethodList, ','); for (UInt_t i = 0; i < mlist.size(); i++) { std::string regMethod(mlist[i].Data()); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // Here the preparation phase begins // Create a new root output file TString outfileName("TMVAReg.root"); TFile *outputFile = TFile::Open(outfileName, "RECREATE"); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory("TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression"); TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset"); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] Float_t var0, var1, var2, var3, var4, var5, var6, var7, var8, var9, var10; dataloader->AddVariable("var0", 'F'); dataloader->AddVariable("var1", 'F'); dataloader->AddVariable("var2", 'F'); dataloader->AddVariable("var3", 'F'); dataloader->AddVariable("var4", 'F'); dataloader->AddVariable("var5", 'F'); dataloader->AddVariable("var6", 'F'); dataloader->AddVariable("var7", 'F'); dataloader->AddVariable("var8", 'F'); dataloader->AddVariable("var9", 'F'); dataloader->AddVariable("var10", 'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // dataloader->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // Add the variable carrying the regression target dataloader->AddTarget("nominal_z"); // It is also possible to declare additional targets for multi-dimensional regression, ie: // factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees TFile *file1 = TFile::Open("./../trees/train_pca.root"); TFile *file2 = TFile::Open("./../trees/test_pca.root"); dataloader->AddRegressionTree(file1->Get("Events")); dataloader->AddRegressionTree(file2->Get("Events")); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the DataLoader to use all remaining events in the trees after training for testing: dataloader->PrepareTrainingAndTestTree(mycut, "SplitMode=Alternate:MixMode=BLock:!V"); // // dataloader->PrepareTrainingAndTestTree( mycut, // "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod(dataloader, TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None"); // And the options strings for the MinMax and RMS methods, respectively: // // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod(dataloader, TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None"); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod(dataloader, TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"); // Linear discriminant if (Use["LD"]) factory->BookMethod(dataloader, TMVA::Types::kLD, "LD"); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod(dataloader, TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3+(5)*x6+(6)*x7+(7)*x8+(8)*x9+(9)*x10:ParRanges=(-100,100);(-100,100);(-100,100);(-100,100);(-100,100);(-100,100);(-100,100);(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=None"); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod(dataloader, TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm"); if (Use["FDA_MT"]) factory->BookMethod(dataloader, TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"); if (Use["FDA_GAMT"]) factory->BookMethod(dataloader, TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod(dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator"); if (Use["DNN_CPU"] || Use["DNN_GPU"]) { TString archOption = Use["DNN_GPU"] ? "GPU" : "CPU"; TString layoutString("Layout=TANH|50,TANH|50,TANH|50,LINEAR"); TString trainingStrategyString("TrainingStrategy="); trainingStrategyString += "LearningRate=1e-3,Momentum=0.3,ConvergenceSteps=20,BatchSize=50,TestRepetitions=1,WeightDecay=0.0,Regularization=None,Optimizer=Adam"; TString nnOptions("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=G:WeightInitialization=XAVIERUNIFORM:Architecture="); nnOptions.Append(archOption); nnOptions.Append(":"); nnOptions.Append(layoutString); nnOptions.Append(":"); nnOptions.Append(trainingStrategyString); TString methodName = TString("DNN_") + archOption; factory->BookMethod(dataloader, TMVA::Types::kDL, methodName, nnOptions); // NN } // Support Vector Machine if (Use["SVM"]) factory->BookMethod(dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm"); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod(dataloader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30"); if (Use["BDTG"]) factory->BookMethod(dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4"); // -------------------------------------------------------------------------------------------------- // Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // Evaluate all MVAs using the set of test events factory->TestAllMethods(); // Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; //delete factory; //delete dataloader; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVARegGui(outfileName); } int main(int argc, char **argv) { // Select methods (don't look at this code - not of interest) TString methodList; for (int i = 1; i < argc; i++) { TString regMethod(argv[i]); if (regMethod == "-b" || regMethod == "--batch") continue; if (!methodList.IsNull()) methodList += TString(","); methodList += regMethod; } Regression(methodList); return 0; }