/// \file /// \ingroup tutorial_tmva /// \notebook /// TMVA Classification Example Using a Recurrent Neural Network /// /// This is an example of using a RNN in TMVA. We do classification using a toy time dependent data set /// that is generated when running this example macro /// /// \macro_image /// \macro_output /// \macro_code /// /// \author Lorenzo Moneta /*** # TMVA Classification Example Using a Recurrent Neural Network This is an example of using a RNN in TMVA. We do the classification using a toy data set containing a time series of data sample ntimes and with dimension ndim that is generated when running the provided function `MakeTimeData (nevents, ntime, ndim)` **/ #include #include "TMVA/Factory.h" #include "TMVA/DataLoader.h" #include "TMVA/DataSetInfo.h" #include "TMVA/Config.h" #include "TMVA/MethodDL.h" #include "TFile.h" #include "TTree.h" /// macro for performing a classification using a Recurrent Neural Network /// @param use_type /// use_type = 0 use Simple RNN network /// use_type = 1 use LSTM network /// use_type = 2 use GRU /// use_type = 3 build 3 different networks with RNN, LSTM and GRU void TMVA_RNN_Classification(int use_type = 3) { const int ninput = 50; const int ntime = 500; const int batchSize = 100; const int maxepochs = 20; int nTotEvts = 10000; // total events to be generated for signal or background bool useKeras = true; bool useTMVA_RNN = false; bool useTMVA_DNN = false; bool useTMVA_BDT = false; std::vector rnn_types = {"RNN", "LSTM", "GRU"}; std::vector use_rnn_type = {1, 1, 1}; if (use_type >=0 && use_type < 3) { use_rnn_type = {0,0,0}; use_rnn_type[use_type] = 1; } bool useGPU = true; // use GPU for TMVA if available #ifndef R__HAS_TMVAGPU useGPU = false; #ifndef R__HAS_TMVACPU Warning("TMVA_RNN_Classification", "TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for RNN"); useTMVA_RNN = false; #endif #endif TString archString = (useGPU) ? "GPU" : "CPU"; bool writeOutputFile = true; const char *rnn_type = "RNN"; #ifdef R__HAS_PYMVA TMVA::PyMethodBase::PyInitialize(); #else useKeras = false; #endif int num_threads = 0; // use by default all threads // do enable MT running if (num_threads >= 0) { ROOT::EnableImplicitMT(num_threads); if (num_threads > 0) gSystem->Setenv("OMP_NUM_THREADS", TString::Format("%d",num_threads)); } else gSystem->Setenv("OMP_NUM_THREADS", "1"); TMVA::Config::Instance(); std::cout << "Running with nthreads = " << ROOT::GetThreadPoolSize() << std::endl; TString inputFileNameSignal = "test_OUT_RNN_OUT_Neutron_AmBe_Complete_Source_Stack.root"; TString inputFileNameBck = "test_OUT_RNN_OUT_Alphas_Dataset_BiPo.root"; auto inputFileSignal = TFile::Open(inputFileNameSignal); if (!inputFileSignal) { Error("TMVA_RNN_Classification", "Error opening input file %s - exit", inputFileNameSignal.Data()); return; } auto inputFileBck = TFile::Open(inputFileNameBck); if (!inputFileBck) { Error("TMVA_RNN_Classification", "Error opening input file %s - exit", inputFileNameBck.Data()); return; } // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName(TString::Format("data_RNN_%s.root", archString.Data())); TFile *outputFile = nullptr; if (writeOutputFile) outputFile = TFile::Open(outfileName, "RECREATE"); /** ## Declare Factory Create the Factory class. Later you can choose the methods whose performance you'd like to investigate. The factory is the major TMVA object you have to interact with. Here is the list of parameters you need to pass - The first argument is the base of the name of all the output weightfiles in the directory weight/ that will be created with the method parameters - The second argument is the output file for the training results - The third argument is a string option defining some general configuration for the TMVA session. For example all TMVA output can be suppressed by removing the "!" (not) in front of the "Silent" argument in the option string **/ // Creating the factory object TMVA::Factory *factory = new TMVA::Factory("TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=None:!Correlations:" "AnalysisType=Classification:ModelPersistence"); TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset"); TTree *signalTree = (TTree *)inputFileSignal->Get("RNN_Results/RNN_TWFS"); TTree *background = (TTree *)inputFileBck->Get("RNN_Results/RNN_TWFS"); const int nvar = ninput * ntime; dataloader->AddSignalTree(signalTree, 1.0); dataloader->AddBackgroundTree(background, 1.0); /// add variables - use new AddVariablesArray function for (auto i = 0; i < ntime; i++) { dataloader->AddVariablesArray(Form("RNN_N_WF_%d", i), ninput); } int nTrainSig = 0.8 * nTotEvts; int nTrainBkg = 0.8 * nTotEvts; // build the string options for DataLoader::PrepareTrainingAndTestTree TString prepareOptions = TString::Format("nTrain_Signal=%d:nTrain_Background=%d:SplitMode=Random:SplitSeed=100:NormMode=NumEvents:!V:!CalcCorrelations", nTrainSig, nTrainBkg); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; dataloader->PrepareTrainingAndTestTree(mycuts, mycutb, prepareOptions); std::cout << "prepared DATA LOADER " << std::endl; /** ## Book TMVA recurrent models Book the different types of recurrent models in TMVA (SimpleRNN, LSTM or GRU) **/ if (useTMVA_RNN) { for (int i = 0; i < 3; ++i) { if (!use_rnn_type[i]) continue; const char *rnn_type = rnn_types[i].c_str(); /// define the inputlayout string for RNN /// the input data should be organize as following: //// input layout for RNN: time x ndim TString inputLayoutString = TString::Format("InputLayout=%d|%d", ntime, ninput); /// Define RNN layer layout /// it should be LayerType (RNN or LSTM or GRU) | number of units | number of inputs | time steps | remember output (typically no=0 | return full sequence TString rnnLayout = TString::Format("%s|10|%d|%d|0|1", rnn_type, ninput, ntime); /// add after RNN a reshape layer (needed top flatten the output) and a dense layer with 64 units and a last one /// Note the last layer is linear because when using Crossentropy a Sigmoid is applied already TString layoutString = TString("Layout=") + rnnLayout + TString(",RESHAPE|FLAT,DENSE|64|TANH,LINEAR"); /// Defining Training strategies. Different training strings can be concatenate. Use however only one TString trainingString1 = TString::Format("LearningRate=1e-3,Momentum=0.0,Repetitions=1," "ConvergenceSteps=5,BatchSize=%d,TestRepetitions=1," "WeightDecay=1e-2,Regularization=None,MaxEpochs=%d," "Optimizer=ADAM,DropConfig=0.0+0.+0.+0.", batchSize,maxepochs); TString trainingStrategyString("TrainingStrategy="); trainingStrategyString += trainingString1; // + "|" + trainingString2 /// Define the full RNN Noption string adding the final options for all network TString rnnOptions("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=None:" "WeightInitialization=XAVIERUNIFORM:ValidationSize=0.2:RandomSeed=1234"); rnnOptions.Append(":"); rnnOptions.Append(inputLayoutString); rnnOptions.Append(":"); rnnOptions.Append(layoutString); rnnOptions.Append(":"); rnnOptions.Append(trainingStrategyString); rnnOptions.Append(":"); rnnOptions.Append(TString::Format("Architecture=%s", archString.Data())); TString rnnName = "TMVA_" + TString(rnn_type); factory->BookMethod(dataloader, TMVA::Types::kDL, rnnName, rnnOptions); } } /** ## Book TMVA fully connected dense layer models **/ if (useTMVA_DNN) { // Method DL with Dense Layer TString inputLayoutString = TString::Format("InputLayout=1|1|%d", ntime * ninput); TString layoutString("Layout=DENSE|64|TANH,DENSE|TANH|64,DENSE|TANH|64,LINEAR"); // Training strategies. TString trainingString1("LearningRate=1e-3,Momentum=0.0,Repetitions=1," "ConvergenceSteps=10,BatchSize=256,TestRepetitions=1," "WeightDecay=1e-4,Regularization=None,MaxEpochs=20" "DropConfig=0.0+0.+0.+0.,Optimizer=ADAM"); TString trainingStrategyString("TrainingStrategy="); trainingStrategyString += trainingString1; // + "|" + trainingString2 // General Options. TString dnnOptions("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=None:" "WeightInitialization=XAVIER:RandomSeed=0"); dnnOptions.Append(":"); dnnOptions.Append(inputLayoutString); dnnOptions.Append(":"); dnnOptions.Append(layoutString); dnnOptions.Append(":"); dnnOptions.Append(trainingStrategyString); dnnOptions.Append(":"); dnnOptions.Append(archString); TString dnnName = "TMVA_DNN"; factory->BookMethod(dataloader, TMVA::Types::kDL, dnnName, dnnOptions); } /** ## Book Keras recurrent models Book the different types of recurrent models in Keras (SimpleRNN, LSTM or GRU) **/ if (useKeras) { for (int i = 0; i < 3; i++) { if (use_rnn_type[i]) { TString modelName = TString::Format("model_%s.h5", rnn_types[i].c_str()); TString trainedModelName = TString::Format("trained_model_%s.h5", rnn_types[i].c_str()); Info("TMVA_RNN_Classification", "Building recurrent keras model using a %s layer", rnn_types[i].c_str()); // create python script which can be executed // create 2 conv2d layer + maxpool + dense TMacro m; m.AddLine("import keras"); m.AddLine("from keras.models import Sequential"); m.AddLine("from keras.optimizers import Adam"); m.AddLine("from keras.layers import Input, Dense, Dropout, Flatten, SimpleRNN, GRU, LSTM, Reshape, " "BatchNormalization"); m.AddLine(""); m.AddLine("model = keras.models.Sequential() "); m.AddLine("model.add(Reshape((999, 50), input_shape = (999*50, )))"); // add recurrent neural network depending on type / Use option to return the full output if (rnn_types[i] == "LSTM") m.AddLine("model.add(LSTM(units=999, return_sequences=True) )"); else if (rnn_types[i] == "GRU") m.AddLine("model.add(GRU(units=999, return_sequences=True) )"); else m.AddLine("model.add(SimpleRNN(units=999, return_sequences=True) )"); // m.AddLine("model.add(BatchNormalization())"); m.AddLine("model.add(Flatten())"); // needed if returning the full time output sequence m.AddLine("model.add(Dense(64, activation = 'tanh')) "); m.AddLine("model.add(Dense(2, activation = 'sigmoid')) "); m.AddLine( "model.compile(loss = 'binary_crossentropy', optimizer = Adam(lr = 0.0001), metrics = ['accuracy'])"); m.AddLine(TString::Format("modelName = '%s'", modelName.Data())); m.AddLine("model.save(modelName)"); m.AddLine("model.summary()"); m.SaveSource("make_rnn_model.py"); // execute gSystem->Exec("python3 make_rnn_model.py"); if (gSystem->AccessPathName(modelName)) { Warning("TMVA_RNN_Classification", "Error creating Keras recurrennt model file - Skip using Keras"); useKeras = false; } else { // book PyKeras method only if Keras model could be created Info("TMVA_RNN_Classification", "Booking Keras %s model", rnn_types[i].c_str()); factory->BookMethod(dataloader, TMVA::Types::kPyKeras, TString::Format("PyKeras_%s", rnn_types[i].c_str()), TString::Format("!H:!V:VarTransform=None:FilenameModel=%s:" "FilenameTrainedModel=%s:GpuOptions=allow_growth=True:" "NumEpochs=%d:BatchSize=%d", modelName.Data(), trainedModelName.Data(), maxepochs, batchSize)); } } } } // use BDT in case not using Keras or TMVA DL if (!useKeras || !useTMVA_BDT) useTMVA_BDT = true; /** ## Book TMVA BDT **/ if (useTMVA_BDT) { factory->BookMethod(dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=100:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:" "BaggedSampleFraction=0.5:nCuts=20:" "MaxDepth=2"); } /// Train all methods factory->TrainAllMethods(); std::cout << "nthreads = " << ROOT::GetThreadPoolSize() << std::endl; // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // check method // plot ROC curve auto c1 = factory->GetROCCurve(dataloader); c1->Draw(); if (outputFile) outputFile->Close(); }