Dear experts,
I know how to use TMVA/PyMVA for training/testing with simple tree structure for MultiClass Classification. But if I have a tree that contains TClonesArray of variables that I want to train on (event loop and track loop). How to extract the variables from the tree ?
#include <iostream>
#include "TString.h"
#include "TFile.h"
#include "TTree.h"
#include "TSystem.h"
#include "TROOT.h"
#include "TMVA/Factory.h"
#include "TMVA/Reader.h"
#include "TMVA/DataLoader.h"
#include "TMVA/PyMethodBase.h"
TString pythonSrc = "\
from keras.models import Sequential\n\
from keras.layers.core import Dense, Dropout, Activation\n\
from keras.optimizers import Adam\n\
\n\
model = Sequential()\n\
model.add(Dense(100, activation=\"tanh\", input_dim=26))\n\
model.add(Dense(50, activation=\"tanh\"))\n\
model.add(Dense(3, activation=\"softmax\"))\n\
model.compile(loss=\"categorical_crossentropy\", optimizer=Adam(), metrics=[\"accuracy\",])\n\
model.summary()\n\
model.save(\"kerasModelMulticlass.h5\")\n";
int PyKerasMulticlass(){
// Get data file
TFile *input0 = TFile::Open("./TrainingSet/electrons_tree.root");
TFile *input1 = TFile::Open("./TrainingSet/pions_tree.root");
TFile *input2 = TFile::Open("./TrainingSet/muons_tree.root");
// Build model from python file
std::cout << "Generate keras model..." << std::endl;
UInt_t ret;
ret = gSystem->Exec("echo '"+pythonSrc+"' > generateKerasModelMulticlass.py");
if(ret!=0){
std::cout << "[ERROR] Failed to write python code to file" << std::endl;
return 1;
}
ret = gSystem->Exec("ipython generateKerasModelMulticlass.py");
if(ret!=0){
std::cout << "[ERROR] Failed to generate model using python" << std::endl;
return 1;
}
// Setup PyMVA and factory
std::cout << "Setup TMVA..." << std::endl;
TMVA::PyMethodBase::PyInitialize();
TFile* outputFile = TFile::Open("ResultsTestPyKerasMulticlass.root", "RECREATE");
TMVA::Factory *factory = new TMVA::Factory("testPyKerasMulticlass", outputFile,
"!V:Silent:Color:!DrawProgressBar:Transformations=None:AnalysisType=multiclass");
// Load data
TMVA::DataLoader *dataloader = new TMVA::DataLoader("datasetTestPyKerasMulticlass");
TTree *signal0 = (TTree*)input0->Get("t1");
TTree *signal1 = (TTree*)input1->Get("t1");
TTree *signal2 = (TTree*)input2->Get("t1");
dataloader->AddTree(signal0, "Electrons");
dataloader->AddTree(signal1, "Pions");
dataloader->AddTree(signal2, "Muons");
TClonesArray *track = new TClonesArray("hTrack");
signal0->GetBranch("track")->SetAutoDelete(kFALSE);
signal0->SetBranchAddress("hTrack",&track);
// Is this the right way to do it ???!!
dataloader->AddVariable("track.fXmomentum", &track->fXmomentum);
dataloader->PrepareTrainingAndTestTree("",
"SplitMode=Random:NormMode=NumEvents:!V");
// Book and train method
factory->BookMethod(dataloader, TMVA::Types::kPyKeras, "PyKeras",
"!H:!V:VarTransform=D,G:FilenameModel=kerasModelMulticlass.h5:FilenameTrainedModel=trainedKerasModelMulticlass.h5:NumEpochs=10:BatchSize=128:SaveBestOnly=false:Verbose=1");
std::cout << "Train model..." << std::endl;
factory->TrainAllMethods();
std::cout << "Test model..." << std::endl;
//factory->TestAllMethods();
std::cout << "Evaluate model..." << std::endl;
//factory->EvaluateAllMethods();
// Clean-up
delete factory;
delete dataloader;
delete outputFile;
return 0;
}
int main(){
int err = PyKerasMulticlass();
return err;
}
Thank you in advance