Hi all,
I am having problems with the new dataloader in TMVA. I have 4 files (signal training file, signal test file, background training file and background test file) which I want to use as inputs for a BDT MVA with N=200. The problem is that depending whether I add the signal trees or the background trees to the dataloader first, I get different results. I have tried using the PrepareTrainingAndTestTree function but it has made no difference.
Thanks in advance for any help!
My code looks like:
void TrainData(TString sigFilename, TString bckFilename, TString TupleName, TString *varsName, const int nvars) {
//Remove ".root" from filename for future use
TString sigFileStem = sigFilename.Remove(sigFilename.Length() - 5, 5);
TString bckFileStem = bckFilename.Remove(bckFilename.Length() - 5, 5);
// Create ouput file, factory object and open the input file
TFile* outputFile = TFile::Open( "TMVA_Results_BDT_N=200.root", "RECREATE" );
TMVA::DataLoader* dataloader = new TMVA::DataLoader(".");
TMVA::Factory* factory = new TMVA::Factory("tmvaTest", outputFile, "");
TFile* sigTrainFile = new TFile(sigFileStem + "_Train.root");
TFile* bckTrainFile = new TFile(bckFileStem + "_Train.root");
TFile* sigTestFile = new TFile(sigFileStem + "_Test.root");
TFile* bckTestFile = new TFile(bckFileStem + "_Test.root");
// Get the TTree objects from the input files
TTree* sigTrain = (TTree*)sigTrainFile->Get(TupleName + "_Train");
TTree* bckTrain = (TTree*)bckTrainFile->Get(TupleName + "_Train");
TTree* sigTest = (TTree*)sigTestFile->Get(TupleName + "_Test");
TTree* bckTest = (TTree*)bckTestFile->Get(TupleName + "_Test");
// Get the number of entries in each TTree
int nSigTrain = sigTrain->GetEntries();
int nBckTrain = bckTrain->GetEntries();
int nSigTest = sigTest->GetEntries();
int nBckTest = bckTest->GetEntries();
// Global event weights
double sigWeight = 1.0;
double bckWeight = 1.0;
dataloader->AddBackgroundTree(bckTrain, bckWeight, TMVA::Types::kTraining);
dataloader->AddSignalTree(sigTrain, sigWeight, TMVA::Types::kTraining);
dataloader->AddBackgroundTree(bckTest, bckWeight, TMVA::Types::kTesting);
dataloader->AddSignalTree(sigTest, sigWeight, TMVA::Types::kTesting);
dataloader->PrepareTrainingAndTestTree("", "", "NormMode=None:!V");
/*dataloader->AddSignalTree(sigTrain, sigWeight, TMVA::Types::kTraining);
dataloader->AddBackgroundTree(bckTrain, bckWeight, TMVA::Types::kTraining);
dataloader->AddSignalTree(sigTest, sigWeight, TMVA::Types::kTesting);
dataloader->AddBackgroundTree(bckTest, bckWeight, TMVA::Types::kTesting);
dataloader->PrepareTrainingAndTestTree("", "", "NormMode=None:!V");*/
// Define the input variables that shall be used for the MVA training
// (the variables used in the expression must exist in the original TTree).
for ( int i=0 ; i<nvars ; i++ ) {
dataloader->AddVariable("SelVars_Nominal_" + varsName[i], 'F');
}
// Book MVA methods (see TMVA manual).
factory->BookMethod(dataloader,TMVA::Types::kBDT, "BDT", "NTrees=200:MaxDepth=4:MinNodeSize=5%:nCuts=100:BoostType=AdaBoost:AdaBoostBeta=0.15");
// Train, test and evaluate all methods
factory->TrainAllMethods();
factory->TestAllMethods();
factory->EvaluateAllMethods();
// Save the output and finish up
outputFile->Close();
std::cout << "==> wrote root file TMVA.root" << std::endl;
std::cout << "==> TMVAnalysis is done!" << std::endl;
delete factory;
delete dataloader;
}