Hi all,
I’m trying to load several signal and background trees from a loop using python with pyROOT, but I am having some strange error with BookMethod.
This is the message I get:
Factory : Booking method: BDT
:
*** Break *** segmentation violation
===========================================================
There was a crash.
This is the entire stack trace of all threads:
===========================================================
gdb.printing.register_pretty_printer(gdb.current_objfile(),
gdb.printing.register_pretty_printer(gdb.current_objfile(),
Thread 2 (Thread 0x7f7e4fe55700 (LWP 2337572)):
#0 0x00007f7e6556bafb in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0
#1 0x00007f7e6556bb8f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0
#2 0x00007f7e6556bc2b in sem_wait
GLIBC_2.2.5 () from /lib64/libpthread.so.0
#3 0x00007f7e6588f795 in PyThread_acquire_lock () from /lib64/libpython2.7.so.1.0
#4 0x00007f7e6585b2e6 in PyEval_RestoreThread () from /lib64/libpython2.7.so.1.0
#5 0x00007f7e4fe98016 in time_sleep () from /usr/lib64/python2.7/lib-dynload/timemodule.so
#6 0x00007f7e65862d40 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#7 0x00007f7e6586508d in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
#8 0x00007f7e657eeabd in function_call () from /lib64/libpython2.7.so.1.0
#9 0x00007f7e657c9ab3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#10 0x00007f7e6585d74d in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#11 0x00007f7e6586270d in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#12 0x00007f7e6586270d in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#13 0x00007f7e6586508d in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
#14 0x00007f7e657ee9c8 in function_call () from /lib64/libpython2.7.so.1.0
#15 0x00007f7e657c9ab3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#16 0x00007f7e657d8aa5 in instancemethod_call () from /lib64/libpython2.7.so.1.0
#17 0x00007f7e657c9ab3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#18 0x00007f7e6585b947 in PyEval_CallObjectWithKeywords () from /lib64/libpython2.7.so.1.0
#19 0x00007f7e65893882 in t_bootstrap () from /lib64/libpython2.7.so.1.0
#20 0x00007f7e65565e65 in start_thread () from /lib64/libpthread.so.0
#21 0x00007f7e64b7488d in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x7f7e65d6d740 (LWP 2337546)):
#0 0x00007f7e64b3b469 in waitpid () from /lib64/libc.so.6
#1 0x00007f7e64ab8f12 in do_system () from /lib64/libc.so.6
#2 0x00007f7e64ab92c1 in system () from /lib64/libc.so.6
#3 0x00007f7e5d420644 in TUnixSystem::StackTrace() () from /usr/lib64/root/libCore.so.6.18
#4 0x00007f7e5d42248c in TUnixSystem::DispatchSignals(ESignals) () from /usr/lib64/root/libCore.so.6.18
#5 <signal handler called>
#6 0x00007f7e4e825f56 in TMVA::DataInputHandler::GetEntries(std::vector<TMVA::TreeInfo, std::allocator<TMVA::TreeInfo> > const&) const () from /usr/lib64/root/libTMVA.so.6.18
#7 0x00007f7e4e825fac in TMVA::DataInputHandler::GetEntries() const () from /usr/lib64/root/libTMVA.so.6.18
#8 0x00007f7e4e840145 in TMVA::DataSetFactory::BuildInitialDataSet(TMVA::DataSetInfo&, TMVA::DataInputHandler&) () from /usr/lib64/root/libTMVA.so.6.18
#9 0x00007f7e4e8404c7 in TMVA::DataSetFactory::CreateDataSet(TMVA::DataSetInfo&, TMVA::DataInputHandler&) () from /usr/lib64/root/libTMVA.so.6.18
#10 0x00007f7e4e846c6d in TMVA::DataSetManager::CreateDataSet(TString const&) () from /usr/lib64/root/libTMVA.so.6.18
#11 0x00007f7e4e8454ec in TMVA::DataSetInfo::GetDataSet() const () from /usr/lib64/root/libTMVA.so.6.18
#12 0x00007f7e4e8b780a in TMVA::MethodBDT::ProcessOptions() () from /usr/lib64/root/libTMVA.so.6.18
#13 0x00007f7e4e85ac4c in TMVA::Factory::BookMethod(TMVA::DataLoader*, TString, TString, TString) () from /usr/lib64/root/libTMVA.so.6.18
#14 0x00007f7e4e85b864 in TMVA::Factory::BookMethod(TMVA::DataLoader*, TMVA::Types::EMVA, TString, TString) () from /usr/lib64/root/libTMVA.so.6.18
#15 0x00007f7e65bac381 in ?? ()
#16 0x000000000542adf8 in ?? ()
#17 0x00007f7e5942c63c in clang::DiagnosticsEngine::setSeverity(unsigned int, clang::diag::Severity, clang::SourceLocation) () from /usr/lib64/root/libCling.so
#18 0x00000000052be330 in ?? ()
#19 0x00000009053d5608 in ?? ()
#20 0x00007f7e5d3358b0 in ?? () from /usr/lib64/root/libCore.so.6.18
#21 0x00007fffd4fc97a0 in ?? ()
#22 0x0000000004ed9a10 in ?? ()
#23 0x00000000053d0950 in ?? ()
#24 0x00007f7e64e3d760 in ?? () from /lib64/libc.so.6
#25 0x0000000000000030 in ?? ()
#26 0x00007f7e5a29d420 in __bss_start () from /usr/lib64/root/libCling.so
#27 0x00007fffd4fc9590 in ?? ()
#28 0x0000000004f25b30 in ?? ()
#29 0x0000000000000000 in ?? ()
===========================================================
The lines below might hint at the cause of the crash.
You may get help by asking at the ROOT forum http://root.cern.ch/forum
Only if you are really convinced it is a bug in ROOT then please submit a
report at http://root.cern.ch/bugs Please post the ENTIRE stack trace
from above as an attachment in addition to anything else
that might help us fixing this issue.
===========================================================
#6 0x00007f7e4e825f56 in TMVA::DataInputHandler::GetEntries(std::vector<TMVA::TreeInfo, std::allocator<TMVA::TreeInfo> > const&) const () from /usr/lib64/root/libTMVA.so.6.18
#7 0x00007f7e4e825fac in TMVA::DataInputHandler::GetEntries() const () from /usr/lib64/root/libTMVA.so.6.18
#8 0x00007f7e4e840145 in TMVA::DataSetFactory::BuildInitialDataSet(TMVA::DataSetInfo&, TMVA::DataInputHandler&) () from /usr/lib64/root/libTMVA.so.6.18
#9 0x00007f7e4e8404c7 in TMVA::DataSetFactory::CreateDataSet(TMVA::DataSetInfo&, TMVA::DataInputHandler&) () from /usr/lib64/root/libTMVA.so.6.18
#10 0x00007f7e4e846c6d in TMVA::DataSetManager::CreateDataSet(TString const&) () from /usr/lib64/root/libTMVA.so.6.18
#11 0x00007f7e4e8454ec in TMVA::DataSetInfo::GetDataSet() const () from /usr/lib64/root/libTMVA.so.6.18
#12 0x00007f7e4e8b780a in TMVA::MethodBDT::ProcessOptions() () from /usr/lib64/root/libTMVA.so.6.18
#13 0x00007f7e4e85ac4c in TMVA::Factory::BookMethod(TMVA::DataLoader*, TString, TString, TString) () from /usr/lib64/root/libTMVA.so.6.18
#14 0x00007f7e4e85b864 in TMVA::Factory::BookMethod(TMVA::DataLoader*, TMVA::Types::EMVA, TString, TString) () from /usr/lib64/root/libTMVA.so.6.18
#15 0x00007f7e65bac381 in ?? ()
#16 0x000000000542adf8 in ?? ()
#17 0x00007f7e5942c63c in clang::DiagnosticsEngine::setSeverity(unsigned int, clang::diag::Severity, clang::SourceLocation) () from /usr/lib64/root/libCling.so
#18 0x00000000052be330 in ?? ()
#19 0x00000009053d5608 in ?? ()
#20 0x00007f7e5d3358b0 in ?? () from /usr/lib64/root/libCore.so.6.18
#21 0x00007fffd4fc97a0 in ?? ()
#22 0x0000000004ed9a10 in ?? ()
#23 0x00000000053d0950 in ?? ()
#24 0x00007f7e64e3d760 in ?? () from /lib64/libc.so.6
#25 0x0000000000000030 in ?? ()
#26 0x00007f7e5a29d420 in __bss_start () from /usr/lib64/root/libCling.so
#27 0x00007fffd4fc9590 in ?? ()
#28 0x0000000004f25b30 in ?? ()
#29 0x0000000000000000 in ?? ()
===========================================================
Traceback (most recent call last):
File "TMVA/BDTClassification.py", line 180, in <module>
classifier()
File "TMVA/BDTClassification.py", line 160, in classifier
"!H:!V:NTrees=1000:MinNodeSize=2.5%:MaxDepth=6:BoostType=AdaBoost:AdaBoostBeta=0.3:UseBaggedBoost:BaggedSampleFraction=0.3:SeparationType=GiniIndex:nCuts=20" );
TypeError: none of the 3 overloaded methods succeeded. Full details:
TMVA::MethodBase* TMVA::Factory::BookMethod(TMVA::DataLoader* loader, TString theMethodName, TString methodTitle, TString theOption = "") =>
could not convert argument 2
TMVA::MethodBase* TMVA::Factory::BookMethod(TMVA::DataLoader* loader, TMVA::Types::EMVA theMethod, TString methodTitle, TString theOption = "") =>
problem in C++; program state has been reset
TMVA::MethodBase* TMVA::Factory::BookMethod(TMVA::DataLoader*, TMVA::Types::EMVA, TString, TString, TMVA::Types::EMVA, TString) =>
takes at least 6 arguments (4 given)
And here is the code that produces this error:
#!/usr/bin/env python
from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os import listdir
def is_signal(infile):
"""
Method checks if input string is a signal by looking for 'MA' chars in it.
"""
if 'MA' in infile:
return True
else:
return False
def tree_name(infile, path= '/path/to/trees/'):
"""
Method takes a string as input and returns the name of associated tree.
"""
f = infile
if is_signal(f):
out = f.replace(path + 'Out_', 'signal_tree')
out = out.replace('.root', '')
return out
else:
out = f.replace(path + 'Out_', 'background_tree')
out = out.replace('.root', '')
return out
def n_entries(infile):
"""
Method takes a string as an input corresponding to dataset name and returns number of events in dataset.
"""
file = TFile.Open(infile)
tree_name_ = tree_name(infile)
tree = file.Get(tree_name_)
print("Stored {} tree..\n".format(tree_name_))
n = tree.GetEntries()
return n
def total_entries(infiles = []):
"""
Method takes list of string corresponding to dataset names and returns the summation over all the file of the entries.
"""
tot_entries = 0
for file in infiles:
tot_entries += n_entries(file)
return tot_entries
def weight(n_entries, tot_entries):
"""
return weight for classification
"""
w = float((tot_entries*0.5/n_entries))
return w
def background_data(list_file):
"""
Method filters background names from input list and returns them in form of a list
"""
bkg = []
for file in list_file:
if not is_signal(file) and "MSSM" not in file:
bkg.append(file)
return bkg
def signal_data(list_file):
"""
Method filters ALL signal names from input list and returns them in form of a list
"""
sgn = []
for file in list_file:
if is_signal(file):
sgn.append(file)
return sgn
def custom_signal_data(list_file): #MODIFY THIS TO SELECT SUBSETS OF SIGNAL
"""
Method filters desired signal names from input list and returns them in form of a list
"""
sgn = []
for file in list_file:
if 'MA400' in file:
sgn.append(file)
return sgn
def classifier(path = '/path/to/trees/', tmva_output_file_name = 'TMVA_Output.root'):
tmva_output_file =TFile.Open(tmva_output_file_name, 'RECREATE') #Opens TMVA output file. Ovverrides if tmva_output_file_name is not changed.
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()
factory = TMVA.Factory('TMVAClassification', tmva_output_file,
'!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')
all_files = listdir(path) #array containing all the file names in path
bkg_data = [path+b for b in background_data(all_files)] #load background dataset
sgn_data = [path+s for s in custom_signal_data(all_files)] #load signal dataset
#TOT_ENTRIES = total_entries(all_files) #uncomment when using all the data.
TOT_ENTRIES = total_entries(bkg_data) #comment this when using all data.
TOT_ENTRIES += total_entries(sgn_data) #comment this when using all data.
print("TOTAL ENTRIES ======> ")
print(TOT_ENTRIES)
variables = ["dimuon_deltar", #Define discrimination variables
"dimuon_deltaphi",
"dimuon_deltaeta",
"met_pt",
"bjet_n",
#"no_btag_jet",
"bjet_1.Pt()",
"bjet_1.Eta()",
#"btag_jet_over2.4",
"deltar_bjet1_dimuon",
"deltapt_bjet1_dimuon",
"deltaeta_bjet1_dimuon"
]
dataloader = TMVA.DataLoader('dataset')
for v in variables:
dataloader.AddVariable(v)
for b in bkg_data:
f = TFile.Open(b)
bkg_tree = f.Get(tree_name(b))
n = bkg_tree.GetEntries()
bkg_weight = weight(n, TOT_ENTRIES)
dataloader.AddBackgroundTree(bkg_tree, bkg_weight)
for s in sgn_data:
f =TFile.Open(s)
sgn_tree = f.Get(tree_name(s))
n = sgn_tree.GetEntries()
sgn_weight = weight(n, TOT_ENTRIES)
dataloader.AddSignalTree(sgn_tree, sgn_weight)
dataloader.PrepareTrainingAndTestTree(TCut(''), "nTrain_Signal=10000:nTrain_Background=100000:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=None:!V")
###################################################################### Book methods ########################################################################
# Generate model
#factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG",
# "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.20:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=6" );
factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT",
"!H:!V:NTrees=1000:MinNodeSize=2.5%:MaxDepth=6:BoostType=AdaBoost:AdaBoostBeta=0.3:UseBaggedBoost:BaggedSampleFraction=0.3:SeparationType=GiniIndex:nCuts=20" );
#factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTB",
# "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
#factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTD",
# "!H:!V:NTrees=1000:MinNodeSize=2.5%:MaxDepth=6:BoostType=AdaBoost:AdaBoostBeta=0.7:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
# Run training, test and evaluation
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
tmva_output_file.close()
print("""
==> Wrote root file: {} \n
==> TMVAClassification is done!\n
""").format(tmva_output_file.GetName())
classifier()
I hope somebody can help me with this.
Many thanks,
William.