Hello All!
I have currently been struggling with using TTreeReaderMT to process a TTree and fill ROOT::TThreadedObject-wrapped histograms. I had been following the the multicore tutorial found here, but I am getting segmentation faults when running the code with my own data. I am posting here as I am not quite confident that this is a bug with ROOT, and I have not been able to test this code on the latest ROOT version yet.
The issue seems to be dependent on the number of cores I give to ROOT::EnableImplicitMT(), as running this on my laptop with 8 cores works flawlessly, however running this on my desktop with 32 cores results in the error, but error becomes rarer as I lower the thread count, eventually disappearing around a max of 15 threads. I have included two data files as the smaller of the two runs perfectly on both machines, however the larger will fault at random. I am almost wondering if the issue is memory corruption caused by threads writing into others’ stack. A sample back-trace is below.
The lines below might hint at the cause of the crash. If you see question
marks as part of the stack trace, try to recompile with debugging information
enabled and export CLING_DEBUG=1 environment variable before running.
You may get help by asking at the ROOT forum https://root.cern/forum
preferably using the command (.forum bug) in the ROOT prompt.
Only if you are really convinced it is a bug in ROOT then please submit a
report at https://root.cern/bugs or (preferably) using the command (.gh bug) in
the ROOT prompt. Please post the ENTIRE stack trace
from above as an attachment in addition to anything else
that might help us fixing this issue.
===========================================================
#9 0x0000000000000115 in ?? ()
#10 0x000055f6d0b5df53 in ROOT::Internal::TTreeReaderArrayBase::UntypedAt(unsigned long) const ()
#11 0x000055f6d0b62a23 in TTreeReaderArray<double>::At(unsigned long) ()
#12 0x000055f6d0b5f489 in TTreeReaderArray<double>::operator[](unsigned long) ()
#13 0x000055f6d0b5bfa1 in main::{lambda(TTreeReader&)#1}::operator()(TTreeReader&) const ()
#14 0x000055f6d0b5d2c7 in void std::__invoke_impl<void, main::{lambda(TTreeReader&)#1}&, TTreeReader&>(std::__invoke_other, main::{lambda(TTreeReader&)#1}&, TTreeReader&) ()
#15 0x000055f6d0b5d1a0 in std::enable_if<is_invocable_r_v<void, main::{lambda(TTreeReader&)#1}&, TTreeReader&>, void>::type std::__invoke_r<void, main::{lambda(TTreeReader&)#1}&, TTreeReader&>(main::{lambda(TTreeReader&)#1}&, TTreeReader&) ()
#16 0x000055f6d0b5d081 in std::_Function_handler<void (TTreeReader&), main::{lambda(TTreeReader&)#1}>::_M_invoke(std::_Any_data const&, TTreeReader&) ()
#17 0x00007f1fff47a9fd in std::function<void (TTreeReader&)>::operator()(TTreeReader&) const (this=0x7fff05ff5cf0, __args#0=...) at /usr/include/c++/11/bits/std_function.h:590
#18 0x00007f1fff473e68 in operator() (__closure=0x7fff05ff4dc0, c=...) at /opt/root_src/tree/treeplayer/src/TTreeProcessorMT.cxx:498
#19 0x00007f1fff474b00 in operator() (__closure=0x7fff05ff4d70, i=18) at /opt/root_src/core/imt/inc/ROOT/TThreadExecutor.hxx:234
#20 0x00007f1fff476f8e in std::__invoke_impl<void, ROOT::TThreadExecutor::Foreach<ROOT::TTreeProcessorMT::Process(std::function<void(TTreeReader&)>)::<lambda(std::size_t)>::<lambda(const EntryRange&)>, std::pair<long long int, long long int> >(ROOT::TTreeProcessorMT::Process(std::function<void(TTreeReader&)>)::<lambda(std::size_t)>::<lambda(const EntryRange&)>, const std::vector<std::pair<long long int, long long int> >&, unsigned int)::<lambda(unsigned int)>&, unsigned int>(std::__invoke_other, struct {...} &) (__f=...) at /usr/include/c++/11/bits/invoke.h:61
#21 0x00007f1fff476502 in std::__invoke_r<void, ROOT::TThreadExecutor::Foreach<ROOT::TTreeProcessorMT::Process(std::function<void(TTreeReader&)>)::<lambda(std::size_t)>::<lambda(const EntryRange&)>, std::pair<long long int, long long int> >(ROOT::TTreeProcessorMT::Process(std::function<void(TTreeReader&)>)::<lambda(std::size_t)>::<lambda(const EntryRange&)>, const std::vector<std::pair<long long int, long long int> >&, unsigned int)::<lambda(unsigned int)>&, unsigned int>(struct {...} &) (__fn=...) at /usr/include/c++/11/bits/invoke.h:111
#22 0x00007f1fff4759b0 in std::_Function_handler<void(unsigned int), ROOT::TThreadExecutor::Foreach<ROOT::TTreeProcessorMT::Process(std::function<void(TTreeReader&)>)::<lambda(std::size_t)>::<lambda(const EntryRange&)>, std::pair<long long int, long long int> >(ROOT::TTreeProcessorMT::Process(std::function<void(TTreeReader&)>)::<lambda(std::size_t)>::<lambda(const EntryRange&)>, const std::vector<std::pair<long long int, long long int> >&, unsigned int)::<lambda(unsigned int)> >::_M_invoke(const std::_Any_data &, unsigned int &&) (__functor=..., __args#0=
0x7f1fdf067654: 18) at /usr/include/c++/11/bits/std_function.h:290
#23 0x00007f1ffec8845c in std::function<void (unsigned int)>::operator()(unsigned int) const (this=0x7fff05ff4d70, __args#0=18) at /usr/include/c++/11/bits/std_function.h:590
#24 0x00007f1ffec87f49 in tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>::operator()(tbb::detail::d1::blocked_range<unsigned int> const&) const (this=0x7f1fdf50bd50, r=...) at /usr/include/oneapi/tbb/parallel_for.h:208
#25 0x00007f1ffec86e4d in tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<unsigned int>, tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>, tbb::detail::d1::auto_partitioner const>::run_body(tbb::detail::d1::blocked_range<unsigned int>&) (this=0x7f1fdf50bd00, r=...) at /usr/include/oneapi/tbb/parallel_for.h:119
#26 0x00007f1ffec8699f in tbb::detail::d1::dynamic_grainsize_mode<tbb::detail::d1::adaptive_mode<tbb::detail::d1::auto_partition_type> >::work_balance<tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<unsigned int>, tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>, tbb::detail::d1::auto_partitioner const>, tbb::detail::d1::blocked_range<unsigned int> >(tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<unsigned int>, tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>, tbb::detail::d1::auto_partitioner const>&, tbb::detail::d1::blocked_range<unsigned int>&, tbb::detail::d1::execution_data&) (this=0x7f1fdf50bd68, start=..., range=..., ed=...) at /usr/include/oneapi/tbb/partitioner.h:447
#27 0x00007f1ffec867a1 in tbb::detail::d1::partition_type_base<tbb::detail::d1::auto_partition_type>::execute<tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<unsigned int>, tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>, tbb::detail::d1::auto_partitioner const>, tbb::detail::d1::blocked_range<unsigned int> >(tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<unsigned int>, tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>, tbb::detail::d1::auto_partitioner const>&, tbb::detail::d1::blocked_range<unsigned int>&, tbb::detail::d1::execution_data&) (this=0x7f1fdf50bd68, start=..., range=..., ed=...) at /usr/include/oneapi/tbb/partitioner.h:288
#28 0x00007f1ffec86572 in tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<unsigned int>, tbb::detail::d1::parallel_for_body_wrapper<std::function<void (unsigned int)>, unsigned int>, tbb::detail::d1::auto_partitioner const>::execute(tbb::detail::d1::execution_data&) (this=0x7f1fdf50bd00, ed=...) at /usr/include/oneapi/tbb/parallel_for.h:172
#29 0x00007f1ffd8bfb3c in ?? () from /lib/x86_64-linux-gnu/libtbb.so.12
#30 0x00007f1ffd8c1dd8 in ?? () from /lib/x86_64-linux-gnu/libtbb.so.12
#31 0x00007f1ffe86bac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#32 0x00007f1ffe8fd850 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
===========================================================
A reproducer of the code can be found on my github here (and below), and the sample data files (one at ~125 MB and the other at 27 GB) can be found on my Google Drive here. Thank you for any assistance you can give!
Reproducer Code:
// C++ Includes
#include <string>
#include <thread>
// ROOT Includes
#include <TFile.h>
#include <TTree.h>
#include <TH2D.h>
#include <TTreeReaderArray.h>
#include <ROOT/TThreadedObject.hxx>
#include <ROOT/TTreeProcessorMT.hxx>
// Project Includes
static const int kMaxThreads = std::thread::hardware_concurrency(); // Number of threads to use for processing, defaults to system max
static const constexpr int kDigitizerBins = 1 << 16; // Number of bins in the digitizer (16-bit)
static const constexpr int kDigitizerChannels = 16; // Number of channels in digitizer
static const constexpr double kNsPerBin = 0.098; // Conversion factor from bin to nanoseconds
// Histograms Namespace
namespace Histograms
{
auto cc_amp = ROOT::TThreadedObject<TH2D>("cc_amp", "Clover Cross Amplitude (Raw Data);ADC;Channel;Counts/Bin", kDigitizerBins, 0, kDigitizerBins, kDigitizerChannels, 0, kDigitizerChannels);
auto cc_cht = ROOT::TThreadedObject<TH2D>("cc_cht", "Clover Cross Channel Time (Raw Data);ADC;Channel;Counts/Bin", kDigitizerBins, 0, (kDigitizerBins)*kNsPerBin, kDigitizerChannels, 0, kDigitizerChannels);
auto cc_plu = ROOT::TThreadedObject<TH2D>("cc_plu", "Clover Cross Pile-Up;Pile-Up Multiplicity;Channel;Counts/Bin", 4, 0, 4, kDigitizerChannels, 0, kDigitizerChannels);
auto cc_mdt = ROOT::TThreadedObject<TH1D>("cc_mdt", "Clover Cross Module Time;Time (ns);Counts/Bin", kDigitizerBins, 0, (kDigitizerBins)*kNsPerBin);
auto cc_trt = ROOT::TThreadedObject<TH2D>("cc_trt", "Clover Cross Trigger Time;Time (ns);Trigger ID;Counts/Bin", kDigitizerBins, 0, (kDigitizerBins)*kNsPerBin, 2, 0, 2);
}
// Main function
int main(int argc, char *argv[])
{
// Introduction
if (argc != 3)
{
std::cerr << "Usage: " << argv[0] << " <input filename> <output filename>" << std::endl;
return 1;
}
std::string input_filename = argv[1];
std::string output_filename = argv[2];
std::cout << "===== Welcome to CASort! =====" << std::endl;
std::cout << "----- Current Configuration -----" << std::endl;
std::cout << "Input file: " << input_filename << std::endl;
std::cout << "Output file: " << output_filename << std::endl;
std::cout << "Max Threads: " << kMaxThreads << std::endl;
std::cout << "---------------------------------" << std::endl;
// Set Max Threads
ROOT::EnableImplicitMT(kMaxThreads);
// ROOT::EnableThreadSafety();
// Read the input ROOT file
auto infile = TFile::Open(input_filename.c_str());
if (!infile || infile->IsZombie())
{
std::cerr << "Error opening input file" << std::endl;
return 1;
}
std::cout << "Opened file: " << input_filename << std::endl;
// Create a TTreeReaderMT to read the TTree
std::cout << "Processing events..." << std::endl;
ROOT::TTreeProcessorMT EventProcessor(input_filename.c_str(), "clover");
// Fill Function
auto fillHistograms = [&](TTreeReader &eventReader)
{
TTreeReaderArray<double> cc_amp_val(eventReader, "clover_cross.amplitude");
TTreeReaderArray<double> cc_cht_val(eventReader, "clover_cross.channel_time");
TTreeReaderArray<double> cc_mdt_val(eventReader, "clover_cross.module_timestamp");
TTreeReaderArray<double> cc_plu_val(eventReader, "clover_cross.pileup");
TTreeReaderArray<double> cc_trt_val(eventReader, "clover_cross.trigger_time");
TTreeReaderArray<double> cb_amp_val(eventReader, "clover_back.amplitude");
TTreeReaderArray<double> cb_cht_val(eventReader, "clover_back.channel_time");
TTreeReaderArray<double> cb_mdt_val(eventReader, "clover_back.module_timestamp");
TTreeReaderArray<double> cb_plu_val(eventReader, "clover_back.pileup");
TTreeReaderArray<double> cb_trt_val(eventReader, "clover_back.trigger_time");
TTreeReaderArray<double> ps_amp_val(eventReader, "pos_sig.amplitude");
TTreeReaderArray<double> ps_cht_val(eventReader, "pos_sig.channel_time");
TTreeReaderArray<double> ps_mdt_val(eventReader, "pos_sig.module_timestamp");
TTreeReaderArray<double> ps_plu_val(eventReader, "pos_sig.pileup");
TTreeReaderArray<double> ps_trt_val(eventReader, "pos_sig.trigger_time");
TTreeReaderArray<double> ce_inl_val(eventReader, "cebr_all.integration_long");
TTreeReaderArray<double> ce_cht_val(eventReader, "cebr_all.channel_time");
TTreeReaderArray<double> ce_mdt_val(eventReader, "cebr_all.module_timestamp");
TTreeReaderArray<double> ce_ins_val(eventReader, "cebr_all.integration_short");
TTreeReaderArray<double> ce_trt_val(eventReader, "cebr_all.trigger_time");
auto cc_amp = Histograms::cc_amp.Get();
auto cc_cht = Histograms::cc_cht.Get();
auto cc_plu = Histograms::cc_plu.Get();
auto cc_trt = Histograms::cc_trt.Get();
auto cc_mdt = Histograms::cc_mdt.Get();
// Loop over the entries in the tree
while (eventReader.Next())
{
// Module Time
cc_mdt->Fill(cc_mdt_val[0] * kNsPerBin);
// Trigger Times
cc_trt->Fill(cc_trt_val[0] * kNsPerBin, 0);
cc_trt->Fill(cc_trt_val[1] * kNsPerBin, 1);
// Main Loop
// Detector Loop
for (size_t det = 0; det < 4; det++)
{
// Crystal Loop
for (size_t xtal = 0; xtal < 4; xtal++)
{
auto ch = det * 4 + xtal; // Channel number 0-15
// Raw Histograms
cc_amp->Fill(cc_amp_val[ch], ch);
cc_cht->Fill(cc_cht_val[ch], ch);
cc_plu->Fill(cc_plu_val[ch], ch);
}
}
}
};
// Loop over the entries in the TTree and fill the histograms appropriately
EventProcessor.Process(fillHistograms);
// Save the histograms to a new ROOT file
auto outfile = std::make_unique<TFile>(output_filename.c_str(), "RECREATE");
if (!outfile || outfile->IsZombie())
{
std::cerr << "Error creating output file" << std::endl;
return 1;
}
/* #region Write Histograms */
// Use histograms defined in Histograms.hpp
// Clover Cross Histograms
Histograms::cc_amp.Merge()->Write();
Histograms::cc_cht.Merge()->Write();
Histograms::cc_plu.Merge()->Write();
Histograms::cc_trt.Merge()->Write();
Histograms::cc_mdt.Merge()->Write();
std::cout << "Saved histograms to file: " << outfile->GetName() << std::endl;
outfile->Close();
std::cout << "Done!" << std::endl;
return 0;
}
ROOT Version: 6.32.20 (Built from source)
Platform: linuxx8664gcc (Arch Linux)
Compiler: c++ (GCC) 15.2.1 20251112
CASort.cpp (6.4 KB)