Dear Experts,
I am experimenting with the ROOT parallel tree processing. The kind of a skeleton of the macro can be found below, it’s based on imt101__parTreeProcessing_8C
example macro. I succesfully extended it to do my task and it is compiled and works well up to 5 cores. But if I pass larger number then 5 to ROOT::EnableImplicitMT(nthreads)
it still uses 5 cores. I checked it in top
, if nthreads=10
or whatever, only 5 cores are used. I made a plot on the average speed of the task to run which is quite heavy and the difference between 1, 2, 3, 4, 5 cores are really there, however, after 5 no changes can be seen. The desktop I’m running on has 24 cores.
Of course, there are other way to parallelize further, e.g., simply in bash
but it would be more convenient to use just one method: ROOT.
I’m using ROOT 6.26/10 on Ubuntu 22.04.2 LTS.
Thanks in advance, Sandor
The Makefile:
CC=g++
CPPFLAGS=-std=c++20 -lpthread
TARGET=<macroname>
OBJS=<macroname>.o
ROOTLIBS=`${ROOTSYS}/bin/root-config --cflags --glibs`
.SUFFIXES : .o .cc
.SUFFIXES : .o .C
.cc.o :
$(CC) $(FFLAGS) $(ROOTLIBS) -c $<
.C.o :
$(CC) $(FFLAGS) $(ROOTLIBS) -c $<
all: $(OBJS) $(HEADERS)
$(CC) $(OBJS) $(CPPFLAGS) $(ROOTLIBS) -o $(TARGET)
clean:
-rm -rf *.o *.d $(TARGET)
#include <random>
#include <iostream>
#include <future>
#include "TROOT.h"
#include "TCanvas.h"
#include "TLegend.h"
#include <TFile.h>
#include <TF1.h>
#include <TH2F.h>
#include <TTree.h>
#include <cstdlib>
#include <TLorentzVector.h>
#include <vector>
#include "ROOT/TExecutor.hxx"
#include "ROOT/TThreadedObject.hxx"
#include "ROOT/TTreeProcessorMT.hxx"
#include "TTreeReader.h"
#include <TTreeReaderArray.h>
#include <chrono>
using namespace std;
using namespace std::chrono;
const int NFILE = 1000;
TLorentzVector signal(vector<TLorentzVector> a, vector<TLorentzVector> b)
{
/* Do stuff */
}
int main(int argc, char** argv)
{
if ( argc < 2 )
{
std::cerr << "Need one argument!" << std::endl;
return 1;
}
auto start1 = high_resolution_clock::now();
TFile * theFile;
int nthreads = atoi(argv[1]);
ROOT::EnableImplicitMT(nthreads);
std::cerr << "Number of threads: " << nthreads << std::endl;
ROOT::TThreadedObject<TH1F> some_histo_merged("some_histo",";)",300,0.6,0.9); some_histo->Sumw2();
for( int ifile = 0 ; ifile < NFILE ; ifile++)
{
ROOT::TThreadedObject<TH1F> some_histo("some_histo",";)",300,0.6,0.9);
some_histo->Sumw2();
ROOT::TTreeProcessorMT tp(Form("<path>","<theTree>");
auto myFunction = [&](TTreeReader &myReader)
{
TTreeReaderArray<double> some_array(myReader, "some_array");
TTreeReaderValue<int> some_int(myReader, "some_int");
while(myReader.Next())
{
/* Do stuff with signal() function and fill some_histo */
}
};
tp.Process(myFunction);
auto MergedHisto = some_histo.Merge();
some_histo_merged->Add((TH1F*)MergedHisto->Clone());
}
TFile * outfile = new TFile("<path>","recreate");
some_histo_merged->Write();
outfile->Close();
delete outfile;
auto stop1 = high_resolution_clock::now();
auto duration1 = duration_cast<microseconds>(stop1 - start1);
std::cout << duration1.count() << std::endl;
return 0;
}