Dear Users,
I am creating columns on RDataFrame object df
containing arrays of Lorentz-vectors (namely ROOT::VecOps::RVec<PxPyPzEVector>
objects) and then applying filters on them, but this is taking very long (for an RDataFrame object of 100’000 columns it takes about 30 minutes to apply the filters). After doing some inspection I realised that the new columns are generated every time a filter is applied, which I suppose increases computation time significantly. My question is if there is something I can alter (perhaps in the way I am generating those branches) to reduce the running time.
The function used to generate the columns is:
ROOT.gInterpreter.Declare('''
using Vec_t = const ROOT::RVec<float>&;
using namespace ROOT::Math;
ROOT::VecOps::RVec<PxPyPzEVector> TrueLep_Vecs(string variable, Vec_t Particle_PID, Vec_t Particle_E, Vec_t Particle_Px, Vec_t Particle_Py, Vec_t Particle_Pz, Vec_t Particle_Charge, Vec_t Particle_Status, Vec_t Particle_M1, Vec_t Particle_PT, Vec_t Particle_Eta) {
ROOT::VecOps::RVec<PxPyPzEVector> TrueLep;
ROOT::VecOps::RVec<PxPyPzEVector> TrueEle;
ROOT::VecOps::RVec<PxPyPzEVector> TrueMuon;
ROOT::VecOps::RVec<float> TrueLepPT;
ROOT::VecOps::RVec<float> TrueElePT;
ROOT::VecOps::RVec<float> TrueMuonPT;
Long64_t nne = Particle_PID.size();
//filling the vectors with 4-momentum and pT values
for (int j=0; j<nne; j++){
if(Particle_Status[j]==1 && fabs(Particle_PID[j])==11 &&
(fabs(Particle_PID[Particle_M1[j]])==24 || fabs(Particle_PID[Particle_M1[j]])==23 || fabs(Particle_PID[Particle_M1[j]])==9900012 || fabs(Particle_PID[Particle_M1[j]])==15) &&
Particle_PT[j]>10.0
&& fabs(Particle_Eta[j])<2.47
){
TrueEle.emplace_back(PxPyPzEVector(Particle_Px[j], Particle_Py[j], Particle_Pz[j], Particle_E[j]));
TrueLep.emplace_back(PxPyPzEVector(Particle_Px[j], Particle_Py[j], Particle_Pz[j], Particle_E[j]));
TrueElePT.emplace_back(Particle_PT[j]);
TrueLepPT.emplace_back(Particle_PT[j]);
}
if(Particle_Status[j]==1 && fabs(Particle_PID[j])==13 &&
(fabs(Particle_PID[Particle_M1[j]])==24 || fabs(Particle_PID[Particle_M1[j]])==23 || fabs(Particle_PID[Particle_M1[j]])==9900012 || fabs(Particle_PID[Particle_M1[j]])==15) &&
Particle_PT[j]>10.0
&& fabs(Particle_Eta[j])<2.40
){
TrueMuon.emplace_back(PxPyPzEVector(Particle_Px[j], Particle_Py[j], Particle_Pz[j], Particle_E[j]));
TrueLep.emplace_back(PxPyPzEVector(Particle_Px[j], Particle_Py[j], Particle_Pz[j], Particle_E[j]));
TrueMuonPT.emplace_back(Particle_PT[j]);
TrueLepPT.emplace_back(Particle_PT[j]);
}
}
//arranging all vectors in descending pT order
auto TrueLep_sorted = Reverse(Take(TrueLep, Argsort(TrueLepPT)));
auto TrueEle_sorted = Reverse(Take(TrueEle, Argsort(TrueElePT)));
auto TrueMuon_sorted = Reverse(Take(TrueMuon, Argsort(TrueMuonPT)));
if(variable=="TrueLep"){return TrueLep_sorted;}
if(variable=="TrueEle"){return TrueEle_sorted;}
if(variable=="TrueMuon"){return TrueMuon_sorted;}
else {ROOT::VecOps::RVec<PxPyPzEVector> Return; return Return;}
}
''')
I then generate columns like:
df = df.Define('TrueLep', '''TrueLep_Vecs("TrueLep", Particle.PID, Particle.E, Particle.Px, Particle.Py, Particle.Pz, Particle.Charge, Particle.Status, Particle.M1, Particle.PT, Particle.Eta)''')
And finally apply filter like:
cut_pT = 'TrueLep[0].Pt()>25 && TrueLep[1].Pt()>20 && TrueLep[2].Pt()>15'
df = df.Filter(cut_pT)
Any help would be very much apperciated!