Dear experts,
I have found that the TTree that I create with the following code contains duplicated entries (two rows sharing the same values in every branch). I have checked that the original TTree from which I construct the new one does not contain duplicated entries so I have to find what part of the code is problematic.
Bellow I leave the code that I am using (only the relevant parts). What I do is to open first one of the trees saved in the original root file, define a few columns, then store a snapshot of a root file per each kinematic bin I am interested. Then I do the same with the other tree of the original file. After having store all the snapshots, I trigger them with “Count()” at the end of the script.
I wanted to ask if you see any risky procedure in this code.
import ROOT as R
options = R.RDF.RSnapshotOptions()
options.fLazy = True
options.fMode = "RECREATE"
print(f"# Processing {Magnet}... #")
histograms[Magnet] , table_dict[Magnet] = {}, {}
# Read input nTuples
chain = createTChain( Sample, 'raw', Magnet, Version='v9', FullConeInfo=FullConeInfo, TreeName=f"{onia}Tuple/DecayTree", kinbin=None, Test=Test )
rdf = R.RDataFrame( chain )
if Test and Sample=="RealData" :
rdf = rdf.Range(10000)
######################################### DECAY TREE ##########################################################
# - Define new variables; no filters applied
print(">>> Binning and storing snapshot DecayTree...")
# --- Output directory and name ntuples
job = Jobs[Sample][Magnet]
if FullConeInfo : value = f"{TEST}{nTuplePrefix[Sample]}_pp{Magnet}2016_{Version}_{Selection['out']['tag']}_FullConeInfo_{job}"
else : value = f"{TEST}{nTuplePrefix[Sample]}_pp{Magnet}2016_{Version}_{Selection['out']['tag']}_{job}"
rdf_bin, rdf_snap, nCand, saved_files = {}, {}, {}, {}
for ybin in _y_kinbin_list :
yfilter = translateBin(ybin)
for ptbin in _pt_kinbin_list :
ptfilter = translateBin(ptbin)
kinbin = ybin+"_"+ptbin
newFile = value+"_"+kinbin+".root"
# Get candidates in actual bin
rdf_bin[ kinbin ] = rdf.Filter( yfilter+" && "+ptfilter )
nCand[ kinbin ] = rdf_bin[ kinbin ].Count()
print("Storing snapshot "+newFile+"...")
saveCols = [ str(i) for i in rdf_bin[ kinbin ].GetColumnNames() if i not in bad_cols ]
rdf_snap[ kinbin ] = rdf_bin[ kinbin ].Snapshot( 'JpsiTuple/DecayTree', outputPath+newFile, saveCols, options )
######################################### MC DECAY TREE ##########################################################
if Sample!="RealData" :
print("# Processing MCDecayTree #")
chainmc = createTChain( Sample, 'raw', Magnet, Version=Version, FullConeInfo=FullConeInfo, TreeName=f"MC{onia}Tuple/MCDecayTree", kinbin=None, Test=Test )
mcdt = R.RDataFrame( chainmc )
print(">>> Binning and storing snapshot MCDecayTree...")
mcdt_bin, mcdt_snap, nCandMC = {}, {}, {}
for ybin in _y_kinbin_list :
yfilter = translateBin(ybin,rec=False)
for ptbin in _pt_kinbin_list :
ptfilter = translateBin(ptbin,rec=False)
kinbin = ybin+"_"+ptbin
newFile = value+"_"+kinbin+".root"
# Get candidates in actual bin
mcdt_bin[ kinbin ] = mcdt.Filter( yfilter+" && "+ptfilter )
nCandMC[kinbin] = mcdt_bin[ kinbin ].Count()
print("Storing snapshot "+newFile+"...")
options.fMode = "UPDATE"
saveCols = [ str(i) for i in mcdt_bin[ kinbin ].GetColumnNames() ]
mcdt_snap[ kinbin ] = mcdt_bin[ kinbin ].Snapshot( 'MCJpsiTuple/MCDecayTree', outputPath+newFile, saveCols, options )
######################################### SAVING ##########################################################
print("# Triggering Snapshot... #")
for ybin in _y_kinbin_list :
for ptbin in _pt_kinbin_list :
kinbin = ybin+"_"+ptbin
nCandVal = nCand[kinbin].GetValue()
if Sample!="RealData" : nCandValMC = nCandMC[kinbin].GetValue()
if nCandVal > 0 : print(f"nCandidates kinbin {kinbin}: {nCandVal}") # this triggers the snapshot
else : print(f"nCandidates kinbin {kinbin}: 0")
if Sample!="RealData" :
if nCandValMC > 0 : print(f"nCandidatesMC kinbin {kinbin}: {nCandValMC}") # this triggers the snapshot
else : print(f"nCandidatesMC kinbin {kinbin}: 0")