Hi,
I am using an RDataFrame to access a root file, create a new variable and then save that new variable to a TTree, along with the original variables in the TTree.
I have managed to do this, however, upon opening the root file there are two TTrees. One contains the correct number of events and the other has more than the original number.
EDIT: Of the two TTrees one shows the correct number of entries, and the other less. This is in line with the solution.
Below is the code modified to keep pertinent information only:
mcGenerator = ["Nominal"]
DSID = ["410470"]
nJets = ["2jet" ]
flavours = ["bb"]
rootDir = './1lep/'
files = []
for gen, dsid in zip(mcGenerator, DSID):
for jet in nJets:
for flav in flavours:
fileDir = gen + "_" + dsid + "_" + jet + "_" + flav
files.append(fileDir)
for filePath in glob.glob(rootDir):
for sample in files:
fileName = glob.glob(filePath + sample + "/*.root")
newString=str(fileName)[2:-2]
file = uproot.open(newString)
tree = file[str(file.keys(0))[2:-2]]
fileName = str(newString)
treeName = str(tree)
treeBranches = list(tree.keys())
d = ROOT.RDataFrame(treeName, fileName)
d2 = d.Define("EventWeightpTVWeighted", "EventWeight * ((pTV * pTV)/(75*75))")
branchList = ROOT.vector('string')()
for branchName in ["EventNumber", "EventNumberModNfold", "EventWeight", "FlavourLabel", "MET", "dPhiLBmin", "dPhiVBB", "dRBB", "mBB", "mBBJ", "nJ", "pTB1", "pTB2", "pTV", "pTJ3", "mTop", "dYWH", "mTW", "FoldType", "EventWeightpTVWeighted"]:
branchList.push_back(branchName)
d2.Snapshot(treeName, sample + "_mod.root", branchList)