import ROOT
from pathlib import Path

def branch_sizes(filename, treename):
    f = ROOT.TFile.Open(filename)
    if not f or f.IsZombie():
        raise RuntimeError("Cannot open %s" % filename)
    t = f.Get(treename)
    if not t:
        raise RuntimeError("Tree %s not found in %s" % (treename, filename))
    n = t.GetEntries()
    sizes = {}
    for br in t.GetListOfBranches():
        name = br.GetName()
        zipbytes = br.GetZipBytes ()
        per_ev = zipbytes / n if n else 0.0
        sizes[name] = per_ev
    f.Close()
    return sizes, int(n)

rversion = ROOT.gROOT.GetVersion().replace('.','')
filename = f"data15_{rversion}.root"
file_path = Path(filename)

if not file_path.is_file():
    df = ROOT.RDataFrame("analysis","user.egramsta.48785775._000523.output_ntup.root")#../OD230226/user.egramsta.OD230226.data15.p6026_output_ntup/user.egramsta.48785775._000523.output_ntup.root")
    df_filter = df.Filter("lep_n>=2","Requiring at least two leptons")
    df_filter.Snapshot("analysis",filename)

sizesA, nA = branch_sizes(filename, "analysis")
all_names = sorted(set(list(sizesA.keys())))
rows = []
for name in all_names:
    a = sizesA.get(name, 0.0)
    rows.append((name, a))
rows.sort(key=lambda r: r[1], reverse=True)

# print nicely
print(f"{'branch':60s} {'size/evt [bytes]':>14s}")
print("-"*100)
suma = 0.0
for name,a in rows:
    suma += a
    a_str = f"{a:14.4f}"
    print(f"{name:60s} {a_str}")  
suma_str = f"{suma:14.2f}"
sumtit = "TOTAL"
print("-"*100)
print(f"{'Total':60s} {suma_str}")
print(f"File Size : {(nA*suma)/1e6:.2f} MB")
