Normalization (or model?) problem in binned ML simultaneous fit

Hi,
I am trying to perform a binned extended ML simultaneous fit to two data sets (2012 and 2016 data) to extract the total signal yield. As you can see from the plot, the normalization is wrong, however I think I am plotting the pdf in the right way so I guess my model is somehow twisted.
I report my code. Any idea?

Thanks,
Jacopo

import ROOT as r
from ROOT import RooFit as rf

def GetDataHist(input_file, finalstate, x, datatype, hist_max = 0.7, Nbins = 7):
    f = TFile(input_file, 'read')
    t = f.Get('tuple/DecayTree')
    Ncand = t.GetEntries()
    hist = TH1D("hist","hist",Nbins, 0, hist_max)
    for i in range(Ncand):
        t.GetEntry(i)       
        if t.flat_Second3piBDT<=hist_max:                                                                                                                              
            hist.Fill(t.flat_Second3piBDT)
    Nentries = hist.GetEntries()
    dataHist = r.RooDataHist(datatype, datatype, r.RooArgList(x), hist)
    f.Close()
    return dataHist, Nentries

finalstate = '3pi3pi'
ntuples = os.environ['NTUPLES'] + '/' + finalstate
strategy = 'KstMass'
bkg = 'OSKstbkg'
x_max = 0.7
Nbins = 7
BDT = 'Second3pi'

signal_12 = ntuples + '/Friends/{Fs}_MC12_truth_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
signal_16 = ntuples + '/Friends/{Fs}_MC16_truth_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
bkg_12 = ntuples + '/Friends/{Fs}_{Bkg}12_3_sel_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,bdt=BDT,Bkg=bkg)
bkg_16 = ntuples + '/Friends/{Fs}_{Bkg}16_3_sel_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,bdt=BDT,Bkg=bkg)
data_12 = ntuples + '/Friends/{Fs}_OS12_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
data_16 = ntuples + '/Friends/{Fs}_OS16_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)

x = r.RooRealVar("x",BDT + "BDT", 0, x_max)

signalHist_12, Nsig12 = GetDataHist(signal_12, finalstate, x, finalstate + " MC12 signal",x_max, Nbins)
signalHist_16, Nsig16 = GetDataHist(signal_16, finalstate, x, finalstate + " MC16 signal",x_max, Nbins)
bkgHist_12, Nbkg12 = GetDataHist(bkg_12, finalstate, x, finalstate + " 2012 bkg",x_max, Nbins)
bkgHist_16, Nbkg16 = GetDataHist(bkg_16, finalstate, x, finalstate + " 2016 bkg",x_max, Nbins)
dataHist_12, Ndata12 = GetDataHist(data_12, finalstate, x, finalstate + " 2012 data",x_max, Nbins)
dataHist_16, Ndata16 = GetDataHist(data_16, finalstate, x, finalstate + " 2016 data",x_max, Nbins)

signalPDF_12 = r.RooHistPdf("signalPDF_12","Signal PDF 2012",r.RooArgSet(x),signalHist_12)
signalPDF_16 = r.RooHistPdf("signalPDF_16","Signal PDF 2016",r.RooArgSet(x),signalHist_16)
bkgPDF_12 = r.RooHistPdf("bkgPDF_12","Bkg PDF 2012",r.RooArgSet(x),bkgHist_12)
bkgPDF_16 = r.RooHistPdf("bkgPDF_16","Bkg PDF 2016",r.RooArgSet(x),bkgHist_16)

frac_12 = r.RooRealVar("frac_12", "Signal fraction 2012", float(Ndata12)/(Ndata12 + Ndata16))
frac_16 = r.RooRealVar("frac_16", "Signal fraction 2016", float(Ndata16)/(Ndata12 + Ndata16))

Normdata_12 = r.RooRealVar("Normdata_12","Data events 2012", Ndata12)
Normdata_16 = r.RooRealVar("Normdata_16","Data events 2016", Ndata16)

Signal_yield = r.RooRealVar("Signal_yield", "Total signal yield", 0, -(Ndata12 + Ndata16), Ndata12 + Ndata16)
Signal_yield.setError(5)

Signal_yield_12 = r.RooFormulaVar("Signal_yield_12", "2012 signal yield", "frac_12*Signal_yield", r.RooArgList(frac_12,Signal_yield))
Signal_yield_16 = r.RooFormulaVar("Signal_yield_16", "2016 signal yield", "frac_16*Signal_yield", r.RooArgList(frac_16,Signal_yield))

Normbkg_12 = r.RooFormulaVar("Normbkg_12","Background events 2012", "Normdata_12-Signal_yield_12", r.RooArgList(Normdata_12, Signal_yield_12))
Normbkg_16 = r.RooFormulaVar("Normbkg_16","Background events 2016", "Normdata_16-Signal_yield_16", r.RooArgList(Normdata_16, Signal_yield_16))

pdf_12 = r.RooAddPdf("pdf_12","PDF signal + bkg 2012", r.RooArgList(signalPDF_12,bkgPDF_12),r.RooArgList(Signal_yield_12,Normbkg_12))
pdf_16 = r.RooAddPdf("pdf_16","PDF signal + bkg 2016", r.RooArgList(signalPDF_16,bkgPDF_16),r.RooArgList(Signal_yield_16,Normbkg_16))

category = r.RooCategory("category","category")
category.defineType("2012")
category.defineType("2016")

model = r.RooSimultaneous("model","",category)
model.addPdf(pdf_12,"2012")
model.addPdf(pdf_16,"2016")

comb_data = r.RooDataHist("comb_data","Combined data", r.RooArgList(x), rf.Index(category), rf.Import("2012",dataHist_12), rf.Import("2016",dataHist_16))

print('****Fitting****')
fit_results = model.fitTo(comb_data, rf.Extended(True), rf.Save(True), rf.Hesse(False) ,rf.Minos(True))
print('****Results****')
fit_results.Print()

frame_2012 = x.frame(rf.Title("2012 dataset"))
comb_data.plotOn(frame_2012,rf.Cut("category==category::2012"))
model.plotOn(frame_2012, rf.Slice(category,"2012"), rf.ProjWData(r.RooArgSet(category),comb_data), rf.LineColor(r.kBlue))                                                                                                                                                       
frame_2012.SetMinimum(10)
frame_2012.SetMaximum(10000)

frame_2016 = x.frame(rf.Title("2016 dataset"))
comb_data.plotOn(frame_2016,rf.Cut("category==category::2016"))
model.plotOn(frame_2016, rf.Slice(category,"2016"), rf.ProjWData(r.RooArgSet(category),comb_data), rf.LineColor(r.kBlue))                                                                                            
frame_2016.SetMinimum(10)
frame_2016.SetMaximum(100000)

canv = r.TCanvas("canv","canv", 800, 400)
canv.Divide(2)
canv.cd(1)
canv.GetPad(1).SetLogy()
frame_2012.Draw()
canv.cd(2)
canv.GetPad(2).SetLogy()
frame_2016.Draw()

canv.Print("sim_test.pdf")

sim_test.pdf (18.4 KB)

Hi @Jacopo,

I could imagine that it’s a problem of plotting bin counts vs. densities. The data are plotted as counts if you don’t do anything, but the PDF might be a density, i.e. it’s normalised by bin width. Let me know if this post helps:

Hi @StephanH,

Indeed I managed to solve the problem using the Normalization option, taking into account the bin width.

Thanks!

Hello @Jacopo,

would you maybe just post the relevant line for future people that might see this post?
Thanks!

I indeed replaced

model.plotOn(frame_2012, rf.Slice(category,"2012"), rf.ProjWData(r.RooArgSet(category),comb_data), rf.LineColor(r.kBlue))

by

model.plotOn(frame_2012, rf.Slice(category,"2012"), rf.ProjWData(r.RooArgSet(category),comb_data), rf.Normalization((Ndata12+Ndata16)/0.07, RooAbsReal.NumEvent) rf.LineColor(r.kBlue))

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.