Hi,
I am trying to perform a binned extended ML simultaneous fit to two data sets (2012 and 2016 data) to extract the total signal yield. As you can see from the plot, the normalization is wrong, however I think I am plotting the pdf in the right way so I guess my model is somehow twisted.
I report my code. Any idea?
Thanks,
Jacopo
import ROOT as r
from ROOT import RooFit as rf
def GetDataHist(input_file, finalstate, x, datatype, hist_max = 0.7, Nbins = 7):
f = TFile(input_file, 'read')
t = f.Get('tuple/DecayTree')
Ncand = t.GetEntries()
hist = TH1D("hist","hist",Nbins, 0, hist_max)
for i in range(Ncand):
t.GetEntry(i)
if t.flat_Second3piBDT<=hist_max:
hist.Fill(t.flat_Second3piBDT)
Nentries = hist.GetEntries()
dataHist = r.RooDataHist(datatype, datatype, r.RooArgList(x), hist)
f.Close()
return dataHist, Nentries
finalstate = '3pi3pi'
ntuples = os.environ['NTUPLES'] + '/' + finalstate
strategy = 'KstMass'
bkg = 'OSKstbkg'
x_max = 0.7
Nbins = 7
BDT = 'Second3pi'
signal_12 = ntuples + '/Friends/{Fs}_MC12_truth_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
signal_16 = ntuples + '/Friends/{Fs}_MC16_truth_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
bkg_12 = ntuples + '/Friends/{Fs}_{Bkg}12_3_sel_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,bdt=BDT,Bkg=bkg)
bkg_16 = ntuples + '/Friends/{Fs}_{Bkg}16_3_sel_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,bdt=BDT,Bkg=bkg)
data_12 = ntuples + '/Friends/{Fs}_OS12_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
data_16 = ntuples + '/Friends/{Fs}_OS16_sel_{strat}_sig_flat_{bdt}BDT_{Bkg}_friend.root'.format(Fs=finalstate,strat=strategy,bdt=BDT,Bkg=bkg)
x = r.RooRealVar("x",BDT + "BDT", 0, x_max)
signalHist_12, Nsig12 = GetDataHist(signal_12, finalstate, x, finalstate + " MC12 signal",x_max, Nbins)
signalHist_16, Nsig16 = GetDataHist(signal_16, finalstate, x, finalstate + " MC16 signal",x_max, Nbins)
bkgHist_12, Nbkg12 = GetDataHist(bkg_12, finalstate, x, finalstate + " 2012 bkg",x_max, Nbins)
bkgHist_16, Nbkg16 = GetDataHist(bkg_16, finalstate, x, finalstate + " 2016 bkg",x_max, Nbins)
dataHist_12, Ndata12 = GetDataHist(data_12, finalstate, x, finalstate + " 2012 data",x_max, Nbins)
dataHist_16, Ndata16 = GetDataHist(data_16, finalstate, x, finalstate + " 2016 data",x_max, Nbins)
signalPDF_12 = r.RooHistPdf("signalPDF_12","Signal PDF 2012",r.RooArgSet(x),signalHist_12)
signalPDF_16 = r.RooHistPdf("signalPDF_16","Signal PDF 2016",r.RooArgSet(x),signalHist_16)
bkgPDF_12 = r.RooHistPdf("bkgPDF_12","Bkg PDF 2012",r.RooArgSet(x),bkgHist_12)
bkgPDF_16 = r.RooHistPdf("bkgPDF_16","Bkg PDF 2016",r.RooArgSet(x),bkgHist_16)
frac_12 = r.RooRealVar("frac_12", "Signal fraction 2012", float(Ndata12)/(Ndata12 + Ndata16))
frac_16 = r.RooRealVar("frac_16", "Signal fraction 2016", float(Ndata16)/(Ndata12 + Ndata16))
Normdata_12 = r.RooRealVar("Normdata_12","Data events 2012", Ndata12)
Normdata_16 = r.RooRealVar("Normdata_16","Data events 2016", Ndata16)
Signal_yield = r.RooRealVar("Signal_yield", "Total signal yield", 0, -(Ndata12 + Ndata16), Ndata12 + Ndata16)
Signal_yield.setError(5)
Signal_yield_12 = r.RooFormulaVar("Signal_yield_12", "2012 signal yield", "frac_12*Signal_yield", r.RooArgList(frac_12,Signal_yield))
Signal_yield_16 = r.RooFormulaVar("Signal_yield_16", "2016 signal yield", "frac_16*Signal_yield", r.RooArgList(frac_16,Signal_yield))
Normbkg_12 = r.RooFormulaVar("Normbkg_12","Background events 2012", "Normdata_12-Signal_yield_12", r.RooArgList(Normdata_12, Signal_yield_12))
Normbkg_16 = r.RooFormulaVar("Normbkg_16","Background events 2016", "Normdata_16-Signal_yield_16", r.RooArgList(Normdata_16, Signal_yield_16))
pdf_12 = r.RooAddPdf("pdf_12","PDF signal + bkg 2012", r.RooArgList(signalPDF_12,bkgPDF_12),r.RooArgList(Signal_yield_12,Normbkg_12))
pdf_16 = r.RooAddPdf("pdf_16","PDF signal + bkg 2016", r.RooArgList(signalPDF_16,bkgPDF_16),r.RooArgList(Signal_yield_16,Normbkg_16))
category = r.RooCategory("category","category")
category.defineType("2012")
category.defineType("2016")
model = r.RooSimultaneous("model","",category)
model.addPdf(pdf_12,"2012")
model.addPdf(pdf_16,"2016")
comb_data = r.RooDataHist("comb_data","Combined data", r.RooArgList(x), rf.Index(category), rf.Import("2012",dataHist_12), rf.Import("2016",dataHist_16))
print('****Fitting****')
fit_results = model.fitTo(comb_data, rf.Extended(True), rf.Save(True), rf.Hesse(False) ,rf.Minos(True))
print('****Results****')
fit_results.Print()
frame_2012 = x.frame(rf.Title("2012 dataset"))
comb_data.plotOn(frame_2012,rf.Cut("category==category::2012"))
model.plotOn(frame_2012, rf.Slice(category,"2012"), rf.ProjWData(r.RooArgSet(category),comb_data), rf.LineColor(r.kBlue))
frame_2012.SetMinimum(10)
frame_2012.SetMaximum(10000)
frame_2016 = x.frame(rf.Title("2016 dataset"))
comb_data.plotOn(frame_2016,rf.Cut("category==category::2016"))
model.plotOn(frame_2016, rf.Slice(category,"2016"), rf.ProjWData(r.RooArgSet(category),comb_data), rf.LineColor(r.kBlue))
frame_2016.SetMinimum(10)
frame_2016.SetMaximum(100000)
canv = r.TCanvas("canv","canv", 800, 400)
canv.Divide(2)
canv.cd(1)
canv.GetPad(1).SetLogy()
frame_2012.Draw()
canv.cd(2)
canv.GetPad(2).SetLogy()
frame_2016.Draw()
canv.Print("sim_test.pdf")
sim_test.pdf (18.4 KB)