Dear colleagues,
I want to solve the following problem using Roofit. I have N datasets for different observables, each in forms of a RooDataHist
, and for each I construct a PDF from a template; the PDF is in form of a RooHistPDF
. Each PDF is convoluted with a Gaussian PDF of unknown sigma, and then linearly scaled by a linear transformation x^\\prime = \\alpha x. The parameter \\alpha accounts for a possible mismatch in the absolute scale between the measured dataset and the template.
In this configuration, I thus have 2N free parameters, with each pair (\\alpha_i,\\sigma_i) independent form the other pairs. Clearly, I could execute N independent fits, but my goal, later, will be to model \\sigma with its own PDF, and extract the parameters for the latter.
I report below a snippet of my code.
#histoData is a list of histograms for DATA
#histoMC is a list of histograms for MC (create the PDF from these)
#xmin is a list of minimum x axis
#xmax is a list of maximum x axis
def doFit(histoData,histoMC,xmin,xmax,name,sigma,sigmaMin,sigmaMax,alphaIN=1):
N=len(histoData)
#create all RooRealVar
x=[]
for ii in range (0,N):
x.append(ROOT.RooRealVar(name+"_x_%i"%ii,name+"_E [GeV]",xmin[ii],xmax[ii]))
#create a global rooRealVar
xALL=ROOT.RooArgSet(name+"_x")
for ii in range(0,N):
xALL.add(x[ii])
# Define category to distinguish physics and control samples events
sample = ROOT.RooCategory("sample", "sample")
for ii in range(0,N):
sample.defineType("sample_%i"%ii)
#create all root data hist for DATA, prepare dictionary
data=[]
dictData={}
for ii in range(0,N):
data.append(ROOT.RooDataHist(name+("_data_%i"%ii),name+("_data_%i"%ii),x[ii],histoData[ii]))
dictData["range_%i"%ii]=data[ii]
#create the GLOBAL dataset
dataALL=ROOT.RooDataHist(name+"_data",name+"_data",xALL,Index=sample,Import=dictData)
#create the RooRealVar for MC
E=[]
for ii in range(0,N):
E.append(ROOT.RooRealVar(name+"_E_%i"%ii,name+"_E_%i"%ii, 0, 500));
E[ii].setBins(10000,"cache")
#create all RootDataHist for MC
dh=[]
dh_draw=[]
for ii in range(0,N):
dh.append(ROOT.RooDataHist(name+("_dh_%i"%ii),name+("_dh_%i"%ii),E[ii],histoMC[ii]));
dh_draw.append(ROOT.RooDataHist(name+("_dh_%i"%ii),name+("_dh_%i"%ii),x[ii],histoMC[ii]));
#create the scale variable for MC - assume a single scale is ok for all energy ranges.
scale=[]
for ii in range(0,N):
scale.append(ROOT.RooRealVar(name+"_scale_%i"%ii,name+"_scale_%i"%ii,0.95,0.5,1.5))
p0=ROOT.RooRealVar(name+"_p0",name+"_p0",0.)
Qf=[]
for ii in range(0,N):
Qf.append(ROOT.RooPolyVar(name+"_HCAL_F_%i"%ii,name+"_HCAL_F_%i"%ii,x[ii],ROOT.RooArgSet(p0,scale[ii])))
#create all histo PDFs for MC.
histpdf=[]
for ii in range(0,N):
histpdf.append(ROOT.RooHistPdf(name+("_histpdf_%i"%ii),name+("_histpdf_%i"%ii),E[ii],dh[ii]))
#TODO: gaussian model
gauss=[]
mg=ROOT.RooRealVar(name+"_mg",name+"_mg", 0);
#sg=ROOT.RooRealVar(name+"_sg_%i"%ii,name+"_sg_%i"%ii,sigma,sigmaMin,sigmaMax)
#prepare gaus PDFs
sg=[]
for ii in range(0,N):
sg.append(ROOT.RooRealVar(name+"_sg_%i"%ii,name+"_sg_%i"%ii,sigma,sigmaMin,sigmaMax));
gauss.append(ROOT.RooGaussian(name+"_gauss_%i"%ii,name+"_gauss_%i"%ii,E[ii],mg,sg[ii]))
#create all histo PDFs for MC after smearing.
lxg=[]
for ii in range(0,N):
lxg.append(ROOT.RooFFTConvPdf(name+"_lxg_%i"%ii,"histpdf (X) gauss", Qf[ii], E[ii], histpdf[ii], gauss[ii]));
#prepare the simultaneous PDF
simPDF = ROOT.RooSimultaneous(name+"_simPDF",name+"_simPDF",sample)
for ii in range(0,N):
simPDF.addPdf(lxg[ii],"sample_%i"%ii)
#do the fit
simPDF.fitTo(dataALL)
#prepare ret val
#retVal=retV(scale.getValV(),scale.getError(),sg.getValV(),sg.getError())
retVal=None
#histpdf for drawing
histpdf_MC=[]
Qf2=[]
histpdf_MC_SCALED=[]
for ii in range(0,N):
histpdf_MC.append(ROOT.RooHistPdf(name+"_histpdf_MC_%i"%ii,name+"_histpdf_MC_%i"%ii, x[ii], dh_draw[ii], 2));
Qf2.append(ROOT.RooPolyVar(name+"_HCAL_f2_%i"%ii, name+"_HCAL_f2_%i"%ii, x[ii], ROOT.RooArgSet(p0, scale[ii])));
histpdf_MC_SCALED.append(ROOT.RooHistPdf(name+"_histpdf_MC_SCALED_%i"%ii,name+"_histpdf_MC_SCALED_%i"%ii, Qf2[ii], E[ii], dh[ii], 2));
frames=[]
for ii in range(0,N):
frames.append(x[ii].frame())
data[ii].plotOn(frames[ii])
lxg[ii].plotOn(frames[ii],LineColor=ROOT.kRed)
histpdf_MC[ii].plotOn(frames[ii],LineColor=ROOT.kGreen,LineWidth=1);
histpdf_MC_SCALED[ii].plotOn(frames[ii],LineColor=ROOT.kBlue,LineWidth=1);
return retVal,frames
It seems to me that something is wrong in this implementation. Indeed, the result I get from the fit for the 4 parameters (N=2 here) is below, note that 0.95 (for scale) and 0.5 (for sigma) are the initial values. It looks to me that roofit is not capable at all to fit the 4 parameters to the dual dataset.
1 hcal1_scale_0 7.17610e-01 4.18627e-01 0.00000e+00 -1.00167e-01
2 hcal1_scale_1 9.50000e-01 4.18627e-01 0.00000e+00 -1.00167e-01
3 hcal1_sg_0 5.00000e-01 2.09162e+00 0.00000e+00 -1.16602e+00
4 hcal1_sg_1 5.00000e-01 2.09162e+00 0.00000e+00 -1.16602e+00
May I ask you for guidance on this?
Thanks,
Andrea Celentano