Roofit - multiple fit different datasets using different PDFs with some shared parameters

Dear colleagues,

I want to solve the following problem using Roofit. I have N datasets for different observables, each in forms of a RooDataHist, and for each I construct a PDF from a template; the PDF is in form of a RooHistPDF. Each PDF is convoluted with a Gaussian PDF of unknown sigma, and then linearly scaled by a linear transformation x^\\prime = \\alpha x. The parameter \\alpha accounts for a possible mismatch in the absolute scale between the measured dataset and the template.

In this configuration, I thus have 2N free parameters, with each pair (\\alpha_i,\\sigma_i) independent form the other pairs. Clearly, I could execute N independent fits, but my goal, later, will be to model \\sigma with its own PDF, and extract the parameters for the latter.

I report below a snippet of my code.

#histoData is a list of histograms for DATA
#histoMC is a list of histograms for MC (create the PDF from these)
#xmin is a list of minimum x axis
#xmax is a list of maximum x axis

def doFit(histoData,histoMC,xmin,xmax,name,sigma,sigmaMin,sigmaMax,alphaIN=1):

    N=len(histoData)
    
    #create all RooRealVar
    x=[]
    for ii in range (0,N):
        x.append(ROOT.RooRealVar(name+"_x_%i"%ii,name+"_E [GeV]",xmin[ii],xmax[ii]))

    #create a global rooRealVar
    xALL=ROOT.RooArgSet(name+"_x")
    for ii in range(0,N):
        xALL.add(x[ii])
    
    # Define category to distinguish physics and control samples events
    sample = ROOT.RooCategory("sample", "sample")
    for ii in range(0,N):
        sample.defineType("sample_%i"%ii)

    #create all root data hist for DATA, prepare dictionary
    data=[]
    dictData={}
    for ii in range(0,N):
        data.append(ROOT.RooDataHist(name+("_data_%i"%ii),name+("_data_%i"%ii),x[ii],histoData[ii]))
        dictData["range_%i"%ii]=data[ii]
        
    #create the GLOBAL dataset
    dataALL=ROOT.RooDataHist(name+"_data",name+"_data",xALL,Index=sample,Import=dictData)
    
    #create the RooRealVar for MC
    E=[]
    for ii in range(0,N):
        E.append(ROOT.RooRealVar(name+"_E_%i"%ii,name+"_E_%i"%ii, 0, 500));
        E[ii].setBins(10000,"cache")
    #create all RootDataHist for MC
    dh=[]
    dh_draw=[]
    for ii in range(0,N):
        dh.append(ROOT.RooDataHist(name+("_dh_%i"%ii),name+("_dh_%i"%ii),E[ii],histoMC[ii]));
        dh_draw.append(ROOT.RooDataHist(name+("_dh_%i"%ii),name+("_dh_%i"%ii),x[ii],histoMC[ii]));

        
    #create the scale variable for MC - assume a single scale is ok for all energy ranges.
    scale=[]
    for ii in range(0,N):
        scale.append(ROOT.RooRealVar(name+"_scale_%i"%ii,name+"_scale_%i"%ii,0.95,0.5,1.5))
    p0=ROOT.RooRealVar(name+"_p0",name+"_p0",0.)

    Qf=[]
    for ii in range(0,N):
        Qf.append(ROOT.RooPolyVar(name+"_HCAL_F_%i"%ii,name+"_HCAL_F_%i"%ii,x[ii],ROOT.RooArgSet(p0,scale[ii])))

    #create all histo PDFs for MC.
    histpdf=[]
   
    for ii in range(0,N):
        histpdf.append(ROOT.RooHistPdf(name+("_histpdf_%i"%ii),name+("_histpdf_%i"%ii),E[ii],dh[ii]))
    
    
    #TODO: gaussian model
    gauss=[]
    mg=ROOT.RooRealVar(name+"_mg",name+"_mg", 0);
    #sg=ROOT.RooRealVar(name+"_sg_%i"%ii,name+"_sg_%i"%ii,sigma,sigmaMin,sigmaMax)
    #prepare gaus PDFs
    sg=[]
    for ii in range(0,N):
        sg.append(ROOT.RooRealVar(name+"_sg_%i"%ii,name+"_sg_%i"%ii,sigma,sigmaMin,sigmaMax));
        gauss.append(ROOT.RooGaussian(name+"_gauss_%i"%ii,name+"_gauss_%i"%ii,E[ii],mg,sg[ii]))
    

        
    #create all histo PDFs for MC after smearing.
    lxg=[]
    for ii in range(0,N):
        lxg.append(ROOT.RooFFTConvPdf(name+"_lxg_%i"%ii,"histpdf (X) gauss", Qf[ii], E[ii], histpdf[ii], gauss[ii]));

  
      
    #prepare the simultaneous PDF
    simPDF = ROOT.RooSimultaneous(name+"_simPDF",name+"_simPDF",sample)
    for ii in range(0,N):
        simPDF.addPdf(lxg[ii],"sample_%i"%ii)
 
    #do the fit
    simPDF.fitTo(dataALL)
    #prepare ret val
    #retVal=retV(scale.getValV(),scale.getError(),sg.getValV(),sg.getError())
    retVal=None
    #histpdf for drawing
    histpdf_MC=[]
    Qf2=[]
    histpdf_MC_SCALED=[]
    for ii in range(0,N):
        histpdf_MC.append(ROOT.RooHistPdf(name+"_histpdf_MC_%i"%ii,name+"_histpdf_MC_%i"%ii, x[ii], dh_draw[ii], 2));  
        Qf2.append(ROOT.RooPolyVar(name+"_HCAL_f2_%i"%ii, name+"_HCAL_f2_%i"%ii, x[ii], ROOT.RooArgSet(p0, scale[ii])));
        histpdf_MC_SCALED.append(ROOT.RooHistPdf(name+"_histpdf_MC_SCALED_%i"%ii,name+"_histpdf_MC_SCALED_%i"%ii, Qf2[ii], E[ii], dh[ii], 2));

    frames=[]
    for ii in range(0,N):
        frames.append(x[ii].frame())
        data[ii].plotOn(frames[ii])
        lxg[ii].plotOn(frames[ii],LineColor=ROOT.kRed)
        histpdf_MC[ii].plotOn(frames[ii],LineColor=ROOT.kGreen,LineWidth=1);
        histpdf_MC_SCALED[ii].plotOn(frames[ii],LineColor=ROOT.kBlue,LineWidth=1);
    return retVal,frames

It seems to me that something is wrong in this implementation. Indeed, the result I get from the fit for the 4 parameters (N=2 here) is below, note that 0.95 (for scale) and 0.5 (for sigma) are the initial values. It looks to me that roofit is not capable at all to fit the 4 parameters to the dual dataset.


 1  hcal1_scale_0   7.17610e-01   4.18627e-01   0.00000e+00  -1.00167e-01
   2  hcal1_scale_1   9.50000e-01   4.18627e-01   0.00000e+00  -1.00167e-01
   3  hcal1_sg_0   5.00000e-01   2.09162e+00   0.00000e+00  -1.16602e+00
   4  hcal1_sg_1   5.00000e-01   2.09162e+00   0.00000e+00  -1.16602e+00

May I ask you for guidance on this?

Thanks,

Andrea Celentano

Hi Andrea,
Thank you for your question.
@jonas, could you please have a look?

Hi @siliataider, thanks for pinging @jonas - let’s see the reply!