RooHypatia2 normalization problem

Dongliang · June 22, 2022, 4:05am

Dear experts,
I try to use RooHypatia2 to fit the sum of two observables and get a normalization problem as shown in the attached figure. My script is also attached.
tt1ss.py (1.6 KB)
Is it correct to use the RooFit.ProjWData(data) normalization option? It gives good normalization if I use Gaussian or CrystalBall instead of RooHypatia2…

Thanks. The version I’m using is 6.26/02.

eguiraud · June 22, 2022, 9:42am

Hi @Dongliang ,

let’s ping our RooFit expert @jonas .

jonas · June 22, 2022, 12:51pm

Hi @Dongliang,

thanks for the question! I’m surprised that this code works at all!

You have these definitions here:

w.factory('xm[670,650,720]')
w.factory('dm[900,875,995]')
w.factory('expr::bm("xm+dm",xm,dm)')

And then you have a pdf for xm and one for bm, and even though bm is a derived quantity you still manage to generate data for the original xm and dm and fit and plot at least something! Quite surprising, given that there is no direct pdf for dm (it’s only implied in the relation between them, bm=xm+dm).

However, I don’t think RooFit was designed to handle this and that it worked with the Gaussian and CrystalBall was probably just luck (they have analytical integrals implemented, maybe numeric integrals don’t work here).

Would it be a problem to rewrite your code such that xm and bm are the fundamental variables? That’s more how RooFit is intended to be used:

from ROOT import gROOT, TFile, TCanvas, RooAbsReal, RooArgSet, RooWorkspace, RooFit


def test():
    w = RooWorkspace("w")
    w.factory("xm[670,650,720]")
    w.factory("bm[1570, 1525, 1715]")
    w.factory('expr::dm("bm-xm",bm,xm)')
    w.factory(
        "Hypatia2::tPdf(bm,l[-4.07,-7,0],zeta[0,0,10],fb[0],sigma[5.,0.1,10],mean[1620,1615,1625],aL[3.06,0,9],nL[2.43,0,9],aR[4.37,0,12],nR[8.08,0.1,12])"
    )
    w.factory("Chebychev::xPdf(xm, {a1p[0.9,0,1],a2p[0.1,0,0.5]})")
    w.factory("PROD::model(tPdf, xPdf)")
    xm = w["xm"]
    dm = w["dm"]
    bm = w["bm"]

    tPdf = w.pdf("tPdf")
    model = w.pdf("model")

    obs = RooArgSet(xm, bm)
    data = model.generate(obs, 50000)

    bframe = bm.frame(Bins=100, Range=(1560, 1680))
    data.plotOn(bframe)

    tPdf.plotOn(bframe)
    bframe.Draw()

    a = input("wait...")


if __name__ == "__main__":
    test()

With this, I also don’t get the normalization problem.

Bonus tips:

In the pyROOT 6.26, you can directly do ws["x"] to correctly retrieve any object, no matter the type.
Also, you can use Python keyword arguments instead of RooFit command arguments (see the call to frame in my snippet)
No ProjWData is necessary, because in your case the pdf for one observable doesn’t depend on the other observable. Only if you plot a pdf that is conditional on another observable you need to do a projection.

Hope this helps!

Jonas

Dongliang · June 23, 2022, 4:43am

Hi @jonas ,
Thanks a lot for the reply! I’m trying to use xm and dm as fundamental variables because my signal peaks in these two variables. The model pdf in the test script is used to describe a background which peaks in xm+dm only. I guess model, as a product of tPdf and xPdf, is still a valid pdf for xm and dm? It seems working in this test script:

#!/usr/bin/env python3
from ROOT import gROOT, TFile, TCanvas, RooAbsReal, RooArgSet, RooWorkspace, RooFit

def test():
    w = RooWorkspace('w')
    w.factory('xm[670,650,720]')
    w.factory('dm[900,875,995]')
    w.factory('expr::bm("xm+dm",xm,dm)')
#     w.factory('Gaussian::tPdf(bm,mean[1620,1615,1625],sigma[5,0.1,25])')
#     w.factory('CrystalBall::tPdf(bm,mean[1620,1615,1625],sigma[5.,0.1,10],sigma,aL[3.06,0.1,9],nL[2.43,0.1,9],aR[4.37,0.1,12],nR[8.08,0.1,12])')
    w.factory('Hypatia2::tPdf(bm,l[-4.,-9,-0.6],zeta[0,0,10],fb[0],sigma[15.,0.1,20],mean[1620,1615,1625],aL[100,0.1,200],nL[2.43,0.1,10],aR[100,0.1,200],nR[8.0,0.1,10])')
    w.factory('Chebychev::xPdf(xm, {a1p[0.9,0,1],a2p[0.1,0,0.5]})')
    w.factory('PROD::model(tPdf, xPdf)')
    w['zeta'].setConstant(True)
    w['aL'].setConstant(True)
    w['nL'].setConstant(True)
    w['aR'].setConstant(True)
    w['nR'].setConstant(True)
    xm = w['xm']
    dm = w['dm']
    bm = w['bm']

    tPdf = w.pdf('tPdf')
    model = w.pdf('model')

    obs = RooArgSet(xm,dm)
    data = model.generate(obs, 50000)
    bmv = data.addColumn(bm)
#     tPdf.fitTo(data)

#     bframe = bmv.frame(RooFit.Bins(100), RooFit.Range(1560,1680))
#     data.plotOn(bframe)
# 
# #     tPdf.plotOn(bframe)
#     tPdf.plotOn(bframe, RooFit.ProjWData(data))
# #     tPdf.plotOn(bframe,RooFit.Normalization(data.sumEntries(), RooAbsReal.NumEvent))
# #     tPdf.plotOn(bframe,RooFit.Normalization(1.0, RooAbsReal.RelativeExpected))
# #     tPdf.plotOn(bframe, RooFit.Normalization(1., RooAbsReal.Relative))
#     bframe.Draw()

    w['a1p'].setVal(0.5)
    w['a2p'].setVal(0.2)
    w['mean'].setVal(1624)
    w['sigma'].setVal(9)
    w['l'].setVal(-1.5)

    rf = model.fitTo(data,Save=True)
    rf.Print("v")

    c1 = TCanvas()
    c1.Divide(3)
    c1.cd(1)
    xm = w.var('xm')
    xframe = xm.frame()
    data.plotOn(xframe)
    model.plotOn(xframe)
    xframe.Draw()

    c1.cd(2)
    dm = w.var('dm')
    dframe = dm.frame()
    data.plotOn(dframe)
    model.plotOn(dframe)
    dframe.Draw()

    c1.cd(3)
    bframe = bmv.frame(Bins=100,Range=(1560,1680))
    data.plotOn(bframe)
    model.plotOn(bframe, RooFit.Normalization(2.e-19, RooAbsReal.Relative))
    bframe.Draw()

    c1.Update()
    a = input('wait...')

if __name__ == '__main__':
    test()

The distribution I can get is

The normalization for xm and dm are the default but for bm I have to scale it by 2.e-19 to get it close to data.

Also thanks for the tips and the work behind them. They helps a lot to make my codes clear.

Dongliang · June 30, 2022, 2:33am

Hi @jonas, as an update, it seems that using Range option in the plotOn function could give a better normalization. Here is a test script:

#!/usr/bin/env python3
from ROOT import gROOT, TFile, TCanvas, RooAbsReal, RooArgSet, RooWorkspace, RooFit

def test():
    w = RooWorkspace('w')
    w.factory('xm[670,650,720]')
    w.factory('dm[900,875,995]')
    w.factory('expr::bm("xm+dm",xm,dm)')
#     w.factory('Gaussian::tPdf(bm,mean[1620,1615,1625],sigma[5,0.1,25])')
#     w.factory('CrystalBall::tPdf(bm,mean[1620,1615,1625],sigma[5.,0.1,10],sigma,aL[3.06,0.1,9],nL[2.43,0.1,9],aR[4.37,0.1,12],nR[8.08,0.1,12])')
    w.factory('Hypatia2::tPdf(bm,l[-4.,-9,-0.6],zeta[0,0,10],fb[0],sigma[40.,0.1,200],mean[1620,1615,1625],aL[100,0.1,200],nL[2.43,0.1,10],aR[100,0.1,200],nR[8.0,0.1,10])')
    w.factory('Chebychev::xPdf(xm, {a1p[0.9,0,1],a2p[0.1,0,0.5]})')
    w.factory('PROD::model(tPdf, xPdf)')
    w['zeta'].setConstant(True)
    w['aL'].setConstant(True)
    w['nL'].setConstant(True)
    w['aR'].setConstant(True)
    w['nR'].setConstant(True)
    xm = w['xm']
    dm = w['dm']
    bm = w['bm']

    tPdf = w.pdf('tPdf')
    model = w.pdf('model')

    obs = RooArgSet(xm,dm)
    data = model.generate(obs, 50000)
    bmv = data.addColumn(bm)

#     w['a1p'].setVal(0.5)
#     w['a2p'].setVal(0.2)
#     w['mean'].setVal(1624)
#     w['sigma'].setVal(9)
#     w['l'].setVal(-1.5)
# 
#     rf = model.fitTo(data,Save=True)
#     rf.Print("v")

    c1 = TCanvas()
    c1.Divide(3)
    c1.cd(1)
    xframe = xm.frame()
    data.plotOn(xframe)
    model.plotOn(xframe)
    xframe.Draw()

    c1.cd(2)
    dframe = dm.frame()
    data.plotOn(dframe)
    model.plotOn(dframe)
    dframe.Draw()

    c1.cd(3)
    bframe = bmv.frame(Bins=100,Range=(1560,1680))
    data.plotOn(bframe)
    model.plotOn(bframe, Range=(1560,1680)) 
    model.plotOn(bframe, Range=(1600,1640), LineColor=2, LineStyle=9) 
    bframe.Draw()

    c1.Update()
    a = input('wait...')

if __name__ == '__main__':
    test()`

and the output figure

system · July 14, 2022, 2:33am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.