I would like to study how well my fit model estimates signal yields. RooMCStudy
seems like the obvious choice, but there does not seem to be a way to extract the signal yield used to generate the toy datasets. Details below.
Introducing a Poisson fluctuation on the number of events using Extended
changes the event weights, but the stored values of the pdf yields do not change, making it impossible to determine the yield assigned to each pdf during dataset generation.
import ROOT as r
ws = r.RooWorkspace("workspace")
# declare model
x = ws.factory("x[-10, 10]")
pdf1 = ws.factory("Gaussian::pdf1(x, m[-1, 1], s1[3,5])")
pdf2 = ws.factory("Gaussian::pdf2(x, m, s2[5,7])")
pdf = ws.factory("SUM::pdf(N1[0, 100] * pdf1, N2[0, 100] * pdf2)")
# add constraint so we can access genParDataSet
s1 = ws.var("s1")
constraint1 = ws.factory("Gaussian::constraint1(s1, cm1[4], cs1[1])")
prodpdf = ws.factory("PROD::prodpdf({pdf,constraint1})")
# declare mcstudy
mcstudy = r.RooMCStudy(
prodpdf,
r.RooArgSet(x),
r.RooFit.FitModel(prodpdf),
r.RooFit.Silence(),
r.RooFit.Binned(), # speed up fits
r.RooFit.Extended(), # Poisson fluctuation on number of events
r.RooFit.Constrain(s1),
r.RooFit.FitOptions(
r.RooFit.Extended(),
r.RooFit.Save(),
r.RooFit.SumW2Error(True),
),
)
# perform 1000 tests
mcstudy.generateAndFit(1000, 0, True)
# the Poisson fluctuation only changes the overall event *weights*
assert mcstudy.genData(0).numEntries() == mcstudy.genData(1).numEntries()
assert mcstudy.genData(0).sumEntries() != mcstudy.genData(1).sumEntries()
# generated pdf1 yields are always the same,
# despite Poisson fluctuation on events
gen_ds = mcstudy.genParDataSet()
assert all(
gen_ds.get(i)["N1_gen"].getVal() == gen_ds.get(0)["N1_gen"].getVal()
for i in range(gen_ds.numEntries())
)
# therefore, the pulls are meaningless
# since they compare the fitted value of N1 to the generated value N1_gen,
# which is not the same as the generated yield for pdf1
# strangely, though N1_gen is fixed to the initial value of N1,
# the stored value of N1 has changed;
# this happens with or without a Poisson fluctuation,
# suggesting the values of N1 and N2 are changing
# without being stored in genParDataSet
assert gen_ds.get(0)["N1_gen"].getVal() == 50
assert ws.var("N1").getVal() != 50