If I run fits with SumW2
errors, I get strange pulls for some distributions. If I run with default errors or with AsymptoticError
, I recover the expected pulls (standard deviation = 1).
Reproducer:
# %%
import ROOT as r
from uncertainties import correlated_values_norm
# %%
ws = r.RooWorkspace("workspace")
x = ws.factory("x[0, 10]")
signal = ws.factory("Gaussian::signal(x, mean[3, 0, 10], sigma[2, 1, 10])")
background = ws.factory("Exponential::background(x, tau[-0.1, -10, 0])")
N_sig_m = ws.factory("N_sig_m[100, 0, 1000]")
N_bkg_m = ws.factory("N_bkg_m[100, 0, 1000]")
N_sig_p = ws.factory("N_sig_p[100, 0, 1000]")
N_bkg_p = ws.factory("N_bkg_p[100, 0, 1000]")
cat = r.RooCategory("charge", "charge", {"m": -1, "p": 1})
ws.Import(cat)
shpdict = {}
for c in dict(cat.states()).keys():
shp = ws.factory(f"SUM::model_{c}(N_sig_{c}*signal, N_bkg_{c}*background)")
shpdict[c] = shp.GetName()
catstr = ", ".join([f"{c}={s}" for c, s in shpdict.items()])
model = ws.factory(f"SIMUL::model({cat.GetName()}, {catstr})")
# %%
internal_constraints = {
ws.factory(
f"Poisson::{nm}_{c}_constraint({nm}_{c}, {ws.var(f'{nm}_{c}').getVal()})"
)
for nm in ("N_sig", "N_bkg")
for c in dict(cat.states()).keys()
}
prodpdf = ws.factory(f"PROD::prodpdf(model, {','.join([c.GetName() for c in internal_constraints])})")
# %%
def do_mcstudy(errors: "str"):
# internal_constraints needed, otherwise genParsDataSet won't be saved
# https://github.com/root-project/root/issues/9490
FitOptions = [
r.RooFit.Extended(),
r.RooFit.Save(),
r.RooFit.Strategy(1),
]
if errors == "SumW2":
FitOptions.append(r.RooFit.SumW2Error(True))
elif errors == "Asymptotic":
FitOptions.append(r.RooFit.AsymptoticError(True))
elif errors == "":
pass
else:
raise ValueError(f"Unknown error type: {errors}")
mcstudy = r.RooMCStudy(
prodpdf,
{x, cat},
FitModel=model,
Silence=True,
Binned=True,
Constrain={N_sig_m, N_bkg_m, N_sig_p, N_bkg_p},
FitOptions=FitOptions,
)
mcstudy.generateAndFit(1_000, 0, True)
return mcstudy
# %%
studies = {}
for err in ("", "SumW2", "Asymptotic"):
studies[err] = do_mcstudy(err)
# %%
def asym(m, p):
return (m - p) / (m + p)
# %%
def make_hist(mcstudy, nickname):
h_pul = r.TH1F(f"h_pul_{nickname}", f"pulls for {nickname}", 100, -5, 5)
gen_ds = mcstudy.genParDataSet()
ifit = 0
for igen in range(gen_ds.numEntries()):
fitres = mcstudy.fitResult(igen)
if fitres.status() != 0:
continue
genpars = gen_ds.get(igen)
fitpars = mcstudy.fitParams(ifit)
gen_a = asym(genpars["N_sig_m_gen"].getVal(), genpars["N_sig_p_gen"].getVal())
corr = fitres.correlation(fitpars[f"N_sig_m"], fitpars[f"N_sig_p"])
fit_m, fit_p = correlated_values_norm(
[
(fitpars[f"N_sig_m"].getVal(), fitpars[f"N_sig_m"].getError()),
(fitpars[f"N_sig_p"].getVal(), fitpars[f"N_sig_p"].getError()),
],
[[1, corr], [corr, 1]],
)
fit_a = asym(fit_m, fit_p)
residual = fit_a - gen_a
h_pul.Fill(residual.n / residual.s)
ifit += 1
return h_pul
# %%
c = r.TCanvas()
c.DivideSquare(len(studies))
h = {}
for i, (err, mcstudy) in enumerate(studies.items(), 1):
c.cd(i)
h[err] = hist = make_hist(mcstudy, err)
hist.Draw()
print(err)
print(f" mean: {hist.GetMean():.2f} +/- {hist.GetMeanError():.2f}")
print(f" std. dev.: {hist.GetStdDev():.2f} +/- {hist.GetStdDevError():.2f}")
c.Draw()
# %%
c.SaveAs("test.pdf")
Relevant output:
mean: 0.02 +/- 0.03
std. dev.: 1.03 +/- 0.02
SumW2
mean: -0.01 +/- 0.02
std. dev.: 0.68 +/- 0.02
Asymptotic
mean: 0.08 +/- 0.03
std. dev.: 1.00 +/- 0.02
test.pdf (18.1 KB)