Dear Experts,
I am performing a simultaneous fit in 3 fit categories, to determine the upper limit on two signal branching fractions. Fit categories 1 and 2 contain a background component, whose Yield is expressed as follows:
N_BKG(cat) = Expexted_BKG(cat)/Expexted_BKG_total *N_BKG_total,
where N_BKG_total is a shared parameter between Fit categories 1 and 2.
Furthermore, N_BKG_total is a Gauss constraint.
For the limit calculation I use the AsymptoticCalculator, and to take the Gauss constraint into account I basically followed the discussion from this post:
root-forum.cern.ch/t/upper-limit-calculation/34559
However, since the constrained component is not present in one of the Fit categories, I am unsure if my implementation makes sense:
#Signal Yields
ws.factory(f'expr::N_sig1{label_fitcat}("(BF_sig1/BF_Normalisation)*Normalisation_Yield*(eps_sig1{label_fitcat}/eps_Normalisation)",BF_sig1,BF_Normalisation, Normalisation_Yield,eps_sig1{label_fitcat},eps_Normalisation)')
ws.factory(f'expr::N_sig2{label_fitcat}("(BF_sig2/BF_Normalisation)*Normalisation_Yield*(eps_sig2{label_fitcat}/eps_Normalisation)*(fs_fu)",BF_sig2,BF_Normalisation, Normalisation_Yield,eps_sig2{label_fitcat},eps_Normalisation,fs_fu)')
#Phys BKG Yield
if fit_category_name =="Fit_category_1" or fit_category_name =="Fit_category_2":
Total_expexted_Bkg = 2.4
Expexted_Yield_Bkg= fit_category_dicc["DB_parameters"]["Expexted_Bkg"]
WS(ws,r.RooRealVar(f"Scale_Bkg_{label_fitcat}", f"Scale_Bkg_{label_fitcat}",Expexted_Yield_Bkg/Total_expexted_Bkg))
ws.factory(f'expr::N_Bkg_{label_fitcat}("(Scale_Bkg_{label_fitcat})*N_Bkg_total",Scale_Bkg_{label_fitcat},N_Bkg_total)')
#####################################################################
#Constrains on BKG Yield
ws.factory(f"Gaussian::Constrain_BKG(Bkg_global[2.4,0,30],N_Bkg_total ,sigma_const[1.7])")
ws.var("Bkg_global").setConstant(True)
ws.defineSet("Global_Obs","Bkg_global")
# How about "N_Bkg_total"?
ws.defineSet("NuisanceParameters","Ncomb_Fitcat1,Ncomb_Fitcat2,Ncomb_Fitcat3,exp_coef_Fitcat1,exp_coef_Fitcat2,exp_coef_Fitcat3")
# #Is this right?
ws.factory(f"PROD::PDF_Bkg_Fitcat1const(PDF_Bkg_Fitcat1,Constrain_BKG)")
ws.factory(f"PROD::PDF_Bkg_Fitcat2const(PDF_Bkg_Fitcat2,Constrain_BKG)")
#####################################################################
#Build Model
#Fitcat 1 and 2 contain the BKG:
ws.factory(f"SUM::Model_Fitcat1(N_sig1Fitcat1*PDF_sig1_Fitcat1,"
f"N_sig2Fitcat1*PDF_sig2_Fitcat1,"
f"Ncomb_Fitcat1*PDF_comb_bkg_Fitcat1,"
f"N_Bkg_Fitcat1*PDF_Bkg_Fitcat1const)")
ws.factory(f"SUM::Model_Fitcat2(N_sig1Fitcat2*PDF_sig1_Fitcat2,"
f"N_sig2Fitcat2*PDF_sig2_Fitcat2,"
f"Ncomb_Fitcat2*PDF_comb_bkg_Fitcat2,"
f"N_Bkg_Fitcat2*PDF_Bkg_Fitcat2const)")
# Fitcat3 does not contain the BKG
ws.factory(f"SUM::Model_Fitcat3(N_sig1Fitcat3*PDF_sig1_Fitcat3,"
f"N_sig2Fitcat3*PDF_sig2_Fitcat3,"
f"Ncomb_Fitcat3*PDF_comb_bkg_Fitcat3)")
#####################################################################
# Build RooSimultanous
roo_category = WS(ws,r.RooCategory("category","category"))
roo_category.defineType("Fitcat1")
roo_category.defineType("Fitcat2")
roo_category.defineType("Fitcat3")
dsmap = r.std.map('string, RooDataSet*')()
dsmap["Fitcat1"] = data_set_holder["Fitcat1"]
dsmap["Fitcat2"] = data_set_holder["Fitcat2"]
dsmap["Fitcat3"] = data_set_holder["Fitcat3"]
data_incat = WS(ws,r.RooDataSet("data","data",r.RooArgSet(mass),r.RooFit.Index(roo_category),r.RooFit.Import(dsmap),))
model = WS(ws,r.RooSimultaneous("Model_Final", "Model_Final", roo_category))
model.addPdf(ws.pdf(f"Model_Fitcat1"), "Fitcat1")
model.addPdf(ws.pdf(f"Model_Fitcat2"), "Fitcat2")
model.addPdf(ws.pdf(f"Model_Fitcat3"), "Fitcat3")
#####################################################################
# Fit
args_fit = r.RooLinkedList()
args_fit.Add( r.RooFit.Extended(True))
args_fit.Add( r.RooFit.Constrain(r.RooArgSet(ws.var("N_Bkg_total") )))
args_fit.Add( r.RooFit.Minimizer("Minuit","migrad"))
args_fit.Add( r.RooFit.RecoverFromUndefinedRegions(100))
args_fit.Add( r.RooFit.EvalErrorWall(True))
args_fit.Add( r.RooFit.Offset(True))
args_fit.Add( r.RooFit.Hesse(True))
args_fit.Add( r.RooFit.Save())
args_fit.Add( r.RooFit.Strategy(2))
_fitResults= model.fitTo(data_incat, args_fit)
I am especially insecure if I need to multiply PDF of the BKG with the constraint or the full Model and how this would look in terms of the Likelihood.
Furhtermore, I implemented the upper limit calculation for one of the two signal componets:
#####################################################################
# Calculate Limit for sig1 only
#####################################################################
#Set sig2 to zero
ws.var("BF_sig2").setVal(0)
ws.var("BF_sig2").setConstant(r.kTRUE)
#signal + background mode
sb_model = r.RooStats.ModelConfig("S+B_model",ws)
sb_model.SetPdf(ws.pdf("Model_Final"))
sb_model.SetGlobalObservables(ws.set("Global_Obs"))
sb_model.SetNuisanceParameters(ws.set("NuisanceParameters"))
sb_model.SetParametersOfInterest("BF_sig1")
sb_model.SetObservables(r.RooArgSet(ws.var("B_DTF_M_GeV"), ws.cat("category")))
sb_model.SetSnapshot(r.RooArgSet(ws.var("BF_sig1")))
val_sig1 = ws.var("BF_sig1").getVal()
# bkg ony model
b_model = r.RooStats.ModelConfig("B_model",ws)
b_model.SetPdf(ws.pdf("Model_Final"))
b_model.SetGlobalObservables(ws.set("Global_Obs"))
b_model.SetNuisanceParameters(ws.set("NuisanceParameters"))
ws.var("BF_sig1").setVal(0)
ws.var("BF_sig1").setConstant(r.kTRUE)
b_model.SetParametersOfInterest("BF_sig1")
b_model.SetObservables(r.RooArgSet(ws.var("B_DTF_M_GeV"), ws.cat("category")))
b_model.SetSnapshot(r.RooArgSet(ws.var("BF_sig1")))
ws.var("BF_sig1").setVal(val_sig1)
ws.var("BF_sig1").setConstant(r.kFALSE)
ac = r.RooStats.AsymptoticCalculator(ws.data("data"), b_model, sb_model)
ac.SetOneSided(True)
calc = r.RooStats.HypoTestInverter(ac)
calc.UseCLs(True)
calc.SetConfidenceLevel(0.95)
poimin = 0
poimax =4e-9
npoints=20
calc.SetFixedScan(npoints,poimin,poimax)
results = calc.GetInterval()
upperLimit = results.UpperLimit()
ulError = results.UpperLimitEstimatedError()
expectedLimit = results.GetExpectedUpperLimit(0)
So from what I understood is that for the limit calculation, it is essential to set the Global Observables
and the Nuisance Parameters, to take the constraint into account. Do I also have to include the N_BKG_total in the Nuisance Parameters? And how do I deal with the second signal BF? I assume I have to set it to zero while I am determining the limit for the first BF?
I would appreciate any advice regarding this points.
Thank you,
Seb