Hi Panagiotis,
If you have already researched papers and tutorials on CLs, you might have already encountered the following paper:
Following this paper you can find the p-value for any possible limit \mu from formulas (59) or (67).
To find upper limits at a given p-value, you can then calculate this p-value for âeveryâ \mu and scan for one that fits your threshold.
Alternatively the upper limits can be computed from (61) and (69) depending on your test statistic.
I have written the following code that implements the test statistic \tilde{q}_{\mu} to exclude the signal (the hypothesis \mu = 1).
By changing the test_mu
variable in lines 106 and 107 you can vary the \mu to be excluded.
The code is not written completely without the use of RooFit and RooStats but computes many quantities without relying on them.
The workspace containing my data and pdf is read from a json file into ROOT (this only works on the master branch as of now and not yet on the stable releases).
If you already have your likelihoods and test statistic without using RooFit, you should be able to reuse some parts of my code that donât require RooFit.
I hope you can get all your answers from my explanation and the code. Otherwise feel free to ask further questions.
Cheers
Daniel
The PyROOT Code
def read_workspace(config):
import ROOT
# start by creating an empty workspace
ws = ROOT.RooWorkspace("workspace")
# the RooJSONFactoryWSTool is responsible for importing and exporting things to and from your workspace
tool = ROOT.RooJSONFactoryWSTool(ws)
# use it to import the information from your JSON file
tool.importJSON(config)
#ws.Print()
return ws
def get_nll_mu(pdf, data, poi, poi_value):
"""
Fit the pdf to data to get the NLL value and the best-fit value of the parameter of interest.
The parameter of interest (poi) is either floating and will be optimized to the best estimate,
or if poi_value is not None, it will be fixed to this value.
"""
import ROOT
params = pdf.getParameters(data)
init_params = params.snapshot()
was_constant = poi.isConstant()
old_value = poi.getVal()
if poi_value is not None:
poi.setConstant(True)
poi.setVal(poi_value)
nll = pdf.createNLL(data) # Get the negative log likelihood of finding
# the given data from the pdf
# Check for floating parameters
has_floating_params = False
for param in params:
if not param.isConstant():
has_floating_params = True
# No fit is necessary if all parameters are constant
if has_floating_params == False:
val = nll.getVal() # The likelihood found for poi_value
mu_hat = poi_value # The resulting strength parameter mu is identical as no fit was performed
poi.setConstant(was_constant) # Reset all properties of the parameters
poi.setVal(old_value) # to not affect future fits if this
return val, mu_hat # function is called multiple times
minim = ROOT.RooMinimizer(nll) # Perform a fit of the nll
minim.setPrintLevel(-1)
minim.minimize("Minuit", "") # The minimizer "minuit" is used here
val = nll.getVal() # Export the nll value of the best-fit parameters
mu_hat = poi.getVal() # Export the best-fit value of mu
poi.setConstant(was_constant)
poi.setVal(old_value)
params.assign(init_params)
return val, mu_hat
def get_q_tilde(pdf, data, mu):
"""
Formula (16) from the paper https://arxiv.org/pdf/1007.1727.pdf is used as the
definition of our test statistic q_mu_tilde.
"""
nll_val_float, mu_hat = get_nll_mu(pdf, data, mu, None) # Get the likelihood from an unconstrained fit
nll_val_fixed, _ = get_nll_mu(pdf, data, mu, mu.getVal()) # Get the likelihood with a fixed poi
q_tilde = 2 * (nll_val_fixed - nll_val_float) # Calculate q_tilde
if mu_hat <= mu.getVal(): # Following (16), 0 is returned if the fit yields a negative signal strength
return q_tilde
return 0
def get_F(q_tilde, test_mu, sigma, mu_prime):
"""
Formula (65) from the paper https://arxiv.org/pdf/1007.1727.pdf defines the cumulative probability
density function of q_tilde. This in turned will be needed to compute the exclusion limits.
The quantity sigma describes the width of the distribution of q_tilde and is found from equation (29)
"""
from scipy.stats import norm
phi = norm.cdf # Phi is the cumulative density of the normal distribution
if q_tilde <= test_mu**2 / sigma**2:
return phi(q_tilde ** (0.5) - (test_mu - mu_prime) / sigma)
else:
return phi((q_tilde - (test_mu**2 - 2 * test_mu * mu_prime) / sigma**2) / (2 * test_mu / sigma))
def get_CLs(pdf, data, mu, sigma):
"""
Get the CLs, which is p_{s+b}/(1 â p_b).
"""
q_tilde = get_q_tilde(pdf, data, mu)
# Calculate the CL values for the hypotheses signal and background and background-only.
# In both tests the null-hypothesis to be rejected is mu = 1
clsb = 1.0 - get_F(q_tilde, 1.0, sigma, 1.0) # In the case of s and b the real strength is 1
clb = 1.0 - get_F(q_tilde, 1.0, sigma, 0.0) # In the b-only case the real strength is 0
return clsb / clb
def post_fit_asimov(observables, pdf, data, poi, poi_value):
from ROOT.RooStats import AsymptoticCalculator
params = pdf.getParameters(data)
init_params = params.snapshot()
was_constant = poi.isConstant()
old_value = poi.getVal()
poi.setConstant(True)
poi.setVal(poi_value)
pdf.fitTo(data, PrintLevel = -1)
asimov_data = AsymptoticCalculator.GenerateAsimovData(pdf, observables)
poi.setConstant(was_constant)
poi.setVal(old_value)
params.assign(init_params)
return asimov_data
def hypotest(obs_data, pdf, poi, observables):
"""
Find the asimov data and perform the hypothesis test itself to find the CLs values
for the observed data and the expected values.
"""
# Generate the asimov data from the given pdf. A fit is performed with fixed mu to find best-fit values
# for the other parameter. If this fit is not performed, it is called having nominal asimov data.
asimov_data = post_fit_asimov(observables, pdf, obs_data, poi, 0)
# Compute the test statistic for the asimov dataset under the null hypothesis to find the width
# of the distributions according to (29).
q_tilde_asimov = get_q_tilde(pdf, asimov_data, poi)
sigma = (poi.getVal()**2 / q_tilde_asimov) ** 0.5
# Compute the confidence levels of excluding mu = 1 based on the observed or asimov data
CLs_obs = get_CLs(pdf, obs_data, poi, sigma)
CLs_exp = get_CLs(pdf, asimov_data, poi, sigma)
return CLs_obs, CLs_exp
def main():
ws = read_workspace("myWorkspace.json")
model = ws["ModelConfig"]
pdf = model.GetPdf()
mu = model.GetParametersOfInterest()[0]
mu.setRange(0, 10)
observables = model.GetObservables()
obs_data = ws.data("obsData")
CLs_obs, CLs_exp = hypotest(obs_data, pdf, mu, observables)
print(f"Observed: {CLs_obs:.8f}, Expected: {CLs_exp:.8f}")
if __name__ == "__main__":
import ROOT
# As long as this object lives, RooFits output below WARNING level is silenced
changeMsgLvl = ROOT.RooHelpers.LocalChangeMsgLevel(ROOT.RooFit.WARNING)
main()
The JSON file containing my workspace
{
"distributions":[
{
"name":"channel_1",
"type":"histfactory_dist",
"axes":[
{
"name":"obs_x_channel_1",
"max":3,
"min":0,
"nbins":3
}
],
"samples":[
{
"name":"signal",
"data":{
"contents":[6.369616873214543,2.697867137638703,0.4097352393619469]
},
"modifiers":[
{
"name":"mu",
"type":"normfactor",
"parameter":"mu"
}
]
},
{
"name":"background",
"data":{
"contents":[0.16527635528529094,8.132702392002724,9.127555772777217],
"errors":[0.5125680914002991,3.3919644089992107,2.5959617012357628]
},
"modifiers":[
{
"name":"bkg_uncert",
"type":"staterror",
"constraint":"Poisson"
}
]
}
]
}
],
"data":[
{
"name":"obsData_channel_1",
"type":"binned",
"axes":[
{
"name":"obs_x_channel_1",
"max":3,
"min":0,
"nbins":3
}
],
"contents":[1,2,7]
}
],
"likelihoods":[
{
"name":"simPdf_obsData",
"distributions":["channel_1"],
"data":["obsData_channel_1"]
}
],
"analyses":[
{
"name":"simPdf_obsData",
"likelihood":"simPdf_obsData",
"parameters_of_interest":["mu"]
}
],
"misc":{
"ROOT_internal":{
"ModelConfigs":{
"simPdf_obsData":{
"mcName":"ModelConfig",
"pdfName":"simPdf"
}
},
"combined_datas":{
"obsData":{
"index_cat":"channelCat",
"indices":[0],
"labels":["channel_1"]
}
},
"combined_distributions":{
"simPdf":{
"distributions":["channel_1"],
"index_cat":"channelCat",
"indices":[0],
"labels":["channel_1"]
}
}
}
}
}