Pulls from RooMCStudy with different generating and fitting models

There are some instances where you would want to have RooFit’s RooMCStudy generate your accept-reject MC with a different model then the fitting model with RooMCStudy::generateAndFit(). For example, you might have a histogram that can act as a template for your background (the gen model) and you want to fit it with a parametric function that you think might model your background (the fit model) and make a pull plot of the number of events to see how well this does (and then repeat with signal injection and spurious signal tests when you add signal).

For RooMCStudy::plotPull() to be able to make a pull plot for a particular variable in your study that variable needs to appear as a parameter in both the gen model and the fit model (so that the gen model can produce the true value and the fit model can produce the meas value for what will eventually get done under the hood in RooPullVar::evaluate()). This makes sense. However, it seems(?) that the variable that appears in both models then also needs to have the same name, or else a segfault will occur:

[#0] WARNING:InputArguments -- RooDataSet::fitParData_fit_model_gen_model:fillHistogram: WARNING: data does not contain variable: n_backgroundpull

 *** Break *** segmentation violation

Is there a better way to do this then creating essentially a duplicate variable so that both models have their own parameter? Something like

    # Variables must have the same name for the pull to be created
    n_background = ROOT.RooRealVar('n_background', 'Number of background events',
                                   expected_number_events,
                                   0.4 * expected_number_events,
                                   1e2 * expected_number_events)
    n_background_fit = ROOT.RooRealVar('n_background', 'Number of background events',
                                       expected_number_events,
                                       0.4 * expected_number_events,
                                       1e2 * expected_number_events)

    # Extend the pdfs so have a quantity for pull plot
    gen_model = ROOT.RooExtendPdf('gen_model', 'background model',
                                  template_model, n_background)
    fit_model = ROOT.RooExtendPdf('fit_model', 'background fit model',
                                  param_model, n_background_fit)

This works, but I don’t know if this is the “correct” way or if there is a better approach. As a working example here is the PyROOT code to do an example of the above. RooMCStudy_fit_model.py It is also copied below.

Note that my example is somewhat dumb, as the “background” is a histogram generated from a Gaussian and the fit model is a different Gaussian, and so it can be super trivially fit meaning that the resulting pull plot is super sharply peaked ---- not a nice Gaussian around a pull mean of 0. I just want to know if this is the best approach to getting a pull plot when using a different gen and fit model

import ROOT
from ROOT import gROOT

from ROOT import RooFit
from ROOT import RooArgList, RooArgSet


def n_bins(range, bin_width):
    return int((range[1] - range[0]) / bin_width)


def generate_hist(n_samples=1000, x_range=None, bin_width=None):
    if x_range is None:
        x_range = [65, 115]

    if bin_width is None:
        bin_width = 5

    hist = ROOT.TH1F('hist', '', n_bins(x_range, bin_width),
                     x_range[0], x_range[1])

    model = ROOT.TF1('model', '[0]*exp(-0.5*((x-[1])/[2])**2)',
                     x_range[0], x_range[1])
    model.SetParameter(1, 80)
    model.SetParameter(2, 13)
    model.SetParameter(0, ROOT.TMath.Sqrt(
        2 * ROOT.TMath.Pi()) * model.GetParameter(2))

    hist.FillRandom('model', n_samples)

    return hist


def draw_RooPlot(observable, data, model, name, path=None, plot_title=None):
    frame = observable.frame()
    if plot_title is None:
        plot_title = ''
    frame.SetTitle(plot_title)
    data.plotOn(frame)
    model.plotOn(frame)

    canvas = ROOT.TCanvas()
    canvas.cd()
    frame.Draw()

    legend = ROOT.TLegend(0.5, 0.7, 0.89, 0.89)
    legend.SetBorderSize(0)  # no border
    legend.SetFillStyle(0)  # make transparent
    legend.Draw()

    canvas.Draw()
    if path is None:
        path = ''
    canvas.SaveAs(path + name + '.png')
    canvas.SaveAs(path + name + '.pdf')
    canvas.SaveAs(path + name + '.eps')


def quick_save(plot, name, path=None, plot_title=None):
    if plot_title is None:
        plot_title = ''
    plot.SetTitle(plot_title)

    canvas = ROOT.TCanvas()
    canvas.cd()
    plot.Draw()
    canvas.Draw()

    if path is None:
        path = ''
    canvas.SaveAs(path + name + '.png')
    canvas.SaveAs(path + name + '.pdf')
    canvas.SaveAs(path + name + '.eps')


if __name__ == '__main__':
    gROOT.SetBatch(ROOT.kTRUE)

    x_range = [60, 115]

    x = ROOT.RooRealVar('x', 'Mass', x_range[0], x_range[1], 'GeV')
    # Set binning to be 5 GeV
    x.setBins(n_bins(x_range, 5))

    input_hist = generate_hist(x_range=x_range)

    datahist = ROOT.RooDataHist('MC_QCD_0tag', '',
                                RooArgList(x), input_hist)
    template_model = ROOT.RooHistPdf('template_model', 'background hist model',
                                     RooArgSet(x), datahist)
    draw_RooPlot(x, datahist, template_model,
                 'background_histpdf',
                 plot_title='background model')

    mean = ROOT.RooRealVar('mean', 'mean', 90, 70, 100, 'GeV')
    width = ROOT.RooRealVar('width', 'width', 10, 1, 50, 'GeV')
    param_model = ROOT.RooGaussian('param_model', 'background parameterized model',
                                   x, mean, width)

    expected_number_events = input_hist.Integral()

    # Variables must have the same name for the pull to be created
    n_background = ROOT.RooRealVar('n_background', 'Number of background events',
                                   expected_number_events,
                                   0.4 * expected_number_events,
                                   1e2 * expected_number_events)
    n_background_fit = ROOT.RooRealVar('n_background', 'Number of background events',
                                       expected_number_events,
                                       0.4 * expected_number_events,
                                       1e2 * expected_number_events)

    # Extend the pdfs so have a quantity for pull plot
    gen_model = ROOT.RooExtendPdf('gen_model', 'background model',
                                  template_model, n_background)
    fit_model = ROOT.RooExtendPdf('fit_model', 'background fit model',
                                  param_model, n_background_fit)

    ROOT.RooRandom.randomGenerator().SetSeed(0)
    n_toys = 1000

    mc_study = ROOT.RooMCStudy(gen_model, RooArgSet(x),
                               RooFit.FitModel(fit_model),
                               RooFit.FitOptions('sh'),
                               RooFit.Binned(ROOT.kTRUE),
                               RooFit.Silence(ROOT.kTRUE))
    ROOT.SetOwnership(mc_study, ROOT.kFALSE)

    mc_study.generateAndFit(n_toys,
                            int(expected_number_events), ROOT.kTRUE)

    pull_background_yield = mc_study.plotPull(n_background,
                                              RooFit.FitGauss(ROOT.kTRUE),
                                              RooFit.FrameRange(-4., 4.))

    quick_save(pull_background_yield, 'background_yield_pull',
               plot_title='Pull of background extraction')

    frame = x.frame()
    mc_study.genData(0).plotOn(frame,
                               RooFit.LineColor(ROOT.kBlack))
    fit_model.plotOn(frame,
                     RooFit.LineColor(ROOT.kBlue))
    canvas = ROOT.TCanvas()
    canvas.cd()
    frame.Draw()
    canvas.Draw()
    canvas.SaveAs('fit.pdf')

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.