I have a framework which creates a Pdf that is a RooSimultaneous that contains additional RooSimultaneous Pdfs.
In checking that things are working correctly I desired to create a dataset from this Pdf, however when I use the generate command (myPdf->generate(myArgSet, Extended())) the dataset generated seems to correctly use the pdf selected from top-level RooSimultaneous, but sample from all pdfs in the sub-RooSimultaneouses instead of the one corresponding to the current category label.
I have created a simple example of a nested RooSimultaneous structure (code included below) to reproduce the problem I’m having. This example has 2 categories with 2 labels each, and each label pair has a pdf which is a uniform distribution over a single quadrent in the x-y plane to make it easy to see what is being sampled.
In this example I run into a different error, namely that I use the command myRooSimultaneous->generate(myArgSet, nEvents) to generate a data set, and this works fine for a single-layered RooSimultaneous, and creates nEvents events as expected, but when I do the same on a nested RooSimultaneous, I get the error:
[#0] ERROR:Generation -- RooSimGenContext::ctor(simulPdf) ERROR: Need either extended mode or prototype data to calculate number of events per category
[#0] ERROR:Generation -- RooAbsPdf::generate(simulPdf) cannot create a valid context
I had naively viewed a RooSimultaneous as a container that points to the proper contained pdf, so that when I would run the generate command it would call that same command on the relevant pdf, but this doesn’t seem to be correct based on the above, which may also explain my initial problem.
I had also believed that if I had a nested RooSimultaneous it was internally being re-expressed as a single-layered container that used a new category that was created from a combination of the ones used for nesting. But the above error has also called this into question.
My question, then, is how do nested RooSimultaneous work, and how can I generate a data set from on that either properly isolates just one of the contained pdfs (past all of the rooSimultaneous layers) or properly maintains the ratio of expected events from each sub-pdf.
Here is the example code that works as expected for a single RooSimultaneous but gives me problems when using a nested RooSimultaneous:
using namespace RooStats;
using namespace RooFit;
using std::cout;
using std::endl;
int main(){
// workspace
RooWorkspace* w = new RooWorkspace("Workspace");
cout << "workspace created" << endl;
// variables
RooRealVar x("x", "x", -1, 1);
x.setBins(2);
w->import(x);
RooRealVar y("y", "y", -1, 1);
y.setBins(2);
w->import(y);
w->defineSet("vars","x,y");
// categories
RooCategory cat1("cat1", "cat1");
cat1.defineType("cat1Negative", 0);
cat1.defineType("cat1Positive", 1);
w->import(cat1);
RooCategory cat2("cat2", "cat2");
cat2.defineType("cat2Negative", 0);
cat2.defineType("cat2Positive", 1);
w->import(cat2);
w->defineSet("VarsAndCats","x,y,cat1,cat2");
cout << "variables set" << endl;
//-------------- Create Pdfs ------------------
// create negative x negative y pdf
RooDataSet xNegYNegDS("xNegYNegDS", "xNegYNegDS", *w->set("vars"));
w->var("x")->setVal(-0.5);
w->var("y")->setVal(-0.5);
for(int i = 0; i < 1; i++){xNegYNegDS.add(*w->set("vars"));}
RooDataHist xNegYNegDH("xNegYNegDH", "xNegYNegDH", *w->set("vars"),
xNegYNegDS);
w->import(xNegYNegDH);
RooHistPdf xNegYNegPdf("xNegYNegPdf", "xNegYNegPdf", *w->set("vars"),
xNegYNegDH);
w->import(xNegYNegPdf);
// create negative x positive y pdf
RooDataSet xNegYPosDS("xNegYPosDS", "xNegYPosDS", *w->set("vars"));
w->var("x")->setVal(-0.5);
w->var("y")->setVal(0.5);
for(int i = 0; i < 2; i++){xNegYPosDS.add(*w->set("vars"));}
RooDataHist xNegYPosDH("xNegYPosDH", "xNegYPosDH", *w->set("vars"),
xNegYPosDS);
w->import(xNegYPosDH);
RooHistPdf xNegYPosPdf("xNegYPosPdf", "xNegYPosPdf", *w->set("vars"),
xNegYPosDH);
w->import(xNegYPosPdf);
// create positive x negative y pdf
RooDataSet xPosYNegDS("xPosYNegDS", "xPosYNegDS", *w->set("vars"));
w->var("x")->setVal(0.5);
w->var("y")->setVal(-0.5);
for(int i = 0; i < 3; i++){xPosYNegDS.add(*w->set("vars"));}
RooDataHist xPosYNegDH("xPosYNegDH", "xPosYNegDH", *w->set("vars"),
xPosYNegDS);
w->import(xPosYNegDH);
RooHistPdf xPosYNegPdf("xPosYNegPdf", "xPosYNegPdf", *w->set("vars"),
xPosYNegDH);
w->import(xPosYNegPdf);
// create positive x positive y pdf
RooDataSet xPosYPosDS("xPosYPosDS", "xPosYPosDS", *w->set("vars"));
w->var("x")->setVal(0.5);
w->var("y")->setVal(0.5);
for(int i = 0; i < 4; i++){xPosYPosDS.add(*w->set("vars"));}
RooDataHist xPosYPosDH("xPosYPosDH", "xPosYPosDH", *w->set("vars"),
xPosYPosDS);
w->import(xPosYPosDH);
RooHistPdf xPosYPosPdf("xPosYPosPdf", "xPosYPosPdf", *w->set("vars"),
xPosYPosDH);
w->import(xPosYPosPdf);
cout << "sub pdfs created" << endl;
//--------------- Combine Pdfs into RooSimultaneous ----------------
// create RooSimultaneous
RooSimultaneous simulXNegPdf("simulXNegPdf", "simulXNegPdf",
*w->cat("cat2"));
simulXNegPdf.addPdf(*w->pdf("xNegYNegPdf"), "cat2Negative");
simulXNegPdf.addPdf(*w->pdf("xNegYPosPdf"), "cat2Positive");
w->import(simulXNegPdf);
RooSimultaneous simulXPosPdf("simulXPosPdf", "simulXPosPdf",
*w->cat("cat2"));
simulXPosPdf.addPdf(*w->pdf("xPosYNegPdf"), "cat2Negative");
simulXPosPdf.addPdf(*w->pdf("xPosYPosPdf"), "cat2Positive");
w->import(simulXPosPdf);
//use factory cmd for the second because can't add roopdf to roopdf
w->factory("SIMUL::simulPdf(cat1, cat1Negative=simulXNegPdf, cat1Positive=simulXPosPdf)");
// check that the evaluation works
w->cat("cat1")->setLabel("cat1Negative");
w->cat("cat2")->setLabel("cat2Negative");
for(int i = 0; i < 10; i++){
w->var("x")->setVal(.2*i - 1);
w->var("y")->setVal((.15*i -1));
cout << "cat1 negative, cat2 negative: x=" << w->var("x")->getVal()
<< " y=" << w->var("y")->getVal()
<< ": "
<< w->pdf("simulPdf")->getVal() << endl;
}
//------------------ Generate Events --------------------------
// single simul first
w->cat("cat1")->setLabel("cat1Negative");
w->cat("cat2")->setLabel("cat2Negative");
RooDataSet* cat1NegDS =
w->pdf("simulXNegPdf")->generate(*w->set("vars"), 5);
// look at the events
for(int i = 0; i < cat1NegDS->numEntries(); i++){
const RooArgSet* tmpSet = cat1NegDS->get(i);
cout << "generated x: " << tmpSet->getRealValue("x") << " "
<< "generated y: " << tmpSet->getRealValue("y") << endl;
}
// try positive y
w->cat("cat2")->setLabel("cat2Positive");
RooDataSet* cat1PosDS =
w->pdf("simulXNegPdf")->generate(*w->set("vars"), 5);
// look at the events
for(int i = 0; i < cat1PosDS->numEntries(); i++){
const RooArgSet* tmpSet = cat1PosDS->get(i);
cout << "generated x: " << tmpSet->getRealValue("x") << " "
<< "generated y: " << tmpSet->getRealValue("y") << endl;
}
// try nested simul
w->cat("cat1")->setLabel("cat1Negative");
w->cat("cat2")->setLabel("cat2Negative");
RooDataSet* genNegDS = w->pdf("simulPdf")->generate(*w->set("vars"), 10);
// look at events
for(int i = 0; i < genNegDS->numEntries(); i++){
const RooArgSet* tmpSet = genNegDS->get(i);
cout << "generated x: " << tmpSet->getRealValue("x")
<< endl;
cout << "generated y: " << tmpSet->getRealValue("y")
<< endl;
}
}
I appreciate any input.
Thanks,
Shaun