I have a pdf that is the product of a pdf in the space of observables and a constraints pdf on a set of nuisance parameters.
RooProdPdf model("model", "model", combinedRunPdf, constraintsPdf);
Assuming the nuisance parameters are kept constant, generating a data set in the space of observables from either the full model model
or the observable component combinedRunPdf
should yield the same result. I do this via
int seed = 1;
RooRandom::randomGenerator()->SetSeed(seed);
// generate a data set from the observable part of the model,
// just default nuis values for now
RooDataSet* genData = combinedRunPdf.generate(obsCats, AutoBinned(false),
Extended(true), Name("genData"));
RooRandom::randomGenerator()->SetSeed(seed);
// generate a data set from the model,
// just default nuis values for now
RooDataSet* genData2 = model.generate(obsCats, AutoBinned(false),
Extended(true), Name("genData2"));
And indeed I find that both generation commands creates the exact same events (and number of events, though interestingly in a different order).
However, when I create a negative log likelihood, the first method fails spectacularly with the error
[#0] ERROR:Eval -- RooAbsReal::logEvalError(model) evaluation error,
origin : RooProdPdf::model[ crPdf * constraintsPdf ]
message : getLogVal() top-level p.d.f evaluates to zero
server values: !pdfs=(crPdf = 0/1,constraintsPdf = 1)
...
these go on for a while...
...
[#0] ERROR:Eval -- RooAbsReal::logEvalError(nll_model_genData) evaluation error,
origin : RooNLLVar::nll_model_genData[ paramSet=(nBkgType0,nBkgType1) ]
message : function value is NAN
server values: paramSet=(nBkgType0 = 10,nBkgType1 = 5)
[#0] ERROR:Eval -- RooAbsReal::logEvalError(nll_model_genData_with_constr) evaluation error,
origin : RooAddition::nll_model_genData_with_constr[ nll_model_genData + nll_model_genData_constr ]
message : function value is NAN
server values: !set=(nll_model_genData = -nan,nll_model_genData_constr = 3.62298)
and yields a value of -nan.
The second dataset, however, despite being identical point for point, runs without incident and yields a sensible value of 7.24813.
Is there something under the hood of a RooDataSet that would cause this behavior? It is interesting to note that the ds that works is the one generated from the same model that it is being compared to, but this should not matter since both contain identical argsets and points.
I’d appreciate any help trying to figure out what’s going on here.
Thanks!
Shaun
The code for generating this example is provided below for reference. Note that the category “run” is intentionally somewhat degenerate with the variable “t” so that strange behavior will cause an error instead of just strange results.
int main(){
// variables
RooRealVar x("x", "x", 0, 10);
RooRealVar t("t", "time", 0, 4);
t.setBins(8);
RooArgSet time(t);
RooArgSet obs(x, t);
RooCategory run("run", "run");
run.defineType("run0", 0);
run.defineType("run1", 1);
RooArgSet cats(run);
RooArgSet obsCats(obs, cats);
cout << "made vars" << endl;
// nuisance parameters
RooRealVar nBkgType0("nBkgType0", "nBkgType0", 10, 0, 30);
RooRealVar nBkgType1("nBkgType1", "nBkgType1", 5, 0, 30);
// nuisance parameter constraints
RooConstVar nBkgType0Mean("nBkgType0Mean", "nBkgType0Mean", 10);
RooConstVar nBkgType0Std("nBkgType0Std", "nBkgType0Std", 3);
RooGaussian nBkgType0Prior("nBkgType0Prior", "nBkgType0Prior", nBkgType0,
nBkgType0Mean, nBkgType0Std);
RooConstVar nBkgType1Mean("nBkgType1Mean", "nBkgType1Mean", 5);
RooConstVar nBkgType1Std("nBkgType1Std", "nBkgType1Std", 2);
RooGaussian nBkgType1Prior("nBkgType1Prior", "nBkgType1Prior", nBkgType1,
nBkgType1Mean, nBkgType1Std);
RooProdPdf constraintsPdf("constraintsPdf", "constraintsPdf",
nBkgType0Prior, nBkgType1Prior);
cout << "made constraints" << endl;
// model
// run0 from time 0 to time 1
RooConstVar run0Exposure("run0Exposure", "run0Exposure", 1);
RooDataSet run0TimeDS("r0tds", "r0tds", time);
t.setVal(0.25); run0TimeDS.add(time);
t.setVal(0.75); run0TimeDS.add(time);
RooDataHist run0TimeDH("r0tdh", "r0tdh", time, run0TimeDS);
RooHistPdf run0TimePdf("run0TimePdf", "run0TimePdf", time, run0TimeDH);
RooConstVar bkgType0Run0Mean("bkgType0Run0Mean", "bkgType0Run0Mean", 3);
RooConstVar bkgType0Run0Std("bkgType0Run0Std", "bkgType0Run0Std", 1);
RooGaussian bkgType0Run0Pdf("bkgType0Run0Pdf", "bkgType0Run0Pdf",
x, bkgType0Run0Mean, bkgType0Run0Std);
RooProdPdf bkgType0Run0FullPdf("fullType0Run0", "fullType0Run0",
run0TimePdf, bkgType0Run0Pdf);
RooFormulaVar nType0Run0("nType0Run0", "nType0Run0",
"@0*@1", RooArgList(run0Exposure, nBkgType0));
RooConstVar bkgType1Run0Mean("bkgType1Run0Mean", "bkgType1Run0Mean", 6);
RooConstVar bkgType1Run0Std("bkgType1Run0Std", "bkgType1Run0Std", 1);
RooGaussian bkgType1Run0Pdf("bkgType1Run0Pdf", "bkgType1Run0Pdf",
x, bkgType1Run0Mean, bkgType1Run0Std);
RooProdPdf bkgType1Run0FullPdf("fullType1Run0", "fullType1Run0",
run0TimePdf, bkgType1Run0Pdf);
RooFormulaVar nType1Run0("nType1Run0", "nType1Run0",
"@0*@1", RooArgList(run0Exposure, nBkgType1));
RooAddPdf run0Pdf("run0Pdf", "run0Pdf",
RooArgList(bkgType0Run0FullPdf, bkgType1Run0FullPdf),
RooArgList(nType0Run0, nType1Run0));
// run1 from time 2.5 to 4
RooConstVar run1Exposure("run1Exposure", "run1Exposure", 1.5);
RooDataSet run1TimeDS("run1tds", "run1tds", time);
t.setVal(2.75); run1TimeDS.add(time);
t.setVal(3.25); run1TimeDS.add(time);
t.setVal(3.75); run1TimeDS.add(time);
RooDataHist run1TimeDH("run1tdh", "run1tdh", time, run1TimeDS);
RooHistPdf run1TimePdf("run1TimePdf", "run1TimePdf", time, run1TimeDH);
RooConstVar bkgType0Run1Mean("bkgType0Run1Mean", "bkgType0Run1Mean", 3.25);
RooConstVar bkgType0Run1Std("bkgType0Run1Std", "bkgType0Run1Std", 1);
RooGaussian bkgType0Run1Pdf("bkgType0Run1Pdf", "bkgType0Run1Pdf",
x, bkgType0Run1Mean, bkgType0Run1Std);
RooProdPdf bkgType0Run1FullPdf("fullType0Run1", "fullType0Run1",
run1TimePdf, bkgType0Run1Pdf);
RooFormulaVar nType0Run1("nType0Run1", "nType0Run1",
"@0*@1", RooArgList(run1Exposure, nBkgType0));
RooConstVar bkgType1Run1Mean("bkgType1Run1Mean", "bkgType1Run1Mean", 5.75);
RooConstVar bkgType1Run1Std("bkgType1Run1Std", "bkgType1Run1Std", 1);
RooGaussian bkgType1Run1Pdf("bkgType1Run1Pdf", "bkgType1Run1Pdf",
x, bkgType1Run1Mean, bkgType1Run1Std);
RooProdPdf bkgType1Run1FullPdf("fullType1Run1", "fullType1Run1",
run1TimePdf, bkgType1Run1Pdf);
RooFormulaVar nType1Run1("nType1Run1", "nType1Run1",
"@0*@1", RooArgList(run1Exposure, nBkgType1));
RooAddPdf run1Pdf("run1Pdf", "run1Pdf",
RooArgList(bkgType0Run1FullPdf, bkgType1Run1FullPdf),
RooArgList(nType0Run1, nType1Run1));
RooSimultaneous combinedRunPdf("crPdf", "crPdf", run);
combinedRunPdf.addPdf(run0Pdf, "run0");
combinedRunPdf.addPdf(run1Pdf, "run1");
RooProdPdf model("model", "model", combinedRunPdf, constraintsPdf);
cout << "made model" << endl;
int seed = 1;
RooRandom::randomGenerator()->SetSeed(seed);
// generate a data set from the observable part of the model,
// just default nuis values for now
RooDataSet* genData = combinedRunPdf.generate(obsCats, AutoBinned(false),
Extended(true), Name("genData"));
RooRandom::randomGenerator()->SetSeed(seed);
// generate a data set from the model,
// just default nuis values for now
RooDataSet* genData2 = model.generate(obsCats, AutoBinned(false),
Extended(true), Name("genData2"));
cout << combinedRunPdf.expectedEvents(obsCats) << endl;
cout << model.expectedEvents(obsCats) << endl;
// look at method 1 data
vector<Double_t> m1xs;
vector<Double_t> m1ts;
cout << "method 1 generated " << genData->numEntries() << " events"
<< endl;
for(int aa = 0; aa < genData->numEntries(); aa++){
cout << "method 1 Event " << aa << ":" << endl;
genData->get(aa)->Print("v");
obsCats = *genData->get(aa);
//genData->printMultiline(cout, 0, true);
cout << "logVal: " << model.getLogVal(&obsCats) << endl;
m1xs.push_back(obsCats.getRealValue("x"));
m1ts.push_back(obsCats.getRealValue("t"));
}
// look at method 2 data
vector<Double_t> m2xs;
vector<Double_t> m2ts;
cout << "method 2 generated " << genData2->numEntries() << " events"
<< endl;
for(int aa = 0; aa < genData2->numEntries(); aa++){
cout << "method 2 Event " << aa << ":" << endl;
genData2->get(aa)->Print("v");
obsCats = *genData2->get(aa);
//genData2->printMultiline(cout, 0, true);
cout << "logVal: " << model.getLogVal(&obsCats) << endl;
m2xs.push_back(obsCats.getRealValue("x"));
m2ts.push_back(obsCats.getRealValue("t"));
}
TGraph* m1Graph = new TGraph(m1xs.size(), &m1xs[0], &m1ts[0]);
TGraph* m2Graph = new TGraph(m2xs.size(), &m2xs[0], &m2ts[0]);
TCanvas* m1Canvas = new TCanvas();
m1Graph->SetMarkerStyle(9);
m1Graph->SetMarkerSize(.25);
m1Graph->Draw("ap");
m1Canvas->SaveAs("m1Points.png");
TCanvas* m2Canvas = new TCanvas();
m2Graph->SetMarkerStyle(9);
m2Graph->SetMarkerSize(.25);
m2Graph->Draw("ap");
m2Canvas->SaveAs("m2Points.png");
RooNLLVar* dataNLL = (RooNLLVar*)model.createNLL(*genData, Extended(true),
Verbose(true));
RooNLLVar* dataNLL2 = (RooNLLVar*)model.createNLL(*genData2,
Extended(true),
Verbose(true));
cout << "NLLVar value: " << dataNLL->getVal(&obsCats) << endl;
cout << "NLLVar2 value: " << dataNLL2->getVal(&obsCats) << endl;
} // end main