Roofit problem: normalization of 2d hist pdf

cross posting from root discussion - this is a more apropriate forum

Hi,
I am trying to create a 2d histogram pdf using RooFit. It seems that the normalization gets messed up.

I load a tree with about 1m entries from a file. Tree has 2 observables trtfrac and nblhits.
I make RooDataSet out of this and then make a RooDataHist out of this, and finally I make RooHistPdf. The trtfrac variable range is from 0.0 to 1.0 in 10 bins. and nblhits goes from 0 to 3 in 3 bins. Then I try to plot the projection of the pdf on both observables together with the dataset that the pdf was built from. I expect the two to lie exactly on top of one another. But - I get about factor of 10 difference - the pdf is data x 10. Also when I ask the pdf to give me a 2d histogram and I integrate that I get 0.1 instead of 1. These two factors are linked to the bin size in the trtfrac. So when I bin it twice as fine I get factor of 20 discrepancy in data plotted with the pdf and the pdf integral becomes 0.05.

I attach the relevant bit of code and 3 plots

RooRealVar trtfrac(“trtfrac”,“fraction of high threshold TRT hits”,0.0,1.0);
trtfrac.setBinning(RooBinning(10,0.0,1.0));

RooRealVar nblhits(“nblhits”,“number of B layer hits”,0.0,3.0);
RooBinning nblhits_bin(3,0.0,3.0);
//nblhits_bin.addBoundary(1.0); nblhits_bin.addBoundary(2.0);
nblhits.setBinning(nblhits_bin);

cerr<<“open file get tree: “<<sig_file_name<<” “<<sig_tree_name<<”\n”;
TFile* f_sig=new TFile(sig_file_name.c_str());
TTree* t_sig_trtfrac_nblhits=(TTree*)f_sig->Get(sig_tree_name.c_str());
//“t_trtfrac_nblhits_minimal_Et2p5GeV_fake”);

cerr<<“sig file tree: “<<f_sig<<” “<<t_sig_trtfrac_nblhits<<”\n”;

RooDataSet sig_data(“sig_data”,“signal training dataset”,
t_sig_trtfrac_nblhits,RooArgSet(trtfrac,nblhits));
cerr<<“number of entries in the dataset: “<<sig_data.numEntries()<<”\n”;

RooDataHist hist_sig_trtfrac_nblhits(“hist_sig_trtfrac_nblhits”,“Number of B Layer hits VS TR fraction”, RooArgList(trtfrac,nblhits),sig_data);
//hist_sig_trtfrac_nblhits.add(sig_data);

cerr<<“num entries: “<<t_sig_trtfrac_nblhits->GetEntries()<<”\n”;

std::cerr<<“making the pdf!\n”;

RooHistPdf sig_PDF(“sig_PDF”,“signal PDF”,RooArgSet(trtfrac,nblhits),hist_sig_trtfrac_nblhits);

std::cerr<<“done\n”;
TCanvas* c_sig_frame=new TCanvas(“c_sig_frame”,“c_sig_frame”,800,600);
c_sig_frame->cd();

TH2F* sig_PDF_histo=(TH2F*)sig_PDF.createHistogram(“sig-trtfrac-nblfrac”,trtfrac,YVar(nblhits));
sig_PDF_histo->Draw(“COLZ”);
c_sig_frame->Print(“sig_PDF.eps”,“eps”);

RooPlot* sig_frame_trtfrac=trtfrac.frame();
RooPlot* sig_frame_nblhits=nblhits.frame();

//hist_sig_trtfrac_nblhits.plotOn(sig_frame_trtfrac);
sig_data.plotOn(sig_frame_trtfrac);
sig_PDF.plotOn(sig_frame_trtfrac);
sig_frame_trtfrac->Draw();
c_sig_frame->Print(“sig_PDF_trtfrac.eps”,“eps”);
//hist_sig_trtfrac_nblhits.plotOn(sig_frame_nblhits);
sig_data.plotOn(sig_frame_nblhits,Binning(nblhits_bin));
sig_PDF.plotOn(sig_frame_nblhits);
sig_frame_nblhits->Draw();
c_sig_frame->Print(“sig_PDF_nblhits.eps”,“eps”);
cerr<<“Integral of the 2d sig PDF histo: “<<
sig_PDF_histo->Integral(“width”)<<”\n”;

any help greatly appreciated!!!

also:
root version:
ROOT 5.22/00h (branches/v5-22-00-patches@32279, Feb 16 2010, 09:13:00 on linux)
RooFit v2.95

I also tried with root 2.26 RooFit v3.12

thanks!
Wojtek





Ok this is clearly a bug. somewhat diappointed…

I ‘scaled’ one of the variables so that it has a natural unit-size bin (the other variable already did). So now bin area is 1. And now things look ok - the histogram returned by the pdf has integral of 1 and data and the projection of pdf created from that data match. This will be super clumsy for my analysis… I also wanted to have larger bins at the tails but this clearly won’t work either… bummer…

I attach modified code and a plot

RooRealVar trtfrac(“trtfrac”,“fraction of high threshold TRT hits”,0.0,10.0);
trtfrac.setBinning(RooBinning(10,0.0,10.0));

RooRealVar nblhits(“nblhits”,“number of B layer hits”,0.0,3.0);
RooBinning nblhits_bin(10,0.0,10.0);
//nblhits_bin.addBoundary(1.0); nblhits_bin.addBoundary(2.0);
nblhits.setBinning(nblhits_bin);

cerr<<“open file get tree: “<<sig_file_name<<” “<<sig_tree_name<<”\n”;
TFile* f_sig=new TFile(sig_file_name.c_str());
TTree* t_sig_trtfrac_nblhits=(TTree*)f_sig->Get(sig_tree_name.c_str());
//“t_trtfrac_nblhits_minimal_Et2p5GeV_fake”);

cerr<<“sig file tree: “<<f_sig<<” “<<t_sig_trtfrac_nblhits<<”\n”;

RooDataSet sig_data(“sig_data”,“signal training dataset”,
//t_sig_trtfrac_nblhits,
RooArgSet(trtfrac,nblhits));

TH2F* temp_hist_sig_trtfrac_nblhits
=new TH2F(“temp_hist_sig_trtfrac_nblhits”,
“temp_hist_sig_trtfrac_nblhits”,
10,0.0,10.0,10,0.0,10.0);
TLeaf* l_sig_trtfrac=t_sig_trtfrac_nblhits->GetLeaf(“trtfrac”);
TLeaf* l_sig_nblhits=t_sig_trtfrac_nblhits->GetLeaf(“nblhits”);
for(int ientry=0;ientry< t_sig_trtfrac_nblhits->GetEntries();ientry++){
t_sig_trtfrac_nblhits->GetEntry(ientry);
temp_hist_sig_trtfrac_nblhits->Fill(10.0l_sig_trtfrac->GetValue(),
l_sig_nblhits->GetValue());
trtfrac=10.0
l_sig_trtfrac->GetValue();
nblhits=l_sig_nblhits->GetValue();
sig_data.add(RooArgSet(trtfrac,nblhits));
}
temp_hist_sig_trtfrac_nblhits
->Scale(1.0/temp_hist_sig_trtfrac_nblhits->Integral(“width”));

cerr<<“number of entries in the dataset: “<<sig_data.numEntries()<<”\n”;

RooDataHist hist_sig_trtfrac_nblhits(“hist_sig_trtfrac_nblhits”,
“Number of B Layer hits VS TR fraction”,
//RooArgList(trtfrac,nblhits),sig_data);
RooArgList(trtfrac,nblhits),
temp_hist_sig_trtfrac_nblhits);
RooDataHist hist_sig_trtfrac(“hist_sig_trtfrac”,“Number of TR fraction”, RooArgList(trtfrac),sig_data);
RooDataHist hist_sig_nblhits(“hist_sig_nblhits”,“Number of B Layer hits”, RooArgList(nblhits),sig_data);
//hist_sig_trtfrac_nblhits.add(sig_data);

cerr<<“num entries: “<<t_sig_trtfrac_nblhits->GetEntries()<<”\n”;

std::cerr<<“making the pdf!\n”;

RooHistPdf sig_PDF(“sig_PDF”,“signal PDF”,RooArgSet(trtfrac,nblhits),hist_sig_trtfrac_nblhits);

RooHistPdf sig_1dtrtfrac_PDF(“sig_1dtrtfrac_PDF”,“signal PDF”,RooArgSet(trtfrac),hist_sig_trtfrac);
RooHistPdf sig_1dnblhits_PDF(“sig_1dnblhits_PDF”,“signal PDF”,RooArgSet(nblhits),hist_sig_nblhits);

std::cerr<<“done\n”;
TCanvas* c_sig_frame=new TCanvas(“c_sig_frame”,“c_sig_frame”,800,600);
c_sig_frame->cd();

TH2F* sig_PDF_histo=(TH2F*)sig_PDF.createHistogram(“sig-trtfrac-nblfrac”,trtfrac,YVar(nblhits));
sig_PDF_histo->Draw(“COLZ”);
c_sig_frame->Print(“sig_PDF.eps”,“eps”);

RooPlot* sig_frame_trtfrac=trtfrac.frame();
RooPlot* sig_frame_nblhits=nblhits.frame();

//hist_sig_trtfrac_nblhits.plotOn(sig_frame_trtfrac);
sig_data.plotOn(sig_frame_trtfrac);
sig_PDF.plotOn(sig_frame_trtfrac,Project(nblhits));
sig_frame_trtfrac->Draw();
c_sig_frame->Print(“sig_PDF_trtfrac.eps”,“eps”);
//hist_sig_trtfrac_nblhits.plotOn(sig_frame_nblhits);
sig_data.plotOn(sig_frame_nblhits,Binning(nblhits_bin));
sig_PDF.plotOn(sig_frame_nblhits,Project(trtfrac));
sig_frame_nblhits->Draw();
c_sig_frame->Print(“sig_PDF_nblhits.eps”,“eps”);
cerr<<“Integral of the 2d sig PDF histo: “<<
sig_PDF_histo->Integral(“width”)<<”\n”;
cerr<<“Integral from the represented 2d histo: “<<sig_PDF.dataHist().sum(1)
<<”\n”;

Hi Wojtek,

This looks like a bug indeed. I’ll fix this, but I’ll only get back to you after next week as I am traveling. In the mean time you can the following

sig_PDF.forceNumInt(kTRUE)

which will force a numeric integration to be used to calculated the integral.
This will certaintly be slower, but should give the correct answer. You might
to try his in ROOT 5.26 rather than in ROOT 5.22 as the former has newer and better numeric nD integration algorithms interfaced

Wouter

Hi,
Ok I tried and the PDF looked right when the binning is equidistant, however when I try to adjust the spacing of the bins so that PDF is not zero even in the tails - the PDF goes crazy. The histogram returned by the pdf also does not integrate to 1
:frowning:
I attach the code snip and 3 plots illustrating the problem
can anyone help? This seems like a very basic issue…
Again - I am comparing the pdf to the exact dataset that the pdf has been derived from so I expect the two to match perfectly…

Thanks a lot,
Wojtek

RooRealVar trtfrac(“trtfrac”,“fraction of high threshold TRT hits”,0.0,1.0);
RooBinning trtfrac_bin(0.0,1.0);
trtfrac_bin.addBoundary(0.1);
trtfrac_bin.addBoundary(0.2);
trtfrac_bin.addBoundary(0.3);
trtfrac.setBinning(trtfrac_bin);

RooRealVar nblhits(“nblhits”,“number of B layer hits”,0.0,10.0);
RooBinning nblhits_bin(0.0,10.0);
nblhits_bin.addBoundary(1.0); nblhits_bin.addBoundary(2.0);
nblhits.setBinning(nblhits_bin);

cerr<<“open file get tree: “<<sig_file_name<<” “<<sig_tree_name<<”\n”;
TFile* f_sig=new TFile(sig_file_name.c_str());
TTree* t_sig_trtfrac_nblhits=(TTree*)f_sig->Get(sig_tree_name.c_str());
//“t_trtfrac_nblhits_minimal_Et2p5GeV_fake”);

cerr<<“sig file tree: “<<f_sig<<” “<<t_sig_trtfrac_nblhits<<”\n”;

RooDataSet sig_train_data(“sig_train_data”,“signal training dataset”,
t_sig_trtfrac_nblhits,
RooArgSet(trtfrac,nblhits));

// TH2F* temp_hist_sig_trtfrac_nblhits
// =new TH2F(“temp_hist_sig_trtfrac_nblhits”,
// “temp_hist_sig_trtfrac_nblhits”,
// 10,0.0,10.0,10,0.0,10.0);
// TLeaf* l_sig_trtfrac=t_sig_trtfrac_nblhits->GetLeaf(“trtfrac”);
// TLeaf* l_sig_nblhits=t_sig_trtfrac_nblhits->GetLeaf(“nblhits”);
// for(int ientry=0;ientry< t_sig_trtfrac_nblhits->GetEntries();ientry++){
// t_sig_trtfrac_nblhits->GetEntry(ientry);
// temp_hist_sig_trtfrac_nblhits->Fill(10.0l_sig_trtfrac->GetValue(),
// l_sig_nblhits->GetValue());
// trtfrac=10.0
l_sig_trtfrac->GetValue();
// nblhits=l_sig_nblhits->GetValue();
// sig_train_data.add(RooArgSet(trtfrac,nblhits));
// }
// temp_hist_sig_trtfrac_nblhits
// ->Scale(1.0/temp_hist_sig_trtfrac_nblhits->Integral(“width”));

cerr<<“number of entries in the dataset: “<<sig_train_data.numEntries()<<”\n”;

RooDataHist hist_sig_trtfrac_nblhits(“hist_sig_trtfrac_nblhits”,
“Number of B Layer hits VS TR fraction”,
RooArgList(trtfrac,nblhits),sig_train_data);
//RooArgList(trtfrac,nblhits),
//temp_hist_sig_trtfrac_nblhits);
RooDataHist hist_sig_trtfrac(“hist_sig_trtfrac”,“Number of TR fraction”, RooArgList(trtfrac),sig_train_data);
RooDataHist hist_sig_nblhits(“hist_sig_nblhits”,“Number of B Layer hits”, RooArgList(nblhits),sig_train_data);
//hist_sig_trtfrac_nblhits.add(sig_train_data);

cerr<<“num entries: “<<t_sig_trtfrac_nblhits->GetEntries()<<”\n”;

std::cerr<<“making the pdf!\n”;

RooHistPdf sig_PDF(“sig_PDF”,“signal PDF”,RooArgSet(trtfrac,nblhits),hist_sig_trtfrac_nblhits);
sig_PDF.forceNumInt(kTRUE);

RooHistPdf sig_1dtrtfrac_PDF(“sig_1dtrtfrac_PDF”,“signal PDF”,RooArgSet(trtfrac),hist_sig_trtfrac);
RooHistPdf sig_1dnblhits_PDF(“sig_1dnblhits_PDF”,“signal PDF”,RooArgSet(nblhits),hist_sig_nblhits);

std::cerr<<“done\n”;
TCanvas* c_sig_frame=new TCanvas(“c_sig_frame”,“c_sig_frame”,800,600);
c_sig_frame->cd();

TH2F* sig_PDF_histo=(TH2F*)sig_PDF.createHistogram(“sig-trtfrac-nblfrac”,trtfrac,YVar(nblhits));
sig_PDF_histo->Draw(“COLZ”);
c_sig_frame->Print(“sig_PDF.eps”,“eps”);

RooPlot* sig_frame_trtfrac=trtfrac.frame();
RooPlot* sig_frame_nblhits=nblhits.frame();

//hist_sig_trtfrac_nblhits.plotOn(sig_frame_trtfrac);
sig_train_data.plotOn(sig_frame_trtfrac,Binning(trtfrac_bin));
sig_PDF.plotOn(sig_frame_trtfrac);
sig_frame_trtfrac->Draw();
c_sig_frame->Print(“sig_PDF_trtfrac.eps”,“eps”);
//hist_sig_trtfrac_nblhits.plotOn(sig_frame_nblhits);
sig_train_data.plotOn(sig_frame_nblhits,Binning(nblhits_bin));
sig_PDF.plotOn(sig_frame_nblhits);
sig_frame_nblhits->Draw();
c_sig_frame->Print(“sig_PDF_nblhits.eps”,“eps”);
cerr<<“Integral of the 2d sig PDF histo: “<<
sig_PDF_histo->Integral(“width”)<<”\n”;
cerr<<“Integral from the represented 2d histo: “<<sig_PDF.dataHist().sum(1)
<<”\n”;





Hi,

I just post to confirm that I will still look at this. (I was away longer due to ash cloud)

Wouter

anyone…?

Hi,

I’ve found a bug in the analytical calculation of partial integrals in RooHistPdf.
The fix for this has been introduced in ROOT 5.27/04 (due this week).

Wouter