// Merge bins of a histogram until all bins have a similar bin count. // NOTE that this algorithm adds the bin contents of merged bins: it // does not preserve the integral! A similar algorithm can be used // for an integral preserving merge. // Author: Axel Naumann, 2010, based on an algorithm invented by Jakob Blomer #include "TMath.h" #include "TH1.h" #include "TCanvas.h" class TMergeLookAhead { public: TMergeLookAhead(double targetContent, const TH1* hist): fNbins(hist->GetNbinsX()), fTargetContent(targetContent), fXaxis(hist->GetXaxis()) { int bufsize = fNbins * (fNbins + 1) / 2; fContent = new double[bufsize]; fCost = new float[bufsize]; fDivision = new int[bufsize]; int offset = Idx(1, 0); for (int i = 0; i < fNbins; ++i) { fContent[offset + i] = hist->GetBinContent(i + 1); fCost[offset + i] = ContentCost(fContent[offset + i]); fDivision[offset + i] = 0; } } ~TMergeLookAhead() { delete []fCost; delete []fContent; delete []fDivision; } void Fill() { for (int span = 2; span < fNbins; ++span) Fill(span); Fill(fNbins, 0); } int Idx(int span, int left) { return (fNbins - span)*(fNbins - span + 1) / 2 + left; } float& Cost(int idx) { return fCost[idx]; } double& Content(int idx) { return fContent[idx]; } int& Division(int idx) { return fDivision[idx]; } float ContentCost(double c) { return fabs(fTargetContent - c); } float CombinedCost(double c1, double c2) { return max(c1, c2); } void GetBinEdges(std::vector& vBinEdges, std::vector& vContent) { vBinEdges.push_back(fXaxis->GetBinLowEdge(1)); GetBinEdges(vBinEdges, vContent, fNbins, 0); vBinEdges.push_back(fXaxis->GetBinUpEdge(fNbins)); } protected: void Fill(int span, int left); void Fill(int span) { for (int left = 0; left <= fNbins - span; ++left) Fill(span, left); } void GetBinEdges(std::vector& vBinEdges, std::vector& vBinContent, int span, int left); private: int fNbins; float *fCost; double *fContent; int *fDivision; double fTargetContent; const TAxis* fXaxis; }; void TMergeLookAhead::Fill(int span, int left) { // Fill the look ahead buffer: // sweep divider from left to right double combContent = Content(Idx(1, left)) + Content(Idx(span - 1, left + 1)); float optCost = ContentCost(combContent); int optDiv = 0; for (int i = 1; i < span; ++i) { float cost = CombinedCost(Cost(Idx(i, left)), Cost(Idx(span - i, left + i))); if (cost < optCost) { optCost = cost; optDiv = i; } } int idx = Idx(span, left); Division(idx) = optDiv; Cost(idx) = optCost; Content(idx) = combContent; } void TMergeLookAhead::GetBinEdges(std::vector& vBinEdges, std::vector& vBinContent, int span, int left) { // Collect the bin edges from the look-ahead buffer for the sub-range spanning // "span" bins and starting at bin index "left". // If needed, "left" and 'right' is already part of vBinEdges from previous recursion. // Return the content of the right bin. int idx = Idx(span, left); int division = Division(idx); if (division == 0 || span == 1) { vBinContent.push_back(Content(idx)); return; } else { GetBinEdges(vBinEdges, vBinContent, division, left); vBinEdges.push_back(fXaxis->GetBinLowEdge(left + division + 1)); GetBinEdges(vBinEdges, vBinContent, span - division, left + division); } } void mergebins() { TCanvas* canv = new TCanvas(); canv->Divide(1,2); canv->cd(1); const int nBins = 1000; const double low = -2; const double high = 10; const int entries = 100000; TH1D* h = new TH1D("h", "h", nBins, low, high); h->FillRandom("gaus", entries); h->Draw(); // target number of bins: const int numTargetBins = 10; // merge until all bins have a bin content > targetBinContent. const double targetBinContent = entries / numTargetBins; TMergeLookAhead ml(targetBinContent, h); ml.Fill(); std::vector vbinx; std::vector vbinc; ml.GetBinEdges(vbinx, vbinc); TH1D* hVarBins = new TH1D("hvb", "optimized bins", (int)vbinc.size(), &vbinx[0]); for (UInt_t i = 0, n = vbinx.size(); i < n; ++i) { hVarBins->SetBinContent(i + 1, vbinc[i]); } canv->cd(2); hVarBins->SetMinimum(0); hVarBins->Draw(); }