We are trying to split off a set of tuple files by binning on a particular parameter. I am finding that a macro to do this seems to break down for large tuple files (~2 GB). Works fine for smaller files (eg 100 MB). Break down means it slows to a crawl. Plan b is to read the entire tuple file n times with a TCut for each bin.
Here is the macro. It goes through the tuple once copying the relevant column to an array for sorting. Then reads the tuple back using the sort index and spits out files depending on the binning of the parameter.
This 2.7 GB files hangs up: ftp://ftp-glast.slac.stanford.edu/glast … doXrb.root
while this small one works fine: ftp://ftp-glast.slac.stanford.edu/glast … merit.root
I tried this with 5.12 on intel mac and 5.10 on RHEL3 linux. Same behaviour.
{
TChain* c = new TChain(“MeritTuple”);
c->Add("/nfs/farm/g/glast/u33/richard/DC2/v2/DC2-v2-merit-redoXrb.root");
// c->Add("/Users/Richard/GLAST/MicroQuasars/DC2/v2-Sky/allXrbPrune-merit.root");
TString sRootName = “Merit_Bin_”;
TString sBinName;
TStopwatch timer;
Double_t timeReal, timeCPU, timeRealTot = 0., timeCPUTot = 0.;
float val[100000000];
Long64_t index[100000000];
int nParamBins = 42;
float binUpper[42];
for (int i=0; iSetBranchAddress(“PtMagLat”,&PtMagLat);
Long64_t nEntries = c->GetEntries();
c->SetBranchStatus("*", 0);
c->SetBranchStatus(“PtMagLat”, 1);
Long64_t jentry;
for (jentry=0; jentryGetEntry(jentry);
val[jentry] = fabs(PtMagLat);
};
cout << " Starting sort for " << nEntries << " entries" << endl;
timer.Start();
TMath::Sort(nEntries,val,index,0);
timer.Stop();
cout << "Min, max PtMagLat = " << val[index[0]] << " " << val[index[nEntries-1]] << endl;
timeRealTot += timeReal = timer.RealTime();
timeCPUTot += timeCPU = timer.CpuTime();
cout << "Sort took (" << timeReal << "wall, " << timeCPU << "cpu) seconds to execute." <<endl>SetBranchStatus("*", 1);
for (Long64_t kentry=0; kentryGetEntry(index[kentry]);
if (kentry%100000 == 0) cout << " entry " << kentry << " PtMagLat " << PtMagLat << " oBin " << oBin <<endl>binUpper[oBin]) {
// if a file is already open, close it
if (f!=0) {
t->AutoSave();
f->Close();
};
f = 0;
cout << " Closed file at entry " << kentry << " PtMagLat " << PtMagLat << " oBin " << oBin <<endl>cd();
t = c->CloneTree(0);
Long64_t maxFile = 10000000000LL;
t->SetMaxTreeSize(maxFile);
t->SetName(sRootName+sBinName);
};
// fill in what got read from the input chain
t->Fill();
};
// odds are the run did not end on a bin boundary, so there is still
// an open file.
if (f!=0) {
t->AutoSave();
f->Close();
};
timer.Stop();
cout << “processing time (wall, cpu) = (” << timer.RealTime() << ", " << timer.CpuTime() << “)s” << endl;
}