Hi,
I have two data sets from the same population. I need to create a histogram using the first data set and convert the second data set into percentile values using the first data set. Also while evaluating the second data set I need to keep updating the histogram with that data as well.
I know I can use TH1->Fill to keep adding data from the second set while i’m evaluating it. And I keep doing computeIntegral() to recompute the quantiles.
However over time the bins below the lowest and above the highest accumulate a large number of outliers. This makes the percentiles I calculate erroneous. I would like to adjust/move/shift the bin contents from time to time to bring the outliers back into the regular histogram bins. How can this be done?
Attaching the code here in case you want to see what im doing. Please follow the comments in BOLD to get a quick idea.
Thanks in advance.
Cheers
Arin
[size=85][code]
//THE FIRST DATA SET, THIS IS USED TO COMPUTE THE FIRST HISTOGRAM
vector<TDataReader> &histDataInputVector = DataInputVectorArr[0];
//READING THE VALUES INTO AN ARRAY
deque<Double_t> valsArray;
TSimpleEvent firstEvnt = histDataInputVector[0].ReadEvent(0);
for(int i=0; i<histDataInputVector.size(); i++)
{
Long64_t count = histDataInputVector[i].GetEventCount();
for(Long64_t j=0; j < count; j++)
{
TSimpleEvent evnt = histDataInputVector[i].ReadEvent(j);
if(!(evnt==TSimpleEvent()))
{
Double_t value = evnt.Val;
valsArray.push_back(value);
}
}
}
//COMPUTING MEAN AND STANDARD DEVIATION
Double_t Mean = TMath::Mean(valsArray.begin(), valsArray.end());
Double_t StdDev = TMath::RMS(valsArray.begin(), valsArray.end());
//SETTING THE HISTOGRAM MIN AND MAX AS histMinStdDevs and
//histMaxStdDevs STANDARD DEVIATIONS BELOW AND ABOVE THE MEAN
Double_t minVal = Mean-histMinStdDevsStdDev;
if(minVal<0)minVal=0;
Double_t maxVal = Mean+histMaxStdDevsStdDev;
//CREATING THE FIRST CUT OF THE HISTOGRAM WITH noBins BINS
TH1D * histogram = new TH1D(“h1”,“Data”,noBins,minVal,maxVal);
for(int j=0;j<valsArray.size();j++)
{
histogram->Fill(valsArray[j]);
}
//TARGET DATA SET THAT NEEDS TO BE EVALUATED WHILE ALSO
//ADDED TO THE HISTOGRAM
vector<TDataReader> &DataInputVector = DataInputVectorArr[1];
for(int i=0; i<DataInputVector.size(); i++)
{
//GET THE HISTOGRAM INTEGRAL
Double_t * integrals = histogram->GetIntegral();
Long64_t inputcount = DataInputVector[i].GetEventCount();
for(Long64_t j=0; j < inputcount; j++)
{
TSimpleEvent CurrentEvnt = DataInputVector[i].ReadEvent(j);
if(!(CurrentEvnt==TSimpleEvent()))
{
TSimpleEvent PercentileEvent;
Int_t binNo = histogram->FindBin(CurrentEvnt.Val);
PercentileEvent.Val=(integrals[binNo]>1)?1:integrals[binNo];
DataOutputVector[i].WriteEvent(PercentileEvent);
//ADD THE NEW DATA TO THE HISTOGRAM
histogram->Fill(CurrentEvnt.Val);
}
else
{
ERROR("Current Event is NULL, MA calculation in inconsistent state... exiting!");
return;
}
}
//AFTER EACH 2ND DIMENTION ITERATION RECOMPUTE THE INTEGRAL
histogram->ComputeIntegral();
//WOULD ALSO LIKE TO REDISTRIBUTE THE OUTLIERS
//BACK INTO THE HISTOGRAM HERE, BUT HOW???
}
[/code][/size]