#include #include #include "TH2D.h" #include "TVectorD.h" #include "TVector2.h" #include "TMatrixD.h" #include "TPrincipal.h" #include "TBrowser.h" /*==PCA Test========================================================== The macro tests the Principal Component Analysis (PCA) class in ROOT. A known set of ten data points in two dimensions is given to the TPrincipal class. The results calcaulted by hand are printed below. Covariance Matrix of the data ------------------------------- | 0.616555556 | 0.615444444 | | 0.615444444 | 0.716555556 | ------------------------------- eigenvalues for the covariance matrix ---------------- | 0.049083398 | | 1.28402771 | ---------------- eigenvectors for the covariance matrix ------------------------------- | -0.73517866 | -0.67787340 | | 0.677873399 | -0.73517866 | ------------------------------- ====================================================================*/ /** * Print a matrix is a little nicer format than TMatrix::Print * @param name Name of the matrix (should not be too long) * @param m Matrix to print */ void printMatrix(const char* name, const TMatrixD& m) { TString n(name); Int_t nl = TMath::Max(n.Length(),16); printf(Form("%%%ds |", nl), name); for (Int_t i = 0; i < m.GetNcols(); i++) printf("%10d |", i); printf("\n"); for (Int_t i = 0; i < nl; i++) printf("-"); printf("-|"); for (Int_t i = 0; i < m.GetNcols(); i++) printf("-----------|"); for (Int_t i = 0; i < m.GetNrows(); i++) { printf(Form("\n%%%dd |", nl), i); for (Int_t j = 0; j < m.GetNcols(); j++) { printf(" %10f ", m(j,i)); } } printf("\n"); } /** * Print a vector * @param name Name of vector * @param v Vector to print */ void printVector(const char* name, const TVectorD& m) { TString n(name); Int_t nl = TMath::Max(n.Length(), 16); printf(Form("%%%ds\n", nl), name); for (Int_t i = 0; i < m.GetNrows(); i++) printf(Form("%%%dd | %%8f\n", nl), i, m(i)); } /** * Calculate the `real' coveriance matrix, and optionally scale * by the trace of the matrix * @param data Data to calculate the matrix for * @param n Number of data points * @param m Number of variables * @param norm Normalize to the trace */ void calculateCovarience(const Double_t data[][2], Int_t n, Int_t m, bool norm=true) { TVectorD mean(m); for (Int_t i = 0; i < n; i++) for (Int_t j = 0; j < m; j++) mean(j) += data[i][j]; for (Int_t j = 0; j < m; j++) mean(j) /= n; TMatrixD covar(m,m); for (Int_t i = 0; i < n; i++) for (Int_t j = 0; j < m; j++) for (Int_t k = 0; k < m; k++) covar(j,k) += (data[i][j] - mean(j)) * (data[i][k] - mean(k)); for (Int_t j = 0; j < m; j++) for (Int_t k = 0; k < m; k++) covar(j,k) /= (n-1); printf("=======================================================\n"); printMatrix("Calculated Covariance ", covar); if (norm) { Double_t trace = 0; for (Int_t i = 0; i < m; i++) trace += covar(i,i); covar *= 1/trace; printMatrix("Calculated Covariance / trace", covar); } TVectorD eigenV(m); TMatrixD eigenM = covar.EigenVectors(eigenV); printMatrix("Calculated Eigen vectors ", eigenM); printVector("Calculated Eigen values ", eigenV); } /** * Do a test of the TPrincipal class */ Int_t PCATest() { //--Data-- const Int_t numVariables = 2; const Int_t numDataPoints = 10; const Double_t inputdata[numDataPoints][numVariables] = { { 2.5, 2.4 }, { 0.5, 0.7 }, { 2.2, 2.9 }, { 1.9, 2.2 }, { 3.1, 3.0 }, { 2.3, 2.7 }, { 2.0, 1.6 }, { 1.0, 1.1 }, { 1.5, 1.6 }, { 1.1, 0.9 } }; calculateCovarience(inputdata, numDataPoints, numVariables); //--Constants-- const Int_t numBinsX = 100; const Double_t minX = -1.0; const Double_t maxX = 4.0; TH2D* hXY = new TH2D("hXY","Orignal Data", numBinsX, minX, maxX, numBinsX, minX, maxX); TPrincipal* pcatest = new TPrincipal(numVariables, "D"); //--Main Algorithm-------------------------------------------------- //Add data to pcatest for (Int_t ii=0; iiFill(inputdata[ii][0], inputdata[ii][1]); pcatest->AddRow(inputdata[ii]); }//ii //Run PCA pcatest->MakePrincipals(); //Print the output printf("=======================================================\n"); pcatest->MakeHistograms("pcatest","XPES"); pcatest->Print("MS"); //Print out the final covariance matrix TMatrixD* finalCov = pcatest->GetCovarianceMatrix(); TMatrixD* finalEVM = pcatest->GetEigenVectors(); TVectorD* finalEVV = pcatest->GetEigenValues(); printMatrix("PCA coveriance ", *finalCov); printMatrix("PCA Eigen vectors ", *finalEVM); printVector("PCA Eigen values ", *finalEVV); // Print real matrix TMatrixD realCov(2,2); realCov(0,0) = 0.616555556; realCov(0,1) = 0.615444444; realCov(1,0) = 0.615444444; realCov(1,1) = 0.716555556; TMatrixD realEigen(2,2); realEigen(0,0) = -0.73517866; realEigen(0,1) = 0.677873399; realEigen(1,0) = -0.67787340; realEigen(1,1) = -0.73517866; TVectorD realValue(2); realValue(0) = 0.049083398; realValue(1) = 1.28402771; printf("=======================================================\n"); printMatrix("Real coveriance ", realCov); printMatrix("Real eigen vectors ", realEigen); printVector("Real eigen value ", realValue); //Draw the orignal data histogram hXY->SetMarkerStyle(2); hXY->Draw(); TArrow* a = new TArrow(); a->SetLineColor(kRed); a->SetFillColor(kRed); a->SetAngle(30); a->SetLineWidth(realValue(0) > realValue(1) ? 5 : 3); a->DrawArrow(0,0,realEigen(0,0),realEigen(1,0), 0.05, "|>"); a->SetLineWidth(realValue(0) <= realValue(1) ? 5 : 3); a->DrawArrow(0,0,realEigen(0,1), realEigen(1,1), 0.05, "|>"); a->SetLineStyle(2); a->SetLineColor(kBlue); a->SetFillColor(kBlue); a->SetLineWidth(finalEVV(0) > finalEVV(1) ? 4 : 2); a->DrawArrow(0,0,finalEVM(0,0),finalEVM(1,0), 0.05, "|>"); a->SetLineWidth(finalEVV(0) <= finalEVV(1) ? 4 : 2); a->DrawArrow(0,0,finalEVM(0,1),finalEVM(1,1), 0.05, "|>"); TLatex* text = new TLatex(); text->SetTextColor(kRed); text->DrawLatex(maxX/2, minX/2, "Red is reference"); text->SetTextColor(kBlue); text->DrawLatex(maxX/2, -minX/2, "Blue is from PCA"); //start a TBrowser TBrowser* p = new TBrowser("principalBrowser", pcatest); return 0; }//Int_t PCATest()