function nma_PCAaccuracy
clear all
close all
pause(1);
REMOVE_MEAN_FROM_RAW_DATA = 1;
USE_BLADDER_DATA = 0;
PROJ_ON_TUMOR = 1;
PROJ_ON_NORMAL = 2;
DO_CUT_OF_TEST = 1;
NTEST = 10;
ALL = 0;
NMODE = 3;
nfig = 1;
if DO_CUT_OF_TEST
CUTOFF = 0.05:0.20:0.95;
else
CUTOFF = .70:.70;
end
if USE_BLADDER_DATA
trueNormalIndex = 103:124 ;
trueTumorIndex = [1:6,8:102,125,126];
else
trueTumorIndex = [4,9:112];
trueNormalIndex = 116:191;
end
if USE_BLADDER_DATA
fprintf('***** BLADDER DATA ANALYSIS ********\n');
xraw = load('imputed_bladder.txt');
else
fprintf('***** LIVER DATA ANALYSIS ********\n');
xraw = load('imputed_liver.txt');
end
if REMOVE_MEAN_FROM_RAW_DATA
nrx=size(xraw,1);
for i=1:nrx
xraw(i,:)=xraw(i,:)-mean(xraw(i,:));
end
end
nCutOff=length(CUTOFF);
tpt=zeros(nCutOff,1);
npt=tpt; tpn=tpt; npn=tpt; tpMixed=tpt; npMixed=tpt;
for z=1:nCutOff
projectAndAnalyse(CUTOFF(z));
end
function projectAndAnalyse(currentCutOff)
fprintf('\n========> Number of trails=%d\n',NTEST);
fprintf('percentage of samples used in generation of eigensample is =%3.3f%%\n',...
(currentCutOff)*100);
fprintf('Number of eigenmode=%d\n\n',NMODE);
tumorSetSize = length(trueTumorIndex);
normalSetSize = length(trueNormalIndex);
workingPopulationTumorFirst = xraw(:,[trueTumorIndex trueNormalIndex]);
sizeOfExcludedSet = floor((1-currentCutOff)*length(trueTumorIndex));
projMatOnTumor = zeros( tumorSetSize+normalSetSize, NMODE, NTEST);
projMatOnTumor= process_2(workingPopulationTumorFirst,...
tumorSetSize,...
projMatOnTumor,...
sizeOfExcludedSet);
if ALL
nfig=analyzeProjectionMatrix(projMatOnTumor,...
tumorSetSize,...
PROJ_ON_TUMOR,...
nfig);
end
workingPopulationNormalFirst = xraw(:,[trueNormalIndex trueTumorIndex]);
sizeOfExcludedSet = floor((1-currentCutOff)*length(trueNormalIndex));
projMatOnNormal = zeros(tumorSetSize+normalSetSize, NMODE, NTEST);
projMatOnNormal = process_2(workingPopulationNormalFirst ,...
length(trueNormalIndex),...
projMatOnNormal,...
sizeOfExcludedSet);
B=projMatOnNormal([normalSetSize+1:end,1:normalSetSize],:,:);
if ALL
nfig=analyzeProjectionMatrix(B, ...
length(trueNormalIndex),...
PROJ_ON_NORMAL,...
nfig);
end
[tpt(z),npt(z),tpn(z),npn(z),tpMixed(z),npMixed(z)]=...
doStats(projMatOnTumor,B,tumorSetSize);
end
figure;
plot(100*CUTOFF,tpt,'r')
hold on;
plot(100*CUTOFF,tpn,'k')
plot(100*CUTOFF,tpMixed,'b')
legend('projection against tumor','projection against normal','projection against combination');
title(sprintf('Liver data: accuracy of detection of cancer a function of the size\nof the random set used to generate dominant component'));
xlabel(sprintf('%% of the population used to generate the dominant component'));
ylabel('accuracy of detecting that a sample is cancerous');
ylim([-1 103]);
print -dtiff bladder_detect_cancer_all_algorithms
figure;
plot(100*CUTOFF,npt,'r')
hold on;
plot(100*CUTOFF,npn,'k'); hold on;
plot(100*CUTOFF,npMixed,'b');
legend('projection against tumor','projection against normal','projection against combination');
title(sprintf('Liver data: accuracy of detection of cancer-free as a function of the size\n of the random set used to generate dominant component'));
xlabel(sprintf('%% of the population used to generate the dominant component'));
ylabel('accuracy of detecting that a sample is cancer-free');
ylim([-1 103]);
print -dtiff bladder_detect_cancer_free_all_algorithms
if 1
figure;
projMat=squeeze(B(:,1,:));
[nr,nc]=size(projMat);
pltProjMat=projMat;
pltProjMat(nr+1,:)=pltProjMat(nr,:);
pltProjMat(:,nc+1)=pltProjMat(:,nc);
pcolor(pltProjMat)
colorbar
xlabel('Case Number')
set(gca,'XTick',10:10:100);
ylabel('Sample number')
title('Projections on normal Component')
figure;
projMat=squeeze(projMatOnTumor(:,1,:));
[nr,nc]=size(projMat);
pltProjMat=projMat;
pltProjMat(nr+1,:)=pltProjMat(nr,:);
pltProjMat(:,nc+1)=pltProjMat(:,nc);
pcolor(pltProjMat)
colorbar
xlabel('Case Number')
set(gca,'XTick',10:10:100);
ylabel('Sample number')
title('Projections on tumor Component')
end
end
function [tpt,npt,tpn,npn,tpMixed,npMixed]=doStats(...
projMatOnTumor,projMatOnNormal,tumorSetSize)
maxNumberOfModes=size(projMatOnTumor,2);
for i=1:maxNumberOfModes
fprintf('\n***** results for number of dominant modes of %d\n',i);
[tpt,npt]=checkProjectionOnTumor(projMatOnTumor,...
tumorSetSize,...
i);
fprintf('result of classification of sample based on correlation with Tumor mode\n');
fprintf('Correct tumor detect=%%%3.2f, Correct normal detect=%%%3.2f\n',tpt,npt);
[tpn,npn]=checkProjectionOnNormal(projMatOnNormal,tumorSetSize,i);
fprintf('\nresult of classification of sample based on correlation with normal mode\n');
fprintf('Correct tumor detect=%%%3.2f, Correct normal detect=%%%3.2f\n',tpn,npn);
[tpMixed,npMixed,nUndecided]=checkProjectionOnCombination(projMatOnTumor,...
projMatOnNormal,tumorSetSize,i);
fprintf('\nresult of classification of sample based on correlation with combination of modes\n');
fprintf('Correct tumor detect=%%%3.2f, Correct normal detect=%%%3.2f, Unable to classify=%%%3.2f\n',...
tpMixed,npMixed,nUndecided);
end
end
function [tumorSamplesDetectOkCount,normalSamplesDetectOkCount]=...
checkProjectionOnTumor(projMatOnTumor,tumorSetSize,nMode)
tumorSamplesDetectOkCount = 0;
normalSamplesDetectOkCount = 0;
nCases = size(projMatOnTumor,3);
totalSampleSize = size(projMatOnTumor,1);
for i = 1:nCases
for j = 1:totalSampleSize
pOnTumor = 0;
for k=1:nMode
pOnTumor=pOnTumor+projMatOnTumor(j,k,i);
end
if pOnTumor>0
if j<=tumorSetSize
tumorSamplesDetectOkCount = 1+ tumorSamplesDetectOkCount;
end
else
if j>tumorSetSize
normalSamplesDetectOkCount = 1+ normalSamplesDetectOkCount;
end
end
end
end
tumorSamplesDetectOkCount=tumorSamplesDetectOkCount*100/(nCases*tumorSetSize);
normalSamplesDetectOkCount=normalSamplesDetectOkCount*100/(nCases*(totalSampleSize-tumorSetSize));
end
function [tumorSamplesDetectOkCount,normalSamplesDetectOkCount]=...
checkProjectionOnNormal(projMatOnNormal,tumorSetSize,nMode)
tumorSamplesDetectOkCount = 0;
normalSamplesDetectOkCount = 0;
nCases = size(projMatOnNormal,3);
totalSampleSize = size(projMatOnNormal,1);
for i = 1:nCases
for j = 1:totalSampleSize
pOnNormal = 0;
for k = 1:nMode
pOnNormal = pOnNormal+projMatOnNormal(j,k,i);
end
if pOnNormal>0
if j>tumorSetSize
normalSamplesDetectOkCount = 1+ normalSamplesDetectOkCount;
end
else
if j<=tumorSetSize
tumorSamplesDetectOkCount = 1+ tumorSamplesDetectOkCount;
end
end
end
end
tumorSamplesDetectOkCount = tumorSamplesDetectOkCount*100/(nCases*tumorSetSize);
normalSamplesDetectOkCount = normalSamplesDetectOkCount*100/(nCases*(totalSampleSize-tumorSetSize));
end
function [tumorSamplesDetectOkCount,normalSamplesDetectOkCount,notDetectedCount]=...
checkProjectionOnCombination(projMatOnTumor,...
projMatOnNormal,...
tumorSetSize,...
nMode)
nCases = size(projMatOnTumor,3);
totalSampleSize = size(projMatOnTumor,1);
normalSetSize = totalSampleSize-tumorSetSize;
tumorSamplesDetectOkCount = 0;
normalSamplesDetectOkCount = 0;
tumorNotSure = 0;
normalNotSure = 0;
for i = 1:nCases
result=zeros(totalSampleSize,1);
for j = 1:totalSampleSize
pOnTumor = 0;
for k = 1:nMode
pOnTumor = pOnTumor+projMatOnTumor(j,k,i);
end
pOnNormal = 0;
for k=1:nMode
pOnNormal = pOnNormal+projMatOnNormal(j,k,i);
end
if pOnTumor>0 || (pOnTumor-pOnNormal)>0
result(j) = 1;
elseif pOnNormal>0
result(j) = 0;
else
result(j) = -1;
end
end
tumorSamplesDetectOkCount = tumorSamplesDetectOkCount + sum(result(1:tumorSetSize)==1);
normalSamplesDetectOkCount = normalSamplesDetectOkCount+sum(result(tumorSetSize+1:end)==0);
tumorNotSure = tumorNotSure + sum( result(1:tumorSetSize) == -1);
normalNotSure = normalNotSure + sum( result(tumorSetSize+1:end) == -1);
end
tumorSamplesDetectOkCount = tumorSamplesDetectOkCount*100/(nCases*tumorSetSize-tumorNotSure);
normalSamplesDetectOkCount = normalSamplesDetectOkCount*100/(nCases*normalSetSize-normalNotSure);
notDetectedCount = (tumorNotSure+normalNotSure)*100/(nCases*totalSampleSize);
end
function plotProjection(B,nFig)
figure(nFig);
projMat=squeeze(B(:,1,:));
[nr,nc]=size(projMat);
pltProjMat=projMat;
pltProjMat(nr+1,:)=pltProjMat(nr,:);
pltProjMat(:,nc+1)=pltProjMat(:,nc);
pcolor(pltProjMat)
colorbar
xlabel('Case Number')
set(gca,'XTick',10:10:100);
ylabel('Sample number')
end
function projMat=process_2(xraw,...
tumorSetSize,...
projMat,...
sizeOfExcludedSet)
if sizeOfExcludedSet>=tumorSetSize
error('projMat: working set size is too small.');
end
ntests = size(projMat,3);
for k = 1:ntests
if mod(k,50)==0
fprintf('case # %d\n',k);
end
z = randperm(tumorSetSize);
primaryTumorIndex = z(sizeOfExcludedSet+1:end);
projMat = getProjectionMatrix(xraw(:,primaryTumorIndex), ...
xraw, ...
k, ...
projMat);
end
end
function s=getTitlePrefix(projectionType)
PROJ_ON_TUMOR = 1;
PROJ_ON_NORMAL = 2;
if projectionType==PROJ_ON_TUMOR
s='[project on tumor]';
elseif projectionType==PROJ_ON_NORMAL
s='[project on normal]';
end
end
function setLegend(projectionType)
PROJ_ON_TUMOR = 1;
PROJ_ON_NORMAL = 2;
if projectionType==PROJ_ON_TUMOR
legend('tumor','normal');
elseif projectionType==PROJ_ON_NORMAL
legend('tumor','normal');
end
end
function projMat=getProjectionMatrix(...
x,...
xraw,...
k,...
projMat)
nModes=size(projMat,2);
nSamples = size(x,2);
gridSize = size(x,1);
theta = zeros(nSamples);
for i=1:nSamples
vi = x(:,i);
for j=i:nSamples
vj=x(:,j);
theta(i,j) = vi'*vj/gridSize/nSamples;
theta(j,i) = theta(i,j);
end
end
[u,lam] = eig(theta);
[lam, ind] = sort(-diag(lam));
u = u(:,ind');
for i=1:nSamples
u(:,i) = u(:,i)/norm(u(:,1),1);
end
for i=1:nModes
phi(:,i)=zeros(gridSize,1);
for j=1:nSamples
phi(:,i)=phi(:,i)+u(j,i)*x(:,j);
end
phi(:,i)=phi(:,i)/norm(phi(:,i));
end
for j=1:nModes
sumP1(j)=0.;
for i=1:nSamples
sumP1(j)=sumP1(j)+sum(x(:,i)'*phi(:,j));
end
sumP1(j);
if sumP1(j) < 0
phi(:,j)=-phi(:,j);
end
for i=1:size(xraw,2)
projMat(i,j,k) = xraw(:,i)'*phi(:,j);
end
end
end
function nFig=analyzeProjectionMatrix(projMatA,...
npta,...
projectionType,...
nFig)
titlePrefix = getTitlePrefix(projectionType);
nModes = size(projMatA,2);
ntests = size(projMatA,3);
for j=1:nModes
projMat=squeeze(projMatA(:,j,:));
[nr,nc]=size(projMat);
outData(j).pltProjMat=projMat;
outData(j).pltProjMat(nr+1,:)=outData(j).pltProjMat(nr,:);
outData(j).pltProjMat(:,nc+1)=outData(j).pltProjMat(:,nc);
end
for j=1:nModes
nFig=nFig+1;
figure(nFig)
pcolor(outData(j).pltProjMat)
colorbar
xlabel('Trial Number')
set(gca,'XTick',10:10:100);
ylabel('Sample number')
title(sprintf('%s, Projections on Component number [%d]',titlePrefix,j))
end
for i=1:nModes
tumorMean(i,:) = mean(outData(i).pltProjMat(1:npta,:));
tumorStd(i,:) = std(outData(i).pltProjMat(1:npta,:));
normalMean(i,:)= mean(outData(i).pltProjMat((npta+1):size(outData(i).pltProjMat,1),:));
normalStd(i,:) = std(outData(i).pltProjMat((npta+1):size(outData(i).pltProjMat,1),:));
end
save chenLiverSave
k=1;
for j=1:ntests
caseNo=j;
znormal=sort((outData(k).pltProjMat((npta+1):size(outData(k).pltProjMat,1),...
caseNo)-mean(normalMean(k,:)))/mean(normalStd(k,:)));
ztumor=sort((outData(k).pltProjMat(1:npta,caseNo)-mean(tumorMean(k,:)))/...
mean(tumorStd(k,:)));
nmax=length(znormal);
for i=1:nmax
alphaLevel=i/nmax;
zalpha_normal(i)=norminv(alphaLevel,0,1);
end
nmax=length(ztumor);
for i=1:nmax
alphaLevel=i/nmax;
zalpha_tumor(i)=norminv(alphaLevel,0,1);
end
if 0
nFig=nFig+1;
figure(nFig)
subplot(2,1,1)
plot(zalpha_tumor,ztumor,'k*',zalpha_tumor,zalpha_tumor,...
'k:',zalpha_tumor,3+zalpha_tumor,'k:',zalpha_tumor,zalpha_tumor-3,'k:')
title(sprintf('%s, Chen Liver Cancer - Tumor Samples - Component %d',titlePrefix,k));
xlabel('Standard Normal Percentile')
ylabel('Normalized projection')
hold on
subplot(2,1,2)
plot(zalpha_normal,znormal,'k*',zalpha_normal,zalpha_normal,'k:',...
zalpha_normal,3+zalpha_normal,'k:',zalpha_normal,zalpha_normal-3,'k:')
title(sprintf('%s, Normal Samples',titlePrefix));
xlabel('Standard Normal Percentile')
ylabel('Normalized projection')
hold on
end
end
subplot(2,1,1)
hold off
subplot(2,1,2)
hold off
ctr=1:ntests;
k=1;
x=-80:0.1:80;
ftumor=1/sqrt(2*pi)/mean(tumorStd(k,:))*exp(-(x-mean(tumorMean(k,:))).^2/(2*mean(tumorStd(k,:))^2));
fnormal=1/sqrt(2*pi)/mean(normalStd(k,:))*exp(-(x-mean(normalMean(k,:))).^2/(2*mean(normalStd(k,:))^2));
nFig=nFig+1;
figure(nFig);
plot(x,ftumor,'r-',x,fnormal,'k-')
setLegend(projectionType);
title(sprintf('%s, Chen Liver Cancer Normal Distribution Curves - Component %d',titlePrefix,k));
xlabel('Projection Value')
ylabel('Normal Probability Density Function')
sumProjMat=0;
for j=1:nModes
sumProjMat=sumProjMat+outData(j).pltProjMat;
end
sumTumorMean=mean(sumProjMat(1:npta,:));
sumTumorStd=std(sumProjMat(1:npta,:));
sumNormalMean=mean(sumProjMat((npta+1):size(sumProjMat,1),:));
sumNormalStd=std(sumProjMat((npta+1):size(sumProjMat,1),:));
nFig=nFig+1;
figure(nFig);
for j=1:ntests
caseNo=j;
znormal=sort((sumProjMat((npta+1):size(sumProjMat,1),caseNo)-mean(sumNormalMean))/mean(sumNormalStd));
ztumor=sort((sumProjMat(1:npta,caseNo)-mean(sumTumorMean))/mean(sumTumorStd));
nmax=length(znormal);
for i=1:nmax
alphaLevel=i/nmax;
zalpha_normal(i)=norminv(alphaLevel,0,1);
end
nmax=length(ztumor);
for i=1:nmax
alphaLevel=i/nmax;
zalpha_tumor(i)=norminv(alphaLevel,0,1);
end
figure(nFig)
subplot(2,1,1)
plot(zalpha_tumor,ztumor,'k*',zalpha_tumor,zalpha_tumor,'k:',zalpha_tumor,3+zalpha_tumor,'k:',zalpha_tumor,zalpha_tumor-3,'k:')
title(sprintf('%s, Chen Liver Cancer - Tumor Samples - Sum of Projections onto Components 1,2,3',titlePrefix));
xlabel('Standard Normal Percentile')
ylabel('Normalized projection')
hold on
subplot(2,1,2)
plot(zalpha_normal,znormal,'k*',zalpha_normal,zalpha_normal,'k:',zalpha_normal,3+zalpha_normal,'k:',zalpha_normal,zalpha_normal-3,'k:')
title(sprintf('%s, Normal Samples',titlePrefix));
xlabel('Standard Normal Percentile')
ylabel('Normalized projection')
hold on
end
subplot(2,1,1)
hold off
subplot(2,1,2)
hold off
ftumor=1/sqrt(2*pi)/mean(sumTumorStd)*exp(-(x-mean(sumTumorMean)).^2/(2*mean(sumTumorStd)^2));
fnormal=1/sqrt(2*pi)/mean(sumNormalStd)*exp(-(x-mean(sumNormalMean)).^2/(2*mean(sumNormalStd)^2));
nFig=nFig+1;
figure(nFig)
plot(x,ftumor,'k-',x,fnormal,'r-')
setLegend(projectionType);
title(sprintf('%s, Chen Liver Cancer Normal Distribution Curves - Sum of Projections onto Components 1,2,3',titlePrefix));
xlabel('Projection Value')
ylabel('Normal Probability Density Function')
tMu=mean(tumorMean(1,:));
tSigma=mean(tumorStd(1,:));
nMu=mean(normalMean(1,:));
nSigma=mean(normalStd(1,:));
ftumor=normpdf(x,tMu,tSigma);
fnormal=normpdf(x,nMu,nSigma);
xp=-80:1:80;
ft=normpdf(xp,tMu,tSigma);
fn=normpdf(xp,nMu,nSigma);
fp=normpdf(xp,tMu+nMu,sqrt(tSigma^2+nSigma^2));
lrt=1-fn./((ft+fn));
nFig=nFig+1;
figure(nFig)
plot(x,ftumor,'k-',x,fnormal,'r-')
hold on
figure(nFig)
[AX,H1,H2] = plotyy(x,ftumor,xp,lrt,'plot');
plotyy(x,ftumor,xp,lrt)
xlabel('Projection')
set(get(AX(1),'Ylabel'),'String','Distribution Function')
set(get(AX(2),'Ylabel'),'String','Prob of Cancer')
grid on
set(AX(1),'YTick',0:.02:.1)
set(AX(2),'YTick',0:.2:1)
set(AX(1),'YColor',[0 0 0])
set(AX(2),'YColor',[0 0 0])
title(sprintf('%s, Chen Liver Cancer Normal Distribution Curves & Prob of Cancer',titlePrefix));
hold off
if 0
x=-100:.1:100;
mt=mean(tumorMean(1,:)); mn=mean(normalMean(1,:));
st=mean(tumorStd(1,:)); sn=mean(normalStd(1,:));
ft=normpdf(x,mt,st);
fn=normpdf(x,mn,sn);
pcancer=ft./(ft+fn);
figure(10)
plot(x,pcancer)
title(sprintf('%s, Chen Liver Cancer Study - Probability of Cancer',titlePrefix));
xlabel('Projection Value')
ylabel('Probability of Cancer')
end
nFig=nFig+1;
figure(nFig)
plot(x,ftumor,'k-',x,fnormal,'r-.')
title(sprintf('%s, Chen Liver Cancer Normal Distribution Curves',titlePrefix))
xlabel('Projection Value')
ylabel('Normal Distribution')
axis([-40 50 0 0.08])
setLegend(projectionType);
nFig=nFig+1;
figure(nFig)
if nFig==8 || nFig==16 || nFig==24
ptumor=1-fnormal./(ftumor+fnormal);
figure(8);
hold on;
if nFig==8
plot(x(501:1601),ptumor(501:1601),'r-')
elseif nFig==16
ptumor=1-fnormal./(ftumor+fnormal);
plot(x(501:1601),ptumor(501:1601),'b-')
else
plot(x(501:1601),ptumor(501:1601),'k-')
legend('Project on Tumor','Project on Normal','Project on Tumor-Normal');
end
title(sprintf('%s, Chen Liver Cancer - Probability of Cancer',titlePrefix));
xlabel('Projection Value')
ylabel('Probability of Cancer')
axis([-80 80 0 1])
end
xciMean=mean(outData(1).pltProjMat(1:npta,:));
xciStd=std(outData(1).pltProjMat(1:npta,:));
pltciLow=xciMean-xciStd*tinv(.025,npta-1)/sqrt(npta);
pltciHi=xciMean+xciStd*tinv(.025,npta-1)/sqrt(npta);
nFig=nFig+1;
figure(nFig)
xcinormalMean=mean(outData(1).pltProjMat((npta+1):size(outData(1).pltProjMat,1),:));
xcinormalStd=std(outData(1).pltProjMat((npta+1):size(outData(1).pltProjMat,1),:));
pltcinLow=xcinormalMean-xcinormalStd*tinv(.025,npta-1)/sqrt(npta);
pltcinHi=xcinormalMean+xcinormalStd*tinv(.025,npta-1)/sqrt(npta);
if projectionType ~=PROJ_ON_RATIO
plot(1:ntests,pltciLow(1:ntests),'r:',1:ntests,xciMean(1:ntests),'r-',1:ntests,pltciHi(1:ntests),'r:', ...
1:ntests,pltcinLow(1:ntests),'k:',1:ntests,xcinormalMean(1:ntests),'k-',1:ntests,pltcinHi(1:ntests),'k:')
xlabel('Case Number')
ylabel('95% confidence intervals')
title(sprintf('%s, 95 percent Confidence Intervals for Means',titlePrefix));
end
end