diff --git a/graph/scatterplot.m b/graph/scatterplot.m index 496eeae..91d90cb 100644 --- a/graph/scatterplot.m +++ b/graph/scatterplot.m @@ -45,6 +45,7 @@ % 'yaxisIncl' : Points (min,max) on yaxis that need to be shown: YLim will % be larger of the same % Joern Diedrichsen (joern.diedrichsen@googlemail.com) +% MK: added label options: labelcolor, labelsize, labelfont [Nx n] = size(y); if (n>1) @@ -65,6 +66,8 @@ polyorder=1; label=[]; labelformat='%d'; +labelsize=10; +labelfont='arial'; wfun='bisquare'; leg=[]; r2=[];t=[]; @@ -80,7 +83,7 @@ xaxisIncl=[]; variables={'markertype','markercolor','markerfill','markersize','CAT',... - 'subset','split','leg','leglocation','color','label',... + 'subset','split','leg','leglocation','color','label','labelsize','labelfont'... 'regression','polyorder','intercept',... 'colormap',... 'bubble','bubble_minsize','bubble_maxsize',... @@ -120,6 +123,7 @@ F.markertype=markertype; F.markercolor=markercolor; F.markerfill=markerfill; +F.labelcolor=labelcolor; % MK added %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % re-code cell array @@ -251,14 +255,16 @@ xoffset=(xlims(2)-xlims(1))/40; yoffset=(ylims(2)-ylims(1))/40; for i=1:length(x) - text(x(i)+xoffset,y(i)+yoffset,label{i}); + th=text(x(i)+xoffset,y(i)+yoffset,label{i}); + set(th,'Color',labelcolor{i},'FontSize',labelsize,'FontName',labelfont) % MK added end; else label=label(find(subset)); xoffset=(xlims(2)-xlims(1))/40; yoffset=(ylims(2)-ylims(1))/40; for i=1:length(x) - text(x(i)+xoffset,y(i)+yoffset,sprintf(labelformat,label(i))); + th=text(x(i)+xoffset,y(i)+yoffset,sprintf(labelformat,label(i))); + set(th,'Color',labelcolor{i},'FontSize',labelsize,'FontName',labelfont) % MK added end; end; end; diff --git a/stats/RandIndex.m b/stats/RandIndex.m new file mode 100644 index 0000000..3a3bc23 --- /dev/null +++ b/stats/RandIndex.m @@ -0,0 +1,62 @@ +function [AR,RI,MI,HI]=RandIndex(c1,c2) +%RANDINDEX - calculates Rand Indices to compare two partitions +% ARI=RANDINDEX(c1,c2), where c1,c2 are vectors listing the +% class membership, returns the "Hubert & Arabie adjusted Rand index". +% [AR,RI,MI,HI]=RANDINDEX(c1,c2) returns the adjusted Rand index, +% the unadjusted Rand index, "Mirkin's" index and "Hubert's" index. +% +% See L. Hubert and P. Arabie (1985) "Comparing Partitions" Journal of +% Classification 2:193-218 + +%(C) David Corney (2000) D.Corney@cs.ucl.ac.uk +% Maedbh King edited so that zeros and NaNs are ignored + +if nargin < 2 | min(size(c1)) > 1 | min(size(c2)) > 1 + error('RandIndex: Requires two vector arguments') + return +end + +C=Contingency(c1,c2); %form contingency matrix + +n=sum(sum(C)); +nis=sum(sum(C,2).^2); %sum of squares of sums of rows +njs=sum(sum(C,1).^2); %sum of squares of sums of columns + +t1=nchoosek(n,2); %total number of pairs of entities +t2=sum(sum(C.^2)); %sum over rows & columnns of nij^2 +t3=.5*(nis+njs); + +%Expected index (for adjustment) +nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1)); + +A=t1+t2-t3; %no. agreements +D= -t2+t3; %no. disagreements + +if t1==nc + AR=0; %avoid division by zero; if k=1, define Rand = 0 +else + AR=(A-nc)/(t1-nc); %adjusted Rand - Hubert & Arabie 1985 +end + +RI=A/t1; %Rand 1971 %Probability of agreement +MI=D/t1; %Mirkin 1970 %p(disagreement) +HI=(A-D)/t1; %Hubert 1977 %p(agree)-p(disagree) + +function Cont=Contingency(Mem1,Mem2) + +if nargin < 2 | min(size(Mem1)) > 1 | min(size(Mem2)) > 1 + error('Contingency: Requires two vector arguments') + return +end + +Cont=zeros(max(Mem1),max(Mem2)); + +Mem1(isnan(Mem1))=0; % MK add +Mem2(isnan(Mem2))=0; % MK add +for i = 1:length(Mem1); + if Mem1(i)==0 || Mem2(i)==0, % MK add + i=i+1; + else + Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1; + end +end \ No newline at end of file diff --git a/stats/semiNonNegMatFac.m b/stats/semiNonNegMatFac.m index 4a8d7c7..d0ab794 100644 --- a/stats/semiNonNegMatFac.m +++ b/stats/semiNonNegMatFac.m @@ -1,17 +1,19 @@ -function [F,G,Err2]=semiNonNegMatFac(X,k,varargin); +function [F,G,Info,winner]=semiNonNegMatFac(X,k,varargin); % Semi-nonnegative matrix factorization % as in Ding & Jordan % X: NxP Matrix of P observations to be clustered over N variables % X = F *G' % k: number of clusters % Joern Diedrichsen +% Maedbh King: added winner-take-all and R2 and R as output + G0= []; % Startibg value (otherwise uses Kmeans + 0.2) threshold = 0.01; % Threshold on reconstruction error maxIter = 5000; % Maximal number of iterations normaliseF = 1; % Normalise F to unit vectors afterwards? vararginoptions(varargin,{'G0','threshold','maxIter','normaliseF'}); if (isempty(G0)) - g=kmeans(X',k); + g=kmeans(X',k); G0 = indicatorMatrix('identity',g); end; df = inf; @@ -48,4 +50,23 @@ f=sqrt(sum(F.^2)); % Factor for normalisation F=bsxfun(@times,F,1./f); G=bsxfun(@times,G,f); % Normalise the group factor the other way -end; \ No newline at end of file +end; + +% Provide fitting info +% R2 +SSR=nansum(R.*R); +SST=nansum(X.*X); + +% R +P=F*G'; +SXP=nansum(nansum(X.*P,1)); +SPP=nansum(nansum(P.*P)); + +%Info.numiter = n-1; +Info.error = Err2(end); +Info.R2_vox = 1-nansum(R.*R)./nansum(X.*X); % MK: R2=1-SSR/SST +Info.R2 = 1-nansum(SSR)./nansum(SST); +Info.R = SXP./sqrt(nansum(SST).*SPP); % MK: R=covar(X,P)/var(X)*var(P) + +% Do winner-take-all +[~,winner]=max(G,[],2); % MK \ No newline at end of file