From 874c4d6029a9b8a6fab81644394a9719094ff8ad Mon Sep 17 00:00:00 2001 From: Maedbh King Date: Tue, 13 Feb 2018 18:08:24 -0800 Subject: [PATCH 1/5] Add files via upload I added the option to have a different number of iterations for the kmeans algorithm as the variable 'maxIter' wasn't included in the original code. --- stats/semiNonNegMatFac.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats/semiNonNegMatFac.m b/stats/semiNonNegMatFac.m index 4a8d7c7..22eeecc 100644 --- a/stats/semiNonNegMatFac.m +++ b/stats/semiNonNegMatFac.m @@ -11,7 +11,7 @@ normaliseF = 1; % Normalise F to unit vectors afterwards? vararginoptions(varargin,{'G0','threshold','maxIter','normaliseF'}); if (isempty(G0)) - g=kmeans(X',k); + g=kmeans(X',k,'MaxIter',maxIter); % MK added maxIner option here G0 = indicatorMatrix('identity',g); end; df = inf; From 340f13caa3cc2ad5cf45ac8293e6c4c9c7ab2c64 Mon Sep 17 00:00:00 2001 From: Maedbh King Date: Sat, 10 Mar 2018 14:37:01 -0800 Subject: [PATCH 2/5] Add files via upload Amended so that zeros are ignored. Right now it crashes if there are any zeros/nan values in the input vector. --- stats/RandIndex.m | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 stats/RandIndex.m diff --git a/stats/RandIndex.m b/stats/RandIndex.m new file mode 100644 index 0000000..3a3bc23 --- /dev/null +++ b/stats/RandIndex.m @@ -0,0 +1,62 @@ +function [AR,RI,MI,HI]=RandIndex(c1,c2) +%RANDINDEX - calculates Rand Indices to compare two partitions +% ARI=RANDINDEX(c1,c2), where c1,c2 are vectors listing the +% class membership, returns the "Hubert & Arabie adjusted Rand index". +% [AR,RI,MI,HI]=RANDINDEX(c1,c2) returns the adjusted Rand index, +% the unadjusted Rand index, "Mirkin's" index and "Hubert's" index. +% +% See L. Hubert and P. Arabie (1985) "Comparing Partitions" Journal of +% Classification 2:193-218 + +%(C) David Corney (2000) D.Corney@cs.ucl.ac.uk +% Maedbh King edited so that zeros and NaNs are ignored + +if nargin < 2 | min(size(c1)) > 1 | min(size(c2)) > 1 + error('RandIndex: Requires two vector arguments') + return +end + +C=Contingency(c1,c2); %form contingency matrix + +n=sum(sum(C)); +nis=sum(sum(C,2).^2); %sum of squares of sums of rows +njs=sum(sum(C,1).^2); %sum of squares of sums of columns + +t1=nchoosek(n,2); %total number of pairs of entities +t2=sum(sum(C.^2)); %sum over rows & columnns of nij^2 +t3=.5*(nis+njs); + +%Expected index (for adjustment) +nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1)); + +A=t1+t2-t3; %no. agreements +D= -t2+t3; %no. disagreements + +if t1==nc + AR=0; %avoid division by zero; if k=1, define Rand = 0 +else + AR=(A-nc)/(t1-nc); %adjusted Rand - Hubert & Arabie 1985 +end + +RI=A/t1; %Rand 1971 %Probability of agreement +MI=D/t1; %Mirkin 1970 %p(disagreement) +HI=(A-D)/t1; %Hubert 1977 %p(agree)-p(disagree) + +function Cont=Contingency(Mem1,Mem2) + +if nargin < 2 | min(size(Mem1)) > 1 | min(size(Mem2)) > 1 + error('Contingency: Requires two vector arguments') + return +end + +Cont=zeros(max(Mem1),max(Mem2)); + +Mem1(isnan(Mem1))=0; % MK add +Mem2(isnan(Mem2))=0; % MK add +for i = 1:length(Mem1); + if Mem1(i)==0 || Mem2(i)==0, % MK add + i=i+1; + else + Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1; + end +end \ No newline at end of file From 6b4c7315039c6cd4cb89803a4370f254c11f9a7c Mon Sep 17 00:00:00 2001 From: Maedbh King Date: Tue, 27 Mar 2018 21:05:49 -0700 Subject: [PATCH 3/5] Add files via upload Now computes R2 and R for fit error. These are now fields in the 'Info' structure. --- stats/semiNonNegMatFac.m | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/stats/semiNonNegMatFac.m b/stats/semiNonNegMatFac.m index 22eeecc..d0ab794 100644 --- a/stats/semiNonNegMatFac.m +++ b/stats/semiNonNegMatFac.m @@ -1,17 +1,19 @@ -function [F,G,Err2]=semiNonNegMatFac(X,k,varargin); +function [F,G,Info,winner]=semiNonNegMatFac(X,k,varargin); % Semi-nonnegative matrix factorization % as in Ding & Jordan % X: NxP Matrix of P observations to be clustered over N variables % X = F *G' % k: number of clusters % Joern Diedrichsen +% Maedbh King: added winner-take-all and R2 and R as output + G0= []; % Startibg value (otherwise uses Kmeans + 0.2) threshold = 0.01; % Threshold on reconstruction error maxIter = 5000; % Maximal number of iterations normaliseF = 1; % Normalise F to unit vectors afterwards? vararginoptions(varargin,{'G0','threshold','maxIter','normaliseF'}); if (isempty(G0)) - g=kmeans(X',k,'MaxIter',maxIter); % MK added maxIner option here + g=kmeans(X',k); G0 = indicatorMatrix('identity',g); end; df = inf; @@ -48,4 +50,23 @@ f=sqrt(sum(F.^2)); % Factor for normalisation F=bsxfun(@times,F,1./f); G=bsxfun(@times,G,f); % Normalise the group factor the other way -end; \ No newline at end of file +end; + +% Provide fitting info +% R2 +SSR=nansum(R.*R); +SST=nansum(X.*X); + +% R +P=F*G'; +SXP=nansum(nansum(X.*P,1)); +SPP=nansum(nansum(P.*P)); + +%Info.numiter = n-1; +Info.error = Err2(end); +Info.R2_vox = 1-nansum(R.*R)./nansum(X.*X); % MK: R2=1-SSR/SST +Info.R2 = 1-nansum(SSR)./nansum(SST); +Info.R = SXP./sqrt(nansum(SST).*SPP); % MK: R=covar(X,P)/var(X)*var(P) + +% Do winner-take-all +[~,winner]=max(G,[],2); % MK \ No newline at end of file From 73b8c7f14d9d0447218f1e950197a10006e02b60 Mon Sep 17 00:00:00 2001 From: Maedbh King Date: Tue, 10 Apr 2018 16:48:20 -0700 Subject: [PATCH 4/5] Add files via upload Added label options: labelfont, labelcolor, labelsize --- graph/scatterplot.m | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/graph/scatterplot.m b/graph/scatterplot.m index 496eeae..3df57a8 100644 --- a/graph/scatterplot.m +++ b/graph/scatterplot.m @@ -45,6 +45,7 @@ % 'yaxisIncl' : Points (min,max) on yaxis that need to be shown: YLim will % be larger of the same % Joern Diedrichsen (joern.diedrichsen@googlemail.com) +% MK: added 'labelsize' and 'labelfont' options [Nx n] = size(y); if (n>1) @@ -65,6 +66,8 @@ polyorder=1; label=[]; labelformat='%d'; +labelsize=10; +labelfont='arial'; wfun='bisquare'; leg=[]; r2=[];t=[]; @@ -80,7 +83,7 @@ xaxisIncl=[]; variables={'markertype','markercolor','markerfill','markersize','CAT',... - 'subset','split','leg','leglocation','color','label',... + 'subset','split','leg','leglocation','color','label','labelsize','labelfont'... 'regression','polyorder','intercept',... 'colormap',... 'bubble','bubble_minsize','bubble_maxsize',... @@ -120,6 +123,7 @@ F.markertype=markertype; F.markercolor=markercolor; F.markerfill=markerfill; +F.labelcolor=labelcolor; % MK added %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % re-code cell array @@ -251,14 +255,16 @@ xoffset=(xlims(2)-xlims(1))/40; yoffset=(ylims(2)-ylims(1))/40; for i=1:length(x) - text(x(i)+xoffset,y(i)+yoffset,label{i}); + th=text(x(i)+xoffset,y(i)+yoffset,label{i}); + set(th,'Color',labelcolor{i},'FontSize',labelsize,'FontName',labelfont) % MK added end; else label=label(find(subset)); xoffset=(xlims(2)-xlims(1))/40; yoffset=(ylims(2)-ylims(1))/40; for i=1:length(x) - text(x(i)+xoffset,y(i)+yoffset,sprintf(labelformat,label(i))); + th=text(x(i)+xoffset,y(i)+yoffset,sprintf(labelformat,label(i))); + set(th,'Color',labelcolor{i},'FontSize',labelsize,'FontName',labelfont) % MK added end; end; end; From 185fc0d0520c397e06fbc135c18aa67320fb65d4 Mon Sep 17 00:00:00 2001 From: Maedbh King Date: Tue, 10 Apr 2018 16:51:04 -0700 Subject: [PATCH 5/5] Update scatterplot.m --- graph/scatterplot.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graph/scatterplot.m b/graph/scatterplot.m index 3df57a8..91d90cb 100644 --- a/graph/scatterplot.m +++ b/graph/scatterplot.m @@ -45,7 +45,7 @@ % 'yaxisIncl' : Points (min,max) on yaxis that need to be shown: YLim will % be larger of the same % Joern Diedrichsen (joern.diedrichsen@googlemail.com) -% MK: added 'labelsize' and 'labelfont' options +% MK: added label options: labelcolor, labelsize, labelfont [Nx n] = size(y); if (n>1)