www.pudn.com > training_gmm.rar > mykmeans.m
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [centres,post,options,errlog] = mykmeans(centres,data,options)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% mykmeans K均值聚类,其中输入项中centres为初始聚类中心,输出项中centres为最终产
% 生的聚类中心。post(i,j)中为1的项表示第i个data向量属于第j个聚类子空间。
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[ndata,data_dim] = size(data);
ncentres = size(centres,1);
if (options(14))
niters = options(14);
else
niters = 100;
end
store = 0;
if (nargout > 3)
store = 1;
errlog = zeros(1,niters);
end
if (options(5) == 1)
perm = randperm(ndata); %将1到ndata随机排列成一行向量
perm = perm(1:ncentres);
centres = data(perm, :); %产生随机抽取的ncentres个样本作为初始聚类中心
end
%计算到每一聚类中心的欧式距离,输出矩阵为ndata*ncentres
id = eye(ncentres);
for n = 1:niters
old_centres = centres;
d2 = dist2(data,centres);
[minvals,index] = min(d2,[],2);
post = id(index,:); %训练样本应划归最近的聚类中心
num_points = sum(post, 1); %计算每一聚类子空间的样本数
%更新聚类中心
for j = 1:ncentres
if (num_points(j) > 0)
centres(j,:) = sum(data(find(post(:,j)),:),1)/num_points(j);
end
end
e = mean(minvals);
if store
errlog(n) = e;
end
if options(1) > 0
fprintf(1,'Cycle %4d Error %11.6f\n',n,e);
end
if n > 1
% 判断中止条件
if max(max(abs(centres - old_centres))) < options(2) & ...
abs(old_e - e) < options(3)
options(8) = e;
return;
end
end
old_e = e;
end
options(8) = e;
if (options(1) >= 0)
disp('Maximum number of iterations has been exceeded');
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%