www.pudn.com > training_gmm.rar > mykmeans.m


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
function [centres,post,options,errlog] = mykmeans(centres,data,options) 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% mykmeans K均值聚类,其中输入项中centres为初始聚类中心,输出项中centres为最终产 
% 生的聚类中心。post(i,j)中为1的项表示第i个data向量属于第j个聚类子空间。 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 
[ndata,data_dim] = size(data); 
ncentres = size(centres,1); 
if (options(14)) 
    niters = options(14); 
else 
    niters = 100; 
end 
 
store = 0; 
if (nargout > 3) 
    store = 1; 
    errlog = zeros(1,niters); 
end 
 
 
if (options(5) == 1) 
    perm = randperm(ndata); %将1到ndata随机排列成一行向量 
    perm = perm(1:ncentres); 
    centres = data(perm, :); %产生随机抽取的ncentres个样本作为初始聚类中心 
end 
 
%计算到每一聚类中心的欧式距离,输出矩阵为ndata*ncentres 
 
id = eye(ncentres); 
for n = 1:niters 
    old_centres = centres; 
    
    d2 = dist2(data,centres); 
    [minvals,index] = min(d2,[],2);  
     
    post = id(index,:); %训练样本应划归最近的聚类中心 
    num_points = sum(post, 1); %计算每一聚类子空间的样本数 
     
    %更新聚类中心 
    for j = 1:ncentres 
        if (num_points(j) > 0) 
            centres(j,:) = sum(data(find(post(:,j)),:),1)/num_points(j);   
        end 
    end 
     
    e = mean(minvals); 
    if store 
        errlog(n) = e; 
    end 
    if options(1) > 0 
        fprintf(1,'Cycle %4d  Error %11.6f\n',n,e); 
    end 
 
    if n > 1 
    % 判断中止条件 
        if max(max(abs(centres - old_centres))) < options(2) & ... 
            abs(old_e - e) < options(3) 
            options(8) = e; 
            return; 
        end 
    end 
    old_e = e; 
end 
options(8) = e; 
if (options(1) >= 0) 
    disp('Maximum number of iterations has been exceeded'); 
end 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%