www.pudn.com > DHMM_MATLAB.rar > My_Kmean.m


%	by lkk@mails.tsinghua.edu.cn 
function [center, data_classify] = My_Kmean(data,center) 
%	输入: 
%			data		待分类数据,二维平面n个点 
%			center	初始类中心 
%	输出: 
%			center	最终类中心 
%			data_classify	1行n列的向量,记录每一个点所属于的类 
 
esp	= 0.001; 
 
 
	%	分类 
	[center, covariance, data_classify] = K_classify(data,center); 
	%	center				为新的类中心 
	%	covariance		记录每个类的离散度 
	%	data_classify	1行n列的向量,记录每一个点所属于的类 
	old_covariance = covariance; 
	while true 
		 
		[center, covariance, data_classify] = K_classify(data,center); 
 		temp = old_covariance - covariance; 
 		fprintf('		esp = %d \n',(sum(temp))/sum(old_covariance)); 
 		%循环结束条件为,类的离散度变化率小于一个阈值 
  	if ((sum(temp))/sum(old_covariance)) < esp 
			break; 
  	end 
  	old_covariance = covariance; 
	end 
end 
 
 
 
function [new_center, covariance, data_classify] = K_classify(data,center) 
%	 
%	输入: 
%			data		待分类数据,二维平面n个点 
%			center	初始类中心 
%	输出:			 
%			center				为新的类中心 
%			covariance		记录每个类的离散度 
%			data_classify	1行n列的向量,记录每一个点所属于的类 
 
    % 初始化 
    sort_number = size(center,2); 
    data_number = size(data,2); 
    character_num = size(data,1); 
     
    part_number = zeros(1,size(center,2)); 
    data_classify =  zeros(1,data_number); 
  
 		 
     
    % 归类循环,直到没有空类 
		while true 
			 
			for i = 1:data_number 
      	dmin = Inf; 
      	temp = 0; 
      	%找最小距离的类中心 
      	for j = 1:sort_number 
      		d = dist(data(:,i), center(:,j)); 
        	if  d < dmin 
        		dmin = d; 
          	temp = j; 
        	end 
      	end %for 
       
      	%	把该向量归类 
      	data_classify(i) = temp; 
      	%	该类所包含的向量个数加一 
      	part_number(temp) = part_number(temp) + 1; 
      end %for 
 
 
      % 统计空类 
      empty_sort = (part_number == 0); 
 
      % 计算新的类中心和类内离散度 
      covariance = zeros(1,sort_number); 
      new_center = zeros(size(center)); 
      for j = 1:sort_number 
     		if part_number(j) > 0 
     			%	提取属于j类的所有向量 
       		temp = data(:,find(data_classify == j)); 
        	%	重新计算类中心 
        	center(:,j) = mean(temp,2); 
        	%	计算新的类离散度 
        	covariance(j) = K_covariance(temp); 
        	 
      	end 
      end 
       
      %	判断是否结束主循环   
      if sum(empty_sort) == 0 
         new_center = center; 
         break; 
      end 
 
			 
			% 去掉空类,增加新的分类 
      idxempty = find(empty_sort); 
      [m, idxm] = max(part_number); 
      spc = center_split(center(:,idxm)); 
      center(:,idxempty(1)) = spc(:,1); 
      center(:,idxm) = spc(:,2); 
         
    end	%	while 
end 
 
function [d] = dist(v1,v2) 
    dif = v1 - v2; 
    d = sum(dif.*dif); 
end 
 
function [d] = K_covariance(data) 
    c = mean(data,2); 
    d = 0; 
    for i = 1:size(data,2) 
        dif = data(:,i)-c; 
        d = d + sum(dif.*dif); 
    end 
end