www.pudn.com > DHMM_MATLAB.rar > My_Kmean.m
% by lkk@mails.tsinghua.edu.cn
function [center, data_classify] = My_Kmean(data,center)
% 输入:
% data 待分类数据,二维平面n个点
% center 初始类中心
% 输出:
% center 最终类中心
% data_classify 1行n列的向量,记录每一个点所属于的类
esp = 0.001;
% 分类
[center, covariance, data_classify] = K_classify(data,center);
% center 为新的类中心
% covariance 记录每个类的离散度
% data_classify 1行n列的向量,记录每一个点所属于的类
old_covariance = covariance;
while true
[center, covariance, data_classify] = K_classify(data,center);
temp = old_covariance - covariance;
fprintf(' esp = %d \n',(sum(temp))/sum(old_covariance));
%循环结束条件为,类的离散度变化率小于一个阈值
if ((sum(temp))/sum(old_covariance)) < esp
break;
end
old_covariance = covariance;
end
end
function [new_center, covariance, data_classify] = K_classify(data,center)
%
% 输入:
% data 待分类数据,二维平面n个点
% center 初始类中心
% 输出:
% center 为新的类中心
% covariance 记录每个类的离散度
% data_classify 1行n列的向量,记录每一个点所属于的类
% 初始化
sort_number = size(center,2);
data_number = size(data,2);
character_num = size(data,1);
part_number = zeros(1,size(center,2));
data_classify = zeros(1,data_number);
% 归类循环,直到没有空类
while true
for i = 1:data_number
dmin = Inf;
temp = 0;
%找最小距离的类中心
for j = 1:sort_number
d = dist(data(:,i), center(:,j));
if d < dmin
dmin = d;
temp = j;
end
end %for
% 把该向量归类
data_classify(i) = temp;
% 该类所包含的向量个数加一
part_number(temp) = part_number(temp) + 1;
end %for
% 统计空类
empty_sort = (part_number == 0);
% 计算新的类中心和类内离散度
covariance = zeros(1,sort_number);
new_center = zeros(size(center));
for j = 1:sort_number
if part_number(j) > 0
% 提取属于j类的所有向量
temp = data(:,find(data_classify == j));
% 重新计算类中心
center(:,j) = mean(temp,2);
% 计算新的类离散度
covariance(j) = K_covariance(temp);
end
end
% 判断是否结束主循环
if sum(empty_sort) == 0
new_center = center;
break;
end
% 去掉空类,增加新的分类
idxempty = find(empty_sort);
[m, idxm] = max(part_number);
spc = center_split(center(:,idxm));
center(:,idxempty(1)) = spc(:,1);
center(:,idxm) = spc(:,2);
end % while
end
function [d] = dist(v1,v2)
dif = v1 - v2;
d = sum(dif.*dif);
end
function [d] = K_covariance(data)
c = mean(data,2);
d = 0;
for i = 1:size(data,2)
dif = data(:,i)-c;
d = d + sum(dif.*dif);
end
end