www.pudn.com > Classification_toolbox.part01.rar > AGHC.m


function [patterns, targets] = AGHC(train_patterns, train_targets, params, plot_on) 
 
%Reduce the number of data points using the agglomerative hierarchical clustering algorithm 
%Inputs: 
%	train_patterns	- Input patterns 
%	train_targets	- Input targets 
%	params			- Parameters: [Number of output data points, distance type] 
%						  Distance can be min, max, avg, or mean 
%   plot_on         - Plot stages of the algorithm 
% 
%Outputs 
%	patterns		- New patterns 
%	targets			- New targets 
 
if (nargin < 4), 
    plot_on = 0; 
end 
 
[c, method] = process_params(params); 
[D,c_hat]	= size(train_patterns); 
label       = 1:c_hat; 
n           = ones(1,c_hat); 
 
%Compute distances 
N           = size(train_patterns,2); 
temp        = repmat(train_patterns,[1 1 N]); 
dist        = sqrt(squeeze(sum((temp - permute(temp, [1 3 2])).^2))); 
 
while (c_hat > c), 
    Uc       = unique(label); 
    Nc       = length(Uc); 
    new_dist = zeros(Nc); 
 
    switch method 
    case 'min' 
        %Find minimum distance between vectors from different clusters 
         
        %For each two clusters, find the shortest distance between vectors 
        for i = 1:Nc, 
            i_in = find(label == Uc(i)); 
            for j = 1:Nc, 
                j_in = find(label == Uc(j)); 
                new_dist(i,j) = min(min(dist(i_in,j_in))); 
            end 
        end 
        new_dist    = new_dist + eye(Nc)*1e33; 
        [i,j]   = find(new_dist == min(min(new_dist))); 
        i = Uc(i(1)); j = Uc(j(1)); 
    case 'max' 
        %Find maximum distance between vectors from different clusters 
         
        %For each two clusters, find the longest distance between vectors 
        for i = 1:Nc, 
            i_in = find(label == Uc(i)); 
            for j = 1:Nc, 
                j_in = find(label == Uc(j)); 
                new_dist(i,j) = max(max(dist(i_in,j_in))); 
            end 
        end 
        new_dist = new_dist .* (ones(Nc)-eye(Nc)); 
        [i,j]   = find(new_dist == max(max(new_dist))); 
        i = Uc(i(1)); j = Uc(j(1)); 
         
    case 'avg' 
        %Find average distance between vectors from different clusters 
         
        %For each two clusters, find the average distance between vectors in one cluster to each vector in the other cluster 
        for i = 1:Nc, 
            i_in = find(label == Uc(i)); 
            for j = 1:Nc, 
                j_in = find(label == Uc(j)); 
                new_dist(i,j) = mean(mean(dist(i_in,j_in)))/(length(j_in)*length(i_in)); 
            end 
        end 
        new_dist = new_dist .* (ones(Nc)-eye(Nc)); 
        [i,j]   = find(new_dist == max(max(new_dist))); 
        i = Uc(i(1)); j = Uc(j(1)); 
         
    case 'mean' 
        %Find mean distance between cluster centers  
         
        %For each two clusters, find the average distance between vectors in one cluster to each vector in the other cluster 
        for i = 1:Nc, 
            i_in = find(label == Uc(i)); 
            for j = 1:Nc, 
                j_in = find(label == Uc(j)); 
                new_dist(i,j) = sum((mean(train_patterns(:,i_in)')'-mean(train_patterns(:,j_in)')').^2); 
            end 
        end 
        new_dist    = new_dist + eye(Nc)*1e33; 
        [i,j]   = find(new_dist == min(min(new_dist))); 
        i = Uc(i(1)); j = Uc(j(1)); 
    otherwise 
        error('Distance method unknown') 
    end 
       
    %Merge cluster i with cluster j 
    label(find(label == j)) = i; 
     
    c_hat = c_hat - 1; 
     
    %Computer cluster centers 
    Uc       = unique(label); 
    Nc       = length(Uc); 
    patterns = zeros(D,Nc); 
    for i = 1:Nc, 
        in            = find(label == Uc(i)); 
        if (length(in) == 1) 
            patterns(:,i) = train_patterns(:,in); 
        else 
            patterns(:,i) = mean(train_patterns(:,in)')'; 
        end 
    end 
     
    %Plot the centers during the process  
    plot_process(patterns, plot_on) 
     
end 
  
%Label the data 
targets = zeros(1,c); 
Uc      = unique(label); 
Ut      = unique(train_targets); 
targets = zeros(1,c); 
for i = 1:c, 
    indices    = find(label == Uc(i)); 
    N          = hist(train_targets(:,indices), Ut); 
    [m, max_l] = max(N); 
    targets(i) = Ut(max_l); 
    if (length(indices) == 1) 
        patterns(:,i) = train_patterns(:,indices); 
    else 
        patterns(:,i) = mean(train_patterns(:,indices)')'; 
    end 
end