www.pudn.com > ConstrainedEM.zip > select_start_params.m, change:2003-05-24,size:3401b


%new version with wishart prior smoothing of covmat. 
% data - the data in rows. 
 
% param - cell of k*3 of GMM parameters. empty unless 
% dont_rand_covmat_flag=1 and single_cov_mat_flag=1 and then 
% param{1,2} hold covmat to be used. 
 
% single_cov_mat_flag  - if ==1 will only select a single covmat in param{1,2} 
 
% dont_rand_covmat_flag - if ==1 will not randomly select cov mat, 
% but use inputed one - to be used only when single cov_mat flag is on. 
 
% k - number of models 
% d - dim of data 
% n - number of data points 
 
%returns 
 
% param - the current randomly chosen params. 
 
function [param] = ... 
    select_start_params(data,param,single_cov_mat_flag,dont_rand_covmat_flag,k,d,n,noise_param,Chunklets,diag_covmat_flag)  
%keyboard 
if ~exist('Chunklets') 
  Chunklets=[]; 
end 
 
if(~exist('diag_covmat_flag')) 
  diag_covmat_flag=0; 
end 
 
%use smoothing of covmat. 
wishart_prior_flag=0;	 
T=eye(d);		% the prior covmat. 
alpha=n/20;		% the prior 'virtual sample' size. 
 
 
cluster_size=floor(n/k); 
used_flags=zeros(n,1); %will mark points used for centroids. 
 
for j=1:k 
    % randomize cluster seed 
    seed_ind=floor(rand*(n-1))+1; % choose an index of a point from 
                                  % current data. 
				   
    while used_flags(seed_ind)      % find unused seed 
        seed_ind=floor(rand*(n-1))+1; 
    end 
    seed=data(seed_ind,:);  % select data point as seed. 
    if dont_rand_covmat_flag==1       % rand only centers 
      %find neighbourhood  - using inpute covmat without limitation 
      %on number of neighbors with respect to d. (see other case). 
      dists= sum( (data-ones(n,1)*seed)'.*(inv(param{1,2})*(data-ones(n,1)*seed)') ); 
      %dists= (data)*inv(param{1,2})*(-ones(n,1)*seed)'; 
      [ tmp inds ]=sort( dists ); 
      number_of_nei= round(n/(k*noise_param)); 
      inds=inds(1:number_of_nei); 
      if ~isempty(Chunklets)         % chunklet closure 
	ch=setdiff(unique(Chunklets(inds)),-1); 
	inds=union(inds,find(ismember(Chunklets,ch))); 
      end 
    
      param{j,1}=mean(data(inds,:));    
     
     
    else  % rand centers and cov matrices 
     
        %find neighbourhood  - using euclidian metric. 
     
        dists=sum(((data-ones(n,1)*seed).^2)'); 
        [ tmp inds ]=sort( dists ); 
        number_of_nei=max(round(n/(k*noise_param)),d+1); 
        inds=inds(1:number_of_nei); 
         
	if ~isempty(Chunklets)         % chunklet closure 
	  ch=setdiff(unique(Chunklets(inds)),-1); 
	  inds=union(inds,find(ismember(Chunklets,ch))); 
	end 
	 
        % set params 
	if(diag_covmat_flag) 
	  param{j,2}=cov(data(inds,:)); 
 	  param{j,2}=param{j,2}.*eye(size(param{j,2})); 
	else 
	  param{j,2}=cov(data(inds,:)); 
	end 
 
	%smooth cov mat: 
	if wishart_prior_flag==1 
	  Sm=param{1,2}.*n; 
	  param{1,2}=(alpha*T+Sm)/(n+alpha);	% is this the map for a wishart distribution? 
							% it needs to be determnied. 
	end 
	 
        param{j,1}=mean(data(inds,:)); 
    end 
     
    used_flags(seed_ind)=1; % mark used point. 
end 
     
if (single_cov_mat_flag==1)&(dont_rand_covmat_flag==0) 
    % unify the cov matrixes ( mean them ) and put the result in param{1,2} 
    tmp=zeros(d,d); 
    for j=1:k 
        tmp=tmp + param{j,2}; 
    end 
    param{1,2}=tmp/k; 
    %clear all other cov mats!!!!! 
    for j=2:k 
      param{j,2}= []; 
    end 
     
end