www.pudn.com > ConstrainedEM.zip > calc_p_and_ll2.m


% calculate the probabilities p( data(i,:) belongs to center j). 
% to be used in EM 
function [ probabilities , ll , c_probabilities ]=... 
	calc_p_and_ll2(data,param,scmf,chunks,ch_num,nc_inds) 
% scmf - single cov mat flag. 
% chunks - the chunklets labels 
% ch_num - the number of chunklettes 
% nc_inds - the indexes of points that aren't in any chunklet. 
 
% probabilities - unnormalized probabilities p(xi | cj ) 
% ll - the log liklihood calculated. 
% c_probabilities - unnormalized hidden chunklet variables probabilities. 
s=size(param); 
k=s(1); 
s=size(data); 
n=s(1); 
d=s(2); 
c_probabilities=zeros(ch_num,k); 
 
for j=1:k 
    if scmf==1 
        cov_index=1; 
    else 
        cov_index=j; 
    end 
   
    tempData=data-ones(n,1)*param{j,1}; % subtract the mean 
    expContent=-0.5*sum(tempData*inv(param{cov_index,2}).*tempData,2);    % the exponent content 
    probabilities(:,j)=1/( (2*pi)^(d/2)*det(param{cov_index,2})^0.5 )*exp(expContent);    % calculate p( x| center j) 
    probabilities(nc_inds,j)=probabilities(nc_inds,j)*param{j,3}; 
 
end 
% calculate log likihood 
 
tmp=sum(probabilities(nc_inds,:)'); 
inds2=find(tmp==0); 
if ~isempty(inds2) 
    probabilities(nc_inds(inds2),:)=1; 
    ll=-inf; 
    return; 
end 
ll=sum(log(tmp)); 
%ll = sum(log(sum(probabilities')));        % the formula without special care for underflow (zeros) 
 
if ch_num~=0    % calculate the ll component of the chunkletted data 
    for j=1:ch_num; 
        inds=find(chunks==j); 
        % using a numeric trick to avoid dealing with too low probabilities. 
      
        %min_exp=max(max(log10(probabilities(inds,:))));    % not used because of underflow problems. 
         
        tmp=probabilities(inds,:);      % these two lines remove zeros in probabilities by assuming  
        tmp(tmp==0)=1e-323;             % underflow and correcting to eps.may cause problems 
         
        min_exp=max(max(log10(tmp)));   % a multiplicative constant is removed to reduce underflow danger. 
        tmp=probabilities(inds,:)* ( 10^(-min_exp) );   % it is added back in an additive form to the ll. 
         
        c_probabilities(j,:)=prod(tmp,1).*cell2mat(param(:,3))'; 
        ll=ll+log(sum(c_probabilities(j,:)))+length(inds)*min_exp*log(10); 
         
        %c_probabilities(j,:)=prod(probabilities(inds,:),1).*cell2mat(param(:,3))'; 
        %ll=ll+log(sum(c_probabilities(j,:))); 
    end 
end