www.pudn.com > ConstrainedEM.zip > calc_p_and_ll2.m
% calculate the probabilities p( data(i,:) belongs to center j).
% to be used in EM
function [ probabilities , ll , c_probabilities ]=...
calc_p_and_ll2(data,param,scmf,chunks,ch_num,nc_inds)
% scmf - single cov mat flag.
% chunks - the chunklets labels
% ch_num - the number of chunklettes
% nc_inds - the indexes of points that aren't in any chunklet.
% probabilities - unnormalized probabilities p(xi | cj )
% ll - the log liklihood calculated.
% c_probabilities - unnormalized hidden chunklet variables probabilities.
s=size(param);
k=s(1);
s=size(data);
n=s(1);
d=s(2);
c_probabilities=zeros(ch_num,k);
for j=1:k
if scmf==1
cov_index=1;
else
cov_index=j;
end
tempData=data-ones(n,1)*param{j,1}; % subtract the mean
expContent=-0.5*sum(tempData*inv(param{cov_index,2}).*tempData,2); % the exponent content
probabilities(:,j)=1/( (2*pi)^(d/2)*det(param{cov_index,2})^0.5 )*exp(expContent); % calculate p( x| center j)
probabilities(nc_inds,j)=probabilities(nc_inds,j)*param{j,3};
end
% calculate log likihood
tmp=sum(probabilities(nc_inds,:)');
inds2=find(tmp==0);
if ~isempty(inds2)
probabilities(nc_inds(inds2),:)=1;
ll=-inf;
return;
end
ll=sum(log(tmp));
%ll = sum(log(sum(probabilities'))); % the formula without special care for underflow (zeros)
if ch_num~=0 % calculate the ll component of the chunkletted data
for j=1:ch_num;
inds=find(chunks==j);
% using a numeric trick to avoid dealing with too low probabilities.
%min_exp=max(max(log10(probabilities(inds,:)))); % not used because of underflow problems.
tmp=probabilities(inds,:); % these two lines remove zeros in probabilities by assuming
tmp(tmp==0)=1e-323; % underflow and correcting to eps.may cause problems
min_exp=max(max(log10(tmp))); % a multiplicative constant is removed to reduce underflow danger.
tmp=probabilities(inds,:)* ( 10^(-min_exp) ); % it is added back in an additive form to the ll.
c_probabilities(j,:)=prod(tmp,1).*cell2mat(param(:,3))';
ll=ll+log(sum(c_probabilities(j,:)))+length(inds)*min_exp*log(10);
%c_probabilities(j,:)=prod(probabilities(inds,:),1).*cell2mat(param(:,3))';
%ll=ll+log(sum(c_probabilities(j,:)));
end
end