www.pudn.com > ConstrainedEM.zip > approximateWeights.m
% approximate new weights - :
% uses computePartitionFunctionAndGradient ONCE to compute function
% and gradient. Then projects gradient the the subspace where
% sum(alphaGradient)==0, so as to keep the Alphas in the
% simplex. Uses lineSearch to search for a local maxima in the
% gradients
function [newAlphas]= approximateWeights(naiveWeights,calc_grad_num)
% naiveWeights - the coefficients of the log(a(j)) in the function to be maximized
% and also the initial a(j)
% calc_grad_num - the number of full gradient calculations
global late_oracle;
global aa_flag;
global anti_chunk_num;
meaningfull_Gradient_saf=1e-10; % gradient stop criteion ( not really important )
meaningful_move_saf=1e-6 ;
check_period=10; % the number of iteration before aa_aprox check
aprox_range=0.8; % the maximum size of a model which still allows aproximation to
% the anti chunklets partition function.
global exit_flags;
%initial guess is current weights:
normalizer= sum(naiveWeights);
newAlphas= naiveWeights./normalizer;
% if late oracle , do gradient ascent in two stages : only chunklet pf in the first
% and full pf in the second. if not late oracle , only the second stage is needed.
full_iteration=0; % iteration of full gradient calculation.
global stage; % passed as global to calc_y
% stage 1- only positive chunklet gradient ascent
% stage 2 -full pf gradient ascent
% the stage machinery s needed only when late oracle is used ,there
% are anti chunklets and no aproximation to the negative pf is used .
if late_oracle & ~aa_flag & anti_chunk_num~=0
stage=1;
else
stage=2;
end
% if no late oracle is used and no anti chunklets are present, there
% is no need for gradient ascent.
if (late_oracle==0)&(anti_chunk_num==0)
return;
end
while stage<3 % main gradient ascent loop
% when in stage 2, number of iteration is limited by calc_grad_num
% (only if anti_chunks are involved and no aproximation is used )
if stage==2;
full_iteration=full_iteration+1;
if (full_iteration>calc_grad_num)&(~aa_flag)&(anti_chunk_num~=0)
break;
end
end
%calculate pf and it's gradient ( with or without anti-chunklet consideration.
if (stage==2)&(anti_chunk_num~=0)&(~aa_flag)
'calculating gradient of pf'
end
[currZ, gradientZ , exp_const ]= calculate_partition_function2(newAlphas,0,stage);
% if aa_flag is on , check the accuracy of aproximation to aa pf.
% if not accurate, aa_flag is turned off (check is done once in check_period full iterations )
if (stage==2)&(aa_flag)&(anti_chunk_num~=0)&(mod(full_iteration,check_period)==0)
if max(newAlphas)>aprox_range
aa_flag=0;
disp(sprintf( 'canceling anti_chunklet aproximation usage\n'));
% do this round again, without the aproximation using a generous number of gradient calculations
exit_flags=bitor(exit_flags,2);
newAlphas=approximateWeights(naiveWeights,calc_grad_num*5);
break;
end
end
% calculate the Q-function value ( for debuging and stop ascent condition )
currY=-log(currZ) + exp_const + naiveWeights*log(newAlphas)';
% the Gradient of the Q-function to maximize. (signed as Y in this context )
alphaGradient= -gradientZ./currZ + naiveWeights./newAlphas;
% gradient projection - to remain in simplex!!!
[gradientY]= computeGradientProjection(alphaGradient);
% check for meaningfull gradient
if (sum(abs(gradientY))