www.pudn.com > ConstrainedEM.zip > approximateWeights.m


% approximate new weights - :

% uses computePartitionFunctionAndGradient ONCE to compute function
% and gradient. Then projects gradient the the subspace where
% sum(alphaGradient)==0, so as to keep the Alphas in the
% simplex. Uses lineSearch to search for a local maxima in the
% gradients 

function  [newAlphas]= approximateWeights(naiveWeights,calc_grad_num)
% naiveWeights - the  coefficients of the log(a(j)) in the function to be maximized
% and also the initial a(j)
% calc_grad_num - the number of full gradient calculations

global late_oracle;
global aa_flag;
global anti_chunk_num;

meaningfull_Gradient_saf=1e-10;	% gradient stop criteion ( not really important )
meaningful_move_saf=1e-6 ;
check_period=10;	% the number of iteration before aa_aprox check
aprox_range=0.8;	% the maximum size of a model which still allows aproximation to 
						% the anti chunklets partition function.
global exit_flags;
                  
%initial guess is current weights:
normalizer= sum(naiveWeights);
newAlphas= naiveWeights./normalizer;

% if late oracle , do gradient ascent in two stages : only chunklet pf in the first
% and full pf in the second. if not late oracle , only the second stage is needed.

full_iteration=0;		% iteration of full gradient calculation.
global stage;	% passed as global to calc_y
					% stage 1- only positive chunklet gradient ascent
					% stage 2 -full pf gradient ascent
               % the stage machinery s needed only when late oracle is used ,there
               % are anti chunklets and no aproximation to the negative pf is used .
if late_oracle & ~aa_flag & anti_chunk_num~=0
   stage=1;
else
   stage=2;
end

% if no late oracle is used and no anti chunklets are present, there
% is no need for gradient ascent.
if (late_oracle==0)&(anti_chunk_num==0)
  return;
end

while stage<3	% main gradient ascent loop
   
    % when in stage 2, number of iteration is limited by calc_grad_num 
    % (only if anti_chunks are involved and no aproximation is used )
   if stage==2;
      full_iteration=full_iteration+1;
      if (full_iteration>calc_grad_num)&(~aa_flag)&(anti_chunk_num~=0)
         break;
      end
   end
      
   %calculate pf and it's gradient ( with or without anti-chunklet consideration.
   if (stage==2)&(anti_chunk_num~=0)&(~aa_flag)
      'calculating gradient of pf'
   end
   [currZ, gradientZ , exp_const ]= calculate_partition_function2(newAlphas,0,stage);
   
   % if aa_flag is on , check the accuracy of aproximation to aa pf.
   % if not accurate, aa_flag is turned off (check is done once in check_period full iterations )
   if (stage==2)&(aa_flag)&(anti_chunk_num~=0)&(mod(full_iteration,check_period)==0)
      if max(newAlphas)>aprox_range
         aa_flag=0;
         disp(sprintf( 'canceling anti_chunklet aproximation usage\n'));
         % do this round again, without the aproximation using a generous number of gradient calculations
         exit_flags=bitor(exit_flags,2);
         newAlphas=approximateWeights(naiveWeights,calc_grad_num*5);	
         break;
      end
   end
            
   % calculate the Q-function value ( for debuging and stop ascent condition )
   currY=-log(currZ) + exp_const + naiveWeights*log(newAlphas)';
   
   % the Gradient of the Q-function to maximize. (signed as Y in this context )
   alphaGradient= -gradientZ./currZ + naiveWeights./newAlphas;
   
   % gradient projection - to remain in simplex!!!
   [gradientY]= computeGradientProjection(alphaGradient);
     
   % check for meaningfull gradient
   if (sum(abs(gradientY))