www.pudn.com > starter.zip > minFunc.m, change:2011-01-04,size:43634b


function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin) 
% minFunc(funObj,x0,options,varargin) 
% 
% Unconstrained optimizer using a line search strategy 
% 
% Uses an interface very similar to fminunc 
%   (it doesn't support all of the optimization toolbox options, 
%       but supports many other options). 
% 
% It computes descent directions using one of ('Method'): 
%   - 'sd': Steepest Descent 
%       (no previous information used, not recommended) 
%   - 'csd': Cyclic Steepest Descent 
%       (uses previous step length for a fixed length cycle) 
%   - 'bb': Barzilai and Borwein Gradient 
%       (uses only previous step) 
%   - 'cg': Non-Linear Conjugate Gradient 
%       (uses only previous step and a vector beta) 
%   - 'scg': Scaled Non-Linear Conjugate Gradient 
%       (uses previous step and a vector beta,  
%           and Hessian-vector products to initialize line search) 
%   - 'pcg': Preconditionined Non-Linear Conjugate Gradient 
%       (uses only previous step and a vector beta, preconditioned version) 
%   - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating 
%       (default: uses a predetermined nunber of previous steps to form a  
%           low-rank Hessian approximation) 
%   - 'newton0': Hessian-Free Newton 
%       (numerically computes Hessian-Vector products) 
%   - 'pnewton0': Preconditioned Hessian-Free Newton  
%       (numerically computes Hessian-Vector products, preconditioned 
%       version) 
%   - 'qnewton': Quasi-Newton Hessian approximation 
%       (uses dense Hessian approximation) 
%   - 'mnewton': Newton's method with Hessian calculation after every 
%   user-specified number of iterations 
%       (needs user-supplied Hessian matrix) 
%   - 'newton': Newton's method with Hessian calculation every iteration 
%       (needs user-supplied Hessian matrix) 
%   - 'tensor': Tensor 
%       (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives) 
% 
% Several line search strategies are available for finding a step length satisfying 
%   the termination criteria ('LS'): 
%   - 0: Backtrack w/ Step Size Halving 
%   - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values 
%   - 2: Backtrack w/ Cubic Interpolation from new function + gradient 
%   values (default for 'bb' and 'sd') 
%   - 3: Bracketing w/ Step Size Doubling and Bisection 
%   - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function + 
%   gradient values (default for all except 'bb' and 'sd') 
%   - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation 
%   - 6: Use Matlab Optimization Toolbox's line search 
%           (requires Matlab's linesearch.m to be added to the path) 
% 
%   Above, the first three find a point satisfying the Armijo conditions, 
%   while the last four search for find a point satisfying the Wolfe 
%   conditions.  If the objective function overflows, it is recommended 
%   to use one of the first 3. 
%   The first three can be used to perform a non-monotone 
%   linesearch by changing the option 'Fref'. 
% 
% Several strategies for choosing the initial step size are avaiable ('LS_init'): 
%   - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd') 
%       (t = 1) 
%   - 1: Use a step similar to the previous step (default for 'cg' and 'sd') 
%       (t = t_old*min(2,g'd/g_old'd_old)) 
%   - 2: Quadratic Initialization using previous function value and new 
%   function value/gradient (use this if steps tend to be very long) 
%       (t = min(1,2*(f-f_old)/g)) 
%   - 3: The minimum between 1 and twice the previous step length 
%       (t = min(1,2*t) 
%   - 4: The scaled conjugate gradient step length (may accelerate 
%   conjugate gradient methods, but requires a Hessian-vector product) 
%       (t = g'd/d'Hd) 
% 
% Inputs: 
%   funObj is a function handle 
%   x0 is a starting vector; 
%   options is a struct containing parameters 
%  (defaults are used for non-existent or blank fields) 
%   all other arguments are passed to funObj 
% 
% Outputs: 
%   x is the minimum value found 
%   f is the function value at the minimum found 
%   exitflag returns an exit condition 
%   output returns a structure with other information 
% 
% Supported Input Options 
%   Display - Level of display [ off | final | (iter) | full | excessive ] 
%   MaxFunEvals - Maximum number of function evaluations allowed (1000) 
%   MaxIter - Maximum number of iterations allowed (500) 
%   TolFun - Termination tolerance on the first-order optimality (1e-5) 
%   TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9) 
%   Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 | 
%       qnewton | mnewton | newton | tensor ] 
%   c1 - Sufficient Decrease for Armijo condition (1e-4) 
%   c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise) 
%   LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise) 
%   LS - Line Search type -see above (2 for bb, 4 otherwise) 
%   Fref - Setting this to a positive integer greater than 1 
%       will use non-monotone Armijo objective in the line search. 
%       (20 for bb, 10 for csd, 1 for all others) 
%   numDiff - compute derivative numerically 
%       (default: 0) (this option has a different effect for 'newton', see below) 
%   useComplex - if 1, use complex differentials when computing numerical derivatives 
%       to get very accurate values (default: 0) 
%   DerivativeCheck - if 'on', computes derivatives numerically at initial 
%       point and compares to user-supplied derivative (default: 'off') 
%   outputFcn - function to run after each iteration (default: []).  It 
%       should have the following interface: 
%       outputFcn(x,infoStruct,state,varargin{:}) 
%   useMex - where applicable, use mex files to speed things up (default: 1) 
% 
% Method-specific input options: 
%   newton: 
%       HessianModify - type of Hessian modification for direct solvers to 
%       use if the Hessian is not positive definite (default: 0) 
%           0: Minimum Euclidean norm s.t. eigenvalues sufficiently large 
%           (requires eigenvalues on iterations where matrix is not pd) 
%           1: Start with (1/2)*||A||_F and increment until Cholesky succeeds 
%           (an approximation to method 0, does not require eigenvalues) 
%           2: Modified LDL factorization 
%           (only 1 generalized Cholesky factorization done and no eigenvalues required) 
%           3: Modified Spectral Decomposition 
%           (requires eigenvalues) 
%           4: Modified Symmetric Indefinite Factorization 
%           5: Uses the eigenvector of the smallest eigenvalue as negative 
%           curvature direction 
%       cgSolve - use conjugate gradient instead of direct solver (default: 0) 
%           0: Direct Solver 
%           1: Conjugate Gradient 
%           2: Conjugate Gradient with Diagonal Preconditioner 
%           3: Conjugate Gradient with LBFGS Preconditioner 
%           x: Conjugate Graident with Symmetric Successive Over Relaxation 
%           Preconditioner with parameter x 
%               (where x is a real number in the range [0,2]) 
%           x: Conjugate Gradient with Incomplete Cholesky Preconditioner 
%           with drop tolerance -x 
%               (where x is a real negative number) 
%       numDiff - compute Hessian numerically 
%                 (default: 0, done with complex differentials if useComplex = 1) 
%       LS_saveHessiancomp - when on, only computes the Hessian at the 
%       first and last iteration of the line search (default: 1) 
%   mnewton: 
%       HessianIter - number of iterations to use same Hessian (default: 5) 
%   qnewton: 
%       initialHessType - scale initial Hessian approximation (default: 1) 
%       qnUpdate - type of quasi-Newton update (default: 3): 
%           0: BFGS 
%           1: SR1 (when it is positive-definite, otherwise BFGS) 
%           2: Hoshino 
%           3: Self-Scaling BFGS 
%           4: Oren's Self-Scaling Variable Metric method  
%           5: McCormick-Huang asymmetric update 
%       Damped - use damped BFGS update (default: 1) 
%   newton0/pnewton0: 
%       HvFunc - user-supplied function that returns Hessian-vector products 
%           (by default, these are computed numerically using autoHv) 
%           HvFunc should have the following interface: HvFunc(v,x,varargin{:}) 
%       useComplex - use a complex perturbation to get high accuracy 
%           Hessian-vector products (default: 0) 
%           (the increased accuracy can make the method much more efficient, 
%               but gradient code must properly support complex inputs) 
%       useNegCurv - a negative curvature direction is used as the descent 
%           direction if one is encountered during the cg iterations 
%           (default: 1) 
%       precFunc (for pnewton0 only) - user-supplied preconditioner 
%           (by default, an L-BFGS preconditioner is used) 
%           precFunc should have the following interfact: 
%           precFunc(v,x,varargin{:}) 
%   lbfgs: 
%       Corr - number of corrections to store in memory (default: 100) 
%           (higher numbers converge faster but use more memory) 
%       Damped - use damped update (default: 0) 
%   pcg: 
%       cgUpdate - type of update (default: 2) 
%   cg/scg/pcg: 
%       cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1) 
%           0: Fletcher Reeves 
%           1: Polak-Ribiere 
%           2: Hestenes-Stiefel (not supported for pcg) 
%           3: Gilbert-Nocedal 
%       HvFunc (for scg only)- user-supplied function that returns Hessian-vector  
%           products 
%           (by default, these are computed numerically using autoHv) 
%           HvFunc should have the following interface: 
%           HvFunc(v,x,varargin{:}) 
%       precFunc (for pcg only) - user-supplied preconditioner 
%           (by default, an L-BFGS preconditioner is used) 
%           precFunc should have the following interfact: 
%           precFunc(v,x,varargin{:}) 
%   bb: 
%       bbType - type of bb step (default: 1) 
%           0: min_alpha ||delta_x - alpha delta_g||_2 
%           1: min_alpha ||alpha delta_x - delta_g||_2 
%           2: Conic BB 
%           3: Gradient method with retards 
%   csd: 
%       cycle - length of cycle (default: 3) 
% 
% Supported Output Options 
%   iterations - number of iterations taken 
%   funcCount - number of function evaluations 
%   algorithm - algorithm used 
%   firstorderopt - first-order optimality 
%   message - exit message 
%   trace.funccount - function evaluations after each iteration 
%   trace.fval - function value after each iteration 
% 
% Author: Mark Schmidt (2006) 
% Web: http://www.cs.ubc.ca/~schmidtm 
% 
% Sources (in order of how much the source material contributes): 
%   J. Nocedal and S.J. Wright.  1999.  "Numerical Optimization".  Springer Verlag. 
%   R. Fletcher.  1987.  "Practical Methods of Optimization".  Wiley. 
%   J. Demmel.  1997.  "Applied Linear Algebra.  SIAM. 
%   R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R. 
%   Pozo, C. Romine, and H. Van der Vost.  1994.  "Templates for the Solution of 
%   Linear Systems: Building Blocks for Iterative Methods".  SIAM. 
%   J. More and D. Thuente.  "Line search algorithms with guaranteed 
%   sufficient decrease".  ACM Trans. Math. Softw. vol 20, 286-307, 1994. 
%   M. Raydan.  "The Barzilai and Borwein gradient method for the large 
%   scale unconstrained minimization problem".  SIAM J. Optim., 7, 26-33, 
%   (1997). 
%   "Mathematical Optimization".  The Computational Science Education 
%   Project.  1995. 
%   C. Kelley.  1999.  "Iterative Methods for Optimization".  Frontiers in 
%   Applied Mathematics.  SIAM. 
 
if nargin < 3 
    options = []; 
end 
 
% Get Parameters 
[verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,... 
    corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,... 
    HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... 
    DerivativeCheck,Damped,HvFunc,bbType,cycle,... 
    HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ... 
    minFunc_processInputOptions(options); 
 
if isfield(options, 'logfile') 
    logfile = options.logfile; 
else 
    logfile = []; 
end 
 
% Constants 
SD = 0; 
CSD = 1; 
BB = 2; 
CG = 3; 
PCG = 4; 
LBFGS = 5; 
QNEWTON = 6; 
NEWTON0 = 7; 
NEWTON = 8; 
TENSOR = 9; 
 
% Initialize 
p = length(x0); 
d = zeros(p,1); 
x = x0; 
t = 1; 
 
% If necessary, form numerical differentiation functions 
funEvalMultiplier = 1; 
if numDiff && method ~= TENSOR 
    varargin(3:end+2) = varargin(1:end); 
    varargin{1} = useComplex; 
    varargin{2} = funObj; 
    if method ~= NEWTON 
        if debug 
            if useComplex 
                fprintf('Using complex differentials for gradient computation\n'); 
            else 
                fprintf('Using finite differences for gradient computation\n'); 
            end 
        end 
        funObj = @autoGrad; 
    else 
        if debug 
            if useComplex 
                fprintf('Using complex differentials for gradient computation\n'); 
            else 
                fprintf('Using finite differences for gradient computation\n'); 
            end 
        end 
        funObj = @autoHess; 
    end 
 
    if method == NEWTON0 && useComplex == 1 
        if debug 
            fprintf('Turning off the use of complex differentials\n'); 
        end 
        useComplex = 0; 
    end 
 
    if useComplex 
        funEvalMultiplier = p; 
    else 
        funEvalMultiplier = p+1; 
    end 
end 
 
% Evaluate Initial Point 
if method < NEWTON 
    [f,g] = feval(funObj, x, varargin{:}); 
else 
    [f,g,H] = feval(funObj, x, varargin{:}); 
    computeHessian = 1; 
end 
funEvals = 1; 
 
if strcmp(DerivativeCheck,'on') 
    if numDiff 
        fprintf('Can not do derivative checking when numDiff is 1\n'); 
    end 
    % Check provided gradient/hessian function using numerical derivatives 
    fprintf('Checking Gradient:\n'); 
    [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:}); 
 
    fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2))); 
    if max(abs(g-g2)) > 1e-4 
        fprintf('User NumDif:\n'); 
        [g g2] 
        diff = abs(g-g2) 
        pause; 
    end 
 
    if method >= NEWTON 
        fprintf('Check Hessian:\n'); 
        [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:}); 
 
        fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:)))); 
        if max(abs(H(:)-H2(:))) > 1e-4 
            H 
            H2 
            diff = abs(H-H2) 
            pause; 
        end 
    end 
end 
 
% Output Log 
if verboseI 
    fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); 
end 
 
if logfile 
    fid = fopen(logfile, 'a'); 
    if (fid > 0) 
        fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); 
        fclose(fid); 
    end 
end 
 
% Output Function 
if ~isempty(outputFcn) 
    callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:}); 
end 
 
% Initialize Trace 
trace.fval = f; 
trace.funcCount = funEvals; 
 
% Check optimality of initial point 
if sum(abs(g)) <= tolFun 
    exitflag=1; 
    msg = 'Optimality Condition below TolFun'; 
    if verbose 
        fprintf('%s\n',msg); 
    end 
    if nargout > 3 
        output = struct('iterations',0,'funcCount',1,... 
            'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); 
    end 
    return; 
end 
 
% Perform up to a maximum of 'maxIter' descent steps: 
for i = 1:maxIter 
 
    % ****************** COMPUTE DESCENT DIRECTION ***************** 
 
    switch method 
        case SD % Steepest Descent 
            d = -g; 
 
        case CSD % Cyclic Steepest Descent 
 
            if mod(i,cycle) == 1 % Use Steepest Descent 
                alpha = 1; 
                LS_init = 2; 
                LS = 4; % Precise Line Search 
            elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step 
                alpha = t; 
                LS_init = 0; 
                LS = 2; % Non-monotonic line search 
            end 
            d = -alpha*g; 
 
        case BB % Steepest Descent with Barzilai and Borwein Step Length 
 
            if i == 1 
                d = -g; 
            else 
                y = g-g_old; 
                s = t*d; 
                if bbType == 0 
                    yy = y'*y; 
                    alpha = (s'*y)/(yy); 
                    if alpha <= 1e-10 || alpha > 1e10 
                        alpha = 1; 
                    end 
                elseif bbType == 1 
                    sy = s'*y; 
                    alpha = (s'*s)/sy; 
                    if alpha <= 1e-10 || alpha > 1e10 
                        alpha = 1; 
                    end 
                elseif bbType == 2 % Conic Interpolation ('Modified BB') 
                    sy = s'*y; 
                    ss = s'*s; 
                    alpha = ss/sy; 
                    if alpha <= 1e-10 || alpha > 1e10 
                        alpha = 1; 
                    end 
                    alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s); 
                    if alphaConic > .001*alpha && alphaConic < 1000*alpha 
                        alpha = alphaConic; 
                    end 
                elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step) 
                    sy = s'*y; 
                    alpha = (s'*s)/sy; 
                    if alpha <= 1e-10 || alpha > 1e10 
                        alpha = 1; 
                    end 
                    v(1+mod(i-2,5)) = alpha; 
                    alpha = v(ceil(rand*length(v))); 
                end 
                d = -alpha*g; 
            end 
            g_old = g; 
            myF_old = f; 
 
 
        case CG % Non-Linear Conjugate Gradient 
 
            if i == 1 
                d = -g; % Initially use steepest descent direction 
            else 
                gtgo = g'*g_old; 
                gotgo = g_old'*g_old; 
 
                if cgUpdate == 0 
                    % Fletcher-Reeves 
                    beta = (g'*g)/(gotgo); 
                elseif cgUpdate == 1 
                    % Polak-Ribiere 
                    beta = (g'*(g-g_old)) /(gotgo); 
                elseif cgUpdate == 2 
                    % Hestenes-Stiefel 
                    beta = (g'*(g-g_old))/((g-g_old)'*d); 
                else 
                    % Gilbert-Nocedal 
                    beta_FR = (g'*(g-g_old)) /(gotgo); 
                    beta_PR = (g'*g-gtgo)/(gotgo); 
                    beta = max(-beta_FR,min(beta_PR,beta_FR)); 
                end 
 
                d = -g + beta*d; 
 
                % Restart if not a direction of sufficient descent 
                if g'*d > -tolX 
                    if debug 
                        fprintf('Restarting CG\n'); 
                    end 
                    beta = 0; 
                    d = -g; 
                end 
 
                % Old restart rule: 
                %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0 
 
            end 
            g_old = g; 
 
        case PCG % Preconditioned Non-Linear Conjugate Gradient 
 
            % Apply preconditioner to negative gradient 
            if isempty(precFunc) 
                % Use L-BFGS Preconditioner 
                if i == 1 
                    old_dirs = zeros(length(g),0); 
                    old_stps = zeros(length(g),0); 
                    Hdiag = 1; 
                    s = -g; 
                else 
                    [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 
 
                    if useMex 
                        s = lbfgsC(-g,old_dirs,old_stps,Hdiag); 
                    else 
                        s = lbfgs(-g,old_dirs,old_stps,Hdiag); 
                    end 
                end 
            else % User-supplied preconditioner 
                s = precFunc(-g,x,varargin{:}); 
            end 
 
            if i == 1 
                d = s; 
            else 
 
                if cgUpdate == 0 
                    % Preconditioned Fletcher-Reeves 
                    beta = (g'*s)/(g_old'*s_old); 
                elseif cgUpdate < 3 
                    % Preconditioned Polak-Ribiere 
                    beta = (g'*(s-s_old))/(g_old'*s_old); 
                else 
                    % Preconditioned Gilbert-Nocedal 
                    beta_FR = (g'*s)/(g_old'*s_old); 
                    beta_PR = (g'*(s-s_old))/(g_old'*s_old); 
                    beta = max(-beta_FR,min(beta_PR,beta_FR)); 
                end 
                d = s + beta*d; 
 
                if g'*d > -tolX 
                    if debug 
                        fprintf('Restarting CG\n'); 
                    end 
                    beta = 0; 
                    d = s; 
                end 
 
            end 
            g_old = g; 
            s_old = s; 
        case LBFGS % L-BFGS 
 
            % Update the direction and step sizes 
 
            if i == 1 
                d = -g; % Initially use steepest descent direction 
                old_dirs = zeros(length(g),0); 
                old_stps = zeros(length(d),0); 
                Hdiag = 1; 
            else 
                if Damped 
                    [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 
                else 
                    [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 
                end 
 
                if useMex 
                    d = lbfgsC(-g,old_dirs,old_stps,Hdiag); 
                else 
                    d = lbfgs(-g,old_dirs,old_stps,Hdiag); 
                end 
            end 
            g_old = g; 
 
        case QNEWTON % Use quasi-Newton Hessian approximation 
 
            if i == 1 
                d = -g; 
            else 
                % Compute difference vectors 
                y = g-g_old; 
                s = t*d; 
 
                if i == 2 
                    % Make initial Hessian approximation 
                    if initialHessType == 0 
                        % Identity 
                        if qnUpdate <= 1 
                            R = eye(length(g)); 
                        else 
                            H = eye(length(g)); 
                        end 
                    else 
                        % Scaled Identity 
                        if debug 
                            fprintf('Scaling Initial Hessian Approximation\n'); 
                        end 
                        if qnUpdate <= 1 
                            % Use Cholesky of Hessian approximation 
                            R = sqrt((y'*y)/(y'*s))*eye(length(g)); 
                        else 
                            % Use Inverse of Hessian approximation 
                            H = eye(length(g))*(y'*s)/(y'*y); 
                        end 
                    end 
                end 
 
                if qnUpdate == 0 % Use BFGS updates 
                    Bs = R'*(R*s); 
                    if Damped 
                        eta = .02; 
                        if y'*s < eta*s'*Bs 
                            if debug 
                                fprintf('Damped Update\n'); 
                            end 
                            theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); 
                            y = theta*y + (1-theta)*Bs; 
                        end 
                        R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 
                    else 
                        if y'*s > 1e-10 
                            R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 
                        else 
                            if debug 
                                fprintf('Skipping Update\n'); 
                            end 
                        end 
                    end 
                elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness 
 
                    Bs = R'*(R*s); 
                    ymBs = y-Bs; 
                    if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10 
                        R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-'); 
                    else 
                        if debug 
                            fprintf('SR1 not positive-definite, doing BFGS Update\n'); 
                        end 
                        if Damped 
                            eta = .02; 
                            if y'*s < eta*s'*Bs 
                                if debug 
                                    fprintf('Damped Update\n'); 
                                end 
                                theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); 
                                y = theta*y + (1-theta)*Bs; 
                            end 
                            R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 
                        else 
                            if y'*s > 1e-10 
                                R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 
                            else 
                                if debug 
                                    fprintf('Skipping Update\n'); 
                                end 
                            end 
                        end 
                    end 
                elseif qnUpdate == 2 % Use Hoshino update 
                    v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); 
                    phi = 1/(1 + (y'*H*y)/(s'*y)); 
                    H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v'; 
 
                elseif qnUpdate == 3 % Self-Scaling BFGS update 
                    ys = y'*s; 
                    Hy = H*y; 
                    yHy = y'*Hy; 
                    gamma = ys/yHy; 
                    v = sqrt(yHy)*(s/ys - Hy/yHy); 
                    H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys; 
                elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update 
 
                    % Oren's method 
                    if (s'*y)/(y'*H*y) > 1 
                        phi = 1; % BFGS 
                        omega = 0; 
                    elseif (s'*(H\s))/(s'*y) < 1 
                        phi = 0; % DFP 
                        omega = 1; 
                    else 
                        phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2); 
                        omega = phi; 
                    end 
 
                    gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y); 
                    v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); 
                    H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y); 
 
                elseif qnUpdate == 5 % McCormick-Huang asymmetric update 
                    theta = 1; 
                    phi = 0; 
                    psi = 1; 
                    omega = 0; 
                    t1 = s*(theta*s + phi*H'*y)'; 
                    t2 = (theta*s + phi*H'*y)'*y; 
                    t3 = H*y*(psi*s + omega*H'*y)'; 
                    t4 = (psi*s + omega*H'*y)'*y; 
                    H = H + t1/t2 - t3/t4; 
                end 
 
                if qnUpdate <= 1 
                    d = -R\(R'\g); 
                else 
                    d = -H*g; 
                end 
 
            end 
            g_old = g; 
 
        case NEWTON0 % Hessian-Free Newton 
 
            cgMaxIter = min(p,maxFunEvals-funEvals); 
            cgForce = min(0.5,sqrt(norm(g)))*norm(g); 
 
            % Set-up preconditioner 
            precondFunc = []; 
            precondArgs = []; 
            if cgSolve == 1 
                if isempty(precFunc) % Apply L-BFGS preconditioner 
                    if i == 1 
                        old_dirs = zeros(length(g),0); 
                        old_stps = zeros(length(g),0); 
                        Hdiag = 1; 
                    else 
                        [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 
                        if useMex 
                            precondFunc = @lbfgsC; 
                        else 
                            precondFunc = @lbfgs; 
                        end 
                        precondArgs = {old_dirs,old_stps,Hdiag}; 
                    end 
                    g_old = g; 
                else 
                    % Apply user-defined preconditioner 
                    precondFunc = precFunc; 
                    precondArgs = {x,varargin{:}}; 
                end 
            end 
 
            % Solve Newton system using cg and hessian-vector products 
            if isempty(HvFunc) 
                % No user-supplied Hessian-vector function, 
                % use automatic differentiation 
                HvFun = @autoHv; 
                HvArgs = {x,g,useComplex,funObj,varargin{:}}; 
            else 
                % Use user-supplid Hessian-vector function 
                HvFun = HvFunc; 
                HvArgs = {x,varargin{:}}; 
            end 
             
            if useNegCurv 
                [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); 
            else 
                [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); 
            end 
 
            funEvals = funEvals+cgIter; 
            if debug 
                fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes); 
 
            end 
 
            if useNegCurv 
                if ~isempty(negCurv) 
                    %if debug 
                    fprintf('Using negative curvature direction\n'); 
                    %end 
                    d = negCurv/norm(negCurv); 
                    d = d/sum(abs(g)); 
                end 
            end 
 
        case NEWTON % Newton search direction 
 
            if cgSolve == 0 
                if HessianModify == 0 
                    % Attempt to perform a Cholesky factorization of the Hessian 
                    [R,posDef] = chol(H); 
 
                    % If the Cholesky factorization was successful, then the Hessian is 
                    % positive definite, solve the system 
                    if posDef == 0 
                        d = -R\(R'\g); 
 
                    else 
                        % otherwise, adjust the Hessian to be positive definite based on the 
                        % minimum eigenvalue, and solve with QR 
                        % (expensive, we don't want to do this very much) 
                        if debug 
                            fprintf('Adjusting Hessian\n'); 
                        end 
                        H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H)))); 
                        d = -H\g; 
                    end 
                elseif HessianModify == 1 
                    % Modified Incomplete Cholesky 
                    R = mcholinc(H,debug); 
                    d = -R\(R'\g); 
                elseif HessianModify == 2 
                    % Modified Generalized Cholesky 
                    if useMex 
                        [L D perm] = mcholC(H); 
                    else 
                        [L D perm] = mchol(H); 
                    end 
                    d(perm) = -L' \ ((D.^-1).*(L \ g(perm))); 
 
                elseif HessianModify == 3 
                    % Modified Spectral Decomposition 
                    [V,D] = eig((H+H')/2); 
                    D = diag(D); 
                    D = max(abs(D),max(max(abs(D)),1)*1e-12); 
                    d = -V*((V'*g)./D); 
                elseif HessianModify == 4 
                    % Modified Symmetric Indefinite Factorization 
                    [L,D,perm] = ldl(H,'vector'); 
                    [blockPos junk] = find(triu(D,1)); 
                    for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1) 
                        if D(diagInd,diagInd) < 1e-12 
                            D(diagInd,diagInd) = 1e-12; 
                        end 
                    end 
                    for blockInd = blockPos' 
                        block = D(blockInd:blockInd+1,blockInd:blockInd+1); 
                        block_a = block(1); 
                        block_b = block(2); 
                        block_d = block(4); 
                        lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2; 
                        D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12); 
                    end 
                    d(perm) = -L' \ (D \ (L \ g(perm))); 
                else 
                    % Take Newton step if Hessian is pd, 
                    % otherwise take a step with negative curvature 
                    [R,posDef] = chol(H); 
                    if posDef == 0 
                        d = -R\(R'\g); 
                    else 
                        if debug 
                            fprintf('Taking Direction of Negative Curvature\n'); 
                        end 
                        [V,D] = eig(H); 
                        u = V(:,1); 
                        d = -sign(u'*g)*u; 
                    end 
                end 
 
            else 
                % Solve with Conjugate Gradient 
                cgMaxIter = p; 
                cgForce = min(0.5,sqrt(norm(g)))*norm(g); 
 
                % Select Preconditioner 
                if cgSolve == 1 
                    % No preconditioner 
                    precondFunc = []; 
                    precondArgs = []; 
                elseif cgSolve == 2 
                    % Diagonal preconditioner 
                    precDiag = diag(H); 
                    precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag); 
                    precondFunc = @precondDiag; 
                    precondArgs = {precDiag.^-1}; 
                elseif cgSolve == 3 
                    % L-BFGS preconditioner 
                    if i == 1 
                        old_dirs = zeros(length(g),0); 
                        old_stps = zeros(length(g),0); 
                        Hdiag = 1; 
                    else 
                        [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 
                    end 
                    g_old = g; 
                    if useMex 
                        precondFunc = @lbfgsC; 
                    else 
                        precondFunc = @lbfgs; 
                    end 
                    precondArgs = {old_dirs,old_stps,Hdiag}; 
                elseif cgSolve > 0 
                    % Symmetric Successive Overelaxation Preconditioner 
                    omega = cgSolve; 
                    D = diag(H); 
                    D(D < 1e-12) = 1e-12 - min(D); 
                    precDiag = (omega/(2-omega))*D.^-1; 
                    precTriu = diag(D/omega) + triu(H,1); 
                    precondFunc = @precondTriuDiag; 
                    precondArgs = {precTriu,precDiag.^-1}; 
                else 
                    % Incomplete Cholesky Preconditioner 
                    opts.droptol = -cgSolve; 
                    opts.rdiag = 1; 
                    R = cholinc(sparse(H),opts); 
                    if min(diag(R)) < 1e-12 
                        R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts); 
                    end 
                    precondFunc = @precondTriu; 
                    precondArgs = {R}; 
                end 
 
                % Run cg with the appropriate preconditioner 
                if isempty(HvFunc) 
                    % No user-supplied Hessian-vector function 
                    [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs); 
                else 
                    % Use user-supplied Hessian-vector function 
                    [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}}); 
                end 
                if debug 
                    fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes); 
                    %funEvals = funEvals + cgIter; 
                end 
            end 
 
        case TENSOR % Tensor Method 
 
            if numDiff 
                % Compute 3rd-order Tensor Numerically 
                [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:}); 
            else 
                % Use user-supplied 3rd-derivative Tensor 
                [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:}); 
            end 
            options_sub.Method = 'newton'; 
            options_sub.Display = 'none'; 
            options_sub.TolX = tolX; 
            options_sub.TolFun = tolFun; 
            d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T); 
 
            if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX 
                if debug 
                    fprintf('Using 2nd-Order Step\n'); 
                end 
                [V,D] = eig((H+H')/2); 
                D = diag(D); 
                D = max(abs(D),max(max(abs(D)),1)*1e-12); 
                d = -V*((V'*g)./D); 
            else 
                if debug 
                    fprintf('Using 3rd-Order Step\n'); 
                end 
            end 
    end 
 
    if ~isLegal(d) 
        fprintf('Step direction is illegal!\n'); 
        pause; 
        return 
    end 
 
    % ****************** COMPUTE STEP LENGTH ************************ 
 
    % Directional Derivative 
    gtd = g'*d; 
 
    % Check that progress can be made along direction 
    if gtd > -tolX 
        exitflag=2; 
        msg = 'Directional Derivative below TolX'; 
        break; 
    end 
 
    % Select Initial Guess 
    if i == 1 
        if method < NEWTON0 
            t = min(1,1/sum(abs(g))); 
        else 
            t = 1; 
        end 
    else 
        if LS_init == 0 
            % Newton step 
            t = 1; 
        elseif LS_init == 1 
            % Close to previous step length 
            t = t*min(2,(gtd_old)/(gtd)); 
        elseif LS_init == 2 
            % Quadratic Initialization based on {f,g} and previous f 
            t = min(1,2*(f-f_old)/(gtd)); 
        elseif LS_init == 3 
            % Double previous step length 
            t = min(1,t*2); 
        elseif LS_init == 4 
            % Scaled step length if possible 
            if isempty(HvFunc) 
                % No user-supplied Hessian-vector function, 
                % use automatic differentiation 
                dHd = d'*autoHv(d,x,g,0,funObj,varargin{:}); 
            else 
                % Use user-supplid Hessian-vector function 
                dHd = d'*HvFunc(d,x,varargin{:}); 
            end 
 
            funEvals = funEvals + 1; 
            if dHd > 0 
                t = -gtd/(dHd); 
            else 
                t = min(1,2*(f-f_old)/(gtd)); 
            end 
        end 
 
        if t <= 0 
            t = 1; 
        end 
    end 
    f_old = f; 
    gtd_old = gtd; 
 
    % Compute reference fr if using non-monotone objective 
    if Fref == 1 
        fr = f; 
    else 
        if i == 1 
            old_fvals = repmat(-inf,[Fref 1]); 
        end 
 
        if i <= Fref 
            old_fvals(i) = f; 
        else 
            old_fvals = [old_fvals(2:end);f]; 
        end 
        fr = max(old_fvals); 
    end 
 
    computeHessian = 0; 
    if method >= NEWTON 
        if HessianIter == 1 
            computeHessian = 1; 
        elseif i > 1 && mod(i-1,HessianIter) == 0 
            computeHessian = 1; 
        end 
    end 
 
    % Line Search 
    f_old = f; 
    if LS < 3 % Use Armijo Bactracking 
        % Perform Backtracking line search 
        if computeHessian 
            [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); 
        else 
            [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:}); 
        end 
        funEvals = funEvals + LSfunEvals; 
 
    elseif LS < 6 
        % Find Point satisfying Wolfe 
 
        if computeHessian 
            [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); 
        else 
            [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:}); 
        end 
        funEvals = funEvals + LSfunEvals; 
        x = x + t*d; 
 
    else 
        % Use Matlab optim toolbox line search 
        [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=... 
            lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,... 
            tolX,[],[],[],varargin{:}); 
        funEvals = funEvals + LSiter; 
        if isempty(t) 
            exitflag = -2; 
            msg = 'Matlab LineSearch failed'; 
            break; 
        end 
 
        if method >= NEWTON 
            [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 
            funEvals = funEvals + 1; 
        end 
        x = x + t*d; 
        f = f_new; 
        g = g_new; 
    end 
 
    % Output iteration information 
    if verboseI 
        fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); 
    end 
 
    if logfile 
        fid = fopen(logfile, 'a'); 
        if (fid > 0) 
            fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); 
            fclose(fid); 
        end 
    end 
 
     
    % Output Function 
    if ~isempty(outputFcn) 
        callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); 
    end 
 
    % Update Trace 
    trace.fval(end+1,1) = f; 
    trace.funcCount(end+1,1) = funEvals; 
 
    % Check Optimality Condition 
    if sum(abs(g)) <= tolFun 
        exitflag=1; 
        msg = 'Optimality Condition below TolFun'; 
        break; 
    end 
 
    % ******************* Check for lack of progress ******************* 
 
    if sum(abs(t*d)) <= tolX 
        exitflag=2; 
        msg = 'Step Size below TolX'; 
        break; 
    end 
 
 
    if abs(f-f_old) < tolX 
        exitflag=2; 
        msg = 'Function Value changing by less than TolX'; 
        break; 
    end 
 
    % ******** Check for going over iteration/evaluation limit ******************* 
 
    if funEvals*funEvalMultiplier > maxFunEvals 
        exitflag = 0; 
        msg = 'Exceeded Maximum Number of Function Evaluations'; 
        break; 
    end 
 
    if i == maxIter 
        exitflag = 0; 
        msg='Exceeded Maximum Number of Iterations'; 
        break; 
    end 
 
end 
 
if verbose 
    fprintf('%s\n',msg); 
end 
if nargout > 3 
    output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,... 
        'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); 
end 
 
% Output Function 
if ~isempty(outputFcn) 
    callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); 
end 
 
end