Some major changes to fix a bug and change behavior; most of the changes

are in tfocs_iterate and tfocs_initialize. To understand the changes, note that most algorithms use two sequences of variables, the "x" and the "y" sequence. The return value is the "x" iterate, but stopping criteria is via the "y" iterate often, since this value is in effect pre-computed. And if data recording was specified, this was done on the "x" variable at an additional cost. Now, the policy is to use whichever variable requries less computation (usually the "y" variable), but you can override this behavior if you wish: set opts.stopping_criteria_always_use_x = true to always use "x" to determine the stopping critier; set opts.data_collection_always_use_x = true to always use "x" for data collection; and set opts.output_always_use_x = true to always use "x" (and when appropriate, its corresponding dual variable). By default, we use whichever is cheaper, except for the output, in which case we use whichever gives a better objective value. Due to these major changes, many smaller changes were made to make the code work again. Also, the continuation script now uses the previous stepsize as the new stepsize. We have also added a new restart feature that uses the gradient, not function values. This is due to Brendan O'Donoghue's idea. This can be controlled by setting opts.autoRestart to either 'gra' or 'fun' for gradient or function criteria, respectively. These automatic restart criteria take effect whenever opts.restart < 0. Also new is opts.printRestart; if true (default), this will print output whenever a restart occurs.
ylp1332 · Dec 21, 2013 · 8420adc · 8420adc
1 parent 711f792
commit 8420adc
Show file tree

Hide file tree

Showing 6 changed files with 168 additions and 42 deletions.
diff --git a/continuation.m b/continuation.m
@@ -87,6 +87,7 @@
 % tol         = finalTol2*betaTol^(kMax+1); % bug fixed Feb 2011
 tol         = finalTol2*betaTol^(kMax);
 
+L0   = Inf; % for first round, use default value of stepsize
 xOld = x0;
 odataCumulative      = [];
 odataCumulative.niter= 0;
@@ -120,6 +121,9 @@
     if k == kMax && ~isempty( finalRestart )
         optsTemp.restart    = finalRestart; 
     end
+    if isfinite(L0)
+        optsTemp.L0     = L0;
+    end
 
     % call the solver
     [x, odata, optsOut ] = fcn( mu, x0, z0, optsTemp );
@@ -146,6 +150,9 @@
     % (possibly) decrease mu for next solve
     mu = mu*muDecrement;
 
+    % Dec '13, use stepsize estimate
+    L0  = 1/odata.stepsize(end);
+
     % update output data
     fields = { 'f', 'normGrad', 'stepsize','theta','counts','err' };
     for ff = fields

diff --git a/examples/smallscale/test_SLOPE.m b/examples/smallscale/test_SLOPE.m
@@ -82,7 +82,8 @@ min_x sum(lambda*sort(abs(x),'descend')) + .5||A(x)-b||_2^2
 
 %% Call the TFOCS solver
 er              = er_ref;  % error with reference solution (from IPM)
-opts = struct('restart',-Inf,'tol',1e-13,'maxits',1000, 'printEvery',10);
+opts = struct('restart',-Inf,'autoRestart','gradient', ...
+    'tol',1e-13,'maxits',1000, 'printEvery',10);
 opts.errFcn     = { @(f,primal) er(primal), ...
                     @(f,primal) f - obj_ref   }; 
 tic;

diff --git a/private/tfocs_cleanup.m b/private/tfocs_cleanup.m
@@ -2,29 +2,75 @@
 %   Performs the final set of operations for our templated solvers: performs
 %   final calculations, prints final outputs, cleans up unused data.
 
-if saddle,
-    if isempty(g_Ax),
-        [ f_x, g_Ax ] = apply_smooth(A_x);
+% This should be called just after tfocs_iterate, so re-use some of that
+%   computation:
+
+% We have two sequences of points, the x and the y sequence
+%   Other things being equal, we will use the x sequence as output
+%   But in some cases, if we use y, there is less computation to do
+%   since we can re-use some older computation. If it is the case that
+%   computing y is cheap, then we will still compute x, but now we will
+%   compare x and y and take whichever is better (ties going toward x)
+
+if exist('cur_dual','var')
+    cur_dual    = get_dual( cur_dual );
+end
+if v_is_y && ~output_always_use_x
+    % We have some free information about y
+    % cur_pri = y
+    if data_collection_always_use_x
+        % f_vy and dual_y have already been saved
+    else
+        f_vy    = f_v;
+        if saddle, dual_y  = cur_dual; end
+    end
+end
+if ~v_is_x
+    if saddle,
+        if isempty(g_Ax),
+            [ f_x, g_Ax ] = apply_smooth(A_x);
+        end
+        cur_dual    = get_dual( g_Ax );
+    elseif isinf(f_x),
+        f_x = apply_smooth(A_x);
+    end
+    if isinf( C_x ),
+        C_x = apply_projector( x );
+    end
+    f_v         = maxmin * ( f_x + C_x );
+    cur_pri     = x;
+end
+% Now, compare x and y if info on y is available, and take whichever is better
+x_or_y_string = 'x';
+if v_is_y && ~output_always_use_x
+    if f_vy < f_v
+        f_v     = f_vy;
+        if saddle, cur_dual= dual_y; end
+        x       = y; % losing information after this point!
+        x_or_y_string = 'y';
+    end
+end
+if saddle
+    if ~exist('cur_dual','var')
+        if isempty(g_Ax),
+            [ f_x, g_Ax ] = apply_smooth(A_x);
+        end
+        cur_dual    = get_dual( g_Ax );
     end
-    out.dual = get_dual( g_Ax );
-    % Oct 13: make it compatible:
+    out.dual    = cur_dual;
     if isa( out.dual, 'tfocs_tuple')
         out.dual = cell( out.dual );
     end
-elseif isinf(f_x),
-    f_x = apply_smooth(A_x);
 end
-if isinf( C_x ),
-    C_x = apply_projector( x );
-end    
 if fid && printEvery,
 	fprintf( fid, 'Finished: %s\n', status );
 end
 out.niter = n_iter;
 out.status = status;
+out.x_or_y = x_or_y_string;
 d.niter = 'Number of iterations';
 if saveHist,
-    out.f(n_iter) = maxmin * ( f_x + C_x );
+    out.f(n_iter) = f_v;
     out.f(n_iter+1:end) = [];
     out.normGrad(n_iter+1:end) = [];
     out.stepsize(n_iter+1:end) = [];

diff --git a/private/tfocs_initialize.m b/private/tfocs_initialize.m
@@ -28,9 +28,14 @@
     'alg',        'AT'  ,...
     'restart',    Inf   ,...
     'printStopCrit', false,...
-    'cntr_reset',  50   ,... % how often to explicitly recompute A*x and A*y (set to Inf if you want )
+    'cntr_reset',  -50   ,... % how often to explicitly recompute A*x and A*y (set to Inf if you want )
     'cg_restart', Inf   ,... % for CG only
     'cg_type',    'pr'  ,...  % for CG only
+    'stopping_criteria_always_use_x',   false, ...
+    'data_collection_always_use_x',     false, ...
+    'output_always_use_x',              false,  ...
+    'autoRestart',  'gra', ... % function or gradient
+    'printRestart', true, ...
     'debug',      false ....
     );
 
@@ -69,6 +74,7 @@
         desc.errFcn = 'Medium. User-specified error function. See user guide';
         desc.beta   = 'Medium. Backtracking parameter, in (0,1). No line search if >= 1';
         desc.alpha  = 'Medium. Line search increase parameter, in (0,1)';
+        desc.autoRestart= 'Medium. Set to ''gra'' or ''fun'' to choose behavior when restart<0';
 
 
         desc.maxCounts  = 'Advanced. Vector that fine-tunes various types of iteration limits; same form as countOps';
@@ -78,9 +84,14 @@
         desc.stopFcn    = 'Advanced. User-supplied stopping criteria. See user guide';
         desc.stopCrit   = 'Advanced. Controls which stopping criteria to use; 1,2, 3 or 4.';
         desc.printStopCrit = 'Advanced. Controls whether to display the value used in the stopping criteria';
-        desc.cntr_reset = 'Advanced. Controls how often to reset some numerical computatios to avoid roundoff';
+        desc.printRestart  = 'Advanced. Whether to signal when a restart happens';
+        desc.cntr_reset = 'Advanced. Controls how often to reset some numerical computations to avoid roundoff';
         desc.debug = 'Advanced.  Turns on more useful error messages';
 
+        desc.stopping_criteria_always_use_x = 'Advanced. Forces usage of x, never y, in stopping crit.';
+        desc.data_collection_always_use_x   = 'Advanced. Forces usage of x, nevery y, in recording errors.';
+        desc.output_always_use_x            = 'Advanced. Forces output of x, never y. Default: uses whichever is better';
+
         desc.adjoint = 'Internal.';
         desc.saddle = 'Internal.  Used by TFOCS_SCD';
 
@@ -186,6 +197,14 @@
 if any(odef.maxCounts<Inf),
     odef.countOps = true;
 end
+% If cntr_reset is not set (signfied by being negative), set to defaults
+if odef.cntr_reset < 0
+    odef.cntr_reset = round(abs(odef.cntr_reset));
+    % and if we requested high precision, change the default
+    if odef.tol < 1e-12
+        odef.cntr_reset = 10; 
+    end
+end
 
 % Now move the options into the current workspace
 for k = def_fields,
@@ -551,7 +570,7 @@
 % Initialize the iterate values
 y    = x;    z    = x;
 A_y  = A_x;  A_z  = A_x;
-C_y  = C_x;  C_z  = C_x;
+C_y  = Inf;  C_z  = C_x;
 f_y  = f_x;  f_z  = f_x;
 g_y  = g_x;  g_z  = g_x;
 g_Ay = g_Ax; g_Az = g_Ax;
@@ -592,6 +611,9 @@
     fprintf(fid,'\n');
 end
 
+% Initialize some variables
+just_restarted = false;
+
 % TFOCS v1.3 by Stephen Becker, Emmanuel Candes, and Michael Grant.
 % Copyright 2013 California Institute of Technology and CVX Research.
 % See the file LICENSE for full license information.

diff --git a/private/tfocs_iterate.m b/private/tfocs_iterate.m
@@ -51,23 +51,33 @@
 % minimizes them, favoring x in the case of a tie.
 %
 
+v_is_x  = false;
+v_is_y  = false;
 % if isempty(status) && ( ~isempty(stopFcn) || restart < 0 || stopCrit == 3 || stopCrit ==4 ),
 if (isempty(status) || ~isempty(findstr(status,'limit')) ) ...
         && ( ~isempty(stopFcn) || restart < 0 || stopCrit == 3 || stopCrit ==4 ),
-    comp_x = [ isinf(f_x), saddle*~isempty(stopFcn)*isempty(g_Ax), isinf(C_x) ];
-    comp_y = [ isinf(f_y), saddle*~isempty(stopFcn)*isempty(g_Ay), isinf(C_y) ];
-    if sum(comp_x) <= sum(comp_y),
+    need_dual   = saddle && (~isempty(stopFcn) || stopCrit == 3 || stopCrit == 4 );
+    comp_x = [ isinf(f_x), need_dual*isempty(g_Ax), isinf(C_x) ];
+    comp_y = [ isinf(f_y), need_dual*isempty(g_Ay), isinf(C_y) ];
+    if sum(comp_x) <= sum(comp_y) || stopping_criteria_always_use_x,
         if comp_x(2), [f_x,g_Ax] = apply_smooth(A_x);
         elseif comp_x(1), f_x = apply_smooth(A_x); end
         if comp_x(3), C_x = apply_projector(x); end
         cur_pri = x; cur_dual = g_Ax;
         f_v = maxmin*(f_x+C_x);
+        v_is_x    = true; % 12/18/2013
     else
         if comp_y(2), [f_y,g_Ay] = apply_smooth(A_y);
         elseif comp_y(1), f_y = apply_smooth(A_y); end
         if comp_y(3), C_y = apply_projector(y); end
         cur_pri = y; cur_dual = g_Ay;
         f_v = maxmin*(f_y+C_y);
+        v_is_y    = true;
+        if data_collection_always_use_x
+            % save the data, otherwise it is overwritten
+            f_vy    = f_v;
+            dual_y  = cur_dual;
+        end
     end
     for err_j = 1 : numel(stopFcn),
         if saddle,
@@ -76,7 +86,8 @@
             stop = stopFcn{err_j}(f_v,cur_pri);
         end
         if stop
-            status = sprintf('Reached user''s supplied stopping criteria no. %d',err_j);
+            if v_is_x, x_or_y_string = 'x'; else x_or_y_string = 'y'; end
+            status = sprintf('Reached user''s supplied stopping criteria no. %d using %s variable',err_j,x_or_y_string);
         end
     end
 end
@@ -118,7 +129,13 @@
             d_dual = Inf;
         end
     end
-    if d_dual < tol
+    nLargeEnough = (n_iter > 2); % Dec '13
+    if restart > 10
+        nLargeEnough = (n_iter - restart_iter > 2);
+    end
+    if d_dual < tol  && nLargeEnough
+        % Problems when cur_dual is base on x one iteration
+        % and y another iteration
         status = 'Step size tolerance reached';
     end
 end
@@ -133,36 +150,50 @@
 % the solution at the end of the algorithm.
 %
 
-will_print = fid && printEvery && ( ~isempty( status ) || ~mod( n_iter, printEvery ) );
+will_print = fid && printEvery && ...
+    ( ~isempty( status ) || ~mod( n_iter, printEvery ) || (printRestart && just_restarted) );
 if saveHist || will_print,
-    f_x_save = f_x;
-    g_Ax_save = g_Ax;
-    if ~isempty(errFcn) && saddle,
-        if isempty(g_Ax),
-            [ f_x, g_Ax ] = smoothF( A_x );
+    % Which point to collect data at? Dec '13, by default, collect data
+    %   at the same point used to find f_v unless data_collection_always_use_x
+    % There is also the chance that the f_v wasn't calculated at all
+    if ( data_collection_always_use_x && ~v_is_x ) || ( ~v_is_x && ~v_is_y )
+        f_x_save = f_x;
+        g_Ax_save = g_Ax;
+        if ~isempty(errFcn) && saddle,
+            if isempty(g_Ax),
+                [ f_x, g_Ax ] = smoothF( A_x );
+            end
+%             out.dual = get_dual( g_Ax );
+            cur_dual = g_Ax ;
         end
-        out.dual = get_dual( g_Ax );
-    end
-    if isinf(f_x),
-        f_x = smoothF( A_x );
-    end
-    if isinf( C_x ),
-        C_x = projectorF( x ); 
+        if isinf(f_x),
+            f_x = smoothF( A_x );
+        end
+        if isinf( C_x ),
+            C_x = projectorF( x );
+        end
+        f_v         = maxmin * ( f_x + C_x );
+        cur_pri     = x;
+        v_is_x      = true;
+        % Now undo any calculations
+        f_x         = f_x_save;
+        g_Ax        = g_Ax_save;
     end
-    f_w = maxmin * ( f_x + C_x );
+    % (otherwise, f_v was already calculated)
+
     if ~isempty(errFcn) && iscell(errFcn)
         for err_j = 1 : numel(errFcn),
             if saddle,
-                errs(err_j) = errFcn{err_j}(f_w,x,out.dual);
+%                 errs(err_j) = errFcn{err_j}(f_w,x,out.dual);
+                errs(err_j) = errFcn{err_j}(f_v,cur_pri,get_dual(cur_dual));
             else
-                errs(err_j) = errFcn{err_j}(f_w,x);
+                errs(err_j) = errFcn{err_j}(f_v,cur_pri);
             end
         end
     end
-    f_x = f_x_save;
-    g_Ax = g_Ax_save;
 end
 
+
 % Register a warning if the step size suggests a Lipschitz violation
 if isempty(status) && ( beta < 1 && backtrack_simple && localL > Lexact ),
     warning_lipschitz = true;
@@ -179,7 +210,7 @@
         bchar = '*'; 
     end
 	fprintf( fid, '%-4d| %+12.5e  %8.2e  %8.2e%c', ...
-        n_iter, f_w, norm_dx / max( norm_x, 1 ), 1 / L, bchar );
+        n_iter, f_v, norm_dx / max( norm_x, 1 ), 1 / L, bchar );
     if countOps,
         fprintf( fid, '|' );
         fprintf( fid, ' %5d', tfocs_count___ );
@@ -223,6 +254,10 @@
         fprintf( fid, ' %8.2e', stopResid );
     end
 
+    if printRestart && just_restarted
+        fprintf( fid, ' | restarted');
+    end
+
 	fprintf( fid, '\n');
 end
 
@@ -241,7 +276,7 @@
             out.err(end+1:csize,:) = 0;
         end
     end
-    out.f(n_iter) = f_w;
+    out.f(n_iter) = f_v;
     out.theta(n_iter) = theta;
     out.stepsize(n_iter) = 1 / L;
     out.normGrad(n_iter) = norm_dx;
@@ -265,18 +300,33 @@
 % Fixed it.
 % two changes: (1) test is now ( maxmin*f_v > maxmin*f_v_old)
 %              (2) to reset f_v_old, set to maxmin*Inf, not just +Inf
-if n_iter - restart_iter == abs(round(restart)) || (restart < 0 && maxmin*f_v > maxmin*f_v_old),
+% Dec 2013, adding gradient-based restarting (see O'Donoghue and Candes '12)
+just_restarted  = false;
+do_auto_restart = false;
+if restart < 0
+    if strfind(lower(autoRestart),'gra')
+        do_auto_restart = tfocs_dot(g_Ay, A_x - A_x_old ) > 0;
+    elseif any(strfind(lower(autoRestart),'fun')) || any(strfind(lower(autoRestart),'obj'))
+        do_auto_restart = maxmin*f_v > maxmin*f_v_old;
+    else
+        error('bad value for opts.autoRestart. Should be ''gradient'' or ''function''');
+    end
+end
+if n_iter - restart_iter == abs(round(restart)) || do_auto_restart
     restart_iter = n_iter;
     backtrack_simple = true;
 	theta = Inf;
     y = x; A_y = A_x; f_y = f_x; g_Ay = g_Ax; g_y = g_x; C_y = C_x;
     z = x; A_z = A_x; f_z = f_x; g_Az = g_Ax; g_z = g_x; C_z = C_x;
     f_v_old = maxmin*Inf; % important!
 %     continue;
+    just_restarted = true;
 elseif restart < 0,
     f_v_old = f_v;
 end
 
+C_y     = Inf;
+
 % TFOCS v1.3 by Stephen Becker, Emmanuel Candes, and Michael Grant.
 % Copyright 2013 California Institute of Technology and CVX Research.
 % See the file LICENSE for full license information.