From 8420adcb97b09fe69d59c2d248bb76fd994e21ce Mon Sep 17 00:00:00 2001
From: Stephen Becker <stephen.beckr@gmail.com>
Date: Sat, 21 Dec 2013 17:58:29 -0500
Subject: [PATCH] Some major changes to fix a bug and change behavior; most of
 the changes are in tfocs_iterate and tfocs_initialize. To understand the
 changes, note that most algorithms use two sequences of variables, the "x"
 and the "y" sequence. The return value is the "x" iterate, but stopping
 criteria is via the "y" iterate often, since this value is in effect
 pre-computed. And if data recording was specified, this was done on the "x"
 variable at an additional cost. Now, the policy is to use whichever variable
 requries less computation (usually the "y" variable), but you can override
 this behavior if you wish: set opts.stopping_criteria_always_use_x = true to
 always use "x" to determine the stopping critier; set
 opts.data_collection_always_use_x = true to always use "x" for data
 collection; and set opts.output_always_use_x = true to always use "x" (and
 when appropriate, its corresponding dual variable). By default, we use
 whichever is cheaper, except for the output, in which case we use whichever
 gives a better objective value.

Due to these major changes, many smaller changes were made to make the
code work again.

Also, the continuation script now uses the previous stepsize as the new
stepsize.

We have also added a new restart feature that uses the gradient, not
function values. This is due to Brendan O'Donoghue's idea. This can be
controlled by setting opts.autoRestart to either 'gra' or 'fun' for
gradient or function criteria, respectively. These automatic restart
criteria take effect whenever opts.restart < 0.

Also new is opts.printRestart; if true (default), this will print output
whenever a restart occurs.
---
 continuation.m                   |   7 +++
 examples/smallscale/test_SLOPE.m |   3 +-
 private/tfocs_cleanup.m          |  68 +++++++++++++++++----
 private/tfocs_initialize.m       |  28 ++++++++-
 private/tfocs_iterate.m          | 102 +++++++++++++++++++++++--------
 tfocs.m                          |   2 +-
 6 files changed, 168 insertions(+), 42 deletions(-)

diff --git a/continuation.m b/continuation.m
index 8e32292..0699b49 100644
--- a/continuation.m
+++ b/continuation.m
@@ -87,6 +87,7 @@
 % tol         = finalTol2*betaTol^(kMax+1); % bug fixed Feb 2011
 tol         = finalTol2*betaTol^(kMax);
 
+L0   = Inf; % for first round, use default value of stepsize
 xOld = x0;
 odataCumulative      = [];
 odataCumulative.niter= 0;
@@ -120,6 +121,9 @@
     if k == kMax && ~isempty( finalRestart )
         optsTemp.restart    = finalRestart; 
     end
+    if isfinite(L0)
+        optsTemp.L0     = L0;
+    end
     
     % call the solver
     [x, odata, optsOut ] = fcn( mu, x0, z0, optsTemp );
@@ -146,6 +150,9 @@
     % (possibly) decrease mu for next solve
     mu = mu*muDecrement;
     
+    % Dec '13, use stepsize estimate
+    L0  = 1/odata.stepsize(end);
+    
     % update output data
     fields = { 'f', 'normGrad', 'stepsize','theta','counts','err' };
     for ff = fields
diff --git a/examples/smallscale/test_SLOPE.m b/examples/smallscale/test_SLOPE.m
index 2b12b8d..6cc8692 100644
--- a/examples/smallscale/test_SLOPE.m
+++ b/examples/smallscale/test_SLOPE.m
@@ -82,7 +82,8 @@ min_x sum(lambda*sort(abs(x),'descend')) + .5||A(x)-b||_2^2
 
 %% Call the TFOCS solver
 er              = er_ref;  % error with reference solution (from IPM)
-opts = struct('restart',-Inf,'tol',1e-13,'maxits',1000, 'printEvery',10);
+opts = struct('restart',-Inf,'autoRestart','gradient', ...
+    'tol',1e-13,'maxits',1000, 'printEvery',10);
 opts.errFcn     = { @(f,primal) er(primal), ...
                     @(f,primal) f - obj_ref   }; 
 tic;
diff --git a/private/tfocs_cleanup.m b/private/tfocs_cleanup.m
index 3f35689..96eed93 100644
--- a/private/tfocs_cleanup.m
+++ b/private/tfocs_cleanup.m
@@ -2,29 +2,75 @@
 %   Performs the final set of operations for our templated solvers: performs
 %   final calculations, prints final outputs, cleans up unused data.
 
-if saddle,
-    if isempty(g_Ax),
-        [ f_x, g_Ax ] = apply_smooth(A_x);
+% This should be called just after tfocs_iterate, so re-use some of that
+%   computation:
+
+% We have two sequences of points, the x and the y sequence
+%   Other things being equal, we will use the x sequence as output
+%   But in some cases, if we use y, there is less computation to do
+%   since we can re-use some older computation. If it is the case that
+%   computing y is cheap, then we will still compute x, but now we will
+%   compare x and y and take whichever is better (ties going toward x)
+
+if exist('cur_dual','var')
+    cur_dual    = get_dual( cur_dual );
+end
+if v_is_y && ~output_always_use_x
+    % We have some free information about y
+    % cur_pri = y
+    if data_collection_always_use_x
+        % f_vy and dual_y have already been saved
+    else
+        f_vy    = f_v;
+        if saddle, dual_y  = cur_dual; end
+    end
+end
+if ~v_is_x
+    if saddle,
+        if isempty(g_Ax),
+            [ f_x, g_Ax ] = apply_smooth(A_x);
+        end
+        cur_dual    = get_dual( g_Ax );
+    elseif isinf(f_x),
+        f_x = apply_smooth(A_x);
+    end
+    if isinf( C_x ),
+        C_x = apply_projector( x );
+    end
+    f_v         = maxmin * ( f_x + C_x );
+    cur_pri     = x;
+end
+% Now, compare x and y if info on y is available, and take whichever is better
+x_or_y_string = 'x';
+if v_is_y && ~output_always_use_x
+    if f_vy < f_v
+        f_v     = f_vy;
+        if saddle, cur_dual= dual_y; end
+        x       = y; % losing information after this point!
+        x_or_y_string = 'y';
+    end
+end
+if saddle
+    if ~exist('cur_dual','var')
+        if isempty(g_Ax),
+            [ f_x, g_Ax ] = apply_smooth(A_x);
+        end
+        cur_dual    = get_dual( g_Ax );
     end
-    out.dual = get_dual( g_Ax );
-    % Oct 13: make it compatible:
+    out.dual    = cur_dual;
     if isa( out.dual, 'tfocs_tuple')
         out.dual = cell( out.dual );
     end
-elseif isinf(f_x),
-    f_x = apply_smooth(A_x);
 end
-if isinf( C_x ),
-    C_x = apply_projector( x );
-end    
 if fid && printEvery,
 	fprintf( fid, 'Finished: %s\n', status );
 end
 out.niter = n_iter;
 out.status = status;
+out.x_or_y = x_or_y_string;
 d.niter = 'Number of iterations';
 if saveHist,
-    out.f(n_iter) = maxmin * ( f_x + C_x );
+    out.f(n_iter) = f_v;
     out.f(n_iter+1:end) = [];
     out.normGrad(n_iter+1:end) = [];
     out.stepsize(n_iter+1:end) = [];
diff --git a/private/tfocs_initialize.m b/private/tfocs_initialize.m
index 4b15eb9..368df86 100644
--- a/private/tfocs_initialize.m
+++ b/private/tfocs_initialize.m
@@ -28,9 +28,14 @@
     'alg',        'AT'  ,...
     'restart',    Inf   ,...
     'printStopCrit', false,...
-    'cntr_reset',  50   ,... % how often to explicitly recompute A*x and A*y (set to Inf if you want )
+    'cntr_reset',  -50   ,... % how often to explicitly recompute A*x and A*y (set to Inf if you want )
     'cg_restart', Inf   ,... % for CG only
     'cg_type',    'pr'  ,...  % for CG only
+    'stopping_criteria_always_use_x',   false, ...
+    'data_collection_always_use_x',     false, ...
+    'output_always_use_x',              false,  ...
+    'autoRestart',  'gra', ... % function or gradient
+    'printRestart', true, ...
     'debug',      false ....
     );
 
@@ -69,6 +74,7 @@
         desc.errFcn = 'Medium. User-specified error function. See user guide';
         desc.beta   = 'Medium. Backtracking parameter, in (0,1). No line search if >= 1';
         desc.alpha  = 'Medium. Line search increase parameter, in (0,1)';
+        desc.autoRestart= 'Medium. Set to ''gra'' or ''fun'' to choose behavior when restart<0';
 
         
         desc.maxCounts  = 'Advanced. Vector that fine-tunes various types of iteration limits; same form as countOps';
@@ -78,9 +84,14 @@
         desc.stopFcn    = 'Advanced. User-supplied stopping criteria. See user guide';
         desc.stopCrit   = 'Advanced. Controls which stopping criteria to use; 1,2, 3 or 4.';
         desc.printStopCrit = 'Advanced. Controls whether to display the value used in the stopping criteria';
-        desc.cntr_reset = 'Advanced. Controls how often to reset some numerical computatios to avoid roundoff';
+        desc.printRestart  = 'Advanced. Whether to signal when a restart happens';
+        desc.cntr_reset = 'Advanced. Controls how often to reset some numerical computations to avoid roundoff';
         desc.debug = 'Advanced.  Turns on more useful error messages';
         
+        desc.stopping_criteria_always_use_x = 'Advanced. Forces usage of x, never y, in stopping crit.';
+        desc.data_collection_always_use_x   = 'Advanced. Forces usage of x, nevery y, in recording errors.';
+        desc.output_always_use_x            = 'Advanced. Forces output of x, never y. Default: uses whichever is better';
+        
         desc.adjoint = 'Internal.';
         desc.saddle = 'Internal.  Used by TFOCS_SCD';
         
@@ -186,6 +197,14 @@
 if any(odef.maxCounts<Inf),
     odef.countOps = true;
 end
+% If cntr_reset is not set (signfied by being negative), set to defaults
+if odef.cntr_reset < 0
+    odef.cntr_reset = round(abs(odef.cntr_reset));
+    % and if we requested high precision, change the default
+    if odef.tol < 1e-12
+        odef.cntr_reset = 10; 
+    end
+end
 
 % Now move the options into the current workspace
 for k = def_fields,
@@ -551,7 +570,7 @@
 % Initialize the iterate values
 y    = x;    z    = x;
 A_y  = A_x;  A_z  = A_x;
-C_y  = C_x;  C_z  = C_x;
+C_y  = Inf;  C_z  = C_x;
 f_y  = f_x;  f_z  = f_x;
 g_y  = g_x;  g_z  = g_x;
 g_Ay = g_Ax; g_Az = g_Ax;
@@ -592,6 +611,9 @@
     fprintf(fid,'\n');
 end
 
+% Initialize some variables
+just_restarted = false;
+
 % TFOCS v1.3 by Stephen Becker, Emmanuel Candes, and Michael Grant.
 % Copyright 2013 California Institute of Technology and CVX Research.
 % See the file LICENSE for full license information.
diff --git a/private/tfocs_iterate.m b/private/tfocs_iterate.m
index 0ac3bab..d005d3a 100644
--- a/private/tfocs_iterate.m
+++ b/private/tfocs_iterate.m
@@ -51,23 +51,33 @@
 % minimizes them, favoring x in the case of a tie.
 %
 
+v_is_x  = false;
+v_is_y  = false;
 % if isempty(status) && ( ~isempty(stopFcn) || restart < 0 || stopCrit == 3 || stopCrit ==4 ),
 if (isempty(status) || ~isempty(findstr(status,'limit')) ) ...
         && ( ~isempty(stopFcn) || restart < 0 || stopCrit == 3 || stopCrit ==4 ),
-    comp_x = [ isinf(f_x), saddle*~isempty(stopFcn)*isempty(g_Ax), isinf(C_x) ];
-    comp_y = [ isinf(f_y), saddle*~isempty(stopFcn)*isempty(g_Ay), isinf(C_y) ];
-    if sum(comp_x) <= sum(comp_y),
+    need_dual   = saddle && (~isempty(stopFcn) || stopCrit == 3 || stopCrit == 4 );
+    comp_x = [ isinf(f_x), need_dual*isempty(g_Ax), isinf(C_x) ];
+    comp_y = [ isinf(f_y), need_dual*isempty(g_Ay), isinf(C_y) ];
+    if sum(comp_x) <= sum(comp_y) || stopping_criteria_always_use_x,
         if comp_x(2), [f_x,g_Ax] = apply_smooth(A_x);
         elseif comp_x(1), f_x = apply_smooth(A_x); end
         if comp_x(3), C_x = apply_projector(x); end
         cur_pri = x; cur_dual = g_Ax;
         f_v = maxmin*(f_x+C_x);
+        v_is_x    = true; % 12/18/2013
     else
         if comp_y(2), [f_y,g_Ay] = apply_smooth(A_y);
         elseif comp_y(1), f_y = apply_smooth(A_y); end
         if comp_y(3), C_y = apply_projector(y); end
         cur_pri = y; cur_dual = g_Ay;
         f_v = maxmin*(f_y+C_y);
+        v_is_y    = true;
+        if data_collection_always_use_x
+            % save the data, otherwise it is overwritten
+            f_vy    = f_v;
+            dual_y  = cur_dual;
+        end
     end
     for err_j = 1 : numel(stopFcn),
         if saddle,
@@ -76,7 +86,8 @@
             stop = stopFcn{err_j}(f_v,cur_pri);
         end
         if stop
-            status = sprintf('Reached user''s supplied stopping criteria no. %d',err_j);
+            if v_is_x, x_or_y_string = 'x'; else x_or_y_string = 'y'; end
+            status = sprintf('Reached user''s supplied stopping criteria no. %d using %s variable',err_j,x_or_y_string);
         end
     end
 end
@@ -118,7 +129,13 @@
             d_dual = Inf;
         end
     end
-    if d_dual < tol
+    nLargeEnough = (n_iter > 2); % Dec '13
+    if restart > 10
+        nLargeEnough = (n_iter - restart_iter > 2);
+    end
+    if d_dual < tol  && nLargeEnough
+        % Problems when cur_dual is base on x one iteration
+        % and y another iteration
         status = 'Step size tolerance reached';
     end
 end
@@ -133,36 +150,50 @@
 % the solution at the end of the algorithm.
 %
 
-will_print = fid && printEvery && ( ~isempty( status ) || ~mod( n_iter, printEvery ) );
+will_print = fid && printEvery && ...
+    ( ~isempty( status ) || ~mod( n_iter, printEvery ) || (printRestart && just_restarted) );
 if saveHist || will_print,
-    f_x_save = f_x;
-    g_Ax_save = g_Ax;
-    if ~isempty(errFcn) && saddle,
-        if isempty(g_Ax),
-            [ f_x, g_Ax ] = smoothF( A_x );
+    % Which point to collect data at? Dec '13, by default, collect data
+    %   at the same point used to find f_v unless data_collection_always_use_x
+    % There is also the chance that the f_v wasn't calculated at all
+    if ( data_collection_always_use_x && ~v_is_x ) || ( ~v_is_x && ~v_is_y )
+        f_x_save = f_x;
+        g_Ax_save = g_Ax;
+        if ~isempty(errFcn) && saddle,
+            if isempty(g_Ax),
+                [ f_x, g_Ax ] = smoothF( A_x );
+            end
+%             out.dual = get_dual( g_Ax );
+            cur_dual = g_Ax ;
         end
-        out.dual = get_dual( g_Ax );
-    end
-    if isinf(f_x),
-        f_x = smoothF( A_x );
-    end
-    if isinf( C_x ),
-        C_x = projectorF( x ); 
+        if isinf(f_x),
+            f_x = smoothF( A_x );
+        end
+        if isinf( C_x ),
+            C_x = projectorF( x );
+        end
+        f_v         = maxmin * ( f_x + C_x );
+        cur_pri     = x;
+        v_is_x      = true;
+        % Now undo any calculations
+        f_x         = f_x_save;
+        g_Ax        = g_Ax_save;
     end
-    f_w = maxmin * ( f_x + C_x );
+    % (otherwise, f_v was already calculated)
+    
     if ~isempty(errFcn) && iscell(errFcn)
         for err_j = 1 : numel(errFcn),
             if saddle,
-                errs(err_j) = errFcn{err_j}(f_w,x,out.dual);
+%                 errs(err_j) = errFcn{err_j}(f_w,x,out.dual);
+                errs(err_j) = errFcn{err_j}(f_v,cur_pri,get_dual(cur_dual));
             else
-                errs(err_j) = errFcn{err_j}(f_w,x);
+                errs(err_j) = errFcn{err_j}(f_v,cur_pri);
             end
         end
     end
-    f_x = f_x_save;
-    g_Ax = g_Ax_save;
 end
 
+
 % Register a warning if the step size suggests a Lipschitz violation
 if isempty(status) && ( beta < 1 && backtrack_simple && localL > Lexact ),
     warning_lipschitz = true;
@@ -179,7 +210,7 @@
         bchar = '*'; 
     end
 	fprintf( fid, '%-4d| %+12.5e  %8.2e  %8.2e%c', ...
-        n_iter, f_w, norm_dx / max( norm_x, 1 ), 1 / L, bchar );
+        n_iter, f_v, norm_dx / max( norm_x, 1 ), 1 / L, bchar );
     if countOps,
         fprintf( fid, '|' );
         fprintf( fid, ' %5d', tfocs_count___ );
@@ -223,6 +254,10 @@
         fprintf( fid, ' %8.2e', stopResid );
     end
     
+    if printRestart && just_restarted
+        fprintf( fid, ' | restarted');
+    end
+    
 	fprintf( fid, '\n');
 end
 
@@ -241,7 +276,7 @@
             out.err(end+1:csize,:) = 0;
         end
     end
-    out.f(n_iter) = f_w;
+    out.f(n_iter) = f_v;
     out.theta(n_iter) = theta;
     out.stepsize(n_iter) = 1 / L;
     out.normGrad(n_iter) = norm_dx;
@@ -265,7 +300,19 @@
 % Fixed it.
 % two changes: (1) test is now ( maxmin*f_v > maxmin*f_v_old)
 %              (2) to reset f_v_old, set to maxmin*Inf, not just +Inf
-if n_iter - restart_iter == abs(round(restart)) || (restart < 0 && maxmin*f_v > maxmin*f_v_old),
+% Dec 2013, adding gradient-based restarting (see O'Donoghue and Candes '12)
+just_restarted  = false;
+do_auto_restart = false;
+if restart < 0
+    if strfind(lower(autoRestart),'gra')
+        do_auto_restart = tfocs_dot(g_Ay, A_x - A_x_old ) > 0;
+    elseif any(strfind(lower(autoRestart),'fun')) || any(strfind(lower(autoRestart),'obj'))
+        do_auto_restart = maxmin*f_v > maxmin*f_v_old;
+    else
+        error('bad value for opts.autoRestart. Should be ''gradient'' or ''function''');
+    end
+end
+if n_iter - restart_iter == abs(round(restart)) || do_auto_restart
     restart_iter = n_iter;
     backtrack_simple = true;
 	theta = Inf;
@@ -273,10 +320,13 @@
     z = x; A_z = A_x; f_z = f_x; g_Az = g_Ax; g_z = g_x; C_z = C_x;
     f_v_old = maxmin*Inf; % important!
 %     continue;
+    just_restarted = true;
 elseif restart < 0,
     f_v_old = f_v;
 end
 
+C_y     = Inf;
+
 % TFOCS v1.3 by Stephen Becker, Emmanuel Candes, and Michael Grant.
 % Copyright 2013 California Institute of Technology and CVX Research.
 % See the file LICENSE for full license information.
diff --git a/tfocs.m b/tfocs.m
index b51f4a3..492ca77 100644
--- a/tfocs.m
+++ b/tfocs.m
@@ -82,7 +82,7 @@
     || strcmpi(smoothF,'-v') || strcmpi(smoothF,'-version') )
     % Display version information
 %     type version.txt 
-    disp('TFOCS v1.3 1.1, December 2011');
+    disp('TFOCS v1.3, October 2013');
     return
 end